As i said on ctxmodel.net i've made some experiments to simulate a linear counter in the logistic domain. For a PAQish architecture you need to transform all probabilities to the logistic domain, e.g.:

Code:

...
using math::Stretch; // less typing
nn.Input(5) = Stretch( histMaps[5].P1( bitHists[5][x].Touched(t) ) );
...

which creates more or less random memory access, hence it is cache inefficient. I've experimented with this:

Code:

...
// nn.Input(5) = histMaps[5].L( bitHists[5][x].Touched(t) );
...

The experimental bit model:

Code:

class bitmodel_log
{
public:
static const uint LD_SCALE = 16;
static const int SCALE = 1<<LD_SCALE;
protected:
static const uint LD_Q = 6;
static const int L0 = -SCALE/2;
sint16 l;
static const sint16 lu0[(1<<LD_Q)+1];
public:
inline void Set( const int m )
{ l=m; }
inline bitmodel_log& Update( const int y )
{
static const int S = LD_SCALE-LD_Q;
const int cl = y==0 ? l : -l;
const short* lu = &lu0[cl-L0>>S];
const int w = cl-L0&(1<<S)-1;
const int nl = ((lu[1]-lu[0])*w>>S)+ lu[0];
l = y==0 ? nl : -nl;
return *this;
}
inline int L() const
{
//printf( "%d\n", l>>LD_SCALE-math::LD_SCALE ); getchar();
return l>>LD_SCALE-math::LD_SCALE;
}
};
const sint16 bitmodel_log::lu0[] = {
-32768, -31760, -30736, -29712, -28688, -27664, -26640, -25616,
-24592, -23568, -22544, -21520, -20496, -19472, -18448, -17424,
-16400, -15376, -14352, -13328, -12304, -11281, -10257, -9233,
-8210, -7186, -6163, -5140, -4117, -3095, -2073, -1052,
-32, 987, 2005, 3022, 4036, 5048, 6056, 7060,
8059, 9050, 10033, 11005, 11962, 12902, 13821, 14714,
15576, 16400, 17181, 17913, 18591, 19208, 19764, 20255,
20684, 21052, 21363, 21623, 21837, 22012, 22154, 22268,
22359
};

Compression dropped (my approximation only uses 64 vertices, one could do it better), resulting a loss of 80kB at SFC. I expected a larger speed gain due to increased cache efficiency, surprisingly this wasn't the case.

It looks like the linear counter's probability output clusters near

P(y=1) = 0 and 1, so a "Stretch(.)" is mostly cached.

I've created the lookup like this:

Code:

#include<cstdio>
#include<cmath>
template<typename T> T Min( const T& a, const T& b ) { return a<b?a:b; }
template<typename T> T Max( const T& a, const T& b ) { return a>b?a:b; }
template<typename T> T Bound( const T& a, const T& min, const T& max )
{ if (a<min) return min; else if (a>max) return max; else return a; }
const double L_MIN = -8.0;
const double L_MAX = 8.0;
const double C = 1.0/256.0;
const int N = 64;
double P( const double l ) // logistic -> probability
{ return 1.0/(1.0+exp(-l)); }
double L( const double p ) // probability -> logistic
{ return log(p) - log(1.0-p); }
// The logistic update function (mimics a linear counter)
static const int SCALE = 1<<12;
static const int L0 = -8*SCALE;
static const int L1 = 8*SCALE-1;
static const int BITS = 16;
short lu0[N+1];
inline int LogUpdate( const int y, int l )
{
l = y==0 ? l : -l;
const short* lu = &lu0[l-L0>>BITS-6];
const int w = l-L0&(1<<BITS-6)-1;
const int nl = ((lu[1]-lu[0])*w>>BITS-6)+ lu[0];
return y==0 ? nl : -nl;
}
int main( int, char** )
{
for (int i=0; i<=N; ++i) {
const double l = (L_MAX-L_MIN)/N*i + L_MIN;
const double p = P(l);
const double l0 = Bound( L((1.0-C)*p), L_MIN, L_MAX );
const double l1 = Bound( L((1.0-C)*p + C), L_MIN, L_MAX );
lu0[i] = SCALE*l0;
// printf( "%.4f -> 0: %.4f (%d) 1: %.4f\n", l, l0, lu0[i], l1 );
// printf( "%
printf( "%6d, ", lu0[i] );
if ((i%8)==7) putchar('\n');
}
int l = 0;
printf( "\n%d\n", l=LogUpdate(0,l) );
printf( "%d\n", l=LogUpdate(1,l) );
printf( "%d\n", l=LogUpdate(1,l) );
printf( "%d\n", l=LogUpdate(0,l) );
getchar();
return 0;
}

Maybe someone can tune it and insert it into LPAQ to see how good things could be. This is mostly a speed optimization.

Another counter approach i'll use for my next project:

For a future project i plan to use delayed counters and a secondary model (per primary model), which estimates probabilities based on a quantisation of a primary model's counter sate. A final estimate is a static mix of both probabilities. This should allow to be more than competative to the FSM approach at higher orders.