I'm trying to find the fastest algorithm for the move to front transformation. The one that's used for example in conjunction with burrows wheeler transform.
The best I've managed so far does about 15MB/s on Core i3 2.1GHz. But I'm sure that it's not optimal. Here's my best effort so far. Is there anything that's faster?
class mtf256_x {
typedef unsigned char u8;
typedef unsigned long long L;
public:
L enc[37];
u8 dec[256];
mtf256_x() {
unsigned i;
for (i=0;i<37;i++) {
enc[i]=0;
}
for (i=0;i<256;i++) {
dec[i]=i;
set(i,i);
}
}
u8 decode(u8 in) {
u8 r = dec[in];
if (in) {
memmove(dec+1,dec,in);
dec[0]=r;
}
return r;
}
u8 set(unsigned x, u8 y) {
unsigned xl = (x%7)*9;
unsigned xh = (x/7);
enc[xh] &= ~(0x1FFLLU<<xl);
enc[xh] |= ((L)y)<<xl;
}
u8 get(unsigned x) {
return enc[x/7] >> (x%7)*9;
}
u8 encode(u8 in) {
u8 r;
unsigned i;
r = get(in);
L m2 = 0x0040201008040201LLU; // 0x01 for each 9 bit int
L m1 = 0x3FDFEFF7FBFDFEFFLLU; // 0xff for each 9 bit int
L Q = (0x100+r)*m2;
L a,b,c,d;
L * l= enc;
for (i=0;i<37;i++) {
a=l[i];
a+= ((Q-a)>>8)&m2; // conditional add 1
a&=m1;
l[i]=a;
}
set(in,0);
return r;
}
};
Maybe you can try this http://kuc406.moxo.cz/projects.php
But it's more like for a smal alphabet (e.g. for the bytes), but for bigger alphabet, I'd suggest to try nburns version, or more at http://encode.ru forum.