fast algorithm for move to front transform

1.1k Views Asked by At

I'm trying to find the fastest algorithm for the move to front transformation. The one that's used for example in conjunction with burrows wheeler transform.

The best I've managed so far does about 15MB/s on Core i3 2.1GHz. But I'm sure that it's not optimal. Here's my best effort so far. Is there anything that's faster?

class mtf256_x {                                                                            
        typedef unsigned char u8;
        typedef unsigned long long L;
        public:
        L enc[37];
        u8 dec[256];
        mtf256_x() {
                unsigned i;
                for (i=0;i<37;i++) {
                        enc[i]=0;
                }
                for (i=0;i<256;i++) {
                        dec[i]=i;
                        set(i,i);
                }
        }
        u8 decode(u8 in) {
                u8 r = dec[in];                                                             
                if (in) {                                                                   
                        memmove(dec+1,dec,in);                                              
                        dec[0]=r;                                                           
                }                                                                           
                return r;                                                                   
        }                                                                                   
        u8 set(unsigned x, u8 y) {                                                          
                unsigned xl = (x%7)*9;                                                      
                unsigned xh = (x/7);                                                        
                enc[xh] &= ~(0x1FFLLU<<xl);                                                 
                enc[xh] |= ((L)y)<<xl;                                                      
        }                                                                                   
        u8 get(unsigned x) {                                                                
                return enc[x/7] >> (x%7)*9;                                                 
        }                                                                                   
        u8 encode(u8 in) {                                                                  
                u8 r;
                unsigned i;
                r = get(in);
                L m2 = 0x0040201008040201LLU; // 0x01 for each 9 bit int                    
                L m1 = 0x3FDFEFF7FBFDFEFFLLU; // 0xff for each 9 bit int                    
                L Q = (0x100+r)*m2;                                                         
                L a,b,c,d;                                                                  
                L * l= enc;                                                                 
                for (i=0;i<37;i++) {                                                        
                        a=l[i];
                        a+= ((Q-a)>>8)&m2;  // conditional add 1
                        a&=m1;
                        l[i]=a;
                }
                set(in,0);
                return r;
        }
};
1

There are 1 best solutions below

0
On

Maybe you can try this http://kuc406.moxo.cz/projects.php

int b[ uint8Max + 2 ], treshold = 0, pivot = -1;
long inFileOffst = 0, pOffset = 0, t[ uint8Max + 1 ];
int rank, c, i, p0, p1;

  // initialise list

  for( i = 0; i <= uint8Max; t[ i++ ] = 0 );
  for( i = 1; i <= uint8Max; b[ i - 1 ] = i++ );
  b[ uint8Max ] = b[ uint8Max + 1 ] = 0;

  // read data
  // input  = c; output = rank

  inFileOffst++;

  rank = 0;
  if( ( p1 = b[uint8Max + 1] ) != ( c = data[input] ) )
  {
     if( t[ c ] < pOffset )
     {
        rank += treshold++;
        t[ c ] = inFileOffst;
        p1 = pivot;
     }
     else if( t[ c ] == pOffset )
     {
        pivot = c;
        t[ c ] = pOffset = inFileOffst;
        treshold = 0;
     }
     while( true ) // passing the list
     {
        if( ( p0 = b[ p1 ] ) == c )
        {
           rank++;
           b[ p1 ] = b[ c ];
           break;
        }
        if( ( p1 = b[ p0 ] ) == c )
        {
           rank += 2;
           b[ p0 ] = b[ c ];
           break;
        }
        if( ( p0 = b[ p1 ] ) == c )
        {
           rank += 3;
           b[ p1 ] = b[ c ];
           break;
        }
        if( ( p1 = b[ p0 ] ) == c )
        {
           rank += 4;
           b[ p0 ] = b[ c ];
           break;
        }
        rank += 4;
     }
     b[ c ] = b[ uint8Max + 1 ];
     b[ uint8Max + 1 ] = c;
  }

But it's more like for a smal alphabet (e.g. for the bytes), but for bigger alphabet, I'd suggest to try nburns version, or more at http://encode.ru forum.