Implement Suffix Tree

155 Views Asked by At

Good afternoon. I am trying to rewrite the code from C ++ to python, but I get a key error: 0 in the last lines: for c in range (256): link [0] [c] = 1;

I've looked at a sample for adding a value to a SortedDict: sd ['c'] = 3, however I can't figure out what I'm doing wrong.

What needs to be fixed?

#include <string>
#include <map>

const int MAXLEN = 600000; 
std::string s;
int pos[MAXLEN], len[MAXLEN], par[MAXLEN];
std::map<char,int> to[MAXLEN], link[MAXLEN];
int sz = 2;
int path[MAXLEN];

void attach(int child, int parent, char c, int child_len)
{
    to[parent][c] = child;
    len[child] = child_len;
    par[child] = parent;
}
void extend(int i) 
{
    int v, vlen = s.size() - i, old = sz - 1, pstk = 0; 
    for (v = old; !link[v].count(s[i]); v = par[v]) {
        vlen -= len[v];
        path[pstk++] = v;
    }
    int w = link[v][s[i]]; 
    if (to[w].count(s[i + vlen])) {
        int u = to[w][s[i + vlen]]; 
        for (pos[sz] = pos[u] - len[u]; s[pos[sz]] == s[i + vlen]; pos[sz] += len[v]) {
            v = path[--pstk];
            vlen += len[v];
        }
        attach(sz, w, s[pos[u] - len[u]], len[u] - (pos[u] - pos[sz]));
        attach(u, sz, s[pos[sz]], pos[u] - pos[sz]);
        w = link[v][s[i]] = sz++;
    }
    link[old][s[i]] = sz;
    attach(sz, w, s[i + vlen], s.size() - (i + vlen));
    pos[sz++] = s.size();
}
int main()
{
    len[1] = 1; pos[1] = 0; par[1] = 0;
    for (int c = 0; c < 256; c++)
        link[0][c] = 1; 
    s = "abababasdsdfasdf";
    for (int i = s.size() - 1; i >= 0; i--)
        extend(i);
}

and python:

#pip install sortedcontainers
from sortedcontainers import SortedDict

MAXLEN = 600000
s = ""
#pos = []
#leng = []
#par = []

pos = [None]*MAXLEN
leng = [None]*MAXLEN
par = [None]*MAXLEN

to = SortedDict()
link = SortedDict()
sz = 2
path = []

def attach(child, parent, c, child_len):
    to[parent][c] = child
    leng[child] = child_len
    par[child] = parent

def extend(i): 
    vlen = len(s) - i
    old = sz - 1
    pstk = 0;
    
    v = old
    while 1:
        if link[v].count(s[i]):
            break
        vlen -= leng[v]
        path[pstk + 1] = v
        v = par[v]
     
    w = link[v][s[i]]
    
    if to[w].count(s[i + vlen]):
        u = to[w][s[i + vlen]]
        
        pos[sz] = pos[u] - leng[u]
        while 1:
            if s[pos[sz]] != s[i + vlen]:
                break
            v = path[pstk - 1]
            vlen += leng[v]
            pos[sz] += leng[v]
        
        attach(sz, w, s[pos[u] - leng[u]], leng[u] - (pos[u] - pos[sz]))
        attach(u, sz, s[pos[sz]], pos[u] - pos[sz])
        w = link[v][s[i]] = sz + 1
    
    link[old][s[i]] = sz
    attach(sz, w, s[i + vlen], len(s) - (i + vlen))
    pos[sz + 1] = len(s)

    
leng[1] = 1;
pos[1] = 0;
par[1] = 0;

for c in range(256):
    link[0][c] = 1;
s = "abababasdsdfasdf";
i = len(s) - 1;
while 1:
    if i <= 0:
        break
    extend(i)
    i -= 1
0

There are 0 best solutions below