I'm trying to find out the textrank scores for each similarity matrix. Summarize function is defined to produce the summary. and the function is called for the list of lists of sentences i.e. result but error is arising while ranking the sentences using PageRank algorithm. I tried debugging it by manually changing max_iter value in PageRank function, error is still the same.

get_score function

It is called in the summarize function. error is arising inside this function.

def get_score(sim_mat):
    import networkx as nx
    nx_graph = nx.from_numpy_array(sim_mat)
    score = nx.pagerank(nx_graph, max_iter=500)
    return score

Summarize function Takes the raw text and returns summary

def summarize(text):

    sentences = sent_tokenize(text) 
    t_clean_sentences = []
    for i in range(len(sentences)):
        obj = text_preprocessing(sentences[i])
        j = obj.text_cleaner()
        t_clean_sentences.append(j)
      
    clean_sentences = []
    for i in range(len(t_clean_sentences)):
        a = gb.predict(vectorizer.transform([t_clean_sentences[i]]))
        if a[0] != 'whQuestion' and a[0] != 'ynQuestion':
            clean_sentences.append(t_clean_sentences[i])

    from nltk.corpus import stopwords
    from nltk.tokenize import word_tokenize

    stop_words = set(stopwords.words('english'))

    filtered_sentences = []

    for i in range(len(clean_sentences)):
        word_tokens = word_tokenize(clean_sentences[i])
        filtered_sentence = [w for w in word_tokens if not w in stop_words]
        filtered_sentences.append(" ".join(filtered_sentence))
    filtered_sentences
    import numpy as np
    #sentence vectors
    sentence_vectors = []
    for i in filtered_sentences:
        if len(i) != 0:
            v = sum([word_embeddings.get(w, np.zeros((100,))) for w in i.split()])/(len(i.split())+0.001)
        else:
            v = np.zeros((100,))
        sentence_vectors.append(v)

    from sklearn.metrics.pairwise import cosine_similarity
    sim_mat = np.zeros([len(clean_sentences), len(clean_sentences)])

    for i in range(len(clean_sentences)):
          for j in range(len(clean_sentences)):
                if i != j:
                      sim_mat[i][j] = cosine_similarity(sentence_vectors[i].reshape(1,100), sentence_vectors[j].reshape(1,100))[0,0]
    
    #pagerank scores
    scores = get_score(sim_mat)
    ranked_sentences = sorted(((scores[i],s) for i,s in enumerate(clean_sentences)), reverse=True)
    # Specify number of sentences to form the summary
  

    # Generate summary
    summary = []
    for i in range(len(ranked_sentences)):
        summary.append(ranked_sentences[i][1].capitalize())
    return summary

function call

Size of result is 100, when i tried it for first 50 list of sentences in result it is working fine. then i made a system where the loop only summarize 50 list of sentences at a time and continues until reaches the size of result but it is still showing the same error.

#text is the raw text from the TXT file
 
result = list(filter(lambda x : x != '', text.split(':')))
compiled = []
for r in result:
  compiled.append(summarize(r))

Error

---------------------------------------------------------------------------
PowerIterationFailedConvergence           Traceback (most recent call last)
<ipython-input-22-a04a4d4d0dfb> in <module>()
      1 compiled = []
      2 for r in range(len(result)):
----> 3   compiled.append(summarize(result[r]))

3 frames
<ipython-input-21-c7462482feb4> in summarize(text)
     45 
     46     #pagerank scores
---> 47     scores = get_score(sim_mat)
     48     ranked_sentences = sorted(((scores[i],s) for i,s in enumerate(clean_sentences)), reverse=True)
     49     # Specify number of sentences to form the summary

<ipython-input-10-798a017cf041> in get_score(sim_mat)
      2     import networkx as nx
      3     nx_graph = nx.from_numpy_array(sim_mat)
----> 4     score = nx.pagerank(nx_graph)
      5     return score

<decorator-gen-431> in pagerank(G, alpha, personalization, max_iter, tol, nstart, weight, dangling)

/usr/local/lib/python3.6/dist-packages/networkx/utils/decorators.py in _not_implemented_for(not_implement_for_func, *args, **kwargs)
     80             raise nx.NetworkXNotImplemented(msg)
     81         else:
---> 82             return not_implement_for_func(*args, **kwargs)
     83     return _not_implemented_for
     84 

/usr/local/lib/python3.6/dist-packages/networkx/algorithms/link_analysis/pagerank_alg.py in pagerank(G, alpha, personalization, max_iter, tol, nstart, weight, dangling)
    156         if err < N * tol:
    157             return x
--> 158     raise nx.PowerIterationFailedConvergence(max_iter)
    159 
    160 

PowerIterationFailedConvergence: (PowerIterationFailedConvergence(...), 'power iteration failed to converge within 100 iterations')
1

There are 1 best solutions below

0
On

I found the solution. I just used nx.pagerank_numpy(nx_graph) instead of nx.pagerank(nx_graph). This solved the problem, as the graph and the similarity matrix I was using was in the form of nx_graph = nx.from_numpy_array(sim_mat).