KeyError in text generator

72 Views Asked by At

I'm trying to write code that will collect statistics on the sequence of characters in the text and generate new text based on it. This works when collecting statistics with one character, but as soon as I try to increase the number of characters, I get KeyError:

import zipfile
from random import randint
from pprint import pprint


# file_zip = 'voyna-i-mir.txt.zip'

# zip = zipfile.ZipFile(file_zip, 'r')
# for file in zip.namelist():
#     zip.extract(file)

origin = 'voyna-i-mir.txt'

statistic = {}

chain = '   '
with open(origin, 'r', encoding='cp1251') as file:
    for sting in file:
        #print(sting)
        for symbol in sting:
            if chain in statistic:
                if symbol in statistic[chain]:
                    statistic[chain][symbol] += 1
                else:
                    statistic[chain][symbol] = 1
            else:
                statistic[chain] = {symbol: 1}
            chain = chain[1:] + symbol


dictionary = {}
stat_generator = {}
for chain, symbol_stat in statistic.items():
    dictionary[chain] = 0
    stat_generator[chain] = []
    for symbol, count in symbol_stat.items():
        dictionary[chain] += count
        stat_generator[chain].append([count, symbol])
    stat_generator[chain].sort(reverse=True)

gen = 1000
was_print = 0

chain = '   '
while was_print < gen:
    symbol_stat = stat_generator[chain]
    total = dictionary[chain]
    random = randint(1, total)
    position = 0
    for count, symbol in symbol_stat:
        position += count
        if random <= position:
            break
        print(symbol, end='')
        was_print += 1
        chain = chain[1:] + symbol

The error I received

Traceback (most recent call last):

  File "C:\Users\roman\Desktop\skillbox\[Skillbox] Профессия Python- 
разработчик\9. Работа с файлами и форматированный 
вывод-20210102T191733Z-001\9. Работа с файлами и форматированный 
вывод\lesson_009\python_snippets\test.py", line 46, in <module>
    symbol_stat = stat_generator[chain]
              ~~~~~~~~~~~~~~^^^^^^^
KeyError: ' -–'
0

There are 0 best solutions below