I am building a compression algorithm that takes a tab-delimited file, converts each column to binary (bitarray) using huffman coding, then converts the bitarray to hexadecimal format (ba2hex). This is using the bitarray library. I am getting the error ValueError: reached end of bitarray at the line: iter = str("\t".join(canonical_decode(binary, count, symbol))) in the decode function. My code works for many lines of the file, but not all.
Any help sorting out the reason for this error would be greatly appreciated.
def encode(stringMap):
with open (test_dir_path + "compressed.txt", 'w') as compressed_file:
for key in stringMap:
list = stringMap[key]
# print(list)
dict = totalHuffmanDict[key]
s = ""
for string in list:
binary = dict[string]
s += binary
s += "\n"
b = bitarray(s)
b.fill()
hex = ba2hex(b)
compressed_file.write(hex)
compressed_file.write("\n")
listOfLists = []
def decode(count, symbol, key):
with open (test_dir_path + "compressed.txt", "r") as compressed_file:
lineNum = 0
key = int(key)
for line in compressed_file:
lineNum = lineNum + 1
if lineNum == key:
binary = hex2ba(line.strip())
iter = str("\t".join(canonical_decode(binary, count, symbol)))
list = iter.split("\t")
list = list[0:NUM_LINES]
listOfLists.append(list)