I am getting an error in the following code while performing the sentiment lexicon. If someone could guide me here, that'd be really helpful.
"'utf-8' codec can't decode byte 0xf3"
from textblob import TextBlob
pos_count = 0
pos_correct = 0
with open("positive.txt","r") as f:
for line in f.read().split('\n'):
analysis = TextBlob(line)
if analysis.sentiment.polarity > 0:
pos_correct += 1
pos_count +=1
neg_count = 0
neg_correct = 0
with open("negative.txt","r") as f:
for line in f.read().split('\n'):
analysis = TextBlob(line)
if analysis.sentiment.polarity <= 0:
neg_correct += 1
neg_count +=1
print("Positive accuracy = {}% via {} samples".format(pos_correct/pos_count*100.0, pos_count))
print("Negative accuracy = {}% via {} samples".format(neg_correct/neg_count*100.0, neg_count))
Error:
UnicodeDecodeError Traceback (most recent call last)
<ipython-input-8-91f2c31897d6> in <module>()
5
6 with open("positive.txt","r") as f:
----> 7 for line in f.read().split('\n'):
8 analysis = TextBlob(line)
9 if analysis.sentiment.polarity > 0:
/usr/lib/python3.7/codecs.py in decode(self, input, final)
320 # decode input (taking the buffer into account)
321 data = self.buffer + input
--> 322 (result, consumed) = self._buffer_decode(data, self.errors, final)
323 # keep undecoded input until the next call
324 self.buffer = data[consumed:]
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xf3 in position 4645: invalid continuation byte