I have a task of language detection of a document and here is the code so far:
def train():
# read data
data = pd.read_csv('./language_detection.csv',delimiter=',')
# separating the independent and dependant features
X = data["Text"]
y = data["Language"]
print('y', y)
# converting categorical variables to numerical
le = LabelEncoder()
y = le.fit_transform(y)
data_list = []
for text in X:
text = re.sub(r'[!@#$(),\n"%^*?\:;~`0-9]', ' ', text)
text = re.sub(r'[[]]', ' ', text)
text = text.lower()
data_list.append(text)
# creating bag of words using countvectorizer
cv = CountVectorizer()
X = cv.fit_transform(data_list).toarray()
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)
model = Pipeline([('MinMaxScaler', MinMaxScaler()), ('MultinomialNB',MultinomialNB()) ])
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
pred = le.inverse_transform(y_pred)
print('train_pred', pred)
ac = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
cr = classification_report(y_test, y_pred)
print("Accuracy is :",ac)
print(cr)
# saving both cv and model
pickle.dump(cv, open('./transform.pkl', "wb"))
pickle.dump(model, open('./model.pkl', "wb"))
def upload_doc():
scaler = MinMaxScaler()
X = open('./test.txt','r')
data_list = []
for text in X:
text = re.sub(r'[!@#$(),\n"%^*?\:;~`0-9]', ' ', text)
text = re.sub(r'[[]]', ' ', text)
text = text.lower()
data_list.append(text)
print("data_list", data_list)
# creating bag of words using countvectorizer
cv = pickle.load(open('./transform.pkl', 'rb'))
X = cv.transform(data_list).toarray()
loaded_model = pickle.load(open('./model.pkl', 'rb'))
#loaded_model.fit(X)
y_pred = loaded_model.predict(X)
y_pred = scaler.fit_transform(y_pred.reshape(-1,1))
y_pred = scaler.inverse_transform((y_pred.reshape(-1,1)))
print("y_pred", y_pred)
It works however I want to also print out the inverse_transform of the predicted label. I can do it in training but I was not able to do it for testing. Basically, I want to output the name of the language instead of the vector form.