How to print labels coming from inverse_transform when loading pkl file for test?

20 Views Asked by At

I have a task of language detection of a document and here is the code so far:

def train():
    # read data
    data = pd.read_csv('./language_detection.csv',delimiter=',')
    
    

    # separating the independent and dependant features
    X = data["Text"]
    y = data["Language"]
    print('y', y)

    # converting categorical variables to numerical
    le = LabelEncoder()
    y = le.fit_transform(y)
  
    
    data_list = []
    for text in X:
        text = re.sub(r'[!@#$(),\n"%^*?\:;~`0-9]', ' ', text)
        text = re.sub(r'[[]]', ' ', text)
        text = text.lower()
        data_list.append(text)
    
    
    

    # creating bag of words using countvectorizer
    cv = CountVectorizer()
    X = cv.fit_transform(data_list).toarray()
    
    
    

    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)
  
    
   
    model = Pipeline([('MinMaxScaler', MinMaxScaler()), ('MultinomialNB',MultinomialNB()) ])
    model.fit(x_train, y_train)
    
    y_pred = model.predict(x_test)
    pred = le.inverse_transform(y_pred)
    
    
    print('train_pred', pred)
    
    ac = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    cr = classification_report(y_test, y_pred)
    
    
    
    
    print("Accuracy is :",ac)
    
    print(cr)
    
    
   
    
    # saving both cv and model
    pickle.dump(cv, open('./transform.pkl', "wb"))
    pickle.dump(model, open('./model.pkl', "wb"))
    
    
    
def upload_doc():         
    
    scaler = MinMaxScaler()

    X = open('./test.txt','r')
    data_list = []
    for text in X:
        text = re.sub(r'[!@#$(),\n"%^*?\:;~`0-9]', ' ', text)
        text = re.sub(r'[[]]', ' ', text)
        text = text.lower()
        data_list.append(text)
    print("data_list", data_list)
        
        
        # creating bag of words using countvectorizer
   
    cv = pickle.load(open('./transform.pkl', 'rb'))
    X = cv.transform(data_list).toarray()
    
    
    loaded_model = pickle.load(open('./model.pkl', 'rb'))
    #loaded_model.fit(X)
    y_pred = loaded_model.predict(X)
    y_pred = scaler.fit_transform(y_pred.reshape(-1,1))
    y_pred = scaler.inverse_transform((y_pred.reshape(-1,1)))
 
    
    print("y_pred", y_pred)

It works however I want to also print out the inverse_transform of the predicted label. I can do it in training but I was not able to do it for testing. Basically, I want to output the name of the language instead of the vector form.

0

There are 0 best solutions below