TypeError: string indices must be integers. Dont know what im doing wrong

229 Views Asked by At

I have a dataset with two columns the one identifies whether an email is classified as spam or not and the other column shows the emails content. ive been trying to implement naive bayes with PSO as well as ABC. However I get the error TypeError: string indices must be integers.

email_train,email_test,spam_train,spam_test train_test_split(dfTotal.Email,dfTotal.Spam,test_size=0.3,random_state=0)

email_test_dtm = cv.transform(email_test)
# convert to TFIDF form
email_test_tf = tf.fit_transform(email_test_dtm)
email_test_tf

Artificial Bee Colony

 from Hive import Hive 
from Hive import Utilities
from sklearn.metrics import log_loss

# ---- SOLVE TEST CASE WITH ARTIFICIAL BEE COLONY ALGORITHM

def run(lowBounds,upBounds,evaluator):

    model = Hive.BeeHive(lower = lowBounds, # MUST BE A LIST !
                         upper = upBounds , # MUST BE A LIST ! 
                         fun       = evaluator ,
                         numb_bees =  100       ,
                         max_itrs  =  2       ,)

    # runs model
    cost,sol = model.run()

    # plots convergence
    Utilities.ConvergencePlot(cost)

    # prints out best solution
    print("Fitness Value ABC: {0}".format(model.best))
    ABC_model = MultinomialNB(alpha=10**sol[0]).fit(email_train_tf,spam_train) # Create the optimized model with best parameter
    result = ABC_model.predict(email_test_tf) # predict with the ABC_model
    return sol,result

Import Optunity

 import optunity
    import optunity.metrics

Naive Bayes

from sklearn.naive_bayes import MultinomialNB

nb = MultinomialNB()
nb.get_params()
# fit tf-idf representation to NB model
nb.fit(email_train_tf, spam_train)

# class predictions for testing set
result1 = nb.predict(email_test_tf)

def evaluator(params):

    nBayes = MultinomialNB(alpha=10**params[0]).fit(email_train_tf,spam_train)
    
    
    pred_proba = nBayes.predict_proba(email_test_tf)
    return log_loss(spam_test,pred_proba)
    

sol,result3 = run([-2],[1],evaluator)

The traceback I receive is as follows:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-44-006c296828ed> in <module>
      8 
      9 
---> 10 sol,result3 = run([-2],[1],evaluator)

<ipython-input-32-6852d973eb15> in run(lowBounds, upBounds, evaluator)
     19 
     20     # plots convergence
---> 21     Utilities.ConvergencePlot(cost)
     22 
     23     # prints out best solution

c:\users\lidak\article\src\hive\Hive\Utilities.py in ConvergencePlot(cost)
     55     labels = ["Best Cost Function", "Mean Cost Function"]
     56     plt.figure(figsize=(12.5, 4));
---> 57     plt.plot(range(len(cost["best"])), cost["best"], label=labels[0]);
     58     plt.scatter(range(len(cost["mean"])), cost["mean"], color='red', label=labels[1]);
     59     plt.xlabel("Iteration #");

TypeError: string indices must be integers

<Figure size 900x288 with 0 Axes>
0

There are 0 best solutions below