from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.datasets import fetch_20newsgroups
from skfeature.function.statistical_based import gini_index
import numpy as np
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
categories=['alt.atheism','comp.graphics','comp.os.ms-windows.misc',
'comp.sys.ibm.pc.hardware','comp.sys.mac.hardware',
'comp.windows.x','misc.forsale','rec.autos','rec.motorcycles',
'rec.sport.baseball']
data_train = fetch_20newsgroups(subset='train', categories=categories,
shuffle=True, random_state=42)
data_test = fetch_20newsgroups(subset='test', categories=categories,
shuffle=True, random_state=42)
# split a training set and a test set
y_train, y_test = data_train.target, data_test.target
vectorizer = TfidfVectorizer(stop_words='english')
X_train = vectorizer.fit_transform(data_train.data)
X_test = vectorizer.transform(data_test.data)
feature_names = vectorizer.get_feature_names()
score = gini_index.gini_index(X_train, y_train)
ranking= gini_index.feature_ranking(score)
While running this code it throws me the following error:
....feature_extraction(20News).py", line 31, in <module>
score = gini_index.gini_index(X_train, y_train)
File "C:\Python27\lib\site-packages\skfeature\function\statistical_based\gini_index.py", line 31, in gini_index
left_y = y[X[:, i] <= v[j]]
IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices