I am creating a clothes recommendation system, with NearestNeighbors, the data is coming from 2 datasets one with ratings.csv in this case 0 and 1 based on saved to wishlist or not and clothes.csv with all the clothes, I want to passed the ID of a garment and get a list of recommended items, but I am getting an index error.
Here is the code:
user_ratings_df = pd.read_csv("ratings.csv")
user_ratings_df['IDGARMENT'] = user_ratings_df['IDGARMENT'].astype(int)
# read in data; use default pd.RangeIndex, i.e. 0, 1, 2, etc., as columns
clothes_desc = pd.read_csv("clothes.csv", on_bad_lines='skip')
clothes_metadata = clothes_desc[['IDGARMENT', 'DESCRIPTION','CATEGORY', 'BRAND', 'PRICE']]
clothes_metadata['IDGARMENT'] = clothes_metadata['IDGARMENT'].astype(int)
clothes_data = user_ratings_df.merge(clothes_metadata, on='IDGARMENT')
user_item_matrix = user_ratings_df.pivot(index=['USERID'], columns=['IDGARMENT'], values='RATING').fillna(0)
user_item_matrix
# Define a KNN model on cosine similarity
cf_knn_model= NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=10, n_jobs=-1)
#lr.fit(x.reshape(-1, 1), y)
# Fitting the model on our matrix
cf_knn_model.fit(user_item_matrix)
def clothes_recommender_engine(garment_id, matrix, cf_model, n_recs):
# Fit model on matrix
cf_knn_model.fit(matrix)
# Calculate neighbour distances
distances, indices = cf_model.kneighbors(matrix[garment_id], n_neighbors=n_recs)
clothes_rec_ids = sorted(list(zip(indices.squeeze().tolist(),distances.squeeze().tolist())),key=lambda x: x[1])[:0:-1]
# List to store recommendations
cf_recs = []
for i in clothes_rec_ids:
cf_recs.append({'Desc':clothes_desc['DESCRIPTION'][i[0]],'Distance':i[1]})
# Select top number of recommendations needed
df = pd.DataFrame(cf_recs, index = range(1,n_recs))
return df
n_recs = 10
clothes_recommender_engine(54448, user_item_matrix, cf_knn_model, n_recs)
And the error I am getting is:
> *keyError Traceback (most recent call last) File
> ~/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:3802,
> in Index.get_loc(self, key, method, tolerance) 3801 try:
> -> 3802 return self._engine.get_loc(casted_key) 3803 except KeyError as err: File
> ~/anaconda3/lib/python3.11/site-packages/pandas/_libs/index.pyx:138,
> in pandas._libs.index.IndexEngine.get_loc() File
> ~/anaconda3/lib/python3.11/site-packages/pandas/_libs/index.pyx:165,
> in pandas._libs.index.IndexEngine.get_loc() File
> pandas/_libs/hashtable_class_helper.pxi:2263, in
> pandas._libs.hashtable.Int64HashTable.get_item() File
> pandas/_libs/hashtable_class_helper.pxi:2273, in
> pandas._libs.hashtable.Int64HashTable.get_item() KeyError: 54448 The
> above exception was the direct cause of the following exception:
> KeyError Traceback (most recent call
> last) Cell In[4], line 64
> 59 return df
> 63 n_recs = 10
> ---> 64 clothes_recommender_engine(54448, user_item_matrix, cf_knn_model, n_recs) Cell In[4], line 48, in
> clothes_recommender_engine(garment_id, matrix, cf_model, n_recs)
> 42 cf_knn_model.fit(matrix)
> 44 # Extract input movie ID
> 45 #garment_id = process.extractOne(category, clothes_desc['CATEGORY'])[2]
> 46
> 47 # Calculate neighbour distances
> ---> 48 distances, indices = cf_model.kneighbors(matrix[garment_id], n_neighbors=n_recs)
> 49 clothes_rec_ids = sorted(list(zip(indices.squeeze().tolist(),distances.squeeze().tolist())),key=lambda
> x: x[1])[:0:-1]
> 51 # List to store recommendations File ~/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:3807, in
> DataFrame.__getitem__(self, key) 3805 if self.columns.nlevels > 1:
> 3806 return self._getitem_multilevel(key)
> -> 3807 indexer = self.columns.get_loc(key) 3808 if is_integer(indexer): 3809 indexer = [indexer] File
> ~/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:3804,
> in Index.get_loc(self, key, method, tolerance) 3802 return
> self._engine.get_loc(casted_key) 3803 except KeyError as err:
> -> 3804 raise KeyError(key) from err 3805 except TypeError: 3806 # If we have a listlike key, _check_indexing_error will raise
> 3807 # InvalidIndexError. Otherwise we fall through and re-raise
> 3808 # the TypeError. 3809 self._check_indexing_error(key)
> KeyError: 54448*
The error seems to be in this line:
distances, indices = cf_model.kneighbors(matrix[garment_id], n_neighbors=n_recs)
when passing matrix[garment_id], any idea how to solve it?