ValueError: '_index' is a reserved name for dataframe columns

1.8k Views Asked by At

I am trying to save a file as to h5ad format and it is giving this value error; ValueError: '_index' is a reserved name for dataframe columns.

import pandas as pd
import scanpy as sc
import numpy as np
data = sc.read_h5ad('f.h5ad')
annotation = pd.read_csv('n.tsv', sep='\t')
annotation_dict = {item['barcodes']:item['celltype'] for item in annotation.to_dict('records')}
data.obs['barcodes'] = data.obs.index
data.obs['celltype'] = data.obs['barcodes'].map(annotation_dict)

sc.pp.filter_genes(data,min_cells=686)
sc.pp.filter_cells(data,min_genes=10)
sc.pp.normalize_per_cell(data,20000)
sc.pp.log1p(data)
sc.pp.highly_variable_genes(data,n_top_genes=1000)
data.X = np.exp(data.X.toarray())-1
data=data[:,data.var['highly_variable']]
sc.pp.normalize_per_cell(data,3800)

clustered = sc.read_h5ad('f.h5ad')
sc.pp.filter_cells(data,min_genes=10)
sc.pp.recipe_zheng17(clustered)

sc.tl.pca(clustered, n_comps=50)
sc.pp.neighbors(clustered, n_pcs=50)
sc.tl.louvain(clustered, resolution=0.15)
clustered.obs.groupby('louvain').count()
data.obs['louvain'] = list(clustered.obs['louvain'])
split = pd.DataFrame(data.obs['barcodes'])
test = split.sample(frac=0.2)
d_split = {item:'test' for item in test['barcodes']}
data.obs['split'] = data.obs['barcodes'].map(d_split).fillna('train')
data.write_h5ad(e.h5ad')
1

There are 1 best solutions below

0
On

This is probably related to a known issue with the AnnData .raw object. Two workarounds (From here):

#1
data.__dict__['_raw'].__dict__['_var'] = data.__dict__['_raw'].__dict__['_var'].rename(columns={'_index': 'features'})
 
#2, deleting the backed up raw information
del data.raw