I'm trying to use the scanpy plugin to infer copy number variation (CNV) from single-cell transcriptomics data.
When trying to annotate genomic positions in Jupyter using infercnvpy, I get the following error. How can I fix things?
AttributeError Traceback (most recent call last) Cell In[28], line 2 1 gtf_file=r'G:\gencode.v38.annotation.gtf' 2 cnv.io.genomic_position_from_gtf(gtf_file, adata=adata, gtf_gene_id='gene_name', inplace=True)
File ~\Anaconda3\envs\spapros\lib\site-packages\infercnvpy\io_genepos.py:41, in genomic_position_from_gtf(gtf_file, adata, gtf_gene_id, adata_gene_id, inplace) 11 def genomic_position_from_gtf( 12 gtf_file: Union[Path, str], 13 adata: Union[AnnData, None] = None, (...) 17 inplace: bool = True, 18 ) -> Union[pd.DataFrame, None]: 19 """Get genomic gene positions from a GTF file. 20 21 The GTF file needs to match the genome annotation used for your single cell dataset. (...) 39 If True, add the annotations directly to adata, otherwise return a dataframe. 40 """ 41 gtf = gtfparse.read_gtf( 42 gtf_file, usecols=["seqname", "feature", "start", "end", "gene_id", "gene_name"], result_type="pandas" 43 ) 44 gtf = ( 45 gtf.loc[ 46 gtf["feature"] == "gene", (...) 50 .rename(columns={"seqname": "chromosome"}) 51 ) 53 gene_ids_adata = (adata.var_names if adata_gene_id is None else adata.var[adata_gene_id]).values
File ~\Anaconda3\envs\spapros\lib\site-packages\gtfparse\read_gtf.py:254, in read_gtf(filepath_or_buffer, expand_attribute_column, infer_biotype_column, column_converters, usecols, features, result_type) 251 raise ValueError("GTF file does not exist: %s" % filepath_or_buffer) 253 if expand_attribute_column: 254 result_df = parse_gtf_and_expand_attributes( 255 filepath_or_buffer, 256 restrict_attribute_columns=usecols, 257 features=features) 258 else: 259 result_df = parse_gtf(result_df, features=features)
File ~\Anaconda3\envs\spapros\lib\site-packages\gtfparse\read_gtf.py:189, in parse_gtf_and_expand_attributes(filepath_or_buffer, restrict_attribute_columns, features) 166 def parse_gtf_and_expand_attributes( 167 filepath_or_buffer, 168 restrict_attribute_columns=None, 169 features=None): 170 """ 171 Parse lines into column->values dictionary and then expand 172 the 'attribute' column into multiple columns. This expansion happens (...) 187 Ignore entries which don't correspond to one of the supplied features 188 """ 189 df = parse_gtf( 190 filepath_or_buffer=filepath_or_buffer, 191 features=features, 192 split_attributes=True) 193 if type(restrict_attribute_columns) is str: 194 restrict_attribute_columns = {restrict_attribute_columns}
File ~\Anaconda3\envs\spapros\lib\site-packages\gtfparse\read_gtf.py:155, in parse_gtf(filepath_or_buffer, split_attributes, features, fix_quotes_columns) 150 def parse_gtf( 151 filepath_or_buffer, 152 split_attributes=True, 153 features=None, 154 fix_quotes_columns=["attribute"]): 155 df_lazy = parse_with_polars_lazy( 156 filepath_or_buffer=filepath_or_buffer, 157 split_attributes=split_attributes, 158 features=features, 159 fix_quotes_columns=fix_quotes_columns) 160 return df_lazy.collect()
File ~\Anaconda3\envs\spapros\lib\site-packages\gtfparse\read_gtf.py:87, in parse_with_polars_lazy(filepath_or_buffer, split_attributes, features, fix_quotes_columns) 80 def parse_with_polars_lazy( 81 filepath_or_buffer, 82 split_attributes=True, (...) 85 # use a global string cache so that all strings get intern'd into 86 # a single numbering system 87 polars.toggle_string_cache(True) 88 kwargs = dict( 89 has_header=False, 90 sep="\t", (...) 103 "frame": polars.UInt32, 104 }) 105 try:
AttributeError: module 'polars' has no attribute 'toggle_string_cache'
I've run the code below. I'm using the latest GENCODE GTF.
import infercnvpy as cnv import scanpy as sc import matplotlib.pyplot as plt import pandas as pd
adata=sc.read(r'G:\data.h5ad') sc.pp.log1p(adata)
gtf_file=r'G:\gencode.v44.annotation.gtf' cnv.io.genomic_position_from_gtf(gtf_file, adata=adata, gtf_gene_id='gene_name', inplace=True)`
I'm doing this so I can get an output along the following lines, and proceed to running inferCNV
adata.var.loc[:, ["ensg", "chromosome", "start", "end"]].head()
ensg chromosome start end
symbol
AL645933.5 ENSG00000288587.1 chr6 31400702 31463705
AC010184.1 ENSG00000288585.1 chr3 141449745 141456434
AC023296.1 ENSG00000288580.1 chr8 2923568 2926689
AL117334.2 ENSG00000288577.1 chr20 3406380 3410036
AC107294.4 ENSG00000288576.1 chr3 184778723 184780720
This looks like an error that's from
read_gtfin the gtfparse library. I'm getting the same error with the latest version (which switched to using polars). Downgrading to an earlier version withpip install gtfparse==1.3.0fixed the issue for me.