AttributeError: module 'fastai.train' has no attribute 'iloc'

91 Views Asked by At

im using python 3.9 and fast ai = 1.0.58 i tried to implement this code source : https://towardsdatascience.com/fastai-with-transformers-bert-roberta-xlnet-xlm-distilbert-4f41ee18ecb2 but im having error when creating data bunch

train = pd.read_csv('train.tsv.zip', sep="\t")
test = pd.read_csv('test.tsv.zip', sep="\t")
print(train.shape,test.shape)
train.head()

(156060, 4) (66292, 3) PhraseId SentenceId Phrase Sentiment 0 1 1 A series of escapades demonstrating the adage ... 1 1 2 1 A series of escapades demonstrating the adage ... 2 2 3 1 A series 2 3 4 1 A 2 4 5 1 series 2

pad_first = bool(model_type in ['xlnet'])
pad_idx = transformer_tokenizer.pad_token_id

databunch = (TextList.from_df(train, cols='Phrase', processor=transformer_processor)
             .split_by_rand_pct(0.1,seed=seed)
             .label_from_df(cols= 'Sentiment')
             .add_test(test)
             .databunch(bs=bs, pad_first=pad_first, pad_idx=pad_idx))
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[59], line 1
----> 1 databunch = (TextList.from_df(train, cols='Phrase', processor=transformer_processor) .split_by_rand_pct(0.1,seed=seed)
      2              .label_from_df(cols= 'Sentiment')
      3              .add_test(test)
      4              .databunch(bs=bs, pad_first=pad_first, pad_idx=pad_idx))

File ~\anaconda3\envs\adtree\lib\site-packages\fastai\data_block.py:135, in ItemList.from_df(cls, df, path, cols, processor, **kwargs)
    132 @classmethod
    133 def from_df(cls, df:DataFrame, path:PathOrStr='.', cols:IntsOrStrs=0, processor:PreProcessors=None, **kwargs)->'ItemList':
    134     "Create an `ItemList` in `path` from the inputs in the `cols` of `df`."
--> 135     inputs = df.iloc[:,df_names_to_idx(cols, df)]
    136     assert not inputs.isna().any().any(), f"You have NaN values in column(s) {cols} of your dataframe, please fix it."
    137     res = cls(items=_maybe_squeeze(inputs.values), path=path, inner_df=df, processor=processor, **kwargs)

AttributeError: module 'fastai.train' has no attribute 'iloc'

i dont know what to import for the module, any solution for this problem?


Update : i tried this

train = pd.read_csv('train.tsv', sep="\t")
test = pd.read_csv('test.tsv', sep="\t")
print(train.shape,test.shape)
train.head()

and run the data bunch and it's running, yes. but i have another error

BrokenProcessPool                         Traceback (most recent call last)
Cell In[85], line 1
----> 1 databunch = (TextList.from_df(train, cols='Phrase', processor=transformer_processor)
      2              .split_by_rand_pct(0.1,seed=seed)
      3              .label_from_df(cols= 'Sentiment')
      4              .add_test(test)
      5              .databunch(bs=bs, pad_first=pad_first, pad_idx=pad_idx))

File ~\anaconda3\envs\adtree\lib\site-packages\fastai\data_block.py:480, in ItemLists.__getattr__.<locals>._inner(*args, **kwargs)
    478 self.valid = fv(*args, from_item_lists=True, **kwargs)
    479 self.__class__ = LabelLists
--> 480 self.process()
    481 return self

File ~\anaconda3\envs\adtree\lib\site-packages\fastai\data_block.py:534, in LabelLists.process(self)
    532 "Process the inner datasets."
    533 xp,yp = self.get_processors()
--> 534 for ds,n in zip(self.lists, ['train','valid','test']): ds.process(xp, yp, name=n)
    535 #progress_bar clear the outputs so in some case warnings issued during processing disappear.
    536 for ds in self.lists:

File ~\anaconda3\envs\adtree\lib\site-packages\fastai\data_block.py:714, in LabelList.process(self, xp, yp, name, max_warn_items)
    712             p.warns = []
    713         self.x,self.y = self.x[~filt],self.y[~filt]
--> 714 self.x.process(xp)
    715 return self

File ~\anaconda3\envs\adtree\lib\site-packages\fastai\data_block.py:84, in ItemList.process(self, processor)
     82 if processor is not None: self.processor = processor
     83 self.processor = listify(self.processor)
---> 84 for p in self.processor: p.process(self)
     85 return self

File ~\anaconda3\envs\adtree\lib\site-packages\fastai\text\data.py:297, in TokenizeProcessor.process(self, ds)
    295 tokens = []
    296 for i in progress_bar(range(0,len(ds),self.chunksize), leave=False):
--> 297     tokens += self.tokenizer.process_all(ds.items[i:i+self.chunksize])
    298 ds.items = tokens

File ~\anaconda3\envs\adtree\lib\site-packages\fastai\text\transform.py:120, in Tokenizer.process_all(self, texts)
    118 if self.n_cpus <= 1: return self._process_all_1(texts)
    119 with ProcessPoolExecutor(self.n_cpus) as e:
--> 120     return sum(e.map(self._process_all_1, partition_by_cores(texts, self.n_cpus)), [])

File ~\anaconda3\envs\adtree\lib\concurrent\futures\process.py:562, in _chain_from_iterable_of_lists(iterable)
    556 def _chain_from_iterable_of_lists(iterable):
    557     """
    558     Specialized implementation of itertools.chain.from_iterable.
    559     Each item in *iterable* should be a list.  This function is
    560     careful not to keep references to yielded objects.
    561     """
--> 562     for element in iterable:
    563         element.reverse()
    564         while element:

File ~\anaconda3\envs\adtree\lib\concurrent\futures\_base.py:609, in Executor.map.<locals>.result_iterator()
    606 while fs:
    607     # Careful not to keep a reference to the popped future
    608     if timeout is None:
--> 609         yield fs.pop().result()
    610     else:
    611         yield fs.pop().result(end_time - time.monotonic())

File ~\anaconda3\envs\adtree\lib\concurrent\futures\_base.py:446, in Future.result(self, timeout)
    444     raise CancelledError()
    445 elif self._state == FINISHED:
--> 446     return self.__get_result()
    447 else:
    448     raise TimeoutError()

File ~\anaconda3\envs\adtree\lib\concurrent\futures\_base.py:391, in Future.__get_result(self)
    389 if self._exception:
    390     try:
--> 391         raise self._exception
    392     finally:
    393         # Break a reference cycle with the exception in self._exception
    394         self = None

BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.
0

There are 0 best solutions below