I have a list of numbers using which I am creating dictionary and then assigning results to them. List looks like - [243,244,445,446] , Dictionary looks like - _243,_244,_445,_446 . I am assigning keys to each dictionary based on their names. For example - if we use dictionary _243 = {} then the key would be - _243["243 train_id"+x.train_id] Here x.train_id return string. I am putting the code and error below -
import re
lfs = []
_243 = _244 = _445 = _446 = {}
for code in [243,244,445,446]:
dics_name = '_'+str(code)
tt = '_'+str(code)+" train_id_"
print("tt : ",tt, " dics_name : ",dics_name)
@labeling_function(name=f"check_keywords_{code}", resources=dict(code = code, dics_name = dics_name, tt = tt))
def lf(x, code, dics_name,tt):
words_1 = unigrams_mapper[code]
words_2 = bigrams_mapper[code]
keywords = words_1+words_2
pattern = '|'.join(f"\\b{k}\\b" for k in keywords)
matches = {k: 0 for k in keywords}
for webtext in x.webtext_all.lower().split('\n'):
for key in keywords:
if key == webtext.strip():
matches[key] += 1
for match in re.findall(pattern, webtext.strip()):
matches[match] += 1
for val in matches:
if matches[val] > 0:
dics_name[tt+x.train_id] = matches
dics_name["webtext_all_"+x.train_id] = x.webtext_all
return code
return -1
lfs.append(lf)
try:
applier = PandasLFApplier(lfs=lfs)
L_train = applier.apply(df=df_train)
except:
pass
Full-stack trace -
Traceback (most recent call last):
File "C:\Users\SHNANDA\AppData\Local\Temp/ipykernel_19416/3779972020.py", line 11, in <module>
L_train = applier.apply(df=df_train)
File "C:\Users\SHNANDA\Anaconda3\lib\site-packages\snorkel\labeling\apply\pandas.py", line 108, in apply
labels = call_fn(apply_fn, axis=1)
File "C:\Users\SHNANDA\Anaconda3\lib\site-packages\tqdm\std.py", line 814, in inner
return getattr(df, df_function)(wrapper, **kwargs)
File "C:\Users\SHNANDA\Anaconda3\lib\site-packages\pandas\core\frame.py", line 8740, in apply
return op.apply()
File "C:\Users\SHNANDA\Anaconda3\lib\site-packages\pandas\core\apply.py", line 688, in apply
return self.apply_standard()
File "C:\Users\SHNANDA\Anaconda3\lib\site-packages\pandas\core\apply.py", line 812, in apply_standard
results, res_index = self.apply_series_generator()
File "C:\Users\SHNANDA\Anaconda3\lib\site-packages\pandas\core\apply.py", line 828, in apply_series_generator
results[i] = self.f(v)
File "C:\Users\SHNANDA\Anaconda3\lib\site-packages\tqdm\std.py", line 809, in wrapper
return func(*args, **kwargs)
File "C:\Users\SHNANDA\Anaconda3\lib\site-packages\snorkel\labeling\apply\pandas.py", line 37, in apply_lfs_to_data_point
y = f_caller(lf, x)
File "C:\Users\SHNANDA\Anaconda3\lib\site-packages\snorkel\labeling\apply\core.py", line 28, in __call__
return f(x)
File "C:\Users\SHNANDA\Anaconda3\lib\site-packages\snorkel\labeling\lf\core.py", line 77, in __call__
return self._f(x, **self._resources)
File "C:\Users\SHNANDA\AppData\Local\Temp/ipykernel_19416/3568096736.py", line 45, in lf
dics_name[tt+x.train_id] = matches
TypeError: 'str' object does not support item assignment
I am getting error at -
dics_name[tt+x.train_id] = matches,
dics_name["webtext_all_"+x.train_id] = x.webtext_all