I have run a code to remove stopwords, but the output has removed the space between the words.
The initial input before the stopword removal is like this:
form server happen
The word output looks like this:
formserverhappen
Code:
#Removing stopwords
def remove_stopwords(text,nlp,custom_stop_words=None,remove_small_tokens=True,min_len=2):
# if custom stop words are provided, then add them to default stop words list
if custom_stop_words:
nlp.Defaults.stop_words |= custom_stop_words
filtered_sentence = []
doc=nlp(text)
for token in doc:
if token.is_stop == False:
# if small tokens have to be removed, then select only those which are longer than the min_len
if remove_small_tokens:
if len(token.text)>min_len:
filtered_sentence.append(token.text)
else:
filtered_sentence.append(token.text)
# if after the stop word removal, words are still left in the sentence, then return the sentence as a string else return null
return ' '.join(filtered_sentence) if len(filtered_sentence)>0 else None
#creating a spaCy object.
nlp = spacy.load("en_core_web_sm", disable=["parser", "ner"])
#removing stop-words and short words from every row
negative.ctstring=negative.ctstring.apply(lambda x:remove_stopwords(x,nlp,{"mask","mandates",}))
I looked that join but I can't figure out why the change is occuring.