What is the correct use of GATE NLP Python's 'RemoveAnn' action

37 Views Asked by At

Can anyone help me with the correct use of GATE NLP Python's RemoveAnn' action (gatenlp.pam.pampac.actions RemoveAnn - docs)

Whilst adding annotations with PAMPAC is ok, I can't seem to then remove the annotation.

For example, here I add an annotation to the word 'cat', then try to remove the annotation:

from gatenlp import Document
from gatenlp.processing.tokenizer import NLTKTokenizer
from gatenlp.pam.pampac import *
from nltk.tokenize.regexp import WhitespaceTokenizer


def pprint(doc):
    print("------")
    for a in doc.annset():
        print(doc[a].ljust(6, " ") + "- " + str(a))
    print("------")

# Generate document and tokenize
text = """dog cat fish"""
tok1 = NLTKTokenizer(nltk_tokenizer=WhitespaceTokenizer())
doc1 = Document(text)
doc1 = tok1(doc1)
print("Simple document:")
pprint(doc1)

# Find cat text and annotate
pat1 = Text(text="cat")
action1 = AddAnn(type="CAT_TAG")
rule1 = Rule(pat1, action1)
pampac1 = Pampac(rule1, skip="longest", select="first")
annt1 = PampacAnnotator(pampac1, annspec=[("", "Token")], outset_name="")
annt1(doc1)
print("Annotate cat text with Annotation of type 'CAT_TAG'")
pprint(doc1)

# Find annotation and try to remove
pat3 = AnnAt(type="CAT_TAG", name="remove")
# action3 = AddAnn(type="CAT_TAGGED_AGAIN", name="remove")  # works
action3 = RemoveAnn("remove", annset=doc1.annset())  # <-- is this the right way to specify the annset???
rule3 = Rule(pat3, action3)
pampac2 = Pampac(rule3, skip="longest", select="first")
annt2 = PampacAnnotator(pampac2, annspec=[""], outset_name="")
annt2(doc1)
print("Try to remove the 'CAT_TAG' annotation from the default set")
pprint(doc1)

Output:

Simple document:
------
dog   - Annotation(0,3,Token,features=Features({}),id=0)
cat   - Annotation(4,7,Token,features=Features({}),id=1)
fish  - Annotation(8,12,Token,features=Features({}),id=2)
------
Annotate cat text with Annotation of type 'CAT_TAG'
------
dog   - Annotation(0,3,Token,features=Features({}),id=0)
cat   - Annotation(4,7,Token,features=Features({}),id=1)
cat   - Annotation(4,7,CAT_TAG,features=Features({}),id=3)
fish  - Annotation(8,12,Token,features=Features({}),id=2)
------

# Error:
#
# Traceback (most recent call last):
#   File ".../scratch.py", line 37, in <module>
#     annt2(doc1)
#   File ".../gatenlp/pam/pampac/pampac.py", line 234, in __call__
#     self.pampac.run(doc, anns, outset=outset, containing_anns=cont)
#   File ".../gatenlp/pam/pampac/pampac.py", line 106, in run
#     return self._run4span(logger, ctx, location)
#            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#   File ".../gatenlp/pam/pampac/pampac.py", line 133, in _run4span
#     fret = self.rules[idx].action(ret, context=ctx, location=location)
#            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#   File ".../gatenlp/pam/pampac/actions.py", line 123, in __call__
#     return self.actions[0](succ, context=context, location=location)
#            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#   File ".../gatenlp/pam/pampac/actions.py", line 421, in __call__
#     self.annset.remove(theann)
#   File ".../gatenlp/annotation_set.py", line 601, in remove
#     self._annset.remove(ann)
# KeyError: Annotation(4,7,CAT_TAG,features=Features({}),id=3)
0

There are 0 best solutions below