1
0
mirror of https://github.com/QData/TextAttack.git synced 2021-10-13 00:05:06 +03:00

Add stanza support for part-of-speech constraint

This commit is contained in:
k-ivey
2020-10-01 19:55:11 -04:00
parent 8c575f2006
commit f079faa61e
6 changed files with 116 additions and 2 deletions

View File

@@ -197,3 +197,26 @@ def zip_flair_result(pred, tag_type="pos-fast"):
pos_list.append(token.get_tag("ner"))
return word_list, pos_list
def zip_stanza_result(pred, tagset="universal"):
"""Takes the first sentence from a document from `stanza` and returns two
lists, one of words and the other of their corresponding parts-of-
speech."""
from stanza.models.common.doc import Document
if not isinstance(pred, Document):
raise TypeError("Result from Stanza POS tagger must be a `Document` object.")
word_list = []
pos_list = []
for sentence in pred.sentences:
for word in sentence.words:
word_list.append(word.text)
if tagset == "universal":
pos_list.append(word.upos)
else:
pos_list.append(word.xpos)
return word_list, pos_list