1
0
mirror of https://github.com/QData/TextAttack.git synced 2021-10-13 00:05:06 +03:00

allow * and _ and @ in a word when tokenizing

This commit is contained in:
qc
2020-11-06 14:17:44 -05:00
parent 07abaf5b1b
commit 5325ef633e

View File

@@ -32,7 +32,7 @@ def words_from_text(s, words_to_ignore=[]):
for c in " ".join(s.split()):
if c.isalnum():
word += c
elif c in "'-" and len(word) > 0:
elif c in "'-_*@" and len(word) > 0:
# Allow apostrophes and hyphens as long as they don't begin the
# word.
word += c