mirror of
https://github.com/QData/TextAttack.git
synced 2021-10-13 00:05:06 +03:00
allow * and _ and @ in a word when tokenizing
This commit is contained in:
@@ -32,7 +32,7 @@ def words_from_text(s, words_to_ignore=[]):
|
||||
for c in " ".join(s.split()):
|
||||
if c.isalnum():
|
||||
word += c
|
||||
elif c in "'-" and len(word) > 0:
|
||||
elif c in "'-_*@" and len(word) > 0:
|
||||
# Allow apostrophes and hyphens as long as they don't begin the
|
||||
# word.
|
||||
word += c
|
||||
|
||||
Reference in New Issue
Block a user