Compare commits
968 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6bf376f8f9 | ||
|
|
155dd490d7 | ||
|
|
d810489bf6 | ||
|
|
5f04781809 | ||
|
|
a03cbb1d27 | ||
|
|
b0892ce7fe | ||
|
|
e33bfc2bb6 | ||
|
|
4dc804a8f7 | ||
|
|
6838209255 | ||
|
|
305eb48cb6 | ||
|
|
b21bdc96c4 | ||
|
|
24669a90b9 | ||
|
|
be6cd8801f | ||
|
|
3e9cbcbd05 | ||
|
|
f0e27d129c | ||
|
|
e04c3777b3 | ||
|
|
170e45b714 | ||
|
|
a9ab5d4502 | ||
|
|
2df8833965 | ||
|
|
ad68251221 | ||
|
|
b148bd2818 | ||
|
|
2328b7f76c | ||
|
|
0f0649e29b | ||
|
|
de0b5b8b8b | ||
|
|
f950128ff4 | ||
|
|
16986b421e | ||
|
|
fb6088f39a | ||
|
|
03e85b39c2 | ||
|
|
acbc6c0276 | ||
|
|
16c3b0d0ed | ||
|
|
6ccfaeaa63 | ||
|
|
c04f8f4b2a | ||
|
|
740c333838 | ||
|
|
e6f07eb4de | ||
|
|
5325ef633e | ||
|
|
d9ae5f6abb | ||
|
|
07abaf5b1b | ||
|
|
a0e94792f5 | ||
|
|
8dad54a145 | ||
|
|
866301ba08 | ||
|
|
84c1681f34 | ||
|
|
dcf408be95 | ||
|
|
4893d47e6c | ||
|
|
79f3f4f8f0 | ||
|
|
bdbeae80d2 | ||
|
|
6273b19c19 | ||
|
|
3ffd776b63 | ||
|
|
21496460fe | ||
|
|
e2dd75001c | ||
|
|
4f16bb097b | ||
|
|
3c65e9d0a1 | ||
|
|
46ef3bcac5 | ||
|
|
dcce7b3434 | ||
|
|
7a89aba559 | ||
|
|
e6786b6bd7 | ||
|
|
3458a2b6c2 | ||
|
|
4478ad910b | ||
|
|
8d64e89a1f | ||
|
|
7dcf27d7bd | ||
|
|
41e747215b | ||
|
|
af029bff8b | ||
|
|
58f522b30c | ||
|
|
2509a14331 | ||
|
|
f79463903e | ||
|
|
230e39b590 | ||
|
|
6d83b2c9cb | ||
|
|
96a0360567 | ||
|
|
4c69e8656a | ||
|
|
77ff8f35e2 | ||
|
|
36dfce6bda | ||
|
|
887dca1307 | ||
|
|
b998e0f654 | ||
|
|
69a415f80b | ||
|
|
a4bd404d12 | ||
|
|
c78aa66980 | ||
|
|
c41a13e6cd | ||
|
|
23d0de6b43 | ||
|
|
95603fb73c | ||
|
|
00c80ee514 | ||
|
|
2ea690bbc6 | ||
|
|
fc7d5294d9 | ||
|
|
821c350c37 | ||
|
|
1e36c63c61 | ||
|
|
688d997694 | ||
|
|
d326796c60 | ||
|
|
e91a652715 | ||
|
|
dfe26b63c4 | ||
|
|
0c95966716 | ||
|
|
74bc5c0013 | ||
|
|
beac247faa | ||
|
|
8c726d1e83 | ||
|
|
cc6f9af25d | ||
|
|
09d5f1c2e7 | ||
|
|
ecc010de65 | ||
|
|
6737a4ee18 | ||
|
|
5a2f7b3bd5 | ||
|
|
9b120b6253 | ||
|
|
0e10c10a9f | ||
|
|
4420daf15e | ||
|
|
6b40544a5b | ||
|
|
9b2fc8df24 | ||
|
|
f3557c4180 | ||
|
|
7abd27cec7 | ||
|
|
2cb37c9f60 | ||
|
|
0728a779a6 | ||
|
|
a07171a6d9 | ||
|
|
63ad70cf17 | ||
|
|
352ab99e6f | ||
|
|
7e38ee28fd | ||
|
|
5d892ce54c | ||
|
|
c9b1bcabdd | ||
|
|
4ffa618be3 | ||
|
|
67e03ac3c0 | ||
|
|
c8e7da9213 | ||
|
|
55cb0710bf | ||
|
|
4627f210c6 | ||
|
|
1505015ff5 | ||
|
|
e7a8dec4b5 | ||
|
|
aacd8b675f | ||
|
|
d5b3e4b7c1 | ||
|
|
e257ed8146 | ||
|
|
d715a3abcb | ||
|
|
c6b9b9c280 | ||
|
|
8c777d60dc | ||
|
|
a55fa39dc2 | ||
|
|
97b67b0d21 | ||
|
|
fcc066b089 | ||
|
|
0e0c25e887 | ||
|
|
1aa8bac7ba | ||
|
|
98dc6eef60 | ||
|
|
7004f96132 | ||
|
|
8351959c3e | ||
|
|
bb009c70f9 | ||
|
|
1846a875e9 | ||
|
|
90bfe90e83 | ||
|
|
22f36b594c | ||
|
|
1ca633dc2f | ||
|
|
0c52fb6066 | ||
|
|
f817a2bb16 | ||
|
|
a131988988 | ||
|
|
224d788d3e | ||
|
|
a07deef834 | ||
|
|
38e3e917f5 | ||
|
|
69bdd25028 | ||
|
|
c341d9e04a | ||
|
|
f8a0cfe266 | ||
|
|
3b87ccedf7 | ||
|
|
e6223182d6 | ||
|
|
0e276cdc36 | ||
|
|
3390a18c52 | ||
|
|
275c77905e | ||
|
|
685a8ac38a | ||
|
|
d701d19437 | ||
|
|
4d794d3d53 | ||
|
|
8d038e1b6c | ||
|
|
753438a9f1 | ||
|
|
36c3e69db6 | ||
|
|
e53ab4653f | ||
|
|
f56f68c42d | ||
|
|
94ea52460e | ||
|
|
61d907c50b | ||
|
|
f079faa61e | ||
|
|
8c575f2006 | ||
|
|
e57634cdb0 | ||
|
|
d878843eb4 | ||
|
|
55f274b55e | ||
|
|
47d15acea9 | ||
|
|
4fb3850072 | ||
|
|
e496280286 | ||
|
|
88024e1f49 | ||
|
|
6ae5dd5065 | ||
|
|
3dae361054 | ||
|
|
5da0f427da | ||
|
|
a2f770aaf0 | ||
|
|
b44bcd0f61 | ||
|
|
5ce3fcb793 | ||
|
|
d61adba949 | ||
|
|
43e5418846 | ||
|
|
2cdf624b67 | ||
|
|
428b19a511 | ||
|
|
bbd1587e5f | ||
|
|
ee3f476888 | ||
|
|
23c06a5c74 | ||
|
|
2c67aa6106 | ||
|
|
356e6b2a97 | ||
|
|
4d28076efb | ||
|
|
2223f70c0d | ||
|
|
4f9db82164 | ||
|
|
4a6ab23e53 | ||
|
|
0f5a3f33fc | ||
|
|
4180b99af3 | ||
|
|
32b8468d80 | ||
|
|
17e1b3cd0f | ||
|
|
cd611887dc | ||
|
|
84e88304ea | ||
|
|
e2d91f0597 | ||
|
|
8f2809e313 | ||
|
|
663f951bec | ||
|
|
cc02abd4dc | ||
|
|
e10b903677 | ||
|
|
273605a5f7 | ||
|
|
5c53dfad21 | ||
|
|
9fa5874144 | ||
|
|
ae3a28a759 | ||
|
|
4269029b86 | ||
|
|
1575566804 | ||
|
|
548d751613 | ||
|
|
f97475b98d | ||
|
|
7e50816135 | ||
|
|
1d8d772928 | ||
|
|
4b5178a761 | ||
|
|
7062355a3a | ||
|
|
9d0a668210 | ||
|
|
4fc83907c6 | ||
|
|
44efdfad05 | ||
|
|
32beeb7bbf | ||
|
|
64575735d5 | ||
|
|
a9ec9ac276 | ||
|
|
419975abdd | ||
|
|
e654dde2af | ||
|
|
fc655cb2e3 | ||
|
|
2461c7be80 | ||
|
|
f1f14de651 | ||
|
|
e76b745187 | ||
|
|
be16974b3b | ||
|
|
44d08d55f4 | ||
|
|
8ea872245a | ||
|
|
4d2585e608 | ||
|
|
d6b036e8bf | ||
|
|
96c5feac64 | ||
|
|
0ece7ca4e9 | ||
|
|
02c9d76cad | ||
|
|
e47e34ebe1 | ||
|
|
c9931daf43 | ||
|
|
c1a801e81d | ||
|
|
e055f17790 | ||
|
|
c2ec986a1d | ||
|
|
d027dc07c0 | ||
|
|
af7f31d265 | ||
|
|
e66429cf80 | ||
|
|
b014a75f9d | ||
|
|
253af191d9 | ||
|
|
20116b483d | ||
|
|
63077c06d3 | ||
|
|
c0cc8552b1 | ||
|
|
5afe88a290 | ||
|
|
005336c8e4 | ||
|
|
aef56dcbbf | ||
|
|
f2e3786e3b | ||
|
|
8c28c555c2 | ||
|
|
4bdaff94cb | ||
|
|
e2217914b6 | ||
|
|
042a371c86 | ||
|
|
3f6df81c71 | ||
|
|
8af151880d | ||
|
|
4baf44607e | ||
|
|
70b035aeb3 | ||
|
|
3139e9a369 | ||
|
|
8f87b26994 | ||
|
|
d292f3ee13 | ||
|
|
65483f6aec | ||
|
|
f33dcc1630 | ||
|
|
c5d0f5702e | ||
|
|
31a69a839e | ||
|
|
5c1d03bdfa | ||
|
|
579a81d7ae | ||
|
|
b6deca335c | ||
|
|
c542479f42 | ||
|
|
0b9a022eed | ||
|
|
0df48891fa | ||
|
|
1f985a27d0 | ||
|
|
cd8326e6ea | ||
|
|
37138a6183 | ||
|
|
17415b6820 | ||
|
|
e658b202a6 | ||
|
|
0511da8366 | ||
|
|
7a2fde1520 | ||
|
|
d13971813e | ||
|
|
9697442ab2 | ||
|
|
f691ede779 | ||
|
|
e16a26d283 | ||
|
|
ec791d1d7b | ||
|
|
a78db7377e | ||
|
|
436b79de66 | ||
|
|
50f642ff37 | ||
|
|
4e9965d016 | ||
|
|
77bfdcc28c | ||
|
|
3416cd3828 | ||
|
|
916989c361 | ||
|
|
0291a6fbdb | ||
|
|
ae575cfeb5 | ||
|
|
adbda1ea3b | ||
|
|
279c8afb40 | ||
|
|
6cd3a2d9a5 | ||
|
|
505ddfee10 | ||
|
|
e979dba481 | ||
|
|
2cc7610cca | ||
|
|
15bd6bd3e0 | ||
|
|
f9ead09a82 | ||
|
|
b34af9fbb0 | ||
|
|
1c9dc3630d | ||
|
|
b0f473685d | ||
|
|
f2eadb894d | ||
|
|
739664f0ff | ||
|
|
3519a3006f | ||
|
|
97b4aa2886 | ||
|
|
4ba7fac789 | ||
|
|
d9ad73b8ff | ||
|
|
aacbed4009 | ||
|
|
cb9e16e127 | ||
|
|
81ceab3352 | ||
|
|
d849b626d4 | ||
|
|
4e45ec4da3 | ||
|
|
b589baf985 | ||
|
|
ea3911fb4c | ||
|
|
5a1dd3e3e9 | ||
|
|
f504cdc724 | ||
|
|
fe90ceaf9a | ||
|
|
89f1724ed4 | ||
|
|
f578375d6c | ||
|
|
0b99512193 | ||
|
|
5d5e1b5857 | ||
|
|
00573d24fa | ||
|
|
e2b89f076f | ||
|
|
a54a3c4e57 | ||
|
|
a04a6a847a | ||
|
|
d66d374a19 | ||
|
|
d82dd4e127 | ||
|
|
024369f5f4 | ||
|
|
c7cfa42bcf | ||
|
|
6271fe9edc | ||
|
|
ee05ca0f17 | ||
|
|
0560d4f80f | ||
|
|
7acb17aa27 | ||
|
|
46b0830508 | ||
|
|
5cce3918bf | ||
|
|
618e815eea | ||
|
|
29432d88c8 | ||
|
|
e6d671e918 | ||
|
|
a06f221443 | ||
|
|
bfd512f63c | ||
|
|
a68caf3df5 | ||
|
|
1a92d70457 | ||
|
|
9d3c068db4 | ||
|
|
5a8d74d288 | ||
|
|
7aa407496f | ||
|
|
f5e1a731d2 | ||
|
|
ac95db5611 | ||
|
|
7255a9ac28 | ||
|
|
589113f82f | ||
|
|
2e64ac8aed | ||
|
|
e6e16a3fee | ||
|
|
c50a0d8f9d | ||
|
|
289c84a48a | ||
|
|
5de2efacc1 | ||
|
|
792fc5b11d | ||
|
|
cdd9061cb4 | ||
|
|
28952235dc | ||
|
|
5db7e1c45b | ||
|
|
9c8b9b6b8a | ||
|
|
fb6b0b1511 | ||
|
|
6f4dc29d00 | ||
|
|
519a06617e | ||
|
|
32ed762a3e | ||
|
|
8094ae5f7f | ||
|
|
94a519f31f | ||
|
|
8b3b549ce8 | ||
|
|
92797cb643 | ||
|
|
7a4b83c979 | ||
|
|
e43108ef9b | ||
|
|
5ed22b52c1 | ||
|
|
a7fdef9875 | ||
|
|
055a1f6829 | ||
|
|
36ebda0452 | ||
|
|
410c77fea0 | ||
|
|
613bbf0b88 | ||
|
|
995c2d7c25 | ||
|
|
ebe050c864 | ||
|
|
d65b5963ed | ||
|
|
8f0c443e66 | ||
|
|
277fb908b2 | ||
|
|
0c26e0ae72 | ||
|
|
c46c3d06ea | ||
|
|
b9564088d8 | ||
|
|
f63a9ec83c | ||
|
|
3a48ad3e7b | ||
|
|
4d388a6cf9 | ||
|
|
c3823aca93 | ||
|
|
334671c89b | ||
|
|
073b258aa5 | ||
|
|
9cebf97c82 | ||
|
|
f9eff53df4 | ||
|
|
a0c6b78ad9 | ||
|
|
3f0fdbe227 | ||
|
|
1c6be3079a | ||
|
|
5038c1790d | ||
|
|
7382c7a1b5 | ||
|
|
50d8ffe507 | ||
|
|
22ef1bd459 | ||
|
|
60bd380417 | ||
|
|
5ac5dc4d28 | ||
|
|
8866b67650 | ||
|
|
11eec566a8 | ||
|
|
c351f537cd | ||
|
|
39e4a8e2c3 | ||
|
|
5827b6a859 | ||
|
|
ce789411e2 | ||
|
|
15bdf7a5c5 | ||
|
|
5a237c2e16 | ||
|
|
09a2e30ddb | ||
|
|
39e2a3663a | ||
|
|
9b48469f1a | ||
|
|
e83f4a0091 | ||
|
|
e63ffa921b | ||
|
|
576876257f | ||
|
|
3853928a44 | ||
|
|
ba3b412658 | ||
|
|
4dd95303a6 | ||
|
|
e937e01531 | ||
|
|
185c17eb77 | ||
|
|
d6a480abc9 | ||
|
|
c2fa2c5051 | ||
|
|
3a4b8b2f7e | ||
|
|
09ea975796 | ||
|
|
1394770562 | ||
|
|
77877daa44 | ||
|
|
3ad73f7dc3 | ||
|
|
da7273ac6e | ||
|
|
eb742affad | ||
|
|
5cbff8b842 | ||
|
|
cff37df829 | ||
|
|
5f584c2d75 | ||
|
|
31b3b2fa4e | ||
|
|
cb3b13cd31 | ||
|
|
aaf36912f0 | ||
|
|
f0ed3ca1be | ||
|
|
700d26f4ce | ||
|
|
a6ee121ed3 | ||
|
|
c2c7c00f5e | ||
|
|
4f8b227ef9 | ||
|
|
974061c0aa | ||
|
|
18563814bd | ||
|
|
2498149d22 | ||
|
|
19c4f8032b | ||
|
|
34115e7c03 | ||
|
|
5541904af7 | ||
|
|
1690c305e2 | ||
|
|
80b9a6e7e7 | ||
|
|
f875f65eed | ||
|
|
1387478fb6 | ||
|
|
641cbe7b61 | ||
|
|
eb57e7df5d | ||
|
|
a9db0ffda5 | ||
|
|
9e37fd386d | ||
|
|
ae5047017a | ||
|
|
d36c80b114 | ||
|
|
17e445b607 | ||
|
|
4173ab10aa | ||
|
|
dfdbfbe5f5 | ||
|
|
5b9827387d | ||
|
|
629650a283 | ||
|
|
afd4a8f425 | ||
|
|
8d5e8e7b90 | ||
|
|
3649791e5c | ||
|
|
86c6e149cb | ||
|
|
45409f5a95 | ||
|
|
279f05640c | ||
|
|
0de91af40e | ||
|
|
782f9b74da | ||
|
|
14242e8d19 | ||
|
|
e0fbf1dcd6 | ||
|
|
52c39ac891 | ||
|
|
b5c3fe2d92 | ||
|
|
a2f40c15c0 | ||
|
|
dfd04704fa | ||
|
|
56ae810afb | ||
|
|
58609feac1 | ||
|
|
824df740a1 | ||
|
|
7e3b90e979 | ||
|
|
c0a3a734d8 | ||
|
|
a19b2e44ce | ||
|
|
140624ea98 | ||
|
|
526d7fb481 | ||
|
|
5c00076890 | ||
|
|
437b8714a3 | ||
|
|
24eead8c34 | ||
|
|
3aa81edfc5 | ||
|
|
9bf2d1be14 | ||
|
|
92f1ca1554 | ||
|
|
ef48bbda7d | ||
|
|
ad2dba067e | ||
|
|
fbcdb74b6d | ||
|
|
087880c586 | ||
|
|
1399e37240 | ||
|
|
29dd7ee6ff | ||
|
|
528f2e2f55 | ||
|
|
11013b3a4f | ||
|
|
25883025a4 | ||
|
|
cd40129e42 | ||
|
|
72423547a6 | ||
|
|
5e7f7300aa | ||
|
|
b6f2c44d7f | ||
|
|
884e3d47be | ||
|
|
0c91cd9512 | ||
|
|
71f98a6b84 | ||
|
|
718a74b036 | ||
|
|
029cb2eee1 | ||
|
|
991787dcc6 | ||
|
|
ece588f4ef | ||
|
|
9f8135d5bc | ||
|
|
7e7d634f75 | ||
|
|
703712f38a | ||
|
|
705fecbd9f | ||
|
|
2f7870ab10 | ||
|
|
cd8c3369fc | ||
|
|
accdae149f | ||
|
|
0c58730d82 | ||
|
|
38b44dac4e | ||
|
|
2770f43242 | ||
|
|
d72823acdb | ||
|
|
5be7de0cf7 | ||
|
|
f881ca4930 | ||
|
|
d03b6947ca | ||
|
|
ae08158f3f | ||
|
|
da78b8febc | ||
|
|
19b6d9a016 | ||
|
|
c1b6245132 | ||
|
|
734a79665c | ||
|
|
489fd92ace | ||
|
|
bcf5eb1f7b | ||
|
|
12ae5edeb9 | ||
|
|
f72f49d5db | ||
|
|
58cada77d5 | ||
|
|
4076469962 | ||
|
|
f28efb0825 | ||
|
|
c08d0c7b48 | ||
|
|
d7476b30ad | ||
|
|
99fedba104 | ||
|
|
5b7bbffe90 | ||
|
|
c815dd1d06 | ||
|
|
a7ef32773e | ||
|
|
a9c8a84070 | ||
|
|
407cf8cb3d | ||
|
|
e6da096d4e | ||
|
|
9b9b704a9e | ||
|
|
6be1261551 | ||
|
|
1cb7614287 | ||
|
|
5fec3aeb0d | ||
|
|
7cf2128fb2 | ||
|
|
05b32b87f0 | ||
|
|
0a48bf79e4 | ||
|
|
e22fcdb253 | ||
|
|
a99cb7aec4 | ||
|
|
868d6e1aed | ||
|
|
5edafc2b38 | ||
|
|
220880fc82 | ||
|
|
b71b2594f3 | ||
|
|
7d9ce86f07 | ||
|
|
54e81e07f8 | ||
|
|
c50eb227ea | ||
|
|
97593cc3c0 | ||
|
|
352a959ef3 | ||
|
|
65135d4cab | ||
|
|
eb68e95f9f | ||
|
|
ae6046bfb8 | ||
|
|
eddb836123 | ||
|
|
59b6efc3f8 | ||
|
|
7e8fb3ca36 | ||
|
|
909d315a92 | ||
|
|
c99ec4022e | ||
|
|
09502e4f8c | ||
|
|
4297e3f411 | ||
|
|
ee6d8ea80f | ||
|
|
0fc4c9995b | ||
|
|
0430055c1c | ||
|
|
6c8728432f | ||
|
|
ebbb81ffb9 | ||
|
|
dafd251614 | ||
|
|
70ef17ed1d | ||
|
|
2ea9b06a0f | ||
|
|
39463bdcc1 | ||
|
|
79e85fc98e | ||
|
|
cdc4f9f9c5 | ||
|
|
36c2f91aad | ||
|
|
711cb7dd14 | ||
|
|
e1f80a9850 | ||
|
|
7e9f3ece13 | ||
|
|
ae4d91ab3f | ||
|
|
6f7af674e2 | ||
|
|
c6c40a15a8 | ||
|
|
c3189709e5 | ||
|
|
94a98f59e5 | ||
|
|
46b0f38967 | ||
|
|
88db893756 | ||
|
|
2fc7f99351 | ||
|
|
fc30840e80 | ||
|
|
9310719694 | ||
|
|
f8320e3be2 | ||
|
|
769f160fa0 | ||
|
|
87ec3ad1bc | ||
|
|
7b37d18ce4 | ||
|
|
779d82e524 | ||
|
|
9b5dd320ff | ||
|
|
0f44580c9d | ||
|
|
b1d5e6f6f1 | ||
|
|
ec5598baed | ||
|
|
73c0fda293 | ||
|
|
7651e2738c | ||
|
|
a2532198a5 | ||
|
|
b876bb2aaa | ||
|
|
6651c7482a | ||
|
|
674a34cb96 | ||
|
|
2c9e484173 | ||
|
|
5636af9fb3 | ||
|
|
9bb4b80af5 | ||
|
|
09977fdb60 | ||
|
|
59f5f8b084 | ||
|
|
1367e247cf | ||
|
|
3da7e5cfdb | ||
|
|
7427865517 | ||
|
|
e0fa607e7b | ||
|
|
c88798073b | ||
|
|
20234b5203 | ||
|
|
a63730344c | ||
|
|
1acd494a3a | ||
|
|
d1460bca67 | ||
|
|
b2760dc701 | ||
|
|
b497fbf5e3 | ||
|
|
98b8d6102a | ||
|
|
8ac1139fdc | ||
|
|
c54b89eb82 | ||
|
|
693927c580 | ||
|
|
db5307432f | ||
|
|
03a9585022 | ||
|
|
abb85ae30a | ||
|
|
01a639515b | ||
|
|
451871ade8 | ||
|
|
1ce3750996 | ||
|
|
2fdc35569b | ||
|
|
7194037380 | ||
|
|
5284fb8a84 | ||
|
|
986a297242 | ||
|
|
971bb8c2aa | ||
|
|
555f77b463 | ||
|
|
77d36c8423 | ||
|
|
2244b01cac | ||
|
|
d870fc6187 | ||
|
|
d4c7030328 | ||
|
|
21133e0296 | ||
|
|
c6a02f0a6a | ||
|
|
bdf90d984d | ||
|
|
e394d818be | ||
|
|
b30ff29b1a | ||
|
|
795e1b30e3 | ||
|
|
c8508c5978 | ||
|
|
1e50592f2c | ||
|
|
54632c3c7b | ||
|
|
b30fdcd1da | ||
|
|
2b217bfa3c | ||
|
|
4c56832253 | ||
|
|
c0edd5e08b | ||
|
|
7daadbc899 | ||
|
|
bce2d7c000 | ||
|
|
981edc182a | ||
|
|
36b52608b2 | ||
|
|
05139cf828 | ||
|
|
f588200119 | ||
|
|
5f3e2b2961 | ||
|
|
86dff2174a | ||
|
|
ca311f286f | ||
|
|
0525cabdb3 | ||
|
|
47e979a560 | ||
|
|
3f15bb34f8 | ||
|
|
c459ccd6f3 | ||
|
|
6e299917fb | ||
|
|
1ebf2b1163 | ||
|
|
b928840bb5 | ||
|
|
8268b42d84 | ||
|
|
a3e5b22805 | ||
|
|
c45d6aa638 | ||
|
|
e6a7969abc | ||
|
|
d332211244 | ||
|
|
9b19499d6f | ||
|
|
ce2fe0a04b | ||
|
|
b6eed3dd08 | ||
|
|
af2541cf2b | ||
|
|
7e44c37813 | ||
|
|
2fa479aad9 | ||
|
|
ba9211e253 | ||
|
|
33c7018cd0 | ||
|
|
4431319405 | ||
|
|
f7d38f7668 | ||
|
|
25b83de8d4 | ||
|
|
021b2cf26f | ||
|
|
072d66cbf3 | ||
|
|
112ff9541a | ||
|
|
7a3e900507 | ||
|
|
5f75ad1617 | ||
|
|
466fb2bf30 | ||
|
|
ec38b81ee6 | ||
|
|
dc3fb96d28 | ||
|
|
773502c6aa | ||
|
|
9518a46531 | ||
|
|
5335fc3beb | ||
|
|
a068902c9e | ||
|
|
fef39d78f7 | ||
|
|
8a7035a664 | ||
|
|
906bbec918 | ||
|
|
7c6eefd795 | ||
|
|
c82eb78117 | ||
|
|
70a254e157 | ||
|
|
c5abf88839 | ||
|
|
8d7cf8783a | ||
|
|
4d4fb77ead | ||
|
|
249f6e4841 | ||
|
|
8cb5ee086d | ||
|
|
d9d8f677bf | ||
|
|
f81402f518 | ||
|
|
a046e0e864 | ||
|
|
75304e1f98 | ||
|
|
2f422fe3af | ||
|
|
335c3baf65 | ||
|
|
4b2cb255cb | ||
|
|
4014273fb0 | ||
|
|
d45962c3ac | ||
|
|
0fcfb51b7f | ||
|
|
4db4999186 | ||
|
|
a10b228849 | ||
|
|
2d98ce0114 | ||
|
|
0d91781e88 | ||
|
|
5991aa6d4e | ||
|
|
3b40a3afb4 | ||
|
|
e3a2b0c693 | ||
|
|
b36026cdd3 | ||
|
|
153b095eea | ||
|
|
fe109267a1 | ||
|
|
43f7ff9122 | ||
|
|
ca1dea595c | ||
|
|
7e95794324 | ||
|
|
69e114099b | ||
|
|
2449a2e315 | ||
|
|
3fd821fb66 | ||
|
|
cf1414289a | ||
|
|
90db2f5037 | ||
|
|
f90c9af9f4 | ||
|
|
004a86d099 | ||
|
|
5d75d02a47 | ||
|
|
74d69c5d08 | ||
|
|
eab52d5150 | ||
|
|
273f9772ac | ||
|
|
c27be4f38d | ||
|
|
76d1e5e620 | ||
|
|
c7858e025f | ||
|
|
68cd4b6ad8 | ||
|
|
98cbdec9d2 | ||
|
|
5d0f92af4c | ||
|
|
cdc8e666b2 | ||
|
|
f89634a309 | ||
|
|
a9b84c2c7f | ||
|
|
25afb3a2e1 | ||
|
|
1d4da67d11 | ||
|
|
42128dc764 | ||
|
|
af21d42a47 | ||
|
|
a296683dd0 | ||
|
|
a44a20e044 | ||
|
|
a14a562cec | ||
|
|
3371e99c41 | ||
|
|
a2d1e3e2d8 | ||
|
|
2551ea07bf | ||
|
|
dda1939036 | ||
|
|
b160aa1abc | ||
|
|
3836386b2f | ||
|
|
c415103439 | ||
|
|
4599561f2e | ||
|
|
51a50f6ca2 | ||
|
|
2971895216 | ||
|
|
bd21328ab8 | ||
|
|
59fcc7ce40 | ||
|
|
0793169325 | ||
|
|
902f7f0940 | ||
|
|
e3c6402f88 | ||
|
|
66e28a35c6 | ||
|
|
2e9cc2c724 | ||
|
|
ff96d5f3dc | ||
|
|
ca016aee2c | ||
|
|
b4acbe8aab | ||
|
|
4bcc2deb3a | ||
|
|
a13db2dc5e | ||
|
|
4b79073b16 | ||
|
|
24b7ec9464 | ||
|
|
138f93cacd | ||
|
|
98b904561a | ||
|
|
29f9606aaa | ||
|
|
4772831527 | ||
|
|
05e86a8f03 | ||
|
|
6b8edfa4f9 | ||
|
|
e11dec246c | ||
|
|
768e35572b | ||
|
|
f076172c31 | ||
|
|
8b93500cda | ||
|
|
5f92c5e78d | ||
|
|
26456908ab | ||
|
|
10e5c061ae | ||
|
|
76f2f67aca | ||
|
|
733fc197d0 | ||
|
|
98bf92e1ce | ||
|
|
e4a4f54202 | ||
|
|
873cad803f | ||
|
|
4617760b66 | ||
|
|
956b6f4b5a | ||
|
|
6599e9b401 | ||
|
|
41ea84639c | ||
|
|
81e1b9f6ce | ||
|
|
7bdc6e457b | ||
|
|
7b72100085 | ||
|
|
d415cd4062 | ||
|
|
8dcc0805b3 | ||
|
|
d2c115d3c8 | ||
|
|
8691088e65 | ||
|
|
2241edd2e4 | ||
|
|
324c9b99fd | ||
|
|
7e5113f91d | ||
|
|
2c12a65f4e | ||
|
|
070a4902d5 | ||
|
|
30478bcde7 | ||
|
|
ec43be52a1 | ||
|
|
f42772c150 | ||
|
|
f9d59a6885 | ||
|
|
347f5356a1 | ||
|
|
ba400f05b3 | ||
|
|
46f4d0b4e7 | ||
|
|
89a66beaf7 | ||
|
|
b86e060dc2 | ||
|
|
c16675ad4e | ||
|
|
661336d864 | ||
|
|
6d4107b270 | ||
|
|
0bd5b1e851 | ||
|
|
d96ffbe613 | ||
|
|
58b2221151 | ||
|
|
42b3afc244 | ||
|
|
f3a4accdc4 | ||
|
|
b27abe3fac | ||
|
|
7a6dfeda2a | ||
|
|
ab1325e4d3 | ||
|
|
462620e074 | ||
|
|
90e6872eb4 | ||
|
|
d6e90065c2 | ||
|
|
4d74aa5316 | ||
|
|
2bbd06b0c2 | ||
|
|
40b5640381 | ||
|
|
dbac606afc | ||
|
|
dddf722aa3 | ||
|
|
6953f0ee7d | ||
|
|
5314f4b624 | ||
|
|
466ef97ade | ||
|
|
83b7fae8b2 | ||
|
|
f0ad52e219 | ||
|
|
a8418f8f75 | ||
|
|
d25bf44f52 | ||
|
|
9bf213f7fd | ||
|
|
c155c0a390 | ||
|
|
b0f6987cfa | ||
|
|
d9619c9f06 | ||
|
|
b53206634b | ||
|
|
686ae84909 | ||
|
|
07e927e3e4 | ||
|
|
ca535eb08e | ||
|
|
e93f456d07 | ||
|
|
ff4c40bc14 | ||
|
|
0c31234580 | ||
|
|
1d8a172c05 | ||
|
|
fcb82dac5e | ||
|
|
c4329985a0 | ||
|
|
10539ece4e | ||
|
|
f9ae39e624 | ||
|
|
d80838e1e7 | ||
|
|
62baccc55d | ||
|
|
a2b1ffda1f | ||
|
|
49c53ebe6f | ||
|
|
8acf7aa728 | ||
|
|
f0ff005fbb | ||
|
|
3da7acf0df | ||
|
|
035e826e3f | ||
|
|
6a44f04f95 | ||
|
|
889954aaa9 | ||
|
|
8b924362a5 | ||
|
|
377e8b7a34 | ||
|
|
824cd9be8e | ||
|
|
719514645c | ||
|
|
753d429629 | ||
|
|
7b2956b2c8 | ||
|
|
656c5d571a | ||
|
|
b1c262fada | ||
|
|
00170fe52c | ||
|
|
d66acde7ea | ||
|
|
a01add4747 | ||
|
|
9c8bed5eda | ||
|
|
f4d7ba0f1a | ||
|
|
798a26efb9 | ||
|
|
8b718e3b34 | ||
|
|
ec857cb59f | ||
|
|
1e673c4596 | ||
|
|
3c68d02fb0 | ||
|
|
74b7dbb70a | ||
|
|
e1a66fabf7 | ||
|
|
62d3a1070c | ||
|
|
721a88978a | ||
|
|
252b9d2064 | ||
|
|
298518afca | ||
|
|
5222d4c604 | ||
|
|
b6dd920c08 | ||
|
|
d24bd67c0b | ||
|
|
ee95b82895 | ||
|
|
20f3769963 | ||
|
|
726cf61aa3 | ||
|
|
76082e4133 | ||
|
|
f0f40826a8 | ||
|
|
e2e979d3c1 | ||
|
|
ab965e292b | ||
|
|
a92e3e817d | ||
|
|
cab61ec436 | ||
|
|
e02b8e3e68 | ||
|
|
c1bc321f8f | ||
|
|
e22a992db7 | ||
|
|
0261c4742e | ||
|
|
7ca1473a8e | ||
|
|
f2fe8e1ad7 | ||
|
|
0b98045ed5 | ||
|
|
e464296eb5 | ||
|
|
87ba014c7e | ||
|
|
5386ee1740 | ||
|
|
e7ec5f0a82 | ||
|
|
ca789b73fa | ||
|
|
7976679671 | ||
|
|
4cc1ed4be8 | ||
|
|
073eba8e66 | ||
|
|
216e73acfa | ||
|
|
78868da533 | ||
|
|
f73509a942 | ||
|
|
24e48f0022 | ||
|
|
7a490831d5 | ||
|
|
4a79cfd35c | ||
|
|
74a4a08ca0 | ||
|
|
6c93c7af7c | ||
|
|
63c0af441d | ||
|
|
18ffe94b32 | ||
|
|
171af100c9 | ||
|
|
b5cd81ae89 | ||
|
|
eeda291577 | ||
|
|
11f295b908 | ||
|
|
a793bd8aa3 | ||
|
|
9152ba8d82 | ||
|
|
0191c4ac5c | ||
|
|
a5dfc23894 | ||
|
|
7a893def7e | ||
|
|
7f5f493023 | ||
|
|
515930343d | ||
|
|
14aa7d0f60 | ||
|
|
8c24c71bb2 | ||
|
|
9c06550d07 | ||
|
|
2892baef91 | ||
|
|
5f0d828ac3 | ||
|
|
ae473724d7 | ||
|
|
c470a4c37c | ||
|
|
81b7eb618f | ||
|
|
d32b685dc3 | ||
|
|
08abc0dd16 | ||
|
|
39cad7e363 |
34
.github/workflows/check-formatting.yml
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||
|
||||
name: Formatting with black & isort
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.8]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel
|
||||
pip install black flake8 isort # Testing packages
|
||||
python setup.py install_egg_info # Workaround https://github.com/pypa/pip/issues/4537
|
||||
pip install -e .[dev]
|
||||
- name: Check code format with black and isort
|
||||
run: |
|
||||
make lint
|
||||
38
.github/workflows/make-docs.yml
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||
|
||||
name: Build documentation with Sphinx
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.8]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo sed -i 's/azure\.//' /etc/apt/sources.list # workaround for flaky pandoc install
|
||||
sudo apt-get update # from here https://github.com/actions/virtual-environments/issues/675
|
||||
sudo apt-get install pandoc -o Acquire::Retries=3 # install pandoc
|
||||
python -m pip install --upgrade pip setuptools wheel # update python
|
||||
pip install ipython --upgrade # needed for Github for whatever reason
|
||||
python setup.py install_egg_info # Workaround https://github.com/pypa/pip/issues/4537
|
||||
pip install -e . ".[dev]" # This should install all packages for development
|
||||
pip install jupyter 'ipykernel<5.0.0' 'ipython<7.0.0' # ipykernel workaround: github.com/jupyter/notebook/issues/4050
|
||||
- name: Build docs with Sphinx and check for errors
|
||||
run: |
|
||||
sphinx-build -b html docs docs/_build/html -W
|
||||
31
.github/workflows/publish-to-pypi.yml
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
# This workflows will upload a Python Package using Twine when a release is created
|
||||
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
|
||||
|
||||
name: Upload Python Package to PyPI
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [created]
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel
|
||||
pip install setuptools wheel twine
|
||||
- name: Build and publish
|
||||
env:
|
||||
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
||||
run: |
|
||||
python setup.py sdist bdist_wheel
|
||||
twine upload dist/*
|
||||
37
.github/workflows/run-pytest.yml
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||
|
||||
name: Test with PyTest
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.6, 3.7, 3.8]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel
|
||||
pip install pytest pytest-xdist # Testing packages
|
||||
pip uninstall textattack --yes # Remove TA if it's already installed
|
||||
python setup.py install_egg_info # Workaround https://github.com/pypa/pip/issues/4537
|
||||
pip install -e .[dev]
|
||||
pip freeze
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
pytest tests -vx --dist=loadfile -n auto
|
||||
|
||||
9
.gitignore
vendored
@@ -9,6 +9,10 @@ outputs/
|
||||
|
||||
# IDE files
|
||||
.c9*
|
||||
.idea/
|
||||
|
||||
# Jupyter notebook files
|
||||
.ipynb_checkpoints/
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
@@ -19,9 +23,6 @@ docs/_build/
|
||||
# Files from IDES
|
||||
.*.py
|
||||
|
||||
# CSVs to upload to MTurk
|
||||
*.csv
|
||||
|
||||
# TF Hub modules
|
||||
tensorflow-hub
|
||||
|
||||
@@ -40,3 +41,5 @@ checkpoints/
|
||||
|
||||
# vim
|
||||
*.swp
|
||||
|
||||
.vscode
|
||||
@@ -1,9 +0,0 @@
|
||||
language: python
|
||||
python: '3.8'
|
||||
before_install:
|
||||
- python --version
|
||||
- pip install -U pip
|
||||
- pip install -U pytest
|
||||
install:
|
||||
- pip install -e .
|
||||
script: pytest tests # run tests
|
||||
222
CONTRIBUTING.md
Normal file
@@ -0,0 +1,222 @@
|
||||
# How can I contribute to TextAttack?
|
||||
|
||||
We welcome contributions from all members of the community– and there are lots
|
||||
of ways to help without editing the code! Answering questions, helping others,
|
||||
reaching out and improving the documentations are immensely valuable to the
|
||||
community.
|
||||
|
||||
It also helps us if you spread the word: reference the library from blog posts
|
||||
on the awesome projects it made possible, shout out on Twitter every time it has
|
||||
helped you, or simply star the repo to say "thank you".
|
||||
|
||||
## Slack Channel
|
||||
|
||||
For help and realtime updates related to TextAttack, please [join the TextAttack Slack](https://join.slack.com/t/textattack/shared_invite/zt-ez3ts03b-Nr55tDiqgAvCkRbbz8zz9g)!
|
||||
|
||||
## Ways to contribute
|
||||
|
||||
There are lots of ways you can contribute to TextAttack:
|
||||
* Submitting issues on Github to report bugs or make feature requests
|
||||
* Fixing outstanding issues with the existing code
|
||||
* Implementing new features
|
||||
* Adding support for new models and datasets
|
||||
* Contributing to the examples or to the documentation
|
||||
|
||||
*All are equally valuable to the community.*
|
||||
|
||||
## Submitting a new issue or feature request
|
||||
|
||||
Do your best to follow these guidelines when submitting an issue or a feature
|
||||
request. It will make it easier for us to come back to you quickly and with good
|
||||
feedback.
|
||||
|
||||
### Found a bug?
|
||||
|
||||
TextAttack can remain robust and reliable thanks to users who notify us of
|
||||
the problems they encounter. So thank you for [reporting an issue](https://github.com/QData/TextAttack/issues).
|
||||
|
||||
We also have a suite of tests intended to detect bugs before they enter the
|
||||
codebase. That said, they still happen (Turing completeness and all) so it's up
|
||||
to you to report the bugs you find! We would really appreciate it if you could
|
||||
make sure the bug was not already reported (use the search bar on Github under
|
||||
Issues).
|
||||
|
||||
To help us fix your issue quickly, please follow these steps:
|
||||
|
||||
* Include your **OS type and version**, the versions of **Python**, **PyTorch** and
|
||||
**Tensorflow** when applicable;
|
||||
* A short, self-contained, code snippet that allows us to reproduce the bug in
|
||||
less than 30s;
|
||||
* Provide the *full* traceback if an exception is raised.
|
||||
|
||||
### Do you want to add your model?
|
||||
|
||||
Awesome! Please provide the following information:
|
||||
|
||||
* Short description of the model and link to the paper;
|
||||
* Link to the implementation if it is open-source;
|
||||
* Link to the model weights if they are available.
|
||||
|
||||
If you are willing to contribute the model yourself, let us know so we can best
|
||||
guide you. We can host your model on our S3 server, but if you trained your
|
||||
model using `transformers`, it's better if you host your model on their
|
||||
[model hub](https://huggingface.co/models).
|
||||
|
||||
### Do you want a new feature: a component, a recipe, or something else?
|
||||
|
||||
A world-class feature request addresses the following points:
|
||||
|
||||
1. Motivation first:
|
||||
* Is it related to a problem/frustration with the library? If so, please explain
|
||||
why. Providing a code snippet that demonstrates the problem is best.
|
||||
* Is it related to something you would need for a project? We'd love to hear
|
||||
about it!
|
||||
* Is it something you worked on and think could benefit the community?
|
||||
Awesome! Tell us what problem it solved for you.
|
||||
2. Write a *full paragraph* describing the feature;
|
||||
3. Provide a **code snippet** that demonstrates its future use;
|
||||
4. In case this is related to a paper, please attach a link;
|
||||
5. Attach any additional information (drawings, screenshots, etc.) you think may help.
|
||||
|
||||
|
||||
## Start contributing! (Pull Requests)
|
||||
|
||||
Before writing code, we strongly advise you to search through the exising PRs or
|
||||
issues to make sure that nobody is already working on the same thing. If you are
|
||||
unsure, it is always a good idea to open an issue to get some feedback.
|
||||
|
||||
You will need basic `git` proficiency to be able to contribute to
|
||||
`textattack`. `git` is not the easiest tool to use but it has the greatest
|
||||
manual. Type `git --help` in a shell and enjoy. If you prefer books, [Pro
|
||||
Git](https://git-scm.com/book/en/v2) is a very good reference.
|
||||
|
||||
Follow these steps to start contributing:
|
||||
|
||||
1. Fork the [repository](https://github.com/QData/TextAttack) by
|
||||
clicking on the 'Fork' button on the repository's page. This creates a copy of the code
|
||||
under your GitHub user account.
|
||||
|
||||
2. Clone your fork to your local disk, and add the base repository as a remote:
|
||||
|
||||
```bash
|
||||
$ git clone git@github.com:<your Github handle>/TextAttack.git
|
||||
$ cd TextAttack
|
||||
$ git remote add upstream https://github.com/QData/TextAttack
|
||||
```
|
||||
|
||||
3. Create a new branch to hold your development changes:
|
||||
|
||||
```bash
|
||||
$ git checkout -b a-descriptive-name-for-my-changes
|
||||
```
|
||||
|
||||
**do not** work on the `master` branch.
|
||||
|
||||
4. Set up a development environment by running the following commands in a virtual environment:
|
||||
|
||||
|
||||
```bash
|
||||
$ cd TextAttack
|
||||
$ pip install -e . ".[dev]"
|
||||
$ pip install black isort pytest pytest-xdist
|
||||
```
|
||||
|
||||
This will install `textattack` in editable mode and install `black` and
|
||||
`isort`, packages we use for code formatting.
|
||||
|
||||
(If TextAttack was already installed in the virtual environment, remove
|
||||
it with `pip uninstall textattack` before reinstalling it in editable
|
||||
mode with the `-e` flag.)
|
||||
|
||||
5. Develop the features on your branch.
|
||||
|
||||
As you work on the features, you should make sure that the test suite
|
||||
passes:
|
||||
|
||||
```bash
|
||||
$ make test
|
||||
```
|
||||
|
||||
(or just simply `pytest`.)
|
||||
|
||||
> **Tip:** if you're fixing just one or two tests, you can run only the last tests that failed using `pytest --lf`.
|
||||
|
||||
`textattack` relies on `black` and `isort` to format its source code
|
||||
consistently. After you make changes, format them with:
|
||||
|
||||
```bash
|
||||
$ make format
|
||||
```
|
||||
|
||||
You can run quality checks to make sure your code is formatted properly
|
||||
using this command:
|
||||
|
||||
```bash
|
||||
$ make lint
|
||||
```
|
||||
|
||||
Once you're happy with your changes, add changed files using `git add` and
|
||||
make a commit with `git commit` to record your changes locally:
|
||||
|
||||
```bash
|
||||
$ git add modified_file.py
|
||||
$ git commit
|
||||
```
|
||||
|
||||
Please write [good commit messages](https://chris.beams.io/posts/git-commit/).
|
||||
|
||||
It is a good idea to sync your copy of the code with the original
|
||||
repository regularly. This way you can quickly account for changes:
|
||||
|
||||
```bash
|
||||
$ git fetch upstream
|
||||
$ git rebase upstream/master
|
||||
```
|
||||
|
||||
Push the changes to your account using:
|
||||
|
||||
```bash
|
||||
$ git push -u origin a-descriptive-name-for-my-changes
|
||||
```
|
||||
|
||||
6. Add documentation.
|
||||
|
||||
Our docs are in the `docs/` folder. Thanks to `sphinx-automodule`, adding
|
||||
documentation for a new code file should just be two lines. Our docs will
|
||||
automatically generate from the comments you added to your code. If you're
|
||||
adding an attack recipe, add a reference in `attack_recipes.rst`.
|
||||
If you're adding a transformation, add a reference in `transformation.rst`, etc.
|
||||
|
||||
You can build the docs and view the updates using `make docs`. If you're
|
||||
adding a tutorial or something where you want to update the docs multiple
|
||||
times, you can run `make docs-auto`. This will run a server using
|
||||
`sphinx-autobuild` that should automatically reload whenever you change
|
||||
a file.
|
||||
|
||||
7. Once you are satisfied (**and the checklist below is happy too**), go to the
|
||||
webpage of your fork on GitHub. Click on 'Pull request' to send your changes
|
||||
to the project maintainers for review.
|
||||
|
||||
8. It's ok if maintainers ask you for changes. It happens to core contributors
|
||||
too! So everyone can see the changes in the Pull request, work in your local
|
||||
branch and push the changes to your fork. They will automatically appear in
|
||||
the pull request.
|
||||
|
||||
|
||||
### Checklist
|
||||
|
||||
1. The title of your pull request should be a summary of its contribution.
|
||||
2. If your pull request adresses an issue, please mention the issue number in
|
||||
the pull request description to make sure they are linked (and people
|
||||
consulting the issue know you are working on it);
|
||||
3. To indicate a work in progress please mark it as a draft on Github.
|
||||
4. Make sure existing tests pass.
|
||||
5. Add relevant tests. No quality testing = no merge.
|
||||
6. All public methods must have informative docstrings that work nicely with sphinx.
|
||||
|
||||
### Tests
|
||||
|
||||
You can run TextAttack tests with `pytest`. Just type `make test`.
|
||||
|
||||
|
||||
#### This guide was heavily inspired by the awesome [transformers guide to contributing](https://github.com/huggingface/transformers/blob/master/CONTRIBUTING.md)
|
||||
34
Makefile
Normal file
@@ -0,0 +1,34 @@
|
||||
PEP_IGNORE_ERRORS="C901 E501 W503 E203 E231 E266 F403"
|
||||
|
||||
format: FORCE ## Run black and isort (rewriting files)
|
||||
black .
|
||||
isort --atomic tests textattack
|
||||
docformatter --in-place --recursive textattack tests
|
||||
|
||||
lint: FORCE ## Run black, isort, flake8 (in check mode)
|
||||
black . --check
|
||||
isort --check-only tests textattack
|
||||
flake8 . --count --ignore=$(PEP_IGNORE_ERRORS) --show-source --statistics --exclude=./.*,build,dist
|
||||
|
||||
test: FORCE ## Run tests using pytest
|
||||
python -m pytest --dist=loadfile -n auto
|
||||
|
||||
docs: FORCE ## Build docs using Sphinx.
|
||||
sphinx-build -b html docs docs/_build/html
|
||||
|
||||
docs-check: FORCE ## Builds docs using Sphinx. If there is an error, exit with an error code (instead of warning & continuing).
|
||||
sphinx-build -b html docs docs/_build/html -W
|
||||
|
||||
docs-auto: FORCE ## Build docs using Sphinx and run hotreload server using Sphinx autobuild.
|
||||
sphinx-autobuild docs docs/_build/html -H 0.0.0.0 -p 8765
|
||||
|
||||
all: format lint docs-check test ## Format, lint, and test.
|
||||
|
||||
.PHONY: help
|
||||
|
||||
.DEFAULT_GOAL := help
|
||||
|
||||
help:
|
||||
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
|
||||
|
||||
FORCE:
|
||||
345
README.md
@@ -1,28 +1,43 @@
|
||||
|
||||
|
||||
<h1 align="center">TextAttack 🐙</h1>
|
||||
|
||||
<p align="center">Generating adversarial examples for NLP models</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://textattack.readthedocs.io/">Docs</a> •
|
||||
<a href="https://textattack.readthedocs.io/">[TextAttack Documentation on ReadTheDocs]</a>
|
||||
<br> <br>
|
||||
<a href="#about">About</a> •
|
||||
<a href="#setup">Setup</a> •
|
||||
<a href="#usage">Usage</a> •
|
||||
<a href="#design">Design</a>
|
||||
<br> <br>
|
||||
<a target="_blank" href="https://travis-ci.org/QData/TextAttack">
|
||||
<img src="https://travis-ci.org/QData/TextAttack.svg?branch=master" alt="Coverage Status">
|
||||
<a target="_blank">
|
||||
<img src="https://github.com/QData/TextAttack/workflows/Github%20PyTest/badge.svg" alt="Github Runner Covergae Status">
|
||||
</a>
|
||||
<a href="https://badge.fury.io/py/textattack">
|
||||
<img src="https://badge.fury.io/py/textattack.svg" alt="PyPI version" height="18">
|
||||
</a>
|
||||
|
||||
</p>
|
||||
|
||||
<img src="http://jackxmorris.com/files/textattack.gif" alt="TextAttack Demo GIF" style="display: block; margin: 0 auto;" />
|
||||
|
||||
## About
|
||||
|
||||
TextAttack is a Python framework for running adversarial attacks against NLP models. TextAttack builds attacks from four components: a search method, goal function, transformation, and set of constraints. TextAttack's modular design makes it easily extensible to new NLP tasks, models, and attack strategies. TextAttack currently supports attacks on models trained for classification, entailment, and translation.
|
||||
TextAttack is a Python framework for adversarial attacks, data augmentation, and model training in NLP.
|
||||
|
||||
> If you're looking for information about TextAttack's menagerie of pre-trained models, you might want the [TextAttack Model Zoo](textattack/models/README.md) readme.
|
||||
|
||||
## Slack Channel
|
||||
|
||||
For help and realtime updates related to TextAttack, please [join the TextAttack Slack](https://join.slack.com/t/textattack/shared_invite/zt-huomtd9z-KqdHBPPu2rOP~Z8q3~urgg)!
|
||||
|
||||
### *Why TextAttack?*
|
||||
|
||||
There are lots of reasons to use TextAttack:
|
||||
|
||||
1. **Understand NLP models better** by running different adversarial attacks on them and examining the output
|
||||
2. **Research and develop different NLP adversarial attacks** using the TextAttack framework and library of components
|
||||
3. **Augment your dataset** to increase model generalization and robustness downstream
|
||||
3. **Train NLP models** using just a single command (all downloads included!)
|
||||
|
||||
## Setup
|
||||
|
||||
@@ -30,116 +45,346 @@ TextAttack is a Python framework for running adversarial attacks against NLP mod
|
||||
|
||||
You should be running Python 3.6+ to use this package. A CUDA-compatible GPU is optional but will greatly improve code speed. TextAttack is available through pip:
|
||||
|
||||
```
|
||||
```bash
|
||||
pip install textattack
|
||||
```
|
||||
|
||||
### Configuration
|
||||
TextAttack downloads files to `~/.cache/textattack/` by default. This includes pretrained models,
|
||||
dataset samples, and the configuration file `config.yaml`. To change the cache path, set the
|
||||
environment variable `TA_CACHE_DIR`.
|
||||
Once TextAttack is installed, you can run it via command-line (`textattack ...`)
|
||||
or via python module (`python -m textattack ...`).
|
||||
|
||||
> **Tip**: TextAttack downloads files to `~/.cache/textattack/` by default. This includes pretrained models,
|
||||
> dataset samples, and the configuration file `config.yaml`. To change the cache path, set the
|
||||
> environment variable `TA_CACHE_DIR`. (for example: `TA_CACHE_DIR=/tmp/ textattack attack ...`).
|
||||
|
||||
## Usage
|
||||
|
||||
TextAttack's main features can all be accessed via the `textattack` command. Two very
|
||||
common commands are `textattack attack <args>`, and `textattack augment <args>`. You can see more
|
||||
information about all commands using
|
||||
```bash
|
||||
textattack --help
|
||||
```
|
||||
or a specific command using, for example,
|
||||
```bash
|
||||
textattack attack --help
|
||||
```
|
||||
|
||||
The [`examples/`](examples/) folder includes scripts showing common TextAttack usage for training models, running attacks, and augmenting a CSV file. The [documentation website](https://textattack.readthedocs.io/en/latest) contains walkthroughs explaining basic usage of TextAttack, including building a custom transformation and a custom constraint..
|
||||
|
||||
### Running Attacks
|
||||
|
||||
The [`examples/`](docs/examples/) folder contains notebooks walking through examples of basic usage of TextAttack, including building a custom transformation and a custom constraint. These examples can also be viewed through the [documentation website](https://textattack.readthedocs.io/en/latest).
|
||||
The easiest way to try out an attack is via the command-line interface, `textattack attack`.
|
||||
|
||||
We also have a command-line interface for running attacks. See help info and list of arguments with `python -m textattack --help`.
|
||||
> **Tip:** If your machine has multiple GPUs, you can distribute the attack across them using the `--parallel` option. For some attacks, this can really help performance.
|
||||
|
||||
### Attack Recipes
|
||||
Here are some concrete examples:
|
||||
|
||||
We include attack recipes which build an attack such that only one command line argument has to be passed. To run an attack recipes, run `python -m textattack --recipe [recipe_name]`
|
||||
*TextFooler on BERT trained on the MR sentiment classification dataset*:
|
||||
```bash
|
||||
textattack attack --recipe textfooler --model bert-base-uncased-mr --num-examples 100
|
||||
```
|
||||
|
||||
The first are for classification and entailment attacks:
|
||||
- **textfooler**: Greedy attack with word importance ranking (["Is Bert Really Robust?" (Jin et al., 2019)](https://arxiv.org/abs/1907.11932)).
|
||||
*DeepWordBug on DistilBERT trained on the Quora Question Pairs paraphrase identification dataset*:
|
||||
```bash
|
||||
textattack attack --model distilbert-base-uncased-qqp --recipe deepwordbug --num-examples 100
|
||||
```
|
||||
|
||||
*Beam search with beam width 4 and word embedding transformation and untargeted goal function on an LSTM*:
|
||||
```bash
|
||||
textattack attack --model lstm-mr --num-examples 20 \
|
||||
--search-method beam-search^beam_width=4 --transformation word-swap-embedding \
|
||||
--constraints repeat stopword max-words-perturbed^max_num_words=2 embedding^min_cos_sim=0.8 part-of-speech \
|
||||
--goal-function untargeted-classification
|
||||
```
|
||||
|
||||
> **Tip:** Instead of specifying a dataset and number of examples, you can pass `--interactive` to attack samples inputted by the user.
|
||||
|
||||
### Attacks and Papers Implemented ("Attack Recipes")
|
||||
|
||||
We include attack recipes which implement attacks from the literature. You can list attack recipes using `textattack list attack-recipes`.
|
||||
|
||||
To run an attack recipe: `textattack attack --recipe [recipe_name]`
|
||||
|
||||
Attacks on classification tasks, like sentiment classification and entailment:
|
||||
- **alzantot**: Genetic algorithm attack from (["Generating Natural Language Adversarial Examples" (Alzantot et al., 2018)](https://arxiv.org/abs/1804.07998)).
|
||||
- **tf-adjusted**: TextFooler attack with constraint thresholds adjusted based on human evaluation and grammaticality enforced.
|
||||
- **alz-adjusted**: Alzantot's attack adjusted to follow the same constraints as tf-adjusted such that the only difference is the search method.
|
||||
- **deepwordbug**: Replace-1 scoring and multi-transformation character-swap attack (["Black-box Generation of Adversarial Text Sequences to Evade Deep Learning Classifiers" (Gao et al., 2018)](https://arxiv.org/abs/1801.04354)).
|
||||
- **bae**: BERT masked language model transformation attack from (["BAE: BERT-based Adversarial Examples for Text Classification" (Garg & Ramakrishnan, 2019)](https://arxiv.org/abs/2004.01970)).
|
||||
- **bert-attack**: BERT masked language model transformation attack with subword replacements (["BERT-ATTACK: Adversarial Attack Against BERT Using BERT" (Li et al., 2020)](https://arxiv.org/abs/2004.09984)).
|
||||
- **checklist**: Invariance testing implemented in CheckList that contract, extend, and substitutes name entities. (["Beyond Accuracy: Behavioral
|
||||
Testing of NLP models with CheckList" (Ribeiro et al., 2020)](https://arxiv.org/abs/2005.04118)).
|
||||
- **clare (*coming soon*)**: Greedy attack with word swap, insertion, and merge transformations using RoBERTa masked language model. (["Contextualized Perturbation for Textual Adversarial Attack" (Li et al., 2020)](https://arxiv.org/abs/2009.07502)).
|
||||
- **faster-alzantot**: modified, faster version of the Alzantot et al. genetic algorithm, from (["Certified Robustness to Adversarial Word Substitutions" (Jia et al., 2019)](https://arxiv.org/abs/1909.00986)).
|
||||
- **deepwordbug**: Greedy replace-1 scoring and multi-transformation character-swap attack (["Black-box Generation of Adversarial Text Sequences to Evade Deep Learning Classifiers" (Gao et al., 2018)](https://arxiv.org/abs/1801.04354)).
|
||||
- **hotflip**: Beam search and gradient-based word swap (["HotFlip: White-Box Adversarial Examples for Text Classification" (Ebrahimi et al., 2017)](https://arxiv.org/abs/1712.06751)).
|
||||
- **iga**: Improved genetic algorithm attack from (["Natural Language Adversarial Attacks and Defenses in Word Level (Wang et al., 2019)"](https://arxiv.org/abs/1909.06723)
|
||||
- **input-reduction**: Reducing the input while maintaining the prediction through word importance ranking (["Pathologies of Neural Models Make Interpretation Difficult" (Feng et al., 2018)](https://arxiv.org/pdf/1804.07781.pdf)).
|
||||
- **kuleshov**: Greedy search and counterfitted embedding swap (["Adversarial Examples for Natural Language Classification Problems" (Kuleshov et al., 2018)](https://openreview.net/pdf?id=r1QZ3zbAZ)).
|
||||
- **pruthi**: Character-based attack that simulates common typos (["Combating Adversarial Misspellings with Robust Word Recognition" (Pruthi et al., 2019)](https://arxiv.org/abs/1905.11268)
|
||||
- **pso**: Particle swarm optimization and HowNet synonym swap (["Word-level Textual Adversarial Attacking as Combinatorial Optimization" (Zang et al., 2020)](https://www.aclweb.org/anthology/2020.acl-main.540/)).
|
||||
- **pwws**: Greedy attack with word importance ranking based on word saliency and synonym swap scores (["Generating Natural Language Adversarial Examples through Probability Weighted Word Saliency" (Ren et al., 2019)](https://www.aclweb.org/anthology/P19-1103/)).
|
||||
- **textbugger**: Greedy attack with word importance ranking and a combination of synonym and character-based swaps ([(["TextBugger: Generating Adversarial Text Against Real-world Applications" (Li et al., 2018)](https://arxiv.org/abs/1812.05271)).
|
||||
- **textfooler**: Greedy attack with word importance ranking and counter-fitted embedding swap (["Is Bert Really Robust?" (Jin et al., 2019)](https://arxiv.org/abs/1907.11932)).
|
||||
|
||||
The final is for translation attacks:
|
||||
Attacks on sequence-to-sequence models:
|
||||
- **morpheus**: Greedy attack that replaces words with their inflections with the goal of minimizing BLEU score (["It’s Morphin’ Time! Combating Linguistic Discrimination with Inflectional Perturbations"](https://www.aclweb.org/anthology/2020.acl-main.263.pdf)
|
||||
- **seq2sick**: Greedy attack with goal of changing every word in the output translation. Currently implemented as black-box with plans to change to white-box as done in paper (["Seq2Sick: Evaluating the Robustness of Sequence-to-Sequence Models with Adversarial Examples" (Cheng et al., 2018)](https://arxiv.org/abs/1803.01128)).
|
||||
|
||||
Following table illustrates the comparison of the attack models.
|
||||
|
||||
Attacks on classification tasks
|
||||
| Attack Recipe(s) | Accessibility | Perturbation | Main Idea |
|
||||
| :-------------------------------: | :-------------: | :----------: | :-----------------------------------------------------------------------------------------------: |
|
||||
| Alzantot Genetic Algorithm | Score | Word | Genetic algorithm-based word substitution |
|
||||
| BAE* | Score | Word | BERT masked language model transformation attack |
|
||||
| Faster Alzantot Genetic Algorithm | Score | Word | Genetic algorithm-based word substitution(faster version) |
|
||||
| Improved Genetic Algorithm | Score | Word | Improved genetic algorithm-based word substitution |
|
||||
| Input Reduction* | Gradient | Word | Reducing the input while maintaining the prediction through word importance ranking |
|
||||
| Kuleshov | Score | Word | Greedy search and counterfitted embedding swap |
|
||||
| Particle Swarm Optimization | Score | Word | Particle Swarm Optimization-based word substitution |
|
||||
| TextFooler | Score | Word | Greedy attack with word importance ranking and counter-fitted embedding swap |
|
||||
| PWWS | Score | Word | Greedy attack with word importance ranking based on word saliency and synonym swap scores |
|
||||
| TextBugger | Gradient, Score | Word+Char | Greedy attack with word importance ranking and a combination of synonym and character-based swaps |
|
||||
| HotFlip | Gradient | Word, Char | Beam search and gradient-based word swap |
|
||||
| BERT-Attack* | Score | Word, Char | BERT masked language model transformation attack with subword replacements |
|
||||
| CheckList* | Score | Word, Char | Invariance testing that contract, extend, and substitutes name entities. |
|
||||
| DeepWordBug | Score | Char | Greedy replace-1 scoring and multi-transformation character-swap attack |
|
||||
| pruthi | Score | Char | Character-based attack that simulates common typos |
|
||||
|
||||
Attacks on sequence-to-sequence models:
|
||||
| Attack Recipe(s) | Accessibility | Perturbation | Main Idea |
|
||||
| :-------------------------------: | :-------------: | :----------: | :-----------------------------------------------------------------------------------------------: |
|
||||
| Seq2Sick | Score | Word | Greedy attack with goal of changing every word in the output translation. |
|
||||
| MORPHEUS | Score | Word | Greedy attack that replaces words with their inflections with the goal of minimizing BLEU score |
|
||||
|
||||
|
||||
#### Recipe Usage Examples
|
||||
|
||||
Here are some examples of testing attacks from the literature from the command-line:
|
||||
|
||||
*TextFooler against BERT fine-tuned on SST-2:*
|
||||
```bash
|
||||
textattack attack --model bert-base-uncased-sst2 --recipe textfooler --num-examples 10
|
||||
```
|
||||
|
||||
*seq2sick (black-box) against T5 fine-tuned for English-German translation:*
|
||||
```bash
|
||||
textattack attack --model t5-en-de --recipe seq2sick --num-examples 100
|
||||
```
|
||||
|
||||
### Augmenting Text
|
||||
|
||||
Many of the components of TextAttack are useful for data augmentation. The `textattack.Augmenter` class
|
||||
uses a transformation and a list of constraints to augment data. We also offer three built-in recipes
|
||||
uses a transformation and a list of constraints to augment data. We also offer five built-in recipes
|
||||
for data augmentation:
|
||||
- `textattack.WordNetAugmenter` augments text by replacing words with WordNet synonyms
|
||||
- `textattack.EmbeddingAugmenter` augments text by replacing words with neighbors in the counter-fitted embedding space, with a constraint to ensure their cosine similarity is at least 0.8
|
||||
- `textattack.CharSwapAugmenter` augments text by substituting, deleting, inserting, and swapping adjacent characters
|
||||
- `textattack.EasyDataAugmenter` augments text with a combination of word insertions, substitutions and deletions.
|
||||
- `textattack.CheckListAugmenter` augments text by contraction/extension and by substituting names, locations, numbers.
|
||||
|
||||
All `Augmenter` objects implement `augment` and `augment_many` to generate augmentations
|
||||
of a string or a list of strings. Here's an example of how to use the `EmbeddingAugmenter`:
|
||||
#### Augmentation Command-Line Interface
|
||||
The easiest way to use our data augmentation tools is with `textattack augment <args>`. `textattack augment`
|
||||
takes an input CSV file and text column to augment, along with the number of words to change per augmentation
|
||||
and the number of augmentations per input example. It outputs a CSV in the same format with all the augmentation
|
||||
examples corresponding to the proper columns.
|
||||
|
||||
For example, given the following as `examples.csv`:
|
||||
|
||||
```csv
|
||||
"text",label
|
||||
"the rock is destined to be the 21st century's new conan and that he's going to make a splash even greater than arnold schwarzenegger , jean- claud van damme or steven segal.", 1
|
||||
"the gorgeously elaborate continuation of 'the lord of the rings' trilogy is so huge that a column of words cannot adequately describe co-writer/director peter jackson's expanded vision of j . r . r . tolkien's middle-earth .", 1
|
||||
"take care of my cat offers a refreshingly different slice of asian cinema .", 1
|
||||
"a technically well-made suspenser . . . but its abrupt drop in iq points as it races to the finish line proves simply too discouraging to let slide .", 0
|
||||
"it's a mystery how the movie could be released in this condition .", 0
|
||||
```
|
||||
|
||||
The command `textattack augment --csv examples.csv --input-column text --recipe embedding --pct-words-to-swap .1 --transformations-per-example 2 --exclude-original`
|
||||
will augment the `text` column by altering 10% of each example's words, generating twice as many augmentations as original inputs, and exclude the original inputs from the
|
||||
output CSV. (All of this will be saved to `augment.csv` by default.)
|
||||
|
||||
After augmentation, here are the contents of `augment.csv`:
|
||||
```csv
|
||||
text,label
|
||||
"the rock is destined to be the 21st century's newest conan and that he's gonna to make a splashing even stronger than arnold schwarzenegger , jean- claud van damme or steven segal.",1
|
||||
"the rock is destined to be the 21tk century's novel conan and that he's going to make a splat even greater than arnold schwarzenegger , jean- claud van damme or stevens segal.",1
|
||||
the gorgeously elaborate continuation of 'the lord of the rings' trilogy is so huge that a column of expression significant adequately describe co-writer/director pedro jackson's expanded vision of j . rs . r . tolkien's middle-earth .,1
|
||||
the gorgeously elaborate continuation of 'the lordy of the piercings' trilogy is so huge that a column of mots cannot adequately describe co-novelist/director peter jackson's expanded vision of j . r . r . tolkien's middle-earth .,1
|
||||
take care of my cat offerings a pleasantly several slice of asia cinema .,1
|
||||
taking care of my cat offers a pleasantly different slice of asiatic kino .,1
|
||||
a technically good-made suspenser . . . but its abrupt drop in iq points as it races to the finish bloodline proves straightforward too disheartening to let slide .,0
|
||||
a technically well-made suspenser . . . but its abrupt drop in iq dot as it races to the finish line demonstrates simply too disheartening to leave slide .,0
|
||||
it's a enigma how the film wo be releases in this condition .,0
|
||||
it's a enigma how the filmmaking wo be publicized in this condition .,0
|
||||
```
|
||||
|
||||
The 'embedding' augmentation recipe uses counterfitted embedding nearest-neighbors to augment data.
|
||||
|
||||
#### Augmentation Python Interface
|
||||
In addition to the command-line interface, you can augment text dynamically by importing the
|
||||
`Augmenter` in your own code. All `Augmenter` objects implement `augment` and `augment_many` to generate augmentations
|
||||
of a string or a list of strings. Here's an example of how to use the `EmbeddingAugmenter` in a python script:
|
||||
|
||||
```python
|
||||
>>> from textattack.augmentation import EmbeddingAugmenter
|
||||
>>> augmenter = EmbeddingAugmenter()
|
||||
>>> s = 'What I cannot create, I do not understand.'
|
||||
>>> augmenter.augment(s)
|
||||
['What I notable create, I do not understand.', 'What I significant create, I do not understand.', 'What I cannot engender, I do not understand.', 'What I cannot creating, I do not understand.', 'What I cannot creations, I do not understand.', 'What I cannot create, I do not comprehend.', 'What I cannot create, I do not fathom.', 'What I cannot create, I do not understanding.', 'What I cannot create, I do not understands.', 'What I cannot create, I do not understood.', 'What I cannot create, I do not realise.']
|
||||
```
|
||||
You can also create your own augmenter from scratch by importing transformations/constraints from `textattack.transformations` and `textattack.constraints`. Here's an example that generates augmentations of a string using `WordSwapRandomCharacterDeletion`:
|
||||
|
||||
```python
|
||||
>>> from textattack.transformations import WordSwapRandomCharacterDeletion
|
||||
>>> from textattack.transformations import CompositeTransformation
|
||||
>>> from textattack.augmentation import Augmenter
|
||||
>>> transformation = CompositeTransformation([WordSwapRandomCharacterDeletion()])
|
||||
>>> augmenter = Augmenter(transformation=transformation, transformations_per_example=5)
|
||||
>>> s = 'What I cannot create, I do not understand.'
|
||||
>>> augmenter.augment(s)
|
||||
['What I cannot creae, I do not understand.', 'What I cannot creat, I do not understand.', 'What I cannot create, I do not nderstand.', 'What I cannot create, I do nt understand.', 'Wht I cannot create, I do not understand.']
|
||||
```
|
||||
|
||||
### Training Models
|
||||
|
||||
Our model training code is available via `textattack train` to help you train LSTMs,
|
||||
CNNs, and `transformers` models using TextAttack out-of-the-box. Datasets are
|
||||
automatically loaded using the `datasets` package.
|
||||
|
||||
#### Training Examples
|
||||
*Train our default LSTM for 50 epochs on the Yelp Polarity dataset:*
|
||||
```bash
|
||||
textattack train --model lstm --dataset yelp_polarity --batch-size 64 --epochs 50 --learning-rate 1e-5
|
||||
```
|
||||
|
||||
The training process has data augmentation built-in:
|
||||
```bash
|
||||
textattack train --model lstm --dataset rotten_tomatoes --augment eda --pct-words-to-swap .1 --transformations-per-example 4
|
||||
```
|
||||
This uses the `EasyDataAugmenter` recipe to augment the `rotten_tomatoes` dataset before training.
|
||||
|
||||
*Fine-Tune `bert-base` on the `CoLA` dataset for 5 epochs**:
|
||||
```bash
|
||||
textattack train --model bert-base-uncased --dataset glue^cola --batch-size 32 --epochs 5
|
||||
```
|
||||
|
||||
### `textattack peek-dataset`
|
||||
|
||||
To take a closer look at a dataset, use `textattack peek-dataset`. TextAttack will print some cursory statistics about the inputs and outputs from the dataset. For example, `textattack peek-dataset --dataset-from-huggingface snli` will show information about the SNLI dataset from the NLP package.
|
||||
|
||||
|
||||
### `textattack list`
|
||||
|
||||
There are lots of pieces in TextAttack, and it can be difficult to keep track of all of them. You can use `textattack list` to list components, for example, pretrained models (`textattack list models`) or available search methods (`textattack list search-methods`).
|
||||
|
||||
## Design
|
||||
|
||||
### TokenizedText
|
||||
### AttackedText
|
||||
|
||||
To allow for word replacement after a sequence has been tokenized, we include a `TokenizedText` object which maintains both a list of tokens and the original text, with punctuation. We use this object in favor of a list of words or just raw text.
|
||||
To allow for word replacement after a sequence has been tokenized, we include an `AttackedText` object
|
||||
which maintains both a list of tokens and the original text, with punctuation. We use this object in favor of a list of words or just raw text.
|
||||
|
||||
### Models and Datasets
|
||||
|
||||
TextAttack is model-agnostic! Anything that overrides `__call__`, takes in `TokenizedText`, and correctly formats output works. However, TextAttack provides pre-trained models and samples for the following datasets:
|
||||
TextAttack is model-agnostic! You can use `TextAttack` to analyze any model that outputs IDs, tensors, or strings.
|
||||
|
||||
#### Classification:
|
||||
* AG News dataset topic classification
|
||||
* IMDB dataset sentiment classification
|
||||
* Movie Review dataset sentiment classification
|
||||
* Yelp dataset sentiment classification
|
||||
#### Built-in Models
|
||||
|
||||
#### Entailment:
|
||||
* SNLI datastet
|
||||
* MNLI dataset (matched & unmatched)
|
||||
TextAttack also comes built-in with models and datasets. Our command-line interface will automatically match the correct
|
||||
dataset to the correct model. We include various pre-trained models for each of the nine [GLUE](https://gluebenchmark.com/)
|
||||
tasks, as well as some common datasets for classification, translation, and summarization.
|
||||
|
||||
#### Translation:
|
||||
* newstest2013 English to German dataset
|
||||
A list of available pretrained models and their validation accuracies is available at
|
||||
[textattack/models/README.md](textattack/models/README.md). You can also view a full list of provided models
|
||||
& datasets via `textattack attack --help`.
|
||||
|
||||
Here's an example of using one of the built-in models (the SST-2 dataset is automatically loaded):
|
||||
|
||||
```bash
|
||||
textattack attack --model roberta-base-sst2 --recipe textfooler --num-examples 10
|
||||
```
|
||||
|
||||
#### HuggingFace support: `transformers` models and `datasets` datasets
|
||||
|
||||
We also provide built-in support for [`transformers` pretrained models](https://huggingface.co/models)
|
||||
and datasets from the [`datasets` package](https://github.com/huggingface/datasets)! Here's an example of loading
|
||||
and attacking a pre-trained model and dataset:
|
||||
|
||||
```bash
|
||||
textattack attack --model-from-huggingface distilbert-base-uncased-finetuned-sst-2-english --dataset-from-huggingface glue^sst2 --recipe deepwordbug --num-examples 10
|
||||
```
|
||||
|
||||
You can explore other pre-trained models using the `--model-from-huggingface` argument, or other datasets by changing
|
||||
`--dataset-from-huggingface`.
|
||||
|
||||
|
||||
#### Loading a model or dataset from a file
|
||||
|
||||
You can easily try out an attack on a local model or dataset sample. To attack a pre-trained model,
|
||||
create a short file that loads them as variables `model` and `tokenizer`. The `tokenizer` must
|
||||
be able to transform string inputs to lists or tensors of IDs using a method called `encode()`. The
|
||||
model must take inputs via the `__call__` method.
|
||||
|
||||
##### Model from a file
|
||||
To experiment with a model you've trained, you could create the following file
|
||||
and name it `my_model.py`:
|
||||
|
||||
```python
|
||||
model = load_your_model_with_custom_code() # replace this line with your model loading code
|
||||
tokenizer = load_your_tokenizer_with_custom_code() # replace this line with your tokenizer loading code
|
||||
```
|
||||
|
||||
Then, run an attack with the argument `--model-from-file my_model.py`. The model and tokenizer will be loaded automatically.
|
||||
|
||||
#### Dataset from a file
|
||||
|
||||
Loading a dataset from a file is very similar to loading a model from a file. A 'dataset' is any iterable of `(input, output)` pairs.
|
||||
The following example would load a sentiment classification dataset from file `my_dataset.py`:
|
||||
|
||||
```python
|
||||
dataset = [('Today was....', 1), ('This movie is...', 0), ...]
|
||||
```
|
||||
|
||||
You can then run attacks on samples from this dataset by adding the argument `--dataset-from-file my_dataset.py`.
|
||||
|
||||
### Attacks
|
||||
|
||||
The `attack_one` method in an `Attack` takes as input a `TokenizedText`, and outputs either a `SuccessfulAttackResult` if it succeeds or a `FailedAttackResult` if it fails. We formulate an attack as consisting of four components: a **goal function** which determines if the attack has succeeded, **constraints** defining which perturbations are valid, a **transformation** that generates potential modifications given an input, and a **search method** which traverses through the search space of possible perturbations.
|
||||
The `attack_one` method in an `Attack` takes as input an `AttackedText`, and outputs either a `SuccessfulAttackResult` if it succeeds or a `FailedAttackResult` if it fails. We formulate an attack as consisting of four components: a **goal function** which determines if the attack has succeeded, **constraints** defining which perturbations are valid, a **transformation** that generates potential modifications given an input, and a **search method** which traverses through the search space of possible perturbations.
|
||||
|
||||
### Goal Functions
|
||||
|
||||
A `GoalFunction` takes as input a `TokenizedText` object and the ground truth output, and determines whether the attack has succeeded, returning a `GoalFunctionResult`.
|
||||
A `GoalFunction` takes as input an `AttackedText` object, scores it, and determines whether the attack has succeeded, returning a `GoalFunctionResult`.
|
||||
|
||||
### Constraints
|
||||
|
||||
A `Constraint` takes as input an original `TokenizedText`, and a list of transformed `TokenizedText`s. For each transformed option, it returns a boolean representing whether the constraint is met.
|
||||
A `Constraint` takes as input a current `AttackedText`, and a list of transformed `AttackedText`s. For each transformed option, it returns a boolean representing whether the constraint is met.
|
||||
|
||||
### Transformations
|
||||
|
||||
A `Transformation` takes as input a `TokenizedText` and returns a list of possible transformed `TokenizedText`s. For example, a transformation might return all possible synonym replacements.
|
||||
A `Transformation` takes as input an `AttackedText` and returns a list of possible transformed `AttackedText`s. For example, a transformation might return all possible synonym replacements.
|
||||
|
||||
### Search Methods
|
||||
|
||||
A `SearchMethod` takes as input an initial `GoalFunctionResult` and returns a final `GoalFunctionResult` The search is given access to the `get_transformations` function, which takes as input a `TokenizedText` object and outputs a list of possible transformations filtered by meeting all of the attack’s constraints. A search consists of successive calls to `get_transformations` until the search succeeds (determined using `get_goal_results`) or is exhausted.
|
||||
A `SearchMethod` takes as input an initial `GoalFunctionResult` and returns a final `GoalFunctionResult` The search is given access to the `get_transformations` function, which takes as input an `AttackedText` object and outputs a list of possible transformations filtered by meeting all of the attack’s constraints. A search consists of successive calls to `get_transformations` until the search succeeds (determined using `get_goal_results`) or is exhausted.
|
||||
|
||||
|
||||
## Contributing to TextAttack
|
||||
|
||||
We welcome contributions and suggestions! Submit a pull request or issue and we will do our best to respond in a timely manner.
|
||||
We welcome suggestions and contributions! Submit an issue or pull request and we will do our best to respond in a timely manner. TextAttack is currently in an "alpha" stage in which we are working to improve its capabilities and design.
|
||||
|
||||
See [CONTRIBUTING.md](https://github.com/QData/TextAttack/blob/master/CONTRIBUTING.md) for detailed information on contributing.
|
||||
|
||||
## Citing TextAttack
|
||||
|
||||
If you use TextAttack for your research, please cite [TextAttack: A Framework for Adversarial Attacks in Natural Language Processing](https://arxiv.org/abs/2005.05909).
|
||||
If you use TextAttack for your research, please cite [TextAttack: A Framework for Adversarial Attacks, Data Augmentation, and Adversarial Training in NLP](https://arxiv.org/abs/2005.05909).
|
||||
|
||||
```bibtex
|
||||
@misc{Morris2020TextAttack,
|
||||
Author = {John X. Morris and Eli Lifland and Jin Yong Yoo and Yanjun Qi},
|
||||
Title = {TextAttack: A Framework for Adversarial Attacks in Natural Language Processing},
|
||||
Year = {2020},
|
||||
Eprint = {arXiv:2005.05909},
|
||||
@misc{morris2020textattack,
|
||||
title={TextAttack: A Framework for Adversarial Attacks, Data Augmentation, and Adversarial Training in NLP},
|
||||
author={John X. Morris and Eli Lifland and Jin Yong Yoo and Jake Grigsby and Di Jin and Yanjun Qi},
|
||||
year={2020},
|
||||
eprint={2005.05909},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
44
docs/1start/api-design-tips.md
Normal file
@@ -0,0 +1,44 @@
|
||||
Lessons learned in designing TextAttack
|
||||
=========================================
|
||||
|
||||
|
||||
*This documentation page was adapted from [Our Workshop Paper in EMNLP 2nd Workshop for Natural Language Processing Open Source Software (NLP-OSS)](https://arxiv.org/abs/2010.01724).*
|
||||
|
||||
|
||||
TextAttack is an open-source Python toolkit for adversarial attacks, adversarial training, and data augmentation in NLP. TextAttack unites 15+ papers from the NLP adversarial attack literature into a single shared framework, with many components reused across attacks. This framework allows both researchers and developers to test and study the weaknesses of their NLP models.
|
||||
|
||||
## Challenges in Design
|
||||
|
||||
|
||||
One of the challenges for building such tools is that the tool should be flexible enough to work with many different deep learning frameworks (e.g. PyTorch, Tensorflow, Scikit-learn). Also, the tool should be able to work with datasets from various sources and in various formats. Lastly, the tools needs to be compatible with different hardware setups.
|
||||
|
||||
|
||||
## Our design tips
|
||||
|
||||
We provide the following broad advice to help other future developers create user-friendly NLP libraries in Python:
|
||||
- To become model-agnostic, implement a model wrapper class: a model is anything that takes string input(s) and returns a prediction.
|
||||
- To become model-agnostic, implement a model wrapper class.
|
||||
- To become data-agnostic, take dataset inputs as (input, output) pairs, where each model input is represented as an OrderedDict.
|
||||
- Do not plan for inputs (tensors, lists, etc.) to be a certain size or shape unless explicitly necessary.
|
||||
- Centralize common text operations, like parsing and string-level operations, in one class.
|
||||
- Whenever possible, cache repeated computations, including model inferences.
|
||||
- If your program runs on a single GPU, but your system contains $N$ GPUs, you can obtain an performance boost proportional to N through parallelism.
|
||||
- Dynamically choose between devices. (Do not require a GPU or TPU if one is not necessary.)
|
||||
|
||||
|
||||
Our modular and extendable design allows us to reuse many components to offer 15+ different adversarial attack methods proposed by literature. Our model-agnostic and dataset-agnostic design allows users to easily run adversarial attacks against their own models built using any deep learning framework. We hope that our lessons from developing TextAttack will help others create user-friendly open-source NLP libraries.
|
||||
|
||||
|
||||
## More Details in Reference
|
||||
|
||||
```
|
||||
@misc{morris2020textattack,
|
||||
title={TextAttack: Lessons learned in designing Python frameworks for NLP},
|
||||
author={John X. Morris and Jin Yong Yoo and Yanjun Qi},
|
||||
year={2020},
|
||||
eprint={2010.01724},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.SE}
|
||||
}
|
||||
```
|
||||
|
||||
63
docs/1start/basic-Intro.rst
Normal file
@@ -0,0 +1,63 @@
|
||||
TextAttack Basic Functions
|
||||
===========================
|
||||
|
||||
Welcome to the documentation for TextAttack!
|
||||
|
||||
What is TextAttack?
|
||||
----------------------
|
||||
`TextAttack <https://github.com/QData/TextAttack>`__ is a Python framework for adversarial attacks, adversarial training, and data augmentation in NLP.
|
||||
|
||||
TextAttack makes experimenting with the robustness of NLP models seamless, fast, and easy. It's also useful for NLP model training, adversarial training, and data augmentation.
|
||||
|
||||
TextAttack provides components for common NLP tasks like sentence encoding, grammar-checking, and word replacement that can be used on their own.
|
||||
|
||||
Where should I start?
|
||||
----------------------
|
||||
|
||||
This is a great question, and one we get a lot. First of all, almost everything in TextAttack can be done in two ways: via the command-line or via the Python API. If you're looking to integrate TextAttack into an existing project, the Python API is likely for you. If you'd prefer to use built-in functionality end-to-end (training a model, running an adversarial attack, augmenting a CSV) then you can just use the command-line API.
|
||||
|
||||
|
||||
|
||||
|
||||
For future developers, visit the :ref:`Installation <installation>` page for more details about installing TextAttack onto your own computer. To start making contributions, read the detailed instructions `here <https://github.com/QData/TextAttack/blob/master/CONTRIBUTING.md>`__.
|
||||
|
||||
TextAttack does three things very well:
|
||||
|
||||
1. Adversarial attacks (Python: ``textattack.shared.Attack``, Bash: ``textattack attack``)
|
||||
2. Data augmentation (Python: ``textattack.augmentation.Augmenter``, Bash: ``textattack augment``)
|
||||
3. Model training (Python: ``textattack.commands.train.*``, Bash: ``textattack train``)
|
||||
|
||||
Adversarial training can be achieved as a combination of [1] and/or [2] with [3] (via ``textattack train --attack``). To see all this in action, see :ref:`the TextAttack End-to-End tutorial </2notebook/0_End_to_End.ipynb>`.
|
||||
|
||||
All of the other components: datasets, models & model wrappers, loggers, transformations, constraints, search methods, goal functions, etc., are developed to support one or more of these three functions. Feel free though to install textattack to include just one of those components! (For example, TextAttack provides a really easy Python interface for accessing and using word embeddings that will automatically download and save them on the first use.)
|
||||
|
||||
|
||||
NLP Attacks
|
||||
-----------
|
||||
|
||||
TextAttack provides a framework for constructing and thinking about generating inputs in NLP via perturbation attacks.
|
||||
|
||||
|
||||
TextAttack builds attacks from four components:
|
||||
|
||||
|
||||
|
||||
- :ref:`Goal Functions <goal_function>`: stipulate the goal of the attack, like to change the prediction score of a classification model, or to change all of the words in a translation output.
|
||||
- :ref:`Constraints <constraint>`: determine if a potential perturbation is valid with respect to the original input.
|
||||
- :ref:`Transformations <transformations>`: take a text input and transform it by inserting and deleting characters, words, and/or phrases.
|
||||
- :ref:`Search Methods <search_methods>`: explore the space of possible **transformations** within the defined **constraints** and attempt to find a successful perturbation which satisfies the **goal function**.
|
||||
|
||||
|
||||
TextAttack provides a set of :ref:`Attack Recipes <attack_recipes>` that assemble attacks from the literature from these four components. Take a look at these recipes (or our `paper on ArXiv <https://arxiv.org/abs/2005.05909>`__) to get a feel for how the four components work together to create an adversarial attack.
|
||||
|
||||
Data Augmentation
|
||||
--------------------
|
||||
Data augmentation is easy and extremely common in computer vision but harder and less common in NLP. We provide a :ref:`Data Augmentation <augmentation>` module using transformations and constraints.
|
||||
|
||||
Features
|
||||
------------
|
||||
TextAttack has some other features that make it a pleasure to use:
|
||||
|
||||
- :ref:`Pre-trained Models <models>` for testing attacks and evaluating constraints
|
||||
- :ref:`Visualization options <loggers>` like Weights & Biases and Visdom
|
||||
- :ref:`AttackedText <attacked_text>`, a utility class for strings that includes tools for tokenizing and editing text
|
||||
41
docs/1start/benchmark-search.md
Normal file
@@ -0,0 +1,41 @@
|
||||
|
||||
Benchmarking Search Algorithms for Generating NLP Adversarial Examples
|
||||
=========================================================================
|
||||
|
||||
|
||||
*This documentation page was adapted from Our Paper in [EMNLP BlackNLP](https://arxiv.org/abs/2009.06368).*
|
||||
|
||||
|
||||
### Title: Searching for a Search Method: Benchmarking Search Algorithms for Generating NLP Adversarial Examples
|
||||
|
||||
|
||||
- Abstract: We study the behavior of several black-box search algorithms used for generating adversarial examples for natural language processing (NLP) tasks. We perform a fine-grained analysis of three elements relevant to search: search algorithm, search space, and search budget. When new search methods are proposed in past work, the attack search space is often modified alongside the search method. Without ablation studies benchmarking the search algorithm change with the search space held constant, an increase in attack success rate could from an improved search method or a less restrictive search space. Additionally, many previous studies fail to properly consider the search algorithms' run-time cost, which is essential for downstream tasks like adversarial training. Our experiments provide a reproducible benchmark of search algorithms across a variety of search spaces and query budgets to guide future research in adversarial NLP. Based on our experiments, we recommend greedy attacks with word importance ranking when under a time constraint or attacking long inputs, and either beam search or particle swarm optimization otherwise.
|
||||
|
||||
|
||||
### Citations:
|
||||
```
|
||||
@misc{yoo2020searching,
|
||||
title={Searching for a Search Method: Benchmarking Search Algorithms for Generating NLP Adversarial Examples},
|
||||
author={Jin Yong Yoo and John X. Morris and Eli Lifland and Yanjun Qi},
|
||||
year={2020},
|
||||
eprint={2009.06368},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
### Our benchmarking results on comparing search methods used in the past attacks.
|
||||
|
||||
|
||||

|
||||
|
||||

|
||||

|
||||

|
||||

|
||||

|
||||
|
||||

|
||||
|
||||
136
docs/1start/command_line_usage.md
Normal file
@@ -0,0 +1,136 @@
|
||||
Command-Line Usage
|
||||
=======================================
|
||||
|
||||
The easiest way to use textattack is from the command-line. Installing textattack
|
||||
will provide you with the handy `textattack` command which will allow you to do
|
||||
just about anything TextAttack offers in a single bash command.
|
||||
|
||||
> *Tip*: If you are for some reason unable to use the `textattack` command, you
|
||||
> can access all the same functionality by prepending `python -m` to the command
|
||||
> (`python -m textattack ...`).
|
||||
|
||||
To see all available commands, type `textattack --help`. This page explains
|
||||
some of the most important functionalities of textattack: NLP data augmentation,
|
||||
adversarial attacks, and training and evaluating models.
|
||||
|
||||
## Data Augmentation with `textattack augment`
|
||||
|
||||
The easiest way to use our data augmentation tools is with `textattack augment <args>`. `textattack augment`
|
||||
takes an input CSV file and text column to augment, along with the percentage of words to change per augmentation
|
||||
and the number of augmentations per input example. It outputs a CSV in the same format with all the augmentation
|
||||
examples corresponding to the proper columns.
|
||||
|
||||
For example, given the following as `examples.csv`:
|
||||
|
||||
```
|
||||
"text",label
|
||||
"the rock is destined to be the 21st century's new conan and that he's going to make a splash even greater than arnold schwarzenegger , jean- claud van damme or steven segal.", 1
|
||||
"the gorgeously elaborate continuation of 'the lord of the rings' trilogy is so huge that a column of words cannot adequately describe co-writer/director peter jackson's expanded vision of j . r . r . tolkien's middle-earth .", 1
|
||||
"take care of my cat offers a refreshingly different slice of asian cinema .", 1
|
||||
"a technically well-made suspenser . . . but its abrupt drop in iq points as it races to the finish line proves simply too discouraging to let slide .", 0
|
||||
"it's a mystery how the movie could be released in this condition .", 0
|
||||
```
|
||||
|
||||
The command:
|
||||
```
|
||||
textattack augment --csv examples.csv --input-column text --recipe eda --pct-words-to-swap .1 \
|
||||
--transformations-per-example 2 --exclude-original
|
||||
```
|
||||
will augment the `text` column with 10% of words edited per augmentation, twice as many augmentations as original inputs, and exclude the original inputs from the
|
||||
output CSV. (All of this will be saved to `augment.csv` by default.)
|
||||
|
||||
After augmentation, here are the contents of `augment.csv`:
|
||||
```
|
||||
text,label
|
||||
"the rock is destined to be the 21st century's new conan and that he's to make splash even greater arnold schwarzenegger , jean- claud van damme or steven segal.",1
|
||||
"the Arnold rock is destined to be the 21st vanguard century's new specify conan and that he's going to make a splash even greater than arnold schwarzenegger , jean- claud van damme or steven segal.",1
|
||||
the gorgeously continuation of 'the lord of the rings' trilogy is so huge that a column of cannot adequately describe co-writer/ peter jackson's expanded vision of j . r . r . tolkien's middle-earth .,1
|
||||
the splendidly elaborate continuation of 'the lord of the rings' trilogy is so huge that a column of parole cannot adequately describe co-writer/director peter jackson's expanded vision of J . r . r . tolkien's middle-earth .,1
|
||||
take care of my cat offers a refreshingly slice different of asian cinema .,1
|
||||
take care of my cast offers a refreshingly different slice of asian cinema .,1
|
||||
a technically well-made suspenser . . . but its abrupt drop in iq points as it races to the finish line proves simply too discouraging to rush let IT slide .,0
|
||||
a technically well-made suspenser . . . but its abrupt drop in iq points as it races to the finish line proves just too discouraging to let chute .,0
|
||||
it's a mystery how the movie could this released in be condition .,0
|
||||
it's a whodunit how the movie could be released in this condition .,0
|
||||
```
|
||||
|
||||
The 'eda' augmentation uses a combination of word swaps, insertions, and substitutions to generate new examples.
|
||||
|
||||
## Adversarial Attacks with `textattack attack`
|
||||
|
||||
The heart of textattack is running adversarial attacks on NLP models with
|
||||
`textattack attack`. You can build an attack from the command-line in several ways:
|
||||
1. Use an **attack recipe** to launch an attack from the literature: `textattack attack --recipe deepwordbug`
|
||||
2. Build your attack from components:
|
||||
```
|
||||
textattack attack --model lstm-mr --num-examples 20 --search-method beam-search^beam_width=4 \
|
||||
--transformation word-swap-embedding \
|
||||
--constraints repeat stopword max-words-perturbed^max_num_words=2 embedding^min_cos_sim=0.8 part-of-speech \
|
||||
--goal-function untargeted-classification
|
||||
```
|
||||
3. Create a python file that builds your attack and load it: `textattack attack --attack-from-file my_file.py^my_attack_name`
|
||||
|
||||
## Training Models with `textattack train`
|
||||
|
||||
With textattack, you can train models on any classification or regression task
|
||||
from [`datasets`](https://github.com/huggingface/datasets/) using a single line.
|
||||
|
||||
### Available Models
|
||||
#### TextAttack Models
|
||||
TextAttack has two build-in model types, a 1-layer bidirectional LSTM with a hidden
|
||||
state size of 150 (`lstm`), and a WordCNN with 3 window sizes
|
||||
(3, 4, 5) and 100 filters for the window size (`cnn`). Both models set dropout
|
||||
to 0.3 and use a base of the 200-dimensional GLoVE embeddings.
|
||||
|
||||
#### `transformers` Models
|
||||
Along with the `lstm` and `cnn`, you can theoretically fine-tune any model based
|
||||
in the huggingface [transformers](https://github.com/huggingface/transformers/)
|
||||
repo. Just type the model name (like `bert-base-cased`) and it will be automatically
|
||||
loaded.
|
||||
|
||||
Here are some models from transformers that have worked well for us:
|
||||
- `bert-base-uncased` and `bert-base-cased`
|
||||
- `distilbert-base-uncased` and `distilbert-base-cased`
|
||||
- `albert-base-v2`
|
||||
- `roberta-base`
|
||||
- `xlnet-base-cased`
|
||||
|
||||
## Evaluating Models with `textattack eval-model`
|
||||
|
||||
Any TextAttack-compatible model can be evaluated using `textattack eval-model`. TextAttack-trained models can be evaluated using `textattack eval --num-examples <num-examples> --model /path/to/trained/model/`
|
||||
|
||||
## Other Commands
|
||||
|
||||
### Checkpoints and `textattack attack-resume`
|
||||
|
||||
Some attacks can take a very long time. Sometimes this is because they're using
|
||||
a very slow search method (like beam search with a high beam width) or sometimes
|
||||
they're just attacking a large number of samples. In these cases, it can be
|
||||
useful to save attack checkpoints throughout the course of the attack. Then,
|
||||
if the attack crashes for some reason, you can resume without restarting from
|
||||
scratch.
|
||||
|
||||
- To save checkpoints while running an attack, add the argument `--checkpoint-interval X`,
|
||||
where X is the number of attacks you want to run between checkpoints (for example `textattack attack <args> --checkpoint-interval 5`).
|
||||
- To load an attack from a checkpoint, use `textattack attack-resume --checkpoint-file <checkpoint-file>`.
|
||||
|
||||
### Listing features with `textattack list`
|
||||
|
||||
TextAttack has a lot of built-in features (models, search methods, constraints, etc.)
|
||||
and it can get overwhelming to keep track of all the options. To list all of the
|
||||
options within a given category, use `textattack list`.
|
||||
|
||||
For example:
|
||||
- list all the built-in models: `textattack list models`
|
||||
- list all constraints: `textattack list constraints`
|
||||
- list all search methods: `textattack list search-methods`
|
||||
|
||||
### Examining datasets with `textattack peek-dataset`
|
||||
It can be useful to take a cursory look at and compute some basic statistics of
|
||||
whatever dataset you're working with. Whether you're loading a dataset of your
|
||||
own from a file, or one from NLP, you can use `textattack peek-dataset` to
|
||||
see some basic information about the dataset.
|
||||
|
||||
For example, use `textattack peek-dataset --dataset-from-huggingface glue^mrpc` to see
|
||||
information about the MRPC dataset (from the GLUE set of datasets). This will
|
||||
print statistics like the number of labels, average number of words, etc.
|
||||
@@ -1,14 +1,17 @@
|
||||
==============
|
||||
|
||||
.. _installation:
|
||||
|
||||
|
||||
Installation
|
||||
==============
|
||||
|
||||
To use TextAttack, you must be running Python 3.6+. A CUDA-compatible GPU is optional but will greatly improve speed. To install, simply run::
|
||||
To use TextAttack, you must be running Python 3.6+. Tensorflow needs to be installed for users, and Java needs to be installed for developers. A CUDA-compatible GPU is optional but will greatly improve speed. To install, simply run::
|
||||
|
||||
pip install textattack
|
||||
|
||||
You're now all set to use TextAttack! Try running an attack from the command line::
|
||||
|
||||
python -m textattack --recipe textfooler --model bert-mr --num-examples 10
|
||||
textattack attack --recipe textfooler --model bert-base-uncased-mr --num-examples 10
|
||||
|
||||
This will run an attack using the TextFooler_ recipe, attacking BERT fine-tuned on the MR dataset. It will attack the first 10 samples. Once everything downloads and starts running, you should see attack results print to ``stdout``.
|
||||
|
||||
62
docs/1start/references.md
Normal file
@@ -0,0 +1,62 @@
|
||||
How to Cite TextAttack
|
||||
===========================
|
||||
|
||||
## Main Paper: TextAttack: A Framework for Adversarial Attacks in Natural Language Processing
|
||||
|
||||
- Paper [EMNLP Demo](https://arxiv.org/abs/2005.05909)
|
||||
|
||||
|
||||
- Abstract: TextAttack is a library for generating natural language adversarial examples to fool natural language processing (NLP) models. TextAttack builds attacks from four components: a search method, goal function, transformation, and a set of constraints. Researchers can use these components to easily assemble new attacks. Individual components can be isolated and compared for easier ablation studies. TextAttack currently supports attacks on models trained for text classification and entailment across a variety of datasets. Additionally, TextAttack's modular design makes it easily extensible to new NLP tasks, models, and attack strategies.
|
||||
|
||||
- Citations
|
||||
|
||||
```
|
||||
@misc{morris2020textattack,
|
||||
title={TextAttack: A Framework for Adversarial Attacks in Natural Language Processing},
|
||||
author={John X. Morris and Eli Lifland and Jin Yong Yoo and Yanjun Qi},
|
||||
year={2020},
|
||||
eprint={2005.05909},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Our Analysis paper: Reevaluating Adversarial Examples in Natural Language
|
||||
|
||||
- Paper [EMNLP Findings](https://arxiv.org/abs/2004.14174)
|
||||
|
||||
- Abstract: State-of-the-art attacks on NLP models lack a shared definition of a what constitutes a successful attack. We distill ideas from past work into a unified framework: a successful natural language adversarial example is a perturbation that fools the model and follows some linguistic constraints. We then analyze the outputs of two state-of-the-art synonym substitution attacks. We find that their perturbations often do not preserve semantics, and 38% introduce grammatical errors. Human surveys reveal that to successfully preserve semantics, we need to significantly increase the minimum cosine similarities between the embeddings of swapped words and between the sentence encodings of original and perturbed sentences.With constraints adjusted to better preserve semantics and grammaticality, the attack success rate drops by over 70 percentage points.
|
||||
|
||||
|
||||
- Citations
|
||||
```
|
||||
@misc{morris2020reevaluating,
|
||||
title={Reevaluating Adversarial Examples in Natural Language},
|
||||
author={John X. Morris and Eli Lifland and Jack Lanchantin and Yangfeng Ji and Yanjun Qi},
|
||||
year={2020},
|
||||
eprint={2004.14174},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL}
|
||||
}
|
||||
```
|
||||
|
||||
## Our Analysis paper: Searching for a Search Method: Benchmarking Search Algorithms for Generating NLP Adversarial Examples
|
||||
|
||||
- Paper [EMNLP BlackNLP](https://arxiv.org/abs/2009.06368)
|
||||
|
||||
- Abstract: We study the behavior of several black-box search algorithms used for generating adversarial examples for natural language processing (NLP) tasks. We perform a fine-grained analysis of three elements relevant to search: search algorithm, search space, and search budget. When new search methods are proposed in past work, the attack search space is often modified alongside the search method. Without ablation studies benchmarking the search algorithm change with the search space held constant, an increase in attack success rate could from an improved search method or a less restrictive search space. Additionally, many previous studies fail to properly consider the search algorithms' run-time cost, which is essential for downstream tasks like adversarial training. Our experiments provide a reproducible benchmark of search algorithms across a variety of search spaces and query budgets to guide future research in adversarial NLP. Based on our experiments, we recommend greedy attacks with word importance ranking when under a time constraint or attacking long inputs, and either beam search or particle swarm optimization otherwise.
|
||||
|
||||
|
||||
- Citations:
|
||||
```
|
||||
@misc{yoo2020searching,
|
||||
title={Searching for a Search Method: Benchmarking Search Algorithms for Generating NLP Adversarial Examples},
|
||||
author={Jin Yong Yoo and John X. Morris and Eli Lifland and Yanjun Qi},
|
||||
year={2020},
|
||||
eprint={2009.06368},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL}
|
||||
}
|
||||
```
|
||||
|
||||
226
docs/1start/support.md
Normal file
@@ -0,0 +1,226 @@
|
||||
How can I contribute to TextAttack?
|
||||
============================================
|
||||
|
||||
*This documentation page is adapted from [TextAttack Github CONTRIBUTING.md](https://github.com/QData/TextAttack/blob/master/CONTRIBUTING.md). for detailed information on contributing*
|
||||
|
||||
|
||||
We welcome contributions from all members of the community– and there are lots
|
||||
of ways to help without editing the code! Answering questions, helping others,
|
||||
reaching out and improving the documentations are immensely valuable to the
|
||||
community.
|
||||
|
||||
It also helps us if you spread the word: reference the library from blog posts
|
||||
on the awesome projects it made possible, shout out on Twitter every time it has
|
||||
helped you, or simply star the repo to say "thank you".
|
||||
|
||||
## Slack Channel
|
||||
|
||||
For help and realtime updates related to TextAttack, please [join the TextAttack Slack](https://join.slack.com/t/textattack/shared_invite/zt-ez3ts03b-Nr55tDiqgAvCkRbbz8zz9g)!
|
||||
|
||||
## Ways to contribute
|
||||
|
||||
There are lots of ways you can contribute to TextAttack:
|
||||
* Submitting issues on Github to report bugs or make feature requests
|
||||
* Fixing outstanding issues with the existing code
|
||||
* Implementing new features
|
||||
* Adding support for new models and datasets
|
||||
* Contributing to the examples or to the documentation
|
||||
|
||||
*All are equally valuable to the community.*
|
||||
|
||||
## Submitting a new issue or feature request
|
||||
|
||||
Do your best to follow these guidelines when submitting an issue or a feature
|
||||
request. It will make it easier for us to come back to you quickly and with good
|
||||
feedback.
|
||||
|
||||
### Found a bug?
|
||||
|
||||
TextAttack can remain robust and reliable thanks to users who notify us of
|
||||
the problems they encounter. So thank you for [reporting an issue](https://github.com/QData/TextAttack/issues).
|
||||
|
||||
We also have a suite of tests intended to detect bugs before they enter the
|
||||
codebase. That said, they still happen (Turing completeness and all) so it's up
|
||||
to you to report the bugs you find! We would really appreciate it if you could
|
||||
make sure the bug was not already reported (use the search bar on Github under
|
||||
Issues).
|
||||
|
||||
To help us fix your issue quickly, please follow these steps:
|
||||
|
||||
* Include your **OS type and version**, the versions of **Python**, **PyTorch** and
|
||||
**Tensorflow** when applicable;
|
||||
* A short, self-contained, code snippet that allows us to reproduce the bug in
|
||||
less than 30s;
|
||||
* Provide the *full* traceback if an exception is raised.
|
||||
|
||||
### Do you want to add your model?
|
||||
|
||||
Awesome! Please provide the following information:
|
||||
|
||||
* Short description of the model and link to the paper;
|
||||
* Link to the implementation if it is open-source;
|
||||
* Link to the model weights if they are available.
|
||||
|
||||
If you are willing to contribute the model yourself, let us know so we can best
|
||||
guide you. We can host your model on our S3 server, but if you trained your
|
||||
model using `transformers`, it's better if you host your model on their
|
||||
[model hub](https://huggingface.co/models).
|
||||
|
||||
### Do you want a new feature: a component, a recipe, or something else?
|
||||
|
||||
A world-class feature request addresses the following points:
|
||||
|
||||
1. Motivation first:
|
||||
* Is it related to a problem/frustration with the library? If so, please explain
|
||||
why. Providing a code snippet that demonstrates the problem is best.
|
||||
* Is it related to something you would need for a project? We'd love to hear
|
||||
about it!
|
||||
* Is it something you worked on and think could benefit the community?
|
||||
Awesome! Tell us what problem it solved for you.
|
||||
2. Write a *full paragraph* describing the feature;
|
||||
3. Provide a **code snippet** that demonstrates its future use;
|
||||
4. In case this is related to a paper, please attach a link;
|
||||
5. Attach any additional information (drawings, screenshots, etc.) you think may help.
|
||||
|
||||
|
||||
## Start contributing! (Pull Requests)
|
||||
|
||||
Before writing code, we strongly advise you to search through the exising PRs or
|
||||
issues to make sure that nobody is already working on the same thing. If you are
|
||||
unsure, it is always a good idea to open an issue to get some feedback.
|
||||
|
||||
You will need basic `git` proficiency to be able to contribute to
|
||||
`textattack`. `git` is not the easiest tool to use but it has the greatest
|
||||
manual. Type `git --help` in a shell and enjoy. If you prefer books, [Pro
|
||||
Git](https://git-scm.com/book/en/v2) is a very good reference.
|
||||
|
||||
Follow these steps to start contributing:
|
||||
|
||||
1. Fork the [repository](https://github.com/QData/TextAttack) by
|
||||
clicking on the 'Fork' button on the repository's page. This creates a copy of the code
|
||||
under your GitHub user account.
|
||||
|
||||
2. Clone your fork to your local disk, and add the base repository as a remote:
|
||||
|
||||
```bash
|
||||
$ git clone git@github.com:<your Github handle>/TextAttack.git
|
||||
$ cd TextAttack
|
||||
$ git remote add upstream https://github.com/QData/TextAttack
|
||||
```
|
||||
|
||||
3. Create a new branch to hold your development changes:
|
||||
|
||||
```bash
|
||||
$ git checkout -b a-descriptive-name-for-my-changes
|
||||
```
|
||||
|
||||
**do not** work on the `master` branch.
|
||||
|
||||
4. Set up a development environment by running the following commands in a virtual environment:
|
||||
|
||||
|
||||
```bash
|
||||
$ cd TextAttack
|
||||
$ pip install -e . ".[dev]"
|
||||
$ pip install black isort pytest pytest-xdist
|
||||
```
|
||||
|
||||
This will install `textattack` in editable mode and install `black` and
|
||||
`isort`, packages we use for code formatting.
|
||||
|
||||
(If TextAttack was already installed in the virtual environment, remove
|
||||
it with `pip uninstall textattack` before reinstalling it in editable
|
||||
mode with the `-e` flag.)
|
||||
|
||||
5. Develop the features on your branch.
|
||||
|
||||
As you work on the features, you should make sure that the test suite
|
||||
passes:
|
||||
|
||||
```bash
|
||||
$ make test
|
||||
```
|
||||
|
||||
(or just simply `pytest`.)
|
||||
|
||||
> **Tip:** if you're fixing just one or two tests, you can run only the last tests that failed using `pytest --lf`.
|
||||
|
||||
`textattack` relies on `black` and `isort` to format its source code
|
||||
consistently. After you make changes, format them with:
|
||||
|
||||
```bash
|
||||
$ make format
|
||||
```
|
||||
|
||||
You can run quality checks to make sure your code is formatted properly
|
||||
using this command:
|
||||
|
||||
```bash
|
||||
$ make lint
|
||||
```
|
||||
|
||||
Once you're happy with your changes, add changed files using `git add` and
|
||||
make a commit with `git commit` to record your changes locally:
|
||||
|
||||
```bash
|
||||
$ git add modified_file.py
|
||||
$ git commit
|
||||
```
|
||||
|
||||
Please write [good commit messages](https://chris.beams.io/posts/git-commit/).
|
||||
|
||||
It is a good idea to sync your copy of the code with the original
|
||||
repository regularly. This way you can quickly account for changes:
|
||||
|
||||
```bash
|
||||
$ git fetch upstream
|
||||
$ git rebase upstream/master
|
||||
```
|
||||
|
||||
Push the changes to your account using:
|
||||
|
||||
```bash
|
||||
$ git push -u origin a-descriptive-name-for-my-changes
|
||||
```
|
||||
|
||||
6. Add documentation.
|
||||
|
||||
Our docs are in the `docs/` folder. Thanks to `sphinx-automodule`, adding
|
||||
documentation for a new code file should just be two lines. Our docs will
|
||||
automatically generate from the comments you added to your code. If you're
|
||||
adding an attack recipe, add a reference in `attack_recipes.rst`.
|
||||
If you're adding a transformation, add a reference in `transformation.rst`, etc.
|
||||
|
||||
You can build the docs and view the updates using `make docs`. If you're
|
||||
adding a tutorial or something where you want to update the docs multiple
|
||||
times, you can run `make docs-auto`. This will run a server using
|
||||
`sphinx-autobuild` that should automatically reload whenever you change
|
||||
a file.
|
||||
|
||||
7. Once you are satisfied (**and the checklist below is happy too**), go to the
|
||||
webpage of your fork on GitHub. Click on 'Pull request' to send your changes
|
||||
to the project maintainers for review.
|
||||
|
||||
8. It's ok if maintainers ask you for changes. It happens to core contributors
|
||||
too! So everyone can see the changes in the Pull request, work in your local
|
||||
branch and push the changes to your fork. They will automatically appear in
|
||||
the pull request.
|
||||
|
||||
|
||||
### Checklist
|
||||
|
||||
1. The title of your pull request should be a summary of its contribution.
|
||||
2. If your pull request adresses an issue, please mention the issue number in
|
||||
the pull request description to make sure they are linked (and people
|
||||
consulting the issue know you are working on it);
|
||||
3. To indicate a work in progress please mark it as a draft on Github.
|
||||
4. Make sure existing tests pass.
|
||||
5. Add relevant tests. No quality testing = no merge.
|
||||
6. All public methods must have informative docstrings that work nicely with sphinx.
|
||||
|
||||
### Tests
|
||||
|
||||
You can run TextAttack tests with `pytest`. Just type `make test`.
|
||||
|
||||
|
||||
#### This guide was heavily inspired by the awesome [transformers guide to contributing](https://github.com/huggingface/transformers/blob/master/CONTRIBUTING.md)
|
||||
103
docs/1start/what_is_an_adversarial_attack.md
Normal file
@@ -0,0 +1,103 @@
|
||||
What is an adversarial attack in NLP?
|
||||
=======================================
|
||||
|
||||
*This documentation page was adapted from [a blog post we wrote about adversarial examples in NLP](https://towardsdatascience.com/what-are-adversarial-examples-in-nlp-f928c574478e).*
|
||||
|
||||
This page is intended to clear up some terminology for those unclear on the meaning of the term ‘adversarial attack’ in natural language processing. We'll try and give an intro to NLP adversarial attacks, try to clear up lots of the scholarly jargon, and give a high-level overview of the uses of TextAttack.
|
||||
|
||||
This article talks about the concept of adversarial examples as applied to NLP (natural language processing). The terminology can be confusing at times, so we’ll begin with an overview of the language used to talk about adversarial examples and adversarial attacks. Then, we’ll talk about TextAttack, an open-source Python library for adversarial examples, data augmentation, and adversarial training in NLP that’s changing the way people research the robustness of NLP models. We’ll conclude with some thoughts on the future of this area of research.
|
||||
|
||||
An adversarial example is an input designed to fool a machine learning model [1]. In TextAttack, we are concerned with adversarial perturbations, changes to benign inputs that cause them to be misclassified by models. ‘Adversarial perturbation’ is more specific than just ‘adversarial example’, as the class of all adversarial examples also includes inputs designed from scratch to fool machine learning models. TextAttack attacks generate a specific kind of adversarial examples, adversarial perturbations.
|
||||
|
||||
As alluded to above, an adversarial attack on a machine learning model is a process for generating adversarial perturbations. TextAttack attacks iterate through a dataset (list of inputs to a model), and for each correctly predicted sample, search for an adversarial perturbation (we’ll talk more about this later). If an example is incorrectly predicted to begin with, it is not attacked, since the input already fools the model. TextAttack breaks the attack process up into stages, and provides a [system of interchangeable components](/2notebook/1_Introduction_and_Transformations.ipynb) for managing each stage of the attack.
|
||||
|
||||
Adversarial robustness is a measurement of a model’s susceptibility to adversarial examples. TextAttack often measures robustness using attack success rate, the percentage of attack attempts that produce successful adversarial examples, or after-attack accuracy, the percentage of inputs that are both correctly classified and unsuccessfully attacked.
|
||||
|
||||
To improve our numeracy when talking about adversarial attacks, let’s take a look at a concrete example of some attack results:
|
||||
|
||||

|
||||
|
||||
*These results come from using TextAttack to run the DeepWordBug attack on an LSTM trained on the Rotten Tomatoes Movie Review sentiment classification dataset, using 200 total examples. These results come from using TextAttack to run the DeepWordBug attack on an LSTM trained on the Rotten Tomatoes Movie Review sentiment classification dataset, using 200 total examples.*
|
||||
|
||||
This attack was run on 200 examples. Out of those 200, the model initially predicted 43 of them incorrectly; this leads to an accuracy of 157/200 or 78.5%. TextAttack ran the adversarial attack process on the remaining 157 examples to try to find a valid adversarial perturbation for each one. Out of those 157, 29 attacks failed, leading to a success rate of 128/157 or 81.5%. Another way to articulate this is that the model correctly predicted and resisted attacks for 29 out of 200 total samples, leading to an accuracy under attack (or “after-attack accuracy”) of 29/200 or 14.5%.
|
||||
|
||||
TextAttack also logged some other helpful statistics for this attack. Among the 157 successful attacks, on average, the attack changed 15.5% of words to alter the prediction, and made 32.7 queries to find a successful perturbation. Across all 200 inputs, the average number of words was 18.97.
|
||||
|
||||
Now that we have provided some terminology, let’s look at some concrete examples of proposed adversarial attacks. We will give some background on adversarial attacks in other domains and then examples of different attacks in NLP.
|
||||
|
||||
## Terminology
|
||||
|
||||
Research in 2013 [2] showed neural networks are vulnerable to adversarial examples. These original adversarial attacks apply a small, well-chosen perturbation to an image to fool an image classifier. In this example, the classifier correctly predicts the original image to be a pig. After a small perturbation, however, the classifier predicts the pig to be an airliner (with extremely high confidence!).
|
||||
|
||||

|
||||
|
||||
*An adversarial example for an ImageNet classifier. Superimposing a tiny (but deliberate) amount of noise causes the model to classify this pig as an airliner.*
|
||||
|
||||
|
||||
These adversarial examples exhibit a serious security flaw in deep neural networks. Therefore adversarial examples pose a security problem for downstream systems that include neural networks, including text-to-speech systems and self-driving cars. Adversarial examples are useful outside of security: researchers have used adversarial examples to improve and interpret deep learning models.
|
||||
|
||||
As you might imagine, adversarial examples in deep neural networks have caught the attention of many researchers around the world, and this 2013 paper spawned an explosion of research into the topic.
|
||||
|
||||
|
||||

|
||||
<br>
|
||||
*The number of papers related to ‘adversarial examples’ on arxiv.org between 2014 and 2020. [Graph from https://nicholas.carlini.com/writing/2019/all-adversarial-example-papers.html]*
|
||||
|
||||
|
||||
Many new, more sophisticated adversarial attacks have been proposed, along with “defenses,” procedures for training neural networks that are resistant (“robust”) against adversarial attacks. Training deep neural networks that are highly accurate while remaining robust to adversarial attacks remains an open problem [3].
|
||||
|
||||
Naturally, many have wondered about what adversarial examples for NLP models might be. No natural analogy to the adversarial examples in computer vision (like the pig-to-airliner bamboozle above) exists for NLP. After all, two sequences of text cannot be truly indistinguishable without being the same. (In the above example, the pig-classified input and its airliner-classified perturbation are literally indistinguishable to the human eye.)
|
||||
|
||||
|
||||
## Adversarial Examples in NLP
|
||||
|
||||

|
||||
|
||||
*Two different ideas of adversarial examples in NLP. These results were generated using TextAttack on an LSTM trained on the Rotten Tomatoes Movie Review sentiment classification dataset. These are *real* adversarial examples, generated using the DeepWordBug and TextFooler attacks. To generate them yourself, after installing TextAttack, run ‘textattack attack — model lstm-mr — num-examples 1 — recipe RECIPE — num-examples-offset 19’ where RECIPE is ‘deepwordbug’ or ‘textfooler’.*
|
||||
|
||||
Because two text sequences are never indistinguishable, researchers have proposed various alternative definitions for adversarial examples in NLP. We find it useful to group adversarial attacks based on their chosen definitions of adversarial examples.
|
||||
|
||||
Although attacks in NLP cannot find an adversarial perturbation that is literally indistinguishable to the original input, they can find a perturbation that is very similar. Our mental model groups NLP adversarial attacks into two groups, based on their notions of ‘similarity’:
|
||||
|
||||
|
||||
**Visual similarity.** Some NLP attacks consider an adversarial example to be a text sequence that looks very similar to the original input -- perhaps just a few character changes away -- but receives a different prediction from the model. Some of these adversarial attacks try to change as few characters as possible to change the model’s prediction; others try to introduce realistic ‘typos’ similar to those that humans would make.
|
||||
|
||||
Some researchers have raised concern that these attacks can be defended against quite effectively, either by using a rule-based spellchecker or a sequence-to-sequence model trained to correct adversarial typos.
|
||||
TextAttack attack recipes that fall under this category: deepwordbug, hotflip, pruthi, textbugger\*, morpheus
|
||||
|
||||
|
||||
**Semantic similarity.** Other NLP attacks consider an adversarial example valid if it is semantically indistinguishable from the original input. In other words, if the perturbation is a paraphrase of the original input, but the input and perturbation receive different predictions, then the input is a valid adversarial example.
|
||||
|
||||
Some NLP models are trained to measure semantic similarity. Adversarial attacks based on the notion of semantic indistinguishability typically use another NLP model to enforce that perturbations are grammatically valid and semantically similar to the original input.
|
||||
|
||||
TextAttack attack recipes that fall under this category: alzantot, bae, bert-attack, faster-alzantot, iga, kuleshov, pso, pwws, textbugger\*, textfooler
|
||||
|
||||
\*The textbugger attack generates perturbations using both typo-like character edits and synonym substitutions. It could be considered to use both definitions of indistinguishability.
|
||||
|
||||
## Generating adversarial examples with TextAttack
|
||||
|
||||
TextAttack supports adversarial attacks based in both definitions of indistinguishability. Both types of attacks are useful for training more robust NLP models. Our goal is to enable research into adversarial examples in NLP by providing a set of intuitive, reusable components for building as many attacks from the literature as possible.
|
||||
|
||||
We define the adversarial attack processing using four components: a goal function, constraints, transformation, and search method. (We’ll go into this in detail in a future post!) These components allow us to reuse many things between attacks from different research papers. They also make it easy to develop methods for NLP data augmentation.
|
||||
|
||||
TextAttack also includes code for loading popular NLP datasets and training models on them. By integrating this training code with adversarial attacks and data augmentation techniques, TextAttack provides an environment for researchers to test adversarial training in many different scenarios.
|
||||
|
||||
The following figure shows an overview of the main functionality of TextAttack:
|
||||
<br>
|
||||

|
||||
|
||||
|
||||
## The future of adversarial attacks in NLP
|
||||
|
||||
We are excited to see the impact that TextAttack has on the NLP research community! One thing we would like to see research in is the combination of components from various papers. TextAttack makes it easy to run ablation studies to compare the effects of swapping out, say, search method from paper A with the search method from paper B, without making any other changes. (And these tests can be run across dozens of pre-trained models and datasets with no downloads!)
|
||||
|
||||
We hope that use of TextAttack leads to more diversity in adversarial attacks. One thing that all current adversarial attacks have in common is that they make substitutions on the word or character level. We hope that future adversarial attacks in NLP can broaden scope to try different approaches to phrase-level replacements as well as full-sentence paraphrases. Additionally, there has been a focus on English in the adversarial attack literature; we look forward to seeing adversarial attacks applied to more languages.
|
||||
|
||||
To get started with TextAttack, you might want to start with one of our [introductory tutorials](/2notebook/0_End_to_End.ipynb).
|
||||
|
||||
|
||||
.. [1] “Attacking Machine Learning with Adversarial Examples”, Goodfellow, 2013. [https://openai.com/blog/adversarial-example-research/]
|
||||
|
||||
.. [2] “Intriguing properties of neural networks”, Szegedy, 2013. [https://arxiv.org/abs/1312.6199]
|
||||
|
||||
.. [3] “Robustness May Be at Odds with Accuracy”, Tsipras, 2018. [https://arxiv.org/abs/1805.12152]
|
||||
9949
docs/2notebook/0_End_to_End.ipynb
Normal file
578
docs/2notebook/1_Introduction_and_Transformations.ipynb
Normal file
@@ -0,0 +1,578 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# The TextAttack ecosystem: search, transformations, and constraints"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/QData/TextAttack/blob/master/docs/2notebook/1_Introduction_and_Transformations.ipynb)\n",
|
||||
"\n",
|
||||
"[](https://github.com/QData/TextAttack/blob/master/docs/2notebook/1_Introduction_and_Transformations.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"An attack in TextAttack consists of four parts.\n",
|
||||
"\n",
|
||||
"### Goal function\n",
|
||||
"\n",
|
||||
"The **goal function** determines if the attack is successful or not. One common goal function is **untargeted classification**, where the attack tries to perturb an input to change its classification. \n",
|
||||
"\n",
|
||||
"### Search method\n",
|
||||
"The **search method** explores the space of potential transformations and tries to locate a successful perturbation. Greedy search, beam search, and brute-force search are all examples of search methods.\n",
|
||||
"\n",
|
||||
"### Transformation\n",
|
||||
"A **transformation** takes a text input and transforms it, for example replacing words or phrases with similar ones, while trying not to change the meaning. Paraphrase and synonym substitution are two broad classes of transformations.\n",
|
||||
"\n",
|
||||
"### Constraints\n",
|
||||
"Finally, **constraints** determine whether or not a given transformation is valid. Transformations don't perfectly preserve syntax or semantics, so additional constraints can increase the probability that these qualities are preserved from the source to adversarial example. There are many types of constraints: overlap constraints that measure edit distance, syntactical constraints check part-of-speech and grammar errors, and semantic constraints like language models and sentence encoders."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### A custom transformation\n",
|
||||
"\n",
|
||||
"This lesson explains how to create a custom transformation. In TextAttack, many transformations involve *word swaps*: they take a word and try and find suitable substitutes. Some attacks focus on replacing characters with neighboring characters to create \"typos\" (these don't intend to preserve the grammaticality of inputs). Other attacks rely on semantics: they take a word and try to replace it with semantic equivalents.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Banana word swap \n",
|
||||
"\n",
|
||||
"As an introduction to writing transformations for TextAttack, we're going to try a very simple transformation: one that replaces any given word with the word 'banana'. In TextAttack, there's an abstract `WordSwap` class that handles the heavy lifting of breaking sentences into words and avoiding replacement of stopwords. We can extend `WordSwap` and implement a single method, `_get_replacement_words`, to indicate to replace each word with 'banana'. 🍌"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from textattack.transformations import WordSwap\n",
|
||||
"\n",
|
||||
"class BananaWordSwap(WordSwap):\n",
|
||||
" \"\"\" Transforms an input by replacing any word with 'banana'.\n",
|
||||
" \"\"\"\n",
|
||||
" \n",
|
||||
" # We don't need a constructor, since our class doesn't require any parameters.\n",
|
||||
"\n",
|
||||
" def _get_replacement_words(self, word):\n",
|
||||
" \"\"\" Returns 'banana', no matter what 'word' was originally.\n",
|
||||
" \n",
|
||||
" Returns a list with one item, since `_get_replacement_words` is intended to\n",
|
||||
" return a list of candidate replacement words.\n",
|
||||
" \"\"\"\n",
|
||||
" return ['banana']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"source": [
|
||||
"### Using our transformation\n",
|
||||
"\n",
|
||||
"Now we have the transformation chosen, but we're missing a few other things. To complete the attack, we need to choose the **search method** and **constraints**. And to use the attack, we need a **goal function**, a **model** and a **dataset**. (The goal function indicates the task our model performs – in this case, classification – and the type of attack – in this case, we'll perform an untargeted attack.)\n",
|
||||
"\n",
|
||||
"### Creating the goal function, model, and dataset\n",
|
||||
"We are performing an untargeted attack on a classification model, so we'll use the `UntargetedClassification` class. For the model, let's use BERT trained for news classification on the AG News dataset. We've pretrained several models and uploaded them to the [HuggingFace Model Hub](https://huggingface.co/textattack). TextAttack integrates with any model from HuggingFace's Model Hub and any dataset from HuggingFace's `datasets`!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[34;1mtextattack\u001b[0m: Goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'> compatible with model BertForSequenceClassification.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "b537c513e8b3410eb2f7e3ec5df851fc",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=3939.0, style=ProgressStyle(description…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "4f3b600b1f1b4a4da538f43582846964",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=2486.0, style=ProgressStyle(description…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using custom data configuration default\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Downloading and preparing dataset ag_news/default (download: 29.88 MiB, generated: 30.23 MiB, total: 60.10 MiB) to /u/edl9cy/.cache/huggingface/datasets/ag_news/default/0.0.0...\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "df8846bd027a457891dd665e3fd4156f",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=11045148.0, style=ProgressStyle(descrip…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "e3a3710421f6423ba77fb3276b3240af",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=751209.0, style=ProgressStyle(descripti…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[34;1mtextattack\u001b[0m: Loading \u001b[94mnlp\u001b[0m dataset \u001b[94mag_news\u001b[0m, split \u001b[94mtest\u001b[0m.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset ag_news downloaded and prepared to /u/edl9cy/.cache/huggingface/datasets/ag_news/default/0.0.0. Subsequent calls will reuse this data.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Import the model\n",
|
||||
"import transformers\n",
|
||||
"from textattack.models.tokenizers import AutoTokenizer\n",
|
||||
"from textattack.models.wrappers import HuggingFaceModelWrapper\n",
|
||||
"\n",
|
||||
"model = transformers.AutoModelForSequenceClassification.from_pretrained(\"textattack/bert-base-uncased-ag-news\")\n",
|
||||
"tokenizer = AutoTokenizer(\"textattack/bert-base-uncased-ag-news\")\n",
|
||||
"\n",
|
||||
"model_wrapper = HuggingFaceModelWrapper(model, tokenizer)\n",
|
||||
"\n",
|
||||
"# Create the goal function using the model\n",
|
||||
"from textattack.goal_functions import UntargetedClassification\n",
|
||||
"goal_function = UntargetedClassification(model_wrapper)\n",
|
||||
"\n",
|
||||
"# Import the dataset\n",
|
||||
"from textattack.datasets import HuggingFaceDataset\n",
|
||||
"dataset = HuggingFaceDataset(\"ag_news\", None, \"test\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Creating the attack\n",
|
||||
"Let's keep it simple: let's use a greedy search method, and let's not use any constraints for now. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from textattack.search_methods import GreedySearch\n",
|
||||
"from textattack.constraints.pre_transformation import RepeatModification, StopwordModification\n",
|
||||
"from textattack.shared import Attack\n",
|
||||
"\n",
|
||||
"# We're going to use our Banana word swap class as the attack transformation.\n",
|
||||
"transformation = BananaWordSwap() \n",
|
||||
"# We'll constrain modification of already modified indices and stopwords\n",
|
||||
"constraints = [RepeatModification(),\n",
|
||||
" StopwordModification()]\n",
|
||||
"# We'll use the Greedy search method\n",
|
||||
"search_method = GreedySearch()\n",
|
||||
"# Now, let's make the attack from the 4 components:\n",
|
||||
"attack = Attack(goal_function, constraints, transformation, search_method)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's print our attack to see all the parameters:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Attack(\n",
|
||||
" (search_method): GreedySearch\n",
|
||||
" (goal_function): UntargetedClassification\n",
|
||||
" (transformation): BananaWordSwap\n",
|
||||
" (constraints): \n",
|
||||
" (0): RepeatModification\n",
|
||||
" (1): StopwordModification\n",
|
||||
" (is_black_box): True\n",
|
||||
")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(attack)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using the attack\n",
|
||||
"\n",
|
||||
"Let's use our attack to successfully attack 10 samples."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1 of 10 successes complete.\n",
|
||||
"2 of 10 successes complete.\n",
|
||||
"3 of 10 successes complete.\n",
|
||||
"4 of 10 successes complete.\n",
|
||||
"5 of 10 successes complete.\n",
|
||||
"6 of 10 successes complete.\n",
|
||||
"7 of 10 successes complete.\n",
|
||||
"8 of 10 successes complete.\n",
|
||||
"9 of 10 successes complete.\n",
|
||||
"10 of 10 successes complete.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from tqdm import tqdm # tqdm provides us a nice progress bar.\n",
|
||||
"from textattack.loggers import CSVLogger # tracks a dataframe for us.\n",
|
||||
"from textattack.attack_results import SuccessfulAttackResult\n",
|
||||
"\n",
|
||||
"results_iterable = attack.attack_dataset(dataset)\n",
|
||||
"\n",
|
||||
"logger = CSVLogger(color_method='html')\n",
|
||||
"\n",
|
||||
"num_successes = 0\n",
|
||||
"while num_successes < 10:\n",
|
||||
" result = next(results_iterable)\n",
|
||||
" if isinstance(result, SuccessfulAttackResult):\n",
|
||||
" logger.log_attack_result(result)\n",
|
||||
" num_successes += 1\n",
|
||||
" print(f'{num_successes} of 10 successes complete.')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Visualizing attack results\n",
|
||||
"\n",
|
||||
"We are logging `AttackResult` objects using a `CSVLogger`. This logger stores all attack results in a dataframe, which we can easily access and display. Since we set `color_method` to `'html'`, the attack results will display their differences, in color, in HTML. Using `IPython` utilities and `pandas`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>original_text</th>\n",
|
||||
" <th>perturbed_text</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>Fears for T N <font color = blue>pension</font> after <font color = blue>talks</font> <font color = blue>Unions</font> representing <font color = blue>workers</font> at Turner Newall say they are '<font color = blue>disappointed'</font> after talks with stricken parent firm Federal <font color = blue>Mogul</font>.</td>\n",
|
||||
" <td>Fears for T N <font color = red>banana</font> after <font color = red>banana</font> <font color = red>banana</font> representing <font color = red>banana</font> at Turner Newall say they are '<font color = red>banana</font> after talks with stricken parent firm Federal <font color = red>banana</font>.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>The Race is On: Second Private <font color = purple>Team</font> Sets Launch <font color = purple>Date</font> for <font color = purple>Human</font> <font color = purple>Spaceflight</font> (<font color = purple>SPACE</font>.<font color = purple>com</font>) <font color = purple>SPACE</font>.<font color = purple>com</font> - <font color = purple>TORONTO</font>, <font color = purple>Canada</font> -- <font color = purple>A</font> <font color = purple>second</font>\\<font color = purple>team</font> of rocketeers <font color = purple>competing</font> for the #36;10 million Ansari X <font color = purple>Prize</font>, a <font color = purple>contest</font> for\\<font color = purple>privately</font> funded <font color = purple>suborbital</font> <font color = purple>space</font> <font color = purple>flight</font>, has officially <font color = purple>announced</font> the first\\<font color = purple>launch</font> date for its <font color = purple>manned</font> rocket.</td>\n",
|
||||
" <td>The Race is On: Second Private <font color = red>banana</font> Sets Launch <font color = red>banana</font> for <font color = red>banana</font> <font color = red>banana</font> (<font color = red>banana</font>.<font color = red>banana</font>) <font color = red>banana</font>.<font color = red>banana</font> - <font color = red>banana</font>, <font color = red>banana</font> -- <font color = red>banana</font> <font color = red>banana</font>\\<font color = red>banana</font> of rocketeers <font color = red>banana</font> for the #36;10 million Ansari X <font color = red>banana</font>, a <font color = red>banana</font> for\\<font color = red>banana</font> funded <font color = red>banana</font> <font color = red>banana</font> <font color = red>banana</font>, has officially <font color = red>banana</font> the first\\<font color = red>banana</font> date for its <font color = red>banana</font> rocket.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>Ky. Company Wins Grant to <font color = purple>Study</font> <font color = purple>Peptides</font> (<font color = purple>AP</font>) <font color = purple>AP</font> - <font color = purple>A</font> company <font color = purple>founded</font> by a <font color = purple>chemistry</font> <font color = purple>researcher</font> at the <font color = purple>University</font> of Louisville won a grant to develop a method of producing better <font color = purple>peptides</font>, which are short chains of <font color = purple>amino</font> <font color = purple>acids</font>, the building blocks of <font color = purple>proteins</font>.</td>\n",
|
||||
" <td>Ky. Company Wins Grant to <font color = blue>banana</font> <font color = blue>banana</font> (<font color = blue>banana</font>) <font color = blue>banana</font> - <font color = blue>banana</font> company <font color = blue>banana</font> by a <font color = blue>banana</font> <font color = blue>banana</font> at the <font color = blue>banana</font> of Louisville won a grant to develop a method of producing better <font color = blue>banana</font>, which are short chains of <font color = blue>banana</font> <font color = blue>banana</font>, the building blocks of <font color = blue>banana</font>.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td><font color = purple>Prediction</font> Unit Helps <font color = purple>Forecast</font> Wildfires (AP) <font color = purple>AP</font> - It's barely dawn when Mike Fitzpatrick <font color = purple>starts</font> his shift with a blur of colorful maps, figures and endless charts, but already he knows what the day will bring. Lightning will strike in places he expects. Winds will pick up, moist places will dry and flames will roar.</td>\n",
|
||||
" <td><font color = red>banana</font> Unit Helps <font color = red>banana</font> Wildfires (AP) <font color = red>banana</font> - It's barely dawn when Mike Fitzpatrick <font color = red>banana</font> his shift with a blur of colorful maps, figures and endless charts, but already he knows what the day will bring. Lightning will strike in places he expects. Winds will pick up, moist places will dry and flames will roar.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>Calif. Aims to Limit Farm-Related <font color = purple>Smog</font> (AP) AP - Southern California's <font color = purple>smog-fighting</font> agency went after <font color = purple>emissions</font> of the <font color = purple>bovine</font> variety Friday, adopting the nation's first rules to reduce air pollution from dairy cow manure.</td>\n",
|
||||
" <td>Calif. Aims to Limit Farm-Related <font color = red>banana</font> (AP) AP - Southern California's <font color = red>banana</font> agency went after <font color = red>banana</font> of the <font color = red>banana</font> variety Friday, adopting the nation's first rules to reduce air pollution from dairy cow manure.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5</th>\n",
|
||||
" <td>Open <font color = purple>Letter</font> Against <font color = purple>British</font> <font color = purple>Copyright</font> Indoctrination in Schools The <font color = purple>British</font> Department for Education and Skills (DfES) <font color = purple>recently</font> <font color = purple>launched</font> a \"<font color = purple>Music</font> <font color = purple>Manifesto</font>\" campaign, with the ostensible <font color = purple>intention</font> of <font color = purple>educating</font> the <font color = purple>next</font> <font color = purple>generation</font> of <font color = purple>British</font> <font color = purple>musicians</font>. <font color = purple>Unfortunately</font>, they also teamed up with the <font color = purple>music</font> industry (<font color = purple>EMI</font>, and <font color = purple>various</font> <font color = purple>artists</font>) to make this popular. <font color = purple>EMI</font> has <font color = purple>apparently</font> <font color = purple>negotiated</font> their end well, so that <font color = purple>children</font> in our schools will now be indoctrinated about the illegality of <font color = purple>downloading</font> music.The ignorance and audacity of this got to me a little, so I wrote an open letter to the DfES about it. Unfortunately, it's pedantic, as I suppose you have to be when writing to goverment representatives. But I hope you find it useful, and perhaps feel inspired to do something similar, if or when the same thing has happened in your area.</td>\n",
|
||||
" <td>Open <font color = red>banana</font> Against <font color = red>banana</font> <font color = red>banana</font> Indoctrination in Schools The <font color = red>banana</font> Department for Education and Skills (DfES) <font color = red>banana</font> <font color = red>banana</font> a \"<font color = red>banana</font> <font color = red>banana</font>\" campaign, with the ostensible <font color = red>banana</font> of <font color = red>banana</font> the <font color = red>banana</font> <font color = red>banana</font> of <font color = red>banana</font> <font color = red>banana</font>. <font color = red>banana</font>, they also teamed up with the <font color = red>banana</font> industry (<font color = red>banana</font>, and <font color = red>banana</font> <font color = red>banana</font>) to make this popular. <font color = red>banana</font> has <font color = red>banana</font> <font color = red>banana</font> their end well, so that <font color = red>banana</font> in our schools will now be indoctrinated about the illegality of <font color = red>banana</font> music.The ignorance and audacity of this got to me a little, so I wrote an open letter to the DfES about it. Unfortunately, it's pedantic, as I suppose you have to be when writing to goverment representatives. But I hope you find it useful, and perhaps feel inspired to do something similar, if or when the same thing has happened in your area.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6</th>\n",
|
||||
" <td><font color = purple>E-mail</font> scam targets police chief Wiltshire Police warns about \"<font color = purple>phishing</font>\" after its fraud squad chief was targeted.</td>\n",
|
||||
" <td><font color = red>banana</font> scam targets police chief Wiltshire Police warns about \"<font color = red>banana</font>\" after its fraud squad chief was targeted.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>7</th>\n",
|
||||
" <td>Card fraud unit nets 36,000 cards In its first two years, the UK's dedicated <font color = purple>card</font> fraud unit, has recovered 36,000 stolen cards and 171 arrests - and estimates it saved 65m.</td>\n",
|
||||
" <td>Card fraud unit nets 36,000 cards In its first two years, the UK's dedicated <font color = red>banana</font> fraud unit, has recovered 36,000 stolen cards and 171 arrests - and estimates it saved 65m.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>8</th>\n",
|
||||
" <td>Group to Propose <font color = purple>New</font> High-Speed <font color = purple>Wireless</font> <font color = purple>Format</font> LOS ANGELES (<font color = purple>Reuters</font>) - A <font color = purple>group</font> of <font color = purple>technology</font> companies including Texas <font color = purple>Instruments</font> <font color = purple>Inc</font>. <<font color = purple>TXN</font>.N>, STMicroelectronics <STM.PA> and Broadcom Corp. <<font color = purple>BRCM</font>.O&<font color = purple>gt</font>;, on Thursday said they will <font color = purple>propose</font> a new <font color = purple>wireless</font> <font color = purple>networking</font> standard up to 10 <font color = purple>times</font> the <font color = purple>speed</font> of the current generation.</td>\n",
|
||||
" <td>Group to Propose <font color = blue>banana</font> High-Speed <font color = blue>banana</font> <font color = blue>banana</font> LOS ANGELES (<font color = blue>banana</font>) - A <font color = blue>banana</font> of <font color = blue>banana</font> companies including Texas <font color = blue>banana</font> <font color = blue>banana</font>. <<font color = blue>banana</font>.N>, STMicroelectronics <STM.PA> and Broadcom Corp. <<font color = blue>banana</font>.O&<font color = blue>banana</font>;, on Thursday said they will <font color = blue>banana</font> a new <font color = blue>banana</font> <font color = blue>banana</font> standard up to 10 <font color = blue>banana</font> the <font color = blue>banana</font> of the current generation.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>9</th>\n",
|
||||
" <td>Apple Launches <font color = purple>Graphics</font> <font color = purple>Software</font>, <font color = purple>Video</font> <font color = purple>Bundle</font> LOS ANGELES (<font color = purple>Reuters</font>) - Apple <font color = purple>Computer</font> Inc.&<font color = purple>lt</font>;AAPL.O&<font color = purple>gt</font>; on Tuesday <font color = purple>began</font> shipping a new program designed to let <font color = purple>users</font> create <font color = purple>real-time</font> <font color = purple>motion</font> <font color = purple>graphics</font> and <font color = purple>unveiled</font> a discount <font color = purple>video-editing</font> <font color = purple>software</font> <font color = purple>bundle</font> featuring its flagship <font color = purple>Final</font> Cut Pro <font color = purple>software</font>.</td>\n",
|
||||
" <td>Apple Launches <font color = blue>banana</font> <font color = blue>banana</font>, <font color = blue>banana</font> <font color = blue>banana</font> LOS ANGELES (<font color = blue>banana</font>) - Apple <font color = blue>banana</font> Inc.&<font color = blue>banana</font>;AAPL.O&<font color = blue>banana</font>; on Tuesday <font color = blue>banana</font> shipping a new program designed to let <font color = blue>banana</font> create <font color = blue>banana</font> <font color = blue>banana</font> <font color = blue>banana</font> and <font color = blue>banana</font> a discount <font color = blue>banana</font> <font color = blue>banana</font> <font color = blue>banana</font> featuring its flagship <font color = blue>banana</font> Cut Pro <font color = blue>banana</font>.</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"pd.options.display.max_colwidth = 480 # increase colum width so we can actually read the examples\n",
|
||||
"\n",
|
||||
"from IPython.core.display import display, HTML\n",
|
||||
"display(HTML(logger.df[['original_text', 'perturbed_text']].to_html(escape=False)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"source": [
|
||||
"### Conclusion\n",
|
||||
"We can examine these examples for a good idea of how many words had to be changed to \"banana\" to change the prediction score from the correct class to another class. The examples without perturbed words were originally misclassified, so they were skipped by the attack. Looks like some examples needed only a couple \"banana\"s, while others needed up to 17 \"banana\" substitutions to change the class score. Wow! 🍌"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Bonus: Attacking Custom Samples\n",
|
||||
"\n",
|
||||
"We can also attack custom data samples, like these ones I just made up!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[34;1mtextattack\u001b[0m: CSVLogger exiting without calling flush().\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>original_text</th>\n",
|
||||
" <th>perturbed_text</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>Malaria <font color = red>deaths</font> in Africa fall by 5% from last year</td>\n",
|
||||
" <td>Malaria <font color = purple>banana</font> in Africa fall by 5% from last year</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td><font color = green>Washington</font> <font color = green>Nationals</font> <font color = green>defeat</font> the Houston Astros to win the World Series</td>\n",
|
||||
" <td><font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> the Houston Astros to win the World Series</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td><font color = blue>Exxon</font> <font color = blue>Mobil</font> <font color = blue>hires</font> a new <font color = blue>CEO</font></td>\n",
|
||||
" <td><font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> a new <font color = purple>banana</font></td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td><font color = purple>Microsoft</font> invests $1 billion in OpenAI</td>\n",
|
||||
" <td><font color = blue>banana</font> invests $1 billion in OpenAI</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# For AG News, labels are 0: World, 1: Sports, 2: Business, 3: Sci/Tech\n",
|
||||
"\n",
|
||||
"custom_dataset = [\n",
|
||||
" ('Malaria deaths in Africa fall by 5% from last year', 0),\n",
|
||||
" ('Washington Nationals defeat the Houston Astros to win the World Series', 1),\n",
|
||||
" ('Exxon Mobil hires a new CEO', 2),\n",
|
||||
" ('Microsoft invests $1 billion in OpenAI', 3),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"results_iterable = attack.attack_dataset(custom_dataset)\n",
|
||||
"\n",
|
||||
"logger = CSVLogger(color_method='html')\n",
|
||||
"\n",
|
||||
"for result in results_iterable:\n",
|
||||
" logger.log_attack_result(result)\n",
|
||||
" \n",
|
||||
"display(HTML(logger.df[['original_text', 'perturbed_text']].to_html(escape=False)))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
769
docs/2notebook/2_Constraints.ipynb
Normal file
@@ -0,0 +1,769 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# The importance of constraints\n",
|
||||
"\n",
|
||||
"Constraints determine which potential adversarial examples are valid inputs to the model. When determining the efficacy of an attack, constraints are everything. After all, an attack that looks very powerful may just be generating nonsense. Or, perhaps more nefariously, an attack may generate a real-looking example that changes the original label of the input. That's why you should always clearly define the *constraints* your adversarial examples must meet. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/QData/TextAttack/blob/master/docs/2notebook/2_Constraints.ipynb)\n",
|
||||
"\n",
|
||||
"[](https://github.com/QData/TextAttack/blob/master/docs/2notebook/2_Constraints.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Classes of constraints\n",
|
||||
"\n",
|
||||
"TextAttack evaluates constraints using methods from three groups:\n",
|
||||
"\n",
|
||||
"- **Overlap constraints** determine if a perturbation is valid based on character-level analysis. For example, some attacks are constrained by edit distance: a perturbation is only valid if it perturbs some small number of characters (or fewer).\n",
|
||||
"\n",
|
||||
"- **Grammaticality constraints** filter inputs based on syntactical information. For example, an attack may require that adversarial perturbations do not introduce grammatical errors.\n",
|
||||
"\n",
|
||||
"- **Semantic constraints** try to ensure that the perturbation is semantically similar to the original input. For example, we may design a constraint that uses a sentence encoder to encode the original and perturbed inputs, and enforce that the sentence encodings be within some fixed distance of one another. (This is what happens in subclasses of `textattack.constraints.semantics.sentence_encoders`.)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### A new constraint\n",
|
||||
"\n",
|
||||
"To add our own constraint, we need to create a subclass of `textattack.constraints.Constraint`. We can implement one of two functions, either `_check_constraint` or `_check_constraint_many`:\n",
|
||||
"\n",
|
||||
"- `_check_constraint` determines whether candidate `TokenizedText` `transformed_text`, transformed from `current_text`, fulfills a desired constraint. It returns either `True` or `False`.\n",
|
||||
"- `_check_constraint_many` determines whether each of a list of candidates `transformed_texts` fulfill the constraint relative to `current_text`. This is here in case your constraint can be vectorized. If not, just implement `_check_constraint`, and `_check_constraint` will be executed for each `(transformed_text, current_text)` pair."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### A custom constraint\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"For fun, we're going to see what happens when we constrain an attack to only allow perturbations that substitute out a named entity for another. In linguistics, a **named entity** is a proper noun, the name of a person, organization, location, product, etc. Named Entity Recognition is a popular NLP task (and one that state-of-the-art models can perform quite well). \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### NLTK and Named Entity Recognition\n",
|
||||
"\n",
|
||||
"**NLTK**, the Natural Language Toolkit, is a Python package that helps developers write programs that process natural language. NLTK comes with predefined algorithms for lots of linguistic tasks– including Named Entity Recognition.\n",
|
||||
"\n",
|
||||
"First, we're going to write a constraint class. In the `_check_constraints` method, we're going to use NLTK to find the named entities in both `current_text` and `transformed_text`. We will only return `True` (that is, our constraint is met) if `transformed_text` has substituted one named entity in `current_text` for another.\n",
|
||||
"\n",
|
||||
"Let's import NLTK and download the required modules:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[nltk_data] Downloading package punkt to /u/edl9cy/nltk_data...\n",
|
||||
"[nltk_data] Package punkt is already up-to-date!\n",
|
||||
"[nltk_data] Downloading package maxent_ne_chunker to\n",
|
||||
"[nltk_data] /u/edl9cy/nltk_data...\n",
|
||||
"[nltk_data] Package maxent_ne_chunker is already up-to-date!\n",
|
||||
"[nltk_data] Downloading package words to /u/edl9cy/nltk_data...\n",
|
||||
"[nltk_data] Package words is already up-to-date!\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import nltk\n",
|
||||
"nltk.download('punkt') # The NLTK tokenizer\n",
|
||||
"nltk.download('maxent_ne_chunker') # NLTK named-entity chunker\n",
|
||||
"nltk.download('words') # NLTK list of words"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### NLTK NER Example\n",
|
||||
"\n",
|
||||
"Here's an example of using NLTK to find the named entities in a sentence:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(S\n",
|
||||
" In/IN\n",
|
||||
" 2017/CD\n",
|
||||
" ,/,\n",
|
||||
" star/NN\n",
|
||||
" quarterback/NN\n",
|
||||
" (PERSON Tom/NNP Brady/NNP)\n",
|
||||
" led/VBD\n",
|
||||
" the/DT\n",
|
||||
" (ORGANIZATION Patriots/NNP)\n",
|
||||
" to/TO\n",
|
||||
" the/DT\n",
|
||||
" (ORGANIZATION Super/NNP Bowl/NNP)\n",
|
||||
" ,/,\n",
|
||||
" but/CC\n",
|
||||
" lost/VBD\n",
|
||||
" to/TO\n",
|
||||
" the/DT\n",
|
||||
" (ORGANIZATION Philadelphia/NNP Eagles/NNP)\n",
|
||||
" ./.)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sentence = ('In 2017, star quarterback Tom Brady led the Patriots to the Super Bowl, '\n",
|
||||
" 'but lost to the Philadelphia Eagles.')\n",
|
||||
"\n",
|
||||
"# 1. Tokenize using the NLTK tokenizer.\n",
|
||||
"tokens = nltk.word_tokenize(sentence)\n",
|
||||
"\n",
|
||||
"# 2. Tag parts of speech using the NLTK part-of-speech tagger.\n",
|
||||
"tagged = nltk.pos_tag(tokens)\n",
|
||||
"\n",
|
||||
"# 3. Extract entities from tagged sentence.\n",
|
||||
"entities = nltk.chunk.ne_chunk(tagged)\n",
|
||||
"print(entities)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It looks like `nltk.chunk.ne_chunk` gives us an `nltk.tree.Tree` object where named entities are also `nltk.tree.Tree` objects within that tree. We can take this a step further and grab the named entities from the tree of entities:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Tree('PERSON', [('Tom', 'NNP'), ('Brady', 'NNP')]), Tree('ORGANIZATION', [('Patriots', 'NNP')]), Tree('ORGANIZATION', [('Super', 'NNP'), ('Bowl', 'NNP')]), Tree('ORGANIZATION', [('Philadelphia', 'NNP'), ('Eagles', 'NNP')])]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 4. Filter entities to just named entities.\n",
|
||||
"named_entities = [entity for entity in entities if isinstance(entity, nltk.tree.Tree)]\n",
|
||||
"print(named_entities)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Caching with `@functools.lru_cache`\n",
|
||||
"\n",
|
||||
"A little-known feature of Python 3 is `functools.lru_cache`, a decorator that allows users to easily cache the results of a function in an LRU cache. We're going to be using the NLTK library quite a bit to tokenize, parse, and detect named entities in sentences. These sentences might repeat themselves. As such, we'll use this decorator to cache named entities so that we don't have to perform this expensive computation multiple times."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Putting it all together: getting a list of Named Entity Labels from a sentence\n",
|
||||
"\n",
|
||||
"Now that we know how to tokenize, parse, and detect named entities using NLTK, let's put it all together into a single helper function. Later, when we implement our constraint, we can query this function to easily get the entity labels from a sentence. We can even use `@functools.lru_cache` to try and speed this process up."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import functools\n",
|
||||
"\n",
|
||||
"@functools.lru_cache(maxsize=2**14)\n",
|
||||
"def get_entities(sentence):\n",
|
||||
" tokens = nltk.word_tokenize(sentence)\n",
|
||||
" tagged = nltk.pos_tag(tokens)\n",
|
||||
" # Setting `binary=True` makes NLTK return all of the named\n",
|
||||
" # entities tagged as NNP instead of detailed tags like\n",
|
||||
" #'Organization', 'Geo-Political Entity', etc.\n",
|
||||
" entities = nltk.chunk.ne_chunk(tagged, binary=True)\n",
|
||||
" return entities.leaves()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And let's test our function to make sure it works:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[('Jack', 'NNP'),\n",
|
||||
" ('Black', 'NNP'),\n",
|
||||
" ('starred', 'VBD'),\n",
|
||||
" ('in', 'IN'),\n",
|
||||
" ('the', 'DT'),\n",
|
||||
" ('2003', 'CD'),\n",
|
||||
" ('film', 'NN'),\n",
|
||||
" ('classic', 'JJ'),\n",
|
||||
" ('``', '``'),\n",
|
||||
" ('School', 'NNP'),\n",
|
||||
" ('of', 'IN'),\n",
|
||||
" ('Rock', 'NNP'),\n",
|
||||
" (\"''\", \"''\"),\n",
|
||||
" ('.', '.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sentence = 'Jack Black starred in the 2003 film classic \"School of Rock\".'\n",
|
||||
"get_entities(sentence)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We flattened the tree of entities, so the return format is a list of `(word, entity type)` tuples. For non-entities, the `entity_type` is just the part of speech of the word. `'NNP'` is the indicator of a named entity (a proper noun, according to NLTK). Looks like we identified three named entities here: 'Jack' and 'Black', 'School', and 'Rock'. as a 'GPE'. (Seems that the labeler thinks Rock is the name of a place, a city or something.) Whatever technique NLTK uses for named entity recognition may be a bit rough, but it did a pretty decent job here!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Creating our NamedEntityConstraint\n",
|
||||
"\n",
|
||||
"Now that we know how to detect named entities using NLTK, let's create our custom constraint."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from textattack.constraints import Constraint\n",
|
||||
"\n",
|
||||
"class NamedEntityConstraint(Constraint):\n",
|
||||
" \"\"\" A constraint that ensures `transformed_text` only substitutes named entities from `current_text` with other named entities.\n",
|
||||
" \"\"\"\n",
|
||||
" def _check_constraint(self, transformed_text, current_text):\n",
|
||||
" transformed_entities = get_entities(transformed_text.text)\n",
|
||||
" current_entities = get_entities(current_text.text)\n",
|
||||
" # If there aren't named entities, let's return False (the attack\n",
|
||||
" # will eventually fail).\n",
|
||||
" if len(current_entities) == 0:\n",
|
||||
" return False\n",
|
||||
" if len(current_entities) != len(transformed_entities):\n",
|
||||
" # If the two sentences have a different number of entities, then \n",
|
||||
" # they definitely don't have the same labels. In this case, the \n",
|
||||
" # constraint is violated, and we return False.\n",
|
||||
" return False\n",
|
||||
" else:\n",
|
||||
" # Here we compare all of the words, in order, to make sure that they match.\n",
|
||||
" # If we find two words that don't match, this means a word was swapped \n",
|
||||
" # between `current_text` and `transformed_text`. That word must be a named entity to fulfill our\n",
|
||||
" # constraint.\n",
|
||||
" current_word_label = None\n",
|
||||
" transformed_word_label = None\n",
|
||||
" for (word_1, label_1), (word_2, label_2) in zip(current_entities, transformed_entities):\n",
|
||||
" if word_1 != word_2:\n",
|
||||
" # Finally, make sure that words swapped between `x` and `x_adv` are named entities. If \n",
|
||||
" # they're not, then we also return False.\n",
|
||||
" if (label_1 not in ['NNP', 'NE']) or (label_2 not in ['NNP', 'NE']):\n",
|
||||
" return False \n",
|
||||
" # If we get here, all of the labels match up. Return True!\n",
|
||||
" return True\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"source": [
|
||||
"### Testing our constraint\n",
|
||||
"\n",
|
||||
"We need to create an attack and a dataset to test our constraint on. We went over all of this in the transformations tutorial, so let's gloss over this part for now."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "83cfddb4681b46d38a7155ca4ebdbbda",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=736.0, style=ProgressStyle(description_…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "934709ba69334a34b63e63555b50b72a",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=46747112.0, style=ProgressStyle(descrip…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "00a1ee97f37e4dd79357731dfe21ed2d",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=760289.0, style=ProgressStyle(descripti…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "d49c0092188640028cc5b36648794867",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=156.0, style=ProgressStyle(description_…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "62d9b14ed049497f9f9ac56372681421",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=25.0, style=ProgressStyle(description_w…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[34;1mtextattack\u001b[0m: Goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'> compatible with model AlbertForSequenceClassification.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "4a63bb6d448e4867965a00293f0c72ce",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=5787.0, style=ProgressStyle(description…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "2b34e451729d47b1b407a88697439f44",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=3419.0, style=ProgressStyle(description…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Downloading and preparing dataset yelp_polarity/plain_text (download: 158.67 MiB, generated: 421.28 MiB, total: 579.95 MiB) to /u/edl9cy/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0...\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "e586d9ff3e784be58e0cd6e6389444f7",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=166373201.0, style=ProgressStyle(descri…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[34;1mtextattack\u001b[0m: Loading \u001b[94mnlp\u001b[0m dataset \u001b[94myelp_polarity\u001b[0m, split \u001b[94mtest\u001b[0m.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset yelp_polarity downloaded and prepared to /u/edl9cy/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0. Subsequent calls will reuse this data.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Import the model\n",
|
||||
"import transformers\n",
|
||||
"from textattack.models.tokenizers import AutoTokenizer\n",
|
||||
"from textattack.models.wrappers import HuggingFaceModelWrapper\n",
|
||||
"\n",
|
||||
"model = transformers.AutoModelForSequenceClassification.from_pretrained(\"textattack/albert-base-v2-yelp-polarity\")\n",
|
||||
"tokenizer = AutoTokenizer(\"textattack/albert-base-v2-yelp-polarity\")\n",
|
||||
"\n",
|
||||
"model_wrapper = HuggingFaceModelWrapper(model, tokenizer)\n",
|
||||
"\n",
|
||||
"# Create the goal function using the model\n",
|
||||
"from textattack.goal_functions import UntargetedClassification\n",
|
||||
"goal_function = UntargetedClassification(model_wrapper)\n",
|
||||
"\n",
|
||||
"# Import the dataset\n",
|
||||
"from textattack.datasets import HuggingFaceDataset\n",
|
||||
"dataset = HuggingFaceDataset(\"yelp_polarity\", None, \"test\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Attack(\n",
|
||||
" (search_method): GreedySearch\n",
|
||||
" (goal_function): UntargetedClassification\n",
|
||||
" (transformation): WordSwapEmbedding(\n",
|
||||
" (max_candidates): 15\n",
|
||||
" (embedding_type): paragramcf\n",
|
||||
" )\n",
|
||||
" (constraints): \n",
|
||||
" (0): NamedEntityConstraint(\n",
|
||||
" (compare_against_original): False\n",
|
||||
" )\n",
|
||||
" (1): RepeatModification\n",
|
||||
" (2): StopwordModification\n",
|
||||
" (is_black_box): True\n",
|
||||
")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from textattack.transformations import WordSwapEmbedding\n",
|
||||
"from textattack.search_methods import GreedySearch\n",
|
||||
"from textattack.shared import Attack\n",
|
||||
"from textattack.constraints.pre_transformation import RepeatModification, StopwordModification\n",
|
||||
"\n",
|
||||
"# We're going to the `WordSwapEmbedding` transformation. Using the default settings, this\n",
|
||||
"# will try substituting words with their neighbors in the counter-fitted embedding space. \n",
|
||||
"transformation = WordSwapEmbedding(max_candidates=15) \n",
|
||||
"\n",
|
||||
"# We'll use the greedy search method again\n",
|
||||
"search_method = GreedySearch()\n",
|
||||
"\n",
|
||||
"# Our constraints will be the same as Tutorial 1, plus the named entity constraint\n",
|
||||
"constraints = [RepeatModification(),\n",
|
||||
" StopwordModification(),\n",
|
||||
" NamedEntityConstraint(False)]\n",
|
||||
"\n",
|
||||
"# Now, let's make the attack using these parameters. \n",
|
||||
"attack = Attack(goal_function, constraints, transformation, search_method)\n",
|
||||
"\n",
|
||||
"print(attack)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's use our attack. We're going to attack samples until we achieve 5 successes. (There's a lot to check here, and since we're using a greedy search over all potential word swap positions, each sample will take a few minutes. This will take a few hours to run on a single core.)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1 of 5 successes complete.\n",
|
||||
"2 of 5 successes complete.\n",
|
||||
"3 of 5 successes complete.\n",
|
||||
"4 of 5 successes complete.\n",
|
||||
"5 of 5 successes complete.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from textattack.loggers import CSVLogger # tracks a dataframe for us.\n",
|
||||
"from textattack.attack_results import SuccessfulAttackResult\n",
|
||||
"\n",
|
||||
"results_iterable = attack.attack_dataset(dataset)\n",
|
||||
"logger = CSVLogger(color_method='html')\n",
|
||||
"\n",
|
||||
"num_successes = 0\n",
|
||||
"while num_successes < 5:\n",
|
||||
" result = next(results_iterable)\n",
|
||||
" if isinstance(result, SuccessfulAttackResult):\n",
|
||||
" logger.log_attack_result(result)\n",
|
||||
" num_successes += 1\n",
|
||||
" print(f'{num_successes} of 5 successes complete.')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's visualize our 5 successes in color:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>original_text</th>\n",
|
||||
" <th>perturbed_text</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>Picture Billy Joel's \\\"\"Piano Man\\\"\" <font color = green>DOUBLED</font> mixed with beer, a rowdy crowd, and comedy - Welcome to Sing Sing! A unique musical experience found in <font color = green>Homestead</font>.\\<font color = green>n</font>\\nIf you're looking to grab a bite to eat or a beer, come on in! Serving food and brews from Rock <font color = green>Bottom</font> <font color = green>Brewery</font>, Sing Sing keeps your tummy full while you listen to two (or more) amazingly talented pianists take your musical requests. They'll play anything you'd like, for tips of course. Wanting to hear Britney Spears? Toto? Duran Duran? <font color = green>Yep</font>, they play that... new or old.\\n\\nThe crowd makes the show, so make sure you come ready for a good time. If the crowd is dead, it's harder for the Guys to get a reaction. If you're wanting to have some fun, it can be a GREAT time! It's the perfect place for Birthday parties - especially if you want to embarrass a friend. The guys will bring them up to the pianos and perform a little ditty. For being a good sport, you get the coveted <font color = green>Sing</font> <font color = green>Sing</font> bumper sticker. Now who wouldn't want that?\\n\\nDueling Pianos and brews... time to Shut Up & Sing Sing!</td>\n",
|
||||
" <td>Picture Billy Joel's \\\"\"Piano Man\\\"\" <font color = red>DOPPELGANGER</font> mixed with beer, a rowdy crowd, and comedy - Welcome to Sing Sing! A unique musical experience found in <font color = red>Fairview</font>.\\<font color = red>nope</font>\\nIf you're looking to grab a bite to eat or a beer, come on in! Serving food and brews from Rock <font color = red>Inferior</font> <font color = red>Stout</font>, Sing Sing keeps your tummy full while you listen to two (or more) amazingly talented pianists take your musical requests. They'll play anything you'd like, for tips of course. Wanting to hear Britney Spears? Toto? Duran Duran? <font color = red>Alrighty</font>, they play that... new or old.\\n\\nThe crowd makes the show, so make sure you come ready for a good time. If the crowd is dead, it's harder for the Guys to get a reaction. If you're wanting to have some fun, it can be a GREAT time! It's the perfect place for Birthday parties - especially if you want to embarrass a friend. The guys will bring them up to the pianos and perform a little ditty. For being a good sport, you get the coveted <font color = red>Blackmailing</font> <font color = red>Blackmailing</font> bumper sticker. Now who wouldn't want that?\\n\\nDueling Pianos and brews... time to Shut Up & Sing Sing!</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>When I think BBB... I think the days of simply bringing your bike in for a quick and relatively inexpensive tune-up and a few fixes are long gone. \\n\\nThis review is more for the repair end of BBB. In their defense BBB does appear to carry some amazing brands of bike (ie Colnago) that you just don't find anywhere else in Pittsburgh. \\n\\nAt BBB I was charged $250 for a tune up and a few other things. Granted this included installing a sew up tire (which I can understand would cost approx $50), Swapping out a left side (big ring) shifter on my down tube (this should have cost approx. $20 at most) and installing new bar tape (cost of tape $20 and $20 to install?).. SO WHAT\\\"\"S WITH $140 FOR A <font color = red>TUNE</font> <font color = red>UP</font>? <font color = red>Well</font> the story goes like this:\\n\\nI bring the bike into BBB prior to the nice weather hitting <font color = red>Pittsburgh</font> in hopes of trying what people have said is a great bike shop and getting my OCLV TREK 5900 ready for the season. Turns out I don't hear from these guys. A week goes by ...two weeks...I think that's ok I have two or three other bike I can turn to for a ride. Then I wind up going out of town for a week thinking for sure I'll get a call from them re: my bike is ready to roll...but no dice. So I call. Turns out a screw snapped when the mechanic was re-installing the down tube shifter and it had to be tapped out (is that my fault?). He says \\\"\"Should be ready in a few days\\\"\". So I come in a few days later to this mammoth bill. I ask if I am paying for the labor of taping out the screw? I don't think I ever got a straight answer? I look at the bill and can't see a good breakdown of the charges. Normally I would \\\"\"duke it over\\\"\" a bill like this but I figured...I had somewhere I to be 10 minutes ago and at least I finally have my bike. I would expect that for that money my bike could have been stripped down to the frame and totally gone over (overhauled). But it wasn't. Well BBB I'll give you a star because the mechanic did do a good job in that my cycle shifts well and the tape job on the bars looks great (nice wrap). Plus I'll toss in a star for your outstanding selection of high end cycles. Maybe I would have rated BBB higher if I was in the market for a purchase instead of a simple repair?</td>\n",
|
||||
" <td>When I think BBB... I think the days of simply bringing your bike in for a quick and relatively inexpensive tune-up and a few fixes are long gone. \\n\\nThis review is more for the repair end of BBB. In their defense BBB does appear to carry some amazing brands of bike (ie Colnago) that you just don't find anywhere else in Pittsburgh. \\n\\nAt BBB I was charged $250 for a tune up and a few other things. Granted this included installing a sew up tire (which I can understand would cost approx $50), Swapping out a left side (big ring) shifter on my down tube (this should have cost approx. $20 at most) and installing new bar tape (cost of tape $20 and $20 to install?).. SO WHAT\\\"\"S WITH $140 FOR A <font color = green>MELODIES</font> <font color = green>ARRIBA</font>? <font color = green>Too</font> the story goes like this:\\n\\nI bring the bike into BBB prior to the nice weather hitting <font color = green>Philly</font> in hopes of trying what people have said is a great bike shop and getting my OCLV TREK 5900 ready for the season. Turns out I don't hear from these guys. A week goes by ...two weeks...I think that's ok I have two or three other bike I can turn to for a ride. Then I wind up going out of town for a week thinking for sure I'll get a call from them re: my bike is ready to roll...but no dice. So I call. Turns out a screw snapped when the mechanic was re-installing the down tube shifter and it had to be tapped out (is that my fault?). He says \\\"\"Should be ready in a few days\\\"\". So I come in a few days later to this mammoth bill. I ask if I am paying for the labor of taping out the screw? I don't think I ever got a straight answer? I look at the bill and can't see a good breakdown of the charges. Normally I would \\\"\"duke it over\\\"\" a bill like this but I figured...I had somewhere I to be 10 minutes ago and at least I finally have my bike. I would expect that for that money my bike could have been stripped down to the frame and totally gone over (overhauled). But it wasn't. Well BBB I'll give you a star because the mechanic did do a good job in that my cycle shifts well and the tape job on the bars looks great (nice wrap). Plus I'll toss in a star for your outstanding selection of high end cycles. Maybe I would have rated BBB higher if I was in the market for a purchase instead of a simple repair?</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>The first time I came here, I waited in line for 20 minutes. When it was my turn, I realized I left my wallet in the car. It hurt so bad, I didn't come back for a year.\\n\\nI can walk to this place from my house- which is dangerous because those biscuits are just <font color = green>OH</font> <font color = green>SO</font> <font color = green>DREAMY</font>. I can't describe them. Just get some.\\n\\nDo I feel guilty about noshing on fabulous Strawberry <font color = green>Napoleons</font> and Jewish Pizza (kind of like a modified, yet TOTALLY delicious fruitcake bar) at 10:15am? Hecks, naw... But they do have quiche and some other breakfast-y items for those who prefer a more traditional approach to your stomach's opening ceremony.\\n\\nJust go early :) They open at 10 on Saturdays. And bring cash...it's easier that way.</td>\n",
|
||||
" <td>The first time I came here, I waited in line for 20 minutes. When it was my turn, I realized I left my wallet in the car. It hurt so bad, I didn't come back for a year.\\n\\nI can walk to this place from my house- which is dangerous because those biscuits are just <font color = red>OOOOH</font> <font color = red>EVEN</font> <font color = red>SULTRY</font>. I can't describe them. Just get some.\\n\\nDo I feel guilty about noshing on fabulous Strawberry <font color = red>Bolsheviks</font> and Jewish Pizza (kind of like a modified, yet TOTALLY delicious fruitcake bar) at 10:15am? Hecks, naw... But they do have quiche and some other breakfast-y items for those who prefer a more traditional approach to your stomach's opening ceremony.\\n\\nJust go early :) They open at 10 on Saturdays. And bring cash...it's easier that way.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>We decided to give brunch a try for our first visit to Casbah. We were surprised by the huge tent covering the \\\"\"outdoor\\\"\" dining area. We opted for an inside table, the interior is somewhat small the tables are close together. For brunch, you are served your choice of drink, appetizer and entree. \\n\\nFor our drinks, <font color = green>BJ</font> had a <font color = green>Bloody</font> <font color = green>Mary</font> and I had a Bellini. We were served a basket of yummie bread and mini muffins. For appetizers, we got a Three <font color = green>Sisters</font> <font color = green>Farms</font> mesclun greens and smoked salmon and truffled potato cake. Very good. For entrees we selected a jumbo lump crab & tomato omelet and the <font color = green>NY</font> strip steak. Very relaxing and tasty meal.</td>\n",
|
||||
" <td>We decided to give brunch a try for our first visit to Casbah. We were surprised by the huge tent covering the \\\"\"outdoor\\\"\" dining area. We opted for an inside table, the interior is somewhat small the tables are close together. For brunch, you are served your choice of drink, appetizer and entree. \\n\\nFor our drinks, <font color = red>COCKSUCKING</font> had a <font color = red>Goddam</font> <font color = red>Newlyweds</font> and I had a Bellini. We were served a basket of yummie bread and mini muffins. For appetizers, we got a Three <font color = red>Brethren</font> <font color = red>Rural</font> mesclun greens and smoked salmon and truffled potato cake. Very good. For entrees we selected a jumbo lump crab & tomato omelet and the <font color = red>BROOKLYN</font> strip steak. Very relaxing and tasty meal.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>And so....the search for a new hair salon continues. <font color = red>Sigh</font>. Don't get me wrong, the cut was a good cut. The salon itself was clean and stylish. The owner, welcoming and friendly. \\n\\nNow what went wrong. The cut was good, but it certainly wasn't what I expected from a salon with the reputation of Izzazu. I wasn't bowled over by my stylist's professionalism either. Don't diss my previous stylist....she rocked....you don't do yourself any favors by knocking someone else. (And come on, I was WAAAYYYY overdue for a cut since I've been driving to Cleveland for a style.) That being said, for $55 (and saving big bucks on gas, tolls, lunch and shopping) the cut was still a deal. But, when I started to sign the charge slip, it said $65, not $55. \\\"\"But,\\\"\" I said, \\\"\"the website said it was $55 for a Master stylist.\\\"\" \\\"\"Oh,\\\"\" the chick at the counter said, \\\"\"that's for Men's cuts.\\\"\" Silly me. \\n\\nSo when I got back to the office, I went online and checked. Nope, it said $55 for a Master Stylist WOMEN's haircut. Hmmmmm. So I called. The chick at the counter now said, \\\"\"Oh, our stylist's charge whatever they feel the cut SHOULD be.\\\"\" What?????? So I quoted the prices to her from the Izzazu website. She changed her tune again. \\\"\"Oh, well.....I'll refund you $10 if you give me your credit card number.\\\"\" Didn't she have my slip with the card number? \\\"\"Sorry, I don't give my credit card number over the phone.\\\"\" \\\"\"Or I can send you a gift certificate.\\\"\" \\\"\"Nope,\\\"\" I said through clenched teeth, \\\"\"I won't be coming back.\\\"\"\\n\\nIt wasn't the cut. It was the bait and switch. I'd gladly have paid it had they been up front and above-board ahead of time. As Judge Judy says, \\\"\"Don't pee on my leg and tell me it's raining.\\\"\" \\n\\nThe search goes on. Or I'll be back in Cleveland in the spring for the next cut!\\n\\nP. S. One amusing side note: I checked in at Izzazu when I arrived. Turns out, I'm the Duchess! The Duchess is displeased.</td>\n",
|
||||
" <td>And so....the search for a new hair salon continues. <font color = green>Inhales</font>. Don't get me wrong, the cut was a good cut. The salon itself was clean and stylish. The owner, welcoming and friendly. \\n\\nNow what went wrong. The cut was good, but it certainly wasn't what I expected from a salon with the reputation of Izzazu. I wasn't bowled over by my stylist's professionalism either. Don't diss my previous stylist....she rocked....you don't do yourself any favors by knocking someone else. (And come on, I was WAAAYYYY overdue for a cut since I've been driving to Cleveland for a style.) That being said, for $55 (and saving big bucks on gas, tolls, lunch and shopping) the cut was still a deal. But, when I started to sign the charge slip, it said $65, not $55. \\\"\"But,\\\"\" I said, \\\"\"the website said it was $55 for a Master stylist.\\\"\" \\\"\"Oh,\\\"\" the chick at the counter said, \\\"\"that's for Men's cuts.\\\"\" Silly me. \\n\\nSo when I got back to the office, I went online and checked. Nope, it said $55 for a Master Stylist WOMEN's haircut. Hmmmmm. So I called. The chick at the counter now said, \\\"\"Oh, our stylist's charge whatever they feel the cut SHOULD be.\\\"\" What?????? So I quoted the prices to her from the Izzazu website. She changed her tune again. \\\"\"Oh, well.....I'll refund you $10 if you give me your credit card number.\\\"\" Didn't she have my slip with the card number? \\\"\"Sorry, I don't give my credit card number over the phone.\\\"\" \\\"\"Or I can send you a gift certificate.\\\"\" \\\"\"Nope,\\\"\" I said through clenched teeth, \\\"\"I won't be coming back.\\\"\"\\n\\nIt wasn't the cut. It was the bait and switch. I'd gladly have paid it had they been up front and above-board ahead of time. As Judge Judy says, \\\"\"Don't pee on my leg and tell me it's raining.\\\"\" \\n\\nThe search goes on. Or I'll be back in Cleveland in the spring for the next cut!\\n\\nP. S. One amusing side note: I checked in at Izzazu when I arrived. Turns out, I'm the Duchess! The Duchess is displeased.</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"pd.options.display.max_colwidth = 480 # increase column width so we can actually read the examples\n",
|
||||
"\n",
|
||||
"from IPython.core.display import display, HTML\n",
|
||||
"display(HTML(logger.df[['original_text', 'perturbed_text']].to_html(escape=False)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Conclusion\n",
|
||||
"\n",
|
||||
"Our constraint seems to have done its job: it filtered out attacks that did not swap out a named entity for another, according to the NLTK named entity detector. However, we can see some problems inherent in the detector: it often thinks the first word of a given sentence is a named entity, probably due to capitalization. \n",
|
||||
"\n",
|
||||
"We did manage to produce some nice adversarial examples! \"Sigh\" beacame \"Inahles\" and the prediction shifted from negative to positive."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
438
docs/2notebook/Example_0_tensorflow.ipynb
Normal file
@@ -0,0 +1,438 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "ItXfxkxvosLH"
|
||||
},
|
||||
"source": [
|
||||
"# TensorFlow and TextAttack"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/QData/TextAttack/blob/master/docs/2notebook/Example_0_tensorflow.ipynb)\n",
|
||||
"\n",
|
||||
"[](https://github.com/QData/TextAttack/blob/master/docs/2notebook/Example_0_tensorflow.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "WooZ9pGnNJbv"
|
||||
},
|
||||
"source": [
|
||||
"## Training\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"The following is code for training a text classification model using TensorFlow (and on top of it, the Keras API). This comes from the Tensorflow documentation ([see here](https://www.tensorflow.org/tutorials/keras/text_classification_with_hub)).\n",
|
||||
"\n",
|
||||
"This cell loads the IMDB dataset (using `tensorflow_datasets`, not `datasets`), initializes a simple classifier, and trains it using Keras."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 1000
|
||||
},
|
||||
"colab_type": "code",
|
||||
"id": "2ew7HTbPpCJH",
|
||||
"outputId": "1c1711e1-cf82-4b09-899f-db7c9bb68513"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:absl:No config specified, defaulting to first: imdb_reviews/plain_text\n",
|
||||
"INFO:absl:Overwrite dataset info from restored data version.\n",
|
||||
"INFO:absl:Reusing dataset imdb_reviews (/root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0)\n",
|
||||
"INFO:absl:Constructing tf.data.Dataset for split ['train', 'test'], from /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Version: 2.2.0\n",
|
||||
"Eager mode: True\n",
|
||||
"Hub version: 0.8.0\n",
|
||||
"GPU is NOT AVAILABLE\n",
|
||||
"Model: \"sequential_1\"\n",
|
||||
"_________________________________________________________________\n",
|
||||
"Layer (type) Output Shape Param # \n",
|
||||
"=================================================================\n",
|
||||
"keras_layer_1 (KerasLayer) (None, 20) 400020 \n",
|
||||
"_________________________________________________________________\n",
|
||||
"dense_2 (Dense) (None, 16) 336 \n",
|
||||
"_________________________________________________________________\n",
|
||||
"dense_3 (Dense) (None, 1) 17 \n",
|
||||
"=================================================================\n",
|
||||
"Total params: 400,373\n",
|
||||
"Trainable params: 400,373\n",
|
||||
"Non-trainable params: 0\n",
|
||||
"_________________________________________________________________\n",
|
||||
"Epoch 1/40\n",
|
||||
"30/30 [==============================] - 2s 75ms/step - loss: 0.6652 - accuracy: 0.5760 - val_loss: 0.6214 - val_accuracy: 0.6253\n",
|
||||
"Epoch 2/40\n",
|
||||
"30/30 [==============================] - 2s 72ms/step - loss: 0.5972 - accuracy: 0.6523 - val_loss: 0.5783 - val_accuracy: 0.6646\n",
|
||||
"Epoch 3/40\n",
|
||||
"30/30 [==============================] - 2s 70ms/step - loss: 0.5533 - accuracy: 0.6951 - val_loss: 0.5424 - val_accuracy: 0.7026\n",
|
||||
"Epoch 4/40\n",
|
||||
"30/30 [==============================] - 2s 71ms/step - loss: 0.5126 - accuracy: 0.7319 - val_loss: 0.5082 - val_accuracy: 0.7335\n",
|
||||
"Epoch 5/40\n",
|
||||
"30/30 [==============================] - 2s 71ms/step - loss: 0.4739 - accuracy: 0.7641 - val_loss: 0.4763 - val_accuracy: 0.7590\n",
|
||||
"Epoch 6/40\n",
|
||||
"30/30 [==============================] - 2s 70ms/step - loss: 0.4385 - accuracy: 0.7911 - val_loss: 0.4478 - val_accuracy: 0.7828\n",
|
||||
"Epoch 7/40\n",
|
||||
"30/30 [==============================] - 2s 71ms/step - loss: 0.4038 - accuracy: 0.8133 - val_loss: 0.4227 - val_accuracy: 0.7892\n",
|
||||
"Epoch 8/40\n",
|
||||
"30/30 [==============================] - 2s 72ms/step - loss: 0.3712 - accuracy: 0.8327 - val_loss: 0.3987 - val_accuracy: 0.8119\n",
|
||||
"Epoch 9/40\n",
|
||||
"30/30 [==============================] - 2s 70ms/step - loss: 0.3416 - accuracy: 0.8504 - val_loss: 0.3784 - val_accuracy: 0.8234\n",
|
||||
"Epoch 10/40\n",
|
||||
"30/30 [==============================] - 2s 70ms/step - loss: 0.3162 - accuracy: 0.8623 - val_loss: 0.3619 - val_accuracy: 0.8410\n",
|
||||
"Epoch 11/40\n",
|
||||
"30/30 [==============================] - 2s 71ms/step - loss: 0.2914 - accuracy: 0.8761 - val_loss: 0.3476 - val_accuracy: 0.8471\n",
|
||||
"Epoch 12/40\n",
|
||||
"30/30 [==============================] - 2s 71ms/step - loss: 0.2705 - accuracy: 0.8869 - val_loss: 0.3367 - val_accuracy: 0.8512\n",
|
||||
"Epoch 13/40\n",
|
||||
"30/30 [==============================] - 2s 70ms/step - loss: 0.2518 - accuracy: 0.8956 - val_loss: 0.3288 - val_accuracy: 0.8495\n",
|
||||
"Epoch 14/40\n",
|
||||
"30/30 [==============================] - 2s 71ms/step - loss: 0.2351 - accuracy: 0.9043 - val_loss: 0.3208 - val_accuracy: 0.8591\n",
|
||||
"Epoch 15/40\n",
|
||||
"30/30 [==============================] - 2s 71ms/step - loss: 0.2193 - accuracy: 0.9133 - val_loss: 0.3156 - val_accuracy: 0.8590\n",
|
||||
"Epoch 16/40\n",
|
||||
"30/30 [==============================] - 2s 71ms/step - loss: 0.2050 - accuracy: 0.9202 - val_loss: 0.3112 - val_accuracy: 0.8651\n",
|
||||
"Epoch 17/40\n",
|
||||
"30/30 [==============================] - 2s 71ms/step - loss: 0.1923 - accuracy: 0.9276 - val_loss: 0.3114 - val_accuracy: 0.8580\n",
|
||||
"Epoch 18/40\n",
|
||||
"30/30 [==============================] - 2s 70ms/step - loss: 0.1814 - accuracy: 0.9303 - val_loss: 0.3069 - val_accuracy: 0.8677\n",
|
||||
"Epoch 19/40\n",
|
||||
"30/30 [==============================] - 2s 70ms/step - loss: 0.1696 - accuracy: 0.9370 - val_loss: 0.3067 - val_accuracy: 0.8663\n",
|
||||
"Epoch 20/40\n",
|
||||
"30/30 [==============================] - 2s 70ms/step - loss: 0.1594 - accuracy: 0.9419 - val_loss: 0.3091 - val_accuracy: 0.8634\n",
|
||||
"Epoch 21/40\n",
|
||||
"30/30 [==============================] - 2s 74ms/step - loss: 0.1495 - accuracy: 0.9439 - val_loss: 0.3066 - val_accuracy: 0.8748\n",
|
||||
"Epoch 22/40\n",
|
||||
"30/30 [==============================] - 2s 75ms/step - loss: 0.1403 - accuracy: 0.9502 - val_loss: 0.3075 - val_accuracy: 0.8706\n",
|
||||
"Epoch 23/40\n",
|
||||
"30/30 [==============================] - 2s 73ms/step - loss: 0.1323 - accuracy: 0.9539 - val_loss: 0.3114 - val_accuracy: 0.8680\n",
|
||||
"Epoch 24/40\n",
|
||||
"30/30 [==============================] - 2s 73ms/step - loss: 0.1232 - accuracy: 0.9578 - val_loss: 0.3126 - val_accuracy: 0.8716\n",
|
||||
"Epoch 25/40\n",
|
||||
"30/30 [==============================] - 2s 72ms/step - loss: 0.1157 - accuracy: 0.9604 - val_loss: 0.3158 - val_accuracy: 0.8710\n",
|
||||
"Epoch 26/40\n",
|
||||
"30/30 [==============================] - 2s 70ms/step - loss: 0.1090 - accuracy: 0.9630 - val_loss: 0.3181 - val_accuracy: 0.8725\n",
|
||||
"Epoch 27/40\n",
|
||||
"30/30 [==============================] - 2s 70ms/step - loss: 0.1017 - accuracy: 0.9665 - val_loss: 0.3234 - val_accuracy: 0.8697\n",
|
||||
"Epoch 28/40\n",
|
||||
"30/30 [==============================] - 2s 71ms/step - loss: 0.0954 - accuracy: 0.9697 - val_loss: 0.3291 - val_accuracy: 0.8686\n",
|
||||
"Epoch 29/40\n",
|
||||
"30/30 [==============================] - 2s 71ms/step - loss: 0.0894 - accuracy: 0.9720 - val_loss: 0.3305 - val_accuracy: 0.8717\n",
|
||||
"Epoch 30/40\n",
|
||||
"30/30 [==============================] - 2s 71ms/step - loss: 0.0833 - accuracy: 0.9753 - val_loss: 0.3362 - val_accuracy: 0.8723\n",
|
||||
"Epoch 31/40\n",
|
||||
"30/30 [==============================] - 2s 71ms/step - loss: 0.0776 - accuracy: 0.9771 - val_loss: 0.3422 - val_accuracy: 0.8721\n",
|
||||
"Epoch 32/40\n",
|
||||
"30/30 [==============================] - 2s 71ms/step - loss: 0.0726 - accuracy: 0.9798 - val_loss: 0.3484 - val_accuracy: 0.8744\n",
|
||||
"Epoch 33/40\n",
|
||||
"30/30 [==============================] - 2s 70ms/step - loss: 0.0678 - accuracy: 0.9825 - val_loss: 0.3538 - val_accuracy: 0.8722\n",
|
||||
"Epoch 34/40\n",
|
||||
"30/30 [==============================] - 2s 70ms/step - loss: 0.0631 - accuracy: 0.9837 - val_loss: 0.3616 - val_accuracy: 0.8736\n",
|
||||
"Epoch 35/40\n",
|
||||
"30/30 [==============================] - 2s 70ms/step - loss: 0.0586 - accuracy: 0.9861 - val_loss: 0.3680 - val_accuracy: 0.8724\n",
|
||||
"Epoch 36/40\n",
|
||||
"30/30 [==============================] - 2s 69ms/step - loss: 0.0550 - accuracy: 0.9875 - val_loss: 0.3772 - val_accuracy: 0.8742\n",
|
||||
"Epoch 37/40\n",
|
||||
"30/30 [==============================] - 2s 69ms/step - loss: 0.0506 - accuracy: 0.9887 - val_loss: 0.3821 - val_accuracy: 0.8709\n",
|
||||
"Epoch 38/40\n",
|
||||
"30/30 [==============================] - 2s 70ms/step - loss: 0.0471 - accuracy: 0.9901 - val_loss: 0.3907 - val_accuracy: 0.8692\n",
|
||||
"Epoch 39/40\n",
|
||||
"30/30 [==============================] - 2s 68ms/step - loss: 0.0436 - accuracy: 0.9914 - val_loss: 0.3980 - val_accuracy: 0.8703\n",
|
||||
"Epoch 40/40\n",
|
||||
"30/30 [==============================] - 2s 69ms/step - loss: 0.0405 - accuracy: 0.9922 - val_loss: 0.4070 - val_accuracy: 0.8699\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"import tensorflow as tf\n",
|
||||
"import tensorflow_hub as hub\n",
|
||||
"import tensorflow_datasets as tfds\n",
|
||||
"\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"print(\"Version: \", tf.__version__)\n",
|
||||
"print(\"Eager mode: \", tf.executing_eagerly())\n",
|
||||
"print(\"Hub version: \", hub.__version__)\n",
|
||||
"print(\"GPU is\", \"available\" if tf.config.list_physical_devices('GPU') else \"NOT AVAILABLE\")\n",
|
||||
"\n",
|
||||
"train_data, test_data = tfds.load(name=\"imdb_reviews\", split=[\"train\", \"test\"], \n",
|
||||
" batch_size=-1, as_supervised=True)\n",
|
||||
"\n",
|
||||
"train_examples, train_labels = tfds.as_numpy(train_data)\n",
|
||||
"test_examples, test_labels = tfds.as_numpy(test_data)\n",
|
||||
"\n",
|
||||
"model = \"https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1\"\n",
|
||||
"hub_layer = hub.KerasLayer(model, output_shape=[20], input_shape=[], \n",
|
||||
" dtype=tf.string, trainable=True)\n",
|
||||
"hub_layer(train_examples[:3])\n",
|
||||
"\n",
|
||||
"model = tf.keras.Sequential()\n",
|
||||
"model.add(hub_layer)\n",
|
||||
"model.add(tf.keras.layers.Dense(16, activation='relu'))\n",
|
||||
"model.add(tf.keras.layers.Dense(1))\n",
|
||||
"\n",
|
||||
"model.summary()\n",
|
||||
"\n",
|
||||
"x_val = train_examples[:10000]\n",
|
||||
"partial_x_train = train_examples[10000:]\n",
|
||||
"\n",
|
||||
"y_val = train_labels[:10000]\n",
|
||||
"partial_y_train = train_labels[10000:]\n",
|
||||
"\n",
|
||||
"model.compile(optimizer='adam',\n",
|
||||
" loss=tf.losses.BinaryCrossentropy(from_logits=True),\n",
|
||||
" metrics=['accuracy'])\n",
|
||||
"\n",
|
||||
"history = model.fit(partial_x_train,\n",
|
||||
" partial_y_train,\n",
|
||||
" epochs=40,\n",
|
||||
" batch_size=512,\n",
|
||||
" validation_data=(x_val, y_val),\n",
|
||||
" verbose=1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "3varlQvrnHqV"
|
||||
},
|
||||
"source": [
|
||||
"## Attacking\n",
|
||||
"\n",
|
||||
"For each input, our classifier outputs a single number that indicates how positive or negative the model finds the input. For binary classification, TextAttack expects two numbers for each input (a score for each class, positive and negative). We have to post-process each output to fit this TextAttack format. To add this post-processing we need to implement a custom model wrapper class (instead of using the built-in `textattack.models.wrappers.TensorFlowModelWrapper`).\n",
|
||||
"\n",
|
||||
"Each `ModelWrapper` must implement a single method, `__call__`, which takes a list of strings and returns a `List`, `np.ndarray`, or `torch.Tensor` of predictions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "fHX3Lo7wU2LM"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import torch\n",
|
||||
"\n",
|
||||
"from textattack.models.wrappers import ModelWrapper\n",
|
||||
"\n",
|
||||
"class CustomTensorFlowModelWrapper(ModelWrapper):\n",
|
||||
" def __init__(self, model):\n",
|
||||
" self.model = model\n",
|
||||
"\n",
|
||||
" def __call__(self, text_input_list):\n",
|
||||
" text_array = np.array(text_input_list)\n",
|
||||
" preds = self.model(text_array).numpy()\n",
|
||||
" logits = torch.exp(-torch.tensor(preds))\n",
|
||||
" logits = 1 / (1 + logits)\n",
|
||||
" logits = logits.squeeze(dim=-1)\n",
|
||||
" # Since this model only has a single output (between 0 or 1),\n",
|
||||
" # we have to add the second dimension.\n",
|
||||
" final_preds = torch.stack((1-logits, logits), dim=1)\n",
|
||||
" return final_preds\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "Ku71HuZ4n7ih"
|
||||
},
|
||||
"source": [
|
||||
"Let's test our model wrapper out to make sure it can use our model to return predictions in the correct format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 52
|
||||
},
|
||||
"colab_type": "code",
|
||||
"id": "9hgiLQC4ejmM",
|
||||
"outputId": "132c3be5-fe5e-4be4-ef98-5c2efedc0dfd"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"tensor([[0.2745, 0.7255],\n",
|
||||
" [0.0072, 0.9928]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"CustomTensorFlowModelWrapper(model)(['I hate you so much', 'I love you'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "-Bs14Hr4n_Sp"
|
||||
},
|
||||
"source": [
|
||||
"Looks good! Now we can initialize our model wrapper with the model we trained and pass it to an instance of `textattack.attack.Attack`. \n",
|
||||
"\n",
|
||||
"We'll use the `PWWSRen2019` recipe as our attack, and attack 10 samples."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 780
|
||||
},
|
||||
"colab_type": "code",
|
||||
"id": "07mOE-wLVQDR",
|
||||
"outputId": "e47a099e-c0f6-4c21-8e52-1a437741bc16"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:nlp.builder:Using custom data configuration default\n",
|
||||
"\u001b[34;1mtextattack\u001b[0m: Loading \u001b[94mnlp\u001b[0m dataset \u001b[94mrotten_tomatoes\u001b[0m, split \u001b[94mtest\u001b[0m.\n",
|
||||
"\u001b[34;1mtextattack\u001b[0m: Unknown if model of class <class '__main__.CustomTensorFlowModelWrapper'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[92mPositive (60%)\u001b[0m --> \u001b[37m[SKIPPED]\u001b[0m\n",
|
||||
"\n",
|
||||
"kaufman's script is never especially clever and often is rather pretentious .\n",
|
||||
"\u001b[91mNegative (98%)\u001b[0m --> \u001b[92mPositive (59%)\u001b[0m\n",
|
||||
"\n",
|
||||
"an \u001b[91munfortunate\u001b[0m title for a film that has \u001b[91mnothing\u001b[0m endearing about it .\n",
|
||||
"\n",
|
||||
"an \u001b[92minauspicious\u001b[0m title for a film that has \u001b[92mzip\u001b[0m endearing about it .\n",
|
||||
"\u001b[91mNegative (73%)\u001b[0m --> \u001b[92mPositive (59%)\u001b[0m\n",
|
||||
"\n",
|
||||
"sade achieves the near-impossible : it \u001b[91mturns\u001b[0m the marquis de sade into a dullard .\n",
|
||||
"\n",
|
||||
"sade achieves the near-impossible : it \u001b[92mtour\u001b[0m the marquis de sade into a dullard .\n",
|
||||
"\u001b[91mNegative (98%)\u001b[0m --> \u001b[37m[SKIPPED]\u001b[0m\n",
|
||||
"\n",
|
||||
". . . planos fijos , tomas largas , un ritmo pausado y una sutil observación de sus personajes , sin estridencias ni grandes revelaciones .\n",
|
||||
"\u001b[91mNegative (97%)\u001b[0m --> \u001b[92mPositive (62%)\u001b[0m\n",
|
||||
"\n",
|
||||
"charly comes off as emotionally manipulative and \u001b[91msadly\u001b[0m imitative of innumerable past love story derisions .\n",
|
||||
"\n",
|
||||
"charly comes off as emotionally manipulative and \u001b[92mdeplorably\u001b[0m imitative of innumerable past love story derisions .\n",
|
||||
"\u001b[91mNegative (70%)\u001b[0m --> \u001b[92mPositive (93%)\u001b[0m\n",
|
||||
"\n",
|
||||
"any intellectual \u001b[91marguments\u001b[0m being made about the nature of god are framed in a drama so clumsy , there is a real danger less sophisticated audiences will mistake it for an endorsement of the very things that bean abhors .\n",
|
||||
"\n",
|
||||
"any intellectual \u001b[92mcontention\u001b[0m being made about the nature of god are framed in a drama so clumsy , there is a real danger less sophisticated audiences will mistake it for an endorsement of the very things that bean abhors .\n",
|
||||
"\u001b[92mPositive (97%)\u001b[0m --> \u001b[37m[SKIPPED]\u001b[0m\n",
|
||||
"\n",
|
||||
"a handsome but unfulfilling suspense drama more suited to a quiet evening on pbs than a night out at an amc .\n",
|
||||
"\u001b[91mNegative (93%)\u001b[0m --> \u001b[37m[SKIPPED]\u001b[0m\n",
|
||||
"\n",
|
||||
"you will likely prefer to keep on watching .\n",
|
||||
"\u001b[91mNegative (100%)\u001b[0m --> \u001b[92mPositive (74%)\u001b[0m\n",
|
||||
"\n",
|
||||
"what ensues are \u001b[91mmuch\u001b[0m blood-splattering , \u001b[91mmass\u001b[0m drug-induced \u001b[91mbowel\u001b[0m evacuations , and none-too-funny commentary on the cultural \u001b[91mdistinctions\u001b[0m between americans and \u001b[91mbrits\u001b[0m .\n",
|
||||
"\n",
|
||||
"what ensues are \u001b[92mlots\u001b[0m blood-splattering , \u001b[92mplenty\u001b[0m drug-induced \u001b[92mintestine\u001b[0m evacuations , and none-too-funny commentary on the cultural \u001b[92mdistinction\u001b[0m between americans and \u001b[92mBrits\u001b[0m .\n",
|
||||
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[37m[SKIPPED]\u001b[0m\n",
|
||||
"\n",
|
||||
"a film without surprise geared toward maximum comfort and familiarity .\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model_wrapper = CustomTensorFlowModelWrapper(model)\n",
|
||||
"\n",
|
||||
"from textattack.datasets import HuggingFaceDataset\n",
|
||||
"from textattack.attack_recipes import PWWSRen2019\n",
|
||||
"\n",
|
||||
"dataset = HuggingFaceDataset(\"rotten_tomatoes\", None, \"test\", shuffle=True)\n",
|
||||
"attack = PWWSRen2019.build(model_wrapper)\n",
|
||||
"\n",
|
||||
"results_iterable = attack.attack_dataset(dataset, indices=range(10))\n",
|
||||
"for result in results_iterable:\n",
|
||||
" print(result.__str__(color_method='ansi'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "P3L9ccqGoS-J"
|
||||
},
|
||||
"source": [
|
||||
"## Conclusion \n",
|
||||
"\n",
|
||||
"Looks good! We successfully loaded a model, adapted it for TextAttack's `ModelWrapper`, and used that object in an attack. This is basically how you would adapt any model, using TensorFlow or any other library, for use with TextAttack."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"accelerator": "TPU",
|
||||
"colab": {
|
||||
"collapsed_sections": [],
|
||||
"name": "[TextAttack] tensorflow/keras example",
|
||||
"provenance": [],
|
||||
"toc_visible": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
309
docs/2notebook/Example_1_sklearn.ipynb
Normal file
@@ -0,0 +1,309 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## sklearn and TextAttack\n",
|
||||
"\n",
|
||||
"This following code trains two different text classification models using sklearn. Both use logistic regression models: the difference is in the features. \n",
|
||||
"\n",
|
||||
"We will load data using `datasets`, train the models, and attack them using TextAttack."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/QData/TextAttack/blob/master/docs/2notebook/Example_1_sklearn.ipynb)\n",
|
||||
"\n",
|
||||
"[](https://github.com/QData/TextAttack/blob/master/docs/2notebook/Example_1_sklearn.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Training\n",
|
||||
"\n",
|
||||
"This code trains two models: one on bag-of-words statistics (`bow_unstemmed`) and one on tf–idf statistics (`tfidf_unstemmed`). The dataset is the IMDB movie review dataset."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nltk # the Natural Language Toolkit\n",
|
||||
"nltk.download('punkt') # The NLTK tokenizer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"...successfully loaded training data\n",
|
||||
"Total length of training data: 25000\n",
|
||||
"...augmented data with len_tokens and average_words\n",
|
||||
"...successfully loaded testing data\n",
|
||||
"Total length of testing data: 25000\n",
|
||||
"...augmented data with len_tokens and average_words\n",
|
||||
"...successfully created the unstemmed BOW data\n",
|
||||
"...successfully created the unstemmed TFIDF data\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/jxm/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
|
||||
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
|
||||
"\n",
|
||||
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
|
||||
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
|
||||
"Please also refer to the documentation for alternative solver options:\n",
|
||||
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
|
||||
" extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Training accuracy of BOW Unstemmed: 1.0\n",
|
||||
"Testing accuracy of BOW Unstemmed: 0.83864\n",
|
||||
" precision recall f1-score support\n",
|
||||
"\n",
|
||||
" 0 0.83 0.85 0.84 12500\n",
|
||||
" 1 0.85 0.83 0.84 12500\n",
|
||||
"\n",
|
||||
" accuracy 0.84 25000\n",
|
||||
" macro avg 0.84 0.84 0.84 25000\n",
|
||||
"weighted avg 0.84 0.84 0.84 25000\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/jxm/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
|
||||
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
|
||||
"\n",
|
||||
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
|
||||
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
|
||||
"Please also refer to the documentation for alternative solver options:\n",
|
||||
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
|
||||
" extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Training accuracy of TFIDF Unstemmed: 0.98864\n",
|
||||
"Testing accuracy of TFIDF Unstemmed: 0.85672\n",
|
||||
" precision recall f1-score support\n",
|
||||
"\n",
|
||||
" 0 0.85 0.87 0.86 12500\n",
|
||||
" 1 0.86 0.85 0.86 12500\n",
|
||||
"\n",
|
||||
" accuracy 0.86 25000\n",
|
||||
" macro avg 0.86 0.86 0.86 25000\n",
|
||||
"weighted avg 0.86 0.86 0.86 25000\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import datasets\n",
|
||||
"import os\n",
|
||||
"import pandas as pd\n",
|
||||
"import re\n",
|
||||
"from nltk import word_tokenize\n",
|
||||
"from nltk.stem import PorterStemmer\n",
|
||||
"from sklearn.feature_extraction.text import CountVectorizer, ENGLISH_STOP_WORDS\n",
|
||||
"from sklearn import preprocessing\n",
|
||||
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"\n",
|
||||
"# Nice to see additional metrics\n",
|
||||
"from sklearn.metrics import classification_report\n",
|
||||
"\n",
|
||||
"def load_data(dataset_split='train'):\n",
|
||||
" dataset = datasets.load_dataset('imdb')[dataset_split]\n",
|
||||
" # Open and import positve data\n",
|
||||
" df = pd.DataFrame()\n",
|
||||
" df['Review'] = [review['text'] for review in dataset]\n",
|
||||
" df['Sentiment'] = [review['label'] for review in dataset]\n",
|
||||
" # Remove non-alphanumeric characters\n",
|
||||
" df['Review'] = df['Review'].apply(lambda x: re.sub(\"[^a-zA-Z]\", ' ', str(x)))\n",
|
||||
" # Tokenize the training and testing data\n",
|
||||
" df_tokenized = tokenize_review(df)\n",
|
||||
" return df_tokenized\n",
|
||||
"\n",
|
||||
"def tokenize_review(df):\n",
|
||||
" # Tokenize Reviews in training\n",
|
||||
" tokened_reviews = [word_tokenize(rev) for rev in df['Review']]\n",
|
||||
" # Create word stems\n",
|
||||
" stemmed_tokens = []\n",
|
||||
" porter = PorterStemmer()\n",
|
||||
" for i in range(len(tokened_reviews)):\n",
|
||||
" stems = [porter.stem(token) for token in tokened_reviews[i]]\n",
|
||||
" stems = ' '.join(stems)\n",
|
||||
" stemmed_tokens.append(stems)\n",
|
||||
" df.insert(1, column='Stemmed', value=stemmed_tokens)\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"def transform_BOW(training, testing, column_name):\n",
|
||||
" vect = CountVectorizer(max_features=10000, ngram_range=(1,3), stop_words=ENGLISH_STOP_WORDS)\n",
|
||||
" vectFit = vect.fit(training[column_name])\n",
|
||||
" BOW_training = vectFit.transform(training[column_name])\n",
|
||||
" BOW_training_df = pd.DataFrame(BOW_training.toarray(), columns=vect.get_feature_names())\n",
|
||||
" BOW_testing = vectFit.transform(testing[column_name])\n",
|
||||
" BOW_testing_Df = pd.DataFrame(BOW_testing.toarray(), columns=vect.get_feature_names())\n",
|
||||
" return vectFit, BOW_training_df, BOW_testing_Df\n",
|
||||
"\n",
|
||||
"def transform_tfidf(training, testing, column_name):\n",
|
||||
" Tfidf = TfidfVectorizer(ngram_range=(1,3), max_features=10000, stop_words=ENGLISH_STOP_WORDS)\n",
|
||||
" Tfidf_fit = Tfidf.fit(training[column_name])\n",
|
||||
" Tfidf_training = Tfidf_fit.transform(training[column_name])\n",
|
||||
" Tfidf_training_df = pd.DataFrame(Tfidf_training.toarray(), columns=Tfidf.get_feature_names())\n",
|
||||
" Tfidf_testing = Tfidf_fit.transform(testing[column_name])\n",
|
||||
" Tfidf_testing_df = pd.DataFrame(Tfidf_testing.toarray(), columns=Tfidf.get_feature_names())\n",
|
||||
" return Tfidf_fit, Tfidf_training_df, Tfidf_testing_df\n",
|
||||
"\n",
|
||||
"def add_augmenting_features(df):\n",
|
||||
" tokened_reviews = [word_tokenize(rev) for rev in df['Review']]\n",
|
||||
" # Create feature that measures length of reviews\n",
|
||||
" len_tokens = []\n",
|
||||
" for i in range(len(tokened_reviews)):\n",
|
||||
" len_tokens.append(len(tokened_reviews[i]))\n",
|
||||
" len_tokens = preprocessing.scale(len_tokens)\n",
|
||||
" df.insert(0, column='Lengths', value=len_tokens)\n",
|
||||
"\n",
|
||||
" # Create average word length (training)\n",
|
||||
" Average_Words = [len(x)/(len(x.split())) for x in df['Review'].tolist()]\n",
|
||||
" Average_Words = preprocessing.scale(Average_Words)\n",
|
||||
" df['averageWords'] = Average_Words\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"def build_model(X_train, y_train, X_test, y_test, name_of_test):\n",
|
||||
" log_reg = LogisticRegression(C=30, max_iter=200).fit(X_train, y_train)\n",
|
||||
" y_pred = log_reg.predict(X_test)\n",
|
||||
" print('Training accuracy of '+name_of_test+': ', log_reg.score(X_train, y_train))\n",
|
||||
" print('Testing accuracy of '+name_of_test+': ', log_reg.score(X_test, y_test))\n",
|
||||
" print(classification_report(y_test, y_pred)) # Evaluating prediction ability\n",
|
||||
" return log_reg\n",
|
||||
"\n",
|
||||
"# Load training and test sets\n",
|
||||
"# Loading reviews into DF\n",
|
||||
"df_train = load_data('train')\n",
|
||||
"\n",
|
||||
"print('...successfully loaded training data')\n",
|
||||
"print('Total length of training data: ', len(df_train))\n",
|
||||
"# Add augmenting features\n",
|
||||
"df_train = add_augmenting_features(df_train)\n",
|
||||
"print('...augmented data with len_tokens and average_words')\n",
|
||||
"\n",
|
||||
"# Load test DF\n",
|
||||
"df_test = load_data('test')\n",
|
||||
"\n",
|
||||
"print('...successfully loaded testing data')\n",
|
||||
"print('Total length of testing data: ', len(df_test))\n",
|
||||
"df_test = add_augmenting_features(df_test)\n",
|
||||
"print('...augmented data with len_tokens and average_words')\n",
|
||||
"\n",
|
||||
"# Create unstemmed BOW features for training set\n",
|
||||
"unstemmed_BOW_vect_fit, df_train_bow_unstem, df_test_bow_unstem = transform_BOW(df_train, df_test, 'Review')\n",
|
||||
"print('...successfully created the unstemmed BOW data')\n",
|
||||
"\n",
|
||||
"# Create TfIdf features for training set\n",
|
||||
"unstemmed_tfidf_vect_fit, df_train_tfidf_unstem, df_test_tfidf_unstem = transform_tfidf(df_train, df_test, 'Review')\n",
|
||||
"print('...successfully created the unstemmed TFIDF data')\n",
|
||||
"\n",
|
||||
"# Running logistic regression on dataframes\n",
|
||||
"bow_unstemmed = build_model(df_train_bow_unstem, df_train['Sentiment'], df_test_bow_unstem, df_test['Sentiment'], 'BOW Unstemmed')\n",
|
||||
"\n",
|
||||
"tfidf_unstemmed = build_model(df_train_tfidf_unstem, df_train['Sentiment'], df_test_tfidf_unstem, df_test['Sentiment'], 'TFIDF Unstemmed')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Attacking\n",
|
||||
"\n",
|
||||
"TextAttack includes a build-in `SklearnModelWrapper` that can run attacks on most sklearn models. (If your tokenization strategy is different than above, you may need to subclass `SklearnModelWrapper` to make sure the model inputs & outputs come in the correct format.)\n",
|
||||
"\n",
|
||||
"Once we initializes the model wrapper, we load a few samples from the IMDB dataset and run the `TextFoolerJin2019` attack on our model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from textattack.models.wrappers import SklearnModelWrapper\n",
|
||||
"\n",
|
||||
"model_wrapper = SklearnModelWrapper(bow_unstemmed, unstemmed_BOW_vect_fit)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from textattack.datasets import HuggingFaceDataset\n",
|
||||
"from textattack.attack_recipes import TextFoolerJin2019\n",
|
||||
"\n",
|
||||
"dataset = HuggingFaceDataset(\"imdb\", None, \"train\")\n",
|
||||
"attack = TextFoolerJin2019.build(model_wrapper)\n",
|
||||
"\n",
|
||||
"results = attack.attack_dataset(dataset, indices=range(20))\n",
|
||||
"for idx, result in enumerate(results):\n",
|
||||
" print(f'Result {idx}:')\n",
|
||||
" print(result.__str__(color_method='ansi'))\n",
|
||||
" print('\\n\\n')\n",
|
||||
"print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Conclusion\n",
|
||||
"We were able to train a model on the IMDB dataset using `sklearn` and use it in TextAttack by initializing with the `SklearnModelWrapper`. It's that simple!"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
322
docs/2notebook/Example_2_allennlp.ipynb
Normal file
@@ -0,0 +1,322 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "JPVBc5ndpFIX"
|
||||
},
|
||||
"source": [
|
||||
"# TextAttack & AllenNLP \n",
|
||||
"\n",
|
||||
"This is an example of testing adversarial attacks from TextAttack on pretrained models provided by AllenNLP. \n",
|
||||
"\n",
|
||||
"In a few lines of code, we load a sentiment analysis model trained on the Stanford Sentiment Treebank and configure it with a TextAttack model wrapper. Then, we initialize the TextBugger attack and run the attack on a few samples from the SST-2 train set.\n",
|
||||
"\n",
|
||||
"For more information on AllenNLP pre-trained models: https://docs.allennlp.org/v1.0.0rc3/tutorials/getting_started/using_pretrained_models/\n",
|
||||
"\n",
|
||||
"For more information about the TextBugger attack: https://arxiv.org/abs/1812.05271"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "AyPMGcz0qLfK"
|
||||
},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/QData/TextAttack/blob/master/docs/2notebook/Example_2_allennlp.ipynb)\n",
|
||||
"\n",
|
||||
"[](https://github.com/QData/TextAttack/blob/master/docs/2notebook/Example_2_allennlp.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "gNhZmYq-ek-2"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install allennlp allennlp_models textattack"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "RzOEn-6Shfxu"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install datasets pyarrow transformers --upgrade"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "_br6Xvsif9SA"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from allennlp.predictors import Predictor\n",
|
||||
"import allennlp_models.classification\n",
|
||||
"\n",
|
||||
"import textattack\n",
|
||||
"\n",
|
||||
"class AllenNLPModel(textattack.models.wrappers.ModelWrapper):\n",
|
||||
" def __init__(self):\n",
|
||||
" self.predictor = Predictor.from_path(\"https://storage.googleapis.com/allennlp-public-models/basic_stanford_sentiment_treebank-2020.06.09.tar.gz\")\n",
|
||||
"\n",
|
||||
" def __call__(self, text_input_list):\n",
|
||||
" outputs = []\n",
|
||||
" for text_input in text_input_list:\n",
|
||||
" outputs.append(self.predictor.predict(sentence=text_input))\n",
|
||||
" # For each output, outputs['logits'] contains the logits where\n",
|
||||
" # index 0 corresponds to the positive and index 1 corresponds \n",
|
||||
" # to the negative score. We reverse the outputs (by reverse slicing,\n",
|
||||
" # [::-1]) so that negative comes first and positive comes second.\n",
|
||||
" return [output['logits'][::-1] for output in outputs]\n",
|
||||
"\n",
|
||||
"model_wrapper = AllenNLPModel()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 1000
|
||||
},
|
||||
"colab_type": "code",
|
||||
"id": "_vt74Gd2hqA6",
|
||||
"outputId": "c317d64d-9499-449a-ef93-f28be0c0d7a2"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[34;1mtextattack\u001b[0m: Loading \u001b[94mnlp\u001b[0m dataset \u001b[94mglue\u001b[0m, subset \u001b[94msst2\u001b[0m, split \u001b[94mtrain\u001b[0m.\n",
|
||||
"\u001b[34;1mtextattack\u001b[0m: Unknown if model of class <class '__main__.AllenNLPModel'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.\n",
|
||||
"/usr/local/lib/python3.6/dist-packages/textattack/constraints/semantics/sentence_encoders/sentence_encoder.py:149: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
|
||||
" embeddings[len(transformed_texts) :]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Result 0:\n",
|
||||
"\u001b[91mNegative (95%)\u001b[0m --> \u001b[92mPositive (93%)\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[91mhide\u001b[0m new secretions from the parental units \n",
|
||||
"\n",
|
||||
"\u001b[92mconcealing\u001b[0m new secretions from the parental units \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 1:\n",
|
||||
"\u001b[91mNegative (96%)\u001b[0m --> \u001b[91m[FAILED]\u001b[0m\n",
|
||||
"\n",
|
||||
"contains no wit , only labored gags \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 2:\n",
|
||||
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91m[FAILED]\u001b[0m\n",
|
||||
"\n",
|
||||
"that loves its characters and communicates something rather beautiful about human nature \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 3:\n",
|
||||
"\u001b[92mPositive (82%)\u001b[0m --> \u001b[37m[SKIPPED]\u001b[0m\n",
|
||||
"\n",
|
||||
"remains utterly satisfied to remain the same throughout \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 4:\n",
|
||||
"\u001b[91mNegative (98%)\u001b[0m --> \u001b[92mPositive (52%)\u001b[0m\n",
|
||||
"\n",
|
||||
"on the \u001b[91mworst\u001b[0m \u001b[91mrevenge-of-the-nerds\u001b[0m clichés the filmmakers could \u001b[91mdredge\u001b[0m up \n",
|
||||
"\n",
|
||||
"on the \u001b[92mpire\u001b[0m \u001b[92mrеvenge-of-the-nerds\u001b[0m clichés the filmmakers could \u001b[92mdragging\u001b[0m up \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 5:\n",
|
||||
"\u001b[91mNegative (99%)\u001b[0m --> \u001b[91m[FAILED]\u001b[0m\n",
|
||||
"\n",
|
||||
"that 's far too tragic to merit such superficial treatment \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 6:\n",
|
||||
"\u001b[92mPositive (98%)\u001b[0m --> \u001b[91mNegative (50%)\u001b[0m\n",
|
||||
"\n",
|
||||
"demonstrates that the \u001b[92mdirector\u001b[0m of such \u001b[92mhollywood\u001b[0m blockbusters as patriot \u001b[92mgames\u001b[0m can still turn out a \u001b[92msmall\u001b[0m , personal \u001b[92mfilm\u001b[0m with an \u001b[92memotional\u001b[0m \u001b[92mwallop\u001b[0m . \n",
|
||||
"\n",
|
||||
"demonstrates that the \u001b[91mdirectors\u001b[0m of such \u001b[91mtinseltown\u001b[0m blockbusters as patriot \u001b[91mgame\u001b[0m can still turn out a \u001b[91mtiny\u001b[0m , personal \u001b[91mmovie\u001b[0m with an \u001b[91msentimental\u001b[0m \u001b[91mbatting\u001b[0m . \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 7:\n",
|
||||
"\u001b[92mPositive (90%)\u001b[0m --> \u001b[91m[FAILED]\u001b[0m\n",
|
||||
"\n",
|
||||
"of saucy \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 8:\n",
|
||||
"\u001b[91mNegative (99%)\u001b[0m --> \u001b[92mPositive (83%)\u001b[0m\n",
|
||||
"\n",
|
||||
"a \u001b[91mdepressed\u001b[0m \u001b[91mfifteen-year-old\u001b[0m 's suicidal poetry \n",
|
||||
"\n",
|
||||
"a \u001b[92mdepr\u001b[0m \u001b[92messed\u001b[0m \u001b[92mfifteeny-ear-old\u001b[0m 's suicidal poetry \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 9:\n",
|
||||
"\u001b[92mPositive (79%)\u001b[0m --> \u001b[91m[FAILED]\u001b[0m\n",
|
||||
"\n",
|
||||
"are more deeply thought through than in most ` right-thinking ' films \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 10:\n",
|
||||
"\u001b[91mNegative (97%)\u001b[0m --> \u001b[91m[FAILED]\u001b[0m\n",
|
||||
"\n",
|
||||
"goes to absurd lengths \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 11:\n",
|
||||
"\u001b[91mNegative (94%)\u001b[0m --> \u001b[92mPositive (51%)\u001b[0m\n",
|
||||
"\n",
|
||||
"for those \u001b[91mmoviegoers\u001b[0m who \u001b[91mcomplain\u001b[0m that ` they do \u001b[91mn't\u001b[0m make movies like they used to anymore \n",
|
||||
"\n",
|
||||
"for those \u001b[92mmovieg\u001b[0m \u001b[92moers\u001b[0m who \u001b[92mcompl\u001b[0m \u001b[92main\u001b[0m that ` they do \u001b[92mnt\u001b[0m make movies like they used to anymore \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 12:\n",
|
||||
"\u001b[91mNegative (92%)\u001b[0m --> \u001b[92mPositive (85%)\u001b[0m\n",
|
||||
"\n",
|
||||
"the part where \u001b[91mnothing\u001b[0m 's happening , \n",
|
||||
"\n",
|
||||
"the part where \u001b[92mnothin\u001b[0m 's happening , \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 13:\n",
|
||||
"\u001b[91mNegative (97%)\u001b[0m --> \u001b[92mPositive (90%)\u001b[0m\n",
|
||||
"\n",
|
||||
"saw how \u001b[91mbad\u001b[0m this movie was \n",
|
||||
"\n",
|
||||
"saw how \u001b[92minclement\u001b[0m this movie was \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 14:\n",
|
||||
"\u001b[91mNegative (73%)\u001b[0m --> \u001b[92mPositive (84%)\u001b[0m\n",
|
||||
"\n",
|
||||
"lend some dignity to a \u001b[91mdumb\u001b[0m story \n",
|
||||
"\n",
|
||||
"lend some dignity to a \u001b[92mdaft\u001b[0m story \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 15:\n",
|
||||
"\u001b[92mPositive (99%)\u001b[0m --> \u001b[91m[FAILED]\u001b[0m\n",
|
||||
"\n",
|
||||
"the greatest musicians \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 16:\n",
|
||||
"\u001b[91mNegative (98%)\u001b[0m --> \u001b[92mPositive (99%)\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[91mcold\u001b[0m movie \n",
|
||||
"\n",
|
||||
"\u001b[92mcolder\u001b[0m movie \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 17:\n",
|
||||
"\u001b[92mPositive (87%)\u001b[0m --> \u001b[91m[FAILED]\u001b[0m\n",
|
||||
"\n",
|
||||
"with his usual intelligence and subtlety \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 18:\n",
|
||||
"\u001b[91mNegative (99%)\u001b[0m --> \u001b[91m[FAILED]\u001b[0m\n",
|
||||
"\n",
|
||||
"redundant concept \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Result 19:\n",
|
||||
"\u001b[92mPositive (93%)\u001b[0m --> \u001b[91m[FAILED]\u001b[0m\n",
|
||||
"\n",
|
||||
"swimming is above all about a young woman 's face , and by casting an actress whose face projects that woman 's doubts and yearnings , it succeeds . \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from textattack.datasets import HuggingFaceDataset\n",
|
||||
"from textattack.attack_recipes import TextBuggerLi2018\n",
|
||||
"\n",
|
||||
"dataset = HuggingFaceDataset(\"glue\", \"sst2\", \"train\")\n",
|
||||
"attack = TextBuggerLi2018(model_wrapper)\n",
|
||||
"\n",
|
||||
"results = list(attack.attack_dataset(dataset, indices=range(20)))\n",
|
||||
"for idx, result in enumerate(results):\n",
|
||||
" print(f'Result {idx}:')\n",
|
||||
" print(result.__str__(color_method='ansi'))\n",
|
||||
" print('\\n')\n",
|
||||
"print()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"collapsed_sections": [],
|
||||
"name": "[TextAttack] Model Example: AllenNLP",
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
229
docs/2notebook/Example_4_CamemBERT.ipynb
Normal file
@@ -0,0 +1,229 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Multi-language attacks\n",
|
||||
"\n",
|
||||
"TextAttack's four-component framework makes it trivial to run attacks in other languages. In this tutorial, we:\n",
|
||||
"\n",
|
||||
"- Create a model wrapper around Transformers [pipelines](https://huggingface.co/transformers/main_classes/pipelines.html) \n",
|
||||
"- Initialize a pre-trained [CamemBERT](https://camembert-model.fr/) model for sentiment classification\n",
|
||||
"- Load the AlloCiné movie review sentiment classification dataset (from [`datasets`](https://github.com/huggingface/datasets/))\n",
|
||||
"- Load the `pwws` recipe, but use French synonyms from multilingual WordNet (instead of English synonyms)\n",
|
||||
"- Run an adversarial attack on a French language model\n",
|
||||
"\n",
|
||||
"Voilà!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/QData/TextAttack/blob/master/docs/2notebook/Example_4_CamemBERT.ipynb)\n",
|
||||
"\n",
|
||||
"[](https://github.com/QData/TextAttack/blob/master/docs/2notebook/Example_4_CamemBERT.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from textattack.attack_recipes import PWWSRen2019\n",
|
||||
"from textattack.datasets import HuggingFaceDataset\n",
|
||||
"from textattack.models.wrappers import ModelWrapper\n",
|
||||
"from transformers import AutoTokenizer, TFAutoModelForSequenceClassification, pipeline\n",
|
||||
"\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"# Quiet TensorFlow.\n",
|
||||
"import os\n",
|
||||
"if \"TF_CPP_MIN_LOG_LEVEL\" not in os.environ:\n",
|
||||
" os.environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"3\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class HuggingFaceSentimentAnalysisPipelineWrapper(ModelWrapper):\n",
|
||||
" \"\"\" Transformers sentiment analysis pipeline returns a list of responses\n",
|
||||
" like \n",
|
||||
" \n",
|
||||
" [{'label': 'POSITIVE', 'score': 0.7817379832267761}]\n",
|
||||
" \n",
|
||||
" We need to convert that to a format TextAttack understands, like\n",
|
||||
" \n",
|
||||
" [[0.218262017, 0.7817379832267761]\n",
|
||||
" \"\"\"\n",
|
||||
" def __init__(self, pipeline):\n",
|
||||
" self.pipeline = pipeline\n",
|
||||
" def __call__(self, text_inputs):\n",
|
||||
" raw_outputs = self.pipeline(text_inputs)\n",
|
||||
" outputs = []\n",
|
||||
" for output in raw_outputs:\n",
|
||||
" score = output['score']\n",
|
||||
" if output['label'] == 'POSITIVE':\n",
|
||||
" outputs.append([1-score, score])\n",
|
||||
" else:\n",
|
||||
" outputs.append([score, 1-score])\n",
|
||||
" return np.array(outputs)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"All model checkpoint weights were used when initializing TFCamembertForSequenceClassification.\n",
|
||||
"\n",
|
||||
"All the weights of TFCamembertForSequenceClassification were initialized from the model checkpoint at tblard/tf-allocine.\n",
|
||||
"If your task is similar to the task the model of the ckeckpoint was trained on, you can already use TFCamembertForSequenceClassification for predictions without further training.\n",
|
||||
"\u001b[34;1mtextattack\u001b[0m: Unknown if model of class <class '__main__.HuggingFaceSentimentAnalysisPipelineWrapper'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.\n",
|
||||
"\u001b[34;1mtextattack\u001b[0m: Loading \u001b[94mnlp\u001b[0m dataset \u001b[94mallocine\u001b[0m, split \u001b[94mtest\u001b[0m.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"xxxxxxxxxxxxxxxxxxxx Result 1 xxxxxxxxxxxxxxxxxxxx\n",
|
||||
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (53%)\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[92mMagnifique\u001b[0m épopée, une \u001b[92mbelle\u001b[0m \u001b[92mhistoire\u001b[0m, touchante avec des acteurs \u001b[92mqui\u001b[0m interprètent \u001b[92mtrès\u001b[0m \u001b[92mbien\u001b[0m leur rôles (Mel Gibson, Heath Ledger, Jason Isaacs...), le genre \u001b[92mde\u001b[0m \u001b[92mfilm\u001b[0m \u001b[92mqui\u001b[0m \u001b[92mse\u001b[0m savoure \u001b[92men\u001b[0m \u001b[92mfamille\u001b[0m! :)\n",
|
||||
"\n",
|
||||
"\u001b[91mbonnard\u001b[0m épopée, une \u001b[91mbeau\u001b[0m \u001b[91mbobard\u001b[0m, touchante avec des acteurs \u001b[91mlequel\u001b[0m interprètent \u001b[91mmême\u001b[0m \u001b[91macceptablement\u001b[0m leur rôles (Mel Gibson, Heath Ledger, Jason Isaacs...), le genre \u001b[91mgale\u001b[0m \u001b[91mpellicule\u001b[0m \u001b[91mOMS\u001b[0m \u001b[91mConcepteur\u001b[0m savoure \u001b[91mun\u001b[0m \u001b[91msyndicat\u001b[0m! :)\n",
|
||||
"\n",
|
||||
"xxxxxxxxxxxxxxxxxxxx Result 2 xxxxxxxxxxxxxxxxxxxx\n",
|
||||
"\u001b[91mNegative (94%)\u001b[0m --> \u001b[92mPositive (91%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Je n'ai pas aimé mais pourtant je lui mets \u001b[91m2\u001b[0m étoiles car l'expérience est louable. Rien de conventionnel ici. Une visite E.T. mais jonchée d'idées /- originales. Le soucis, tout ceci avait-il vraiment sa place dans un film de S.F. tirant sur l'horreur ? Voici un film qui, à l'inverse de tant d'autres qui y ont droit, mériterait peut-être un remake.\n",
|
||||
"\n",
|
||||
"Je n'ai pas aimé mais pourtant je lui mets \u001b[92m4\u001b[0m étoiles car l'expérience est louable. Rien de conventionnel ici. Une visite E.T. mais jonchée d'idées /- originales. Le soucis, tout ceci avait-il vraiment sa place dans un film de S.F. tirant sur l'horreur ? Voici un film qui, à l'inverse de tant d'autres qui y ont droit, mériterait peut-être un remake.\n",
|
||||
"\n",
|
||||
"xxxxxxxxxxxxxxxxxxxx Result 3 xxxxxxxxxxxxxxxxxxxx\n",
|
||||
"\u001b[92mPositive (85%)\u001b[0m --> \u001b[91mNegative (91%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Un \u001b[92mdessin\u001b[0m animé qui brille par sa féerie et ses chansons.\n",
|
||||
"\n",
|
||||
"Un \u001b[91mbrouillon\u001b[0m animé qui brille par sa féerie et ses chansons.\n",
|
||||
"\n",
|
||||
"xxxxxxxxxxxxxxxxxxxx Result 4 xxxxxxxxxxxxxxxxxxxx\n",
|
||||
"\u001b[91mNegative (100%)\u001b[0m --> \u001b[92mPositive (80%)\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[91mSi\u001b[0m c'est là le renouveau du cinéma français, c'est tout \u001b[91mde\u001b[0m même foutrement chiant. \u001b[91mSi\u001b[0m l'objet est \u001b[91mtrès\u001b[0m stylisé et la tension palpable, le film paraît \u001b[91mplutôt\u001b[0m \u001b[91mcreux\u001b[0m.\n",
|
||||
"\n",
|
||||
"\u001b[92maussi\u001b[0m c'est là le renouveau du cinéma français, c'est tout \u001b[92mabolir\u001b[0m même foutrement chiant. \u001b[92mtellement\u001b[0m l'objet est \u001b[92mprodigieusement\u001b[0m stylisé et la tension palpable, le film paraît \u001b[92mpeu\u001b[0m \u001b[92mtrou\u001b[0m.\n",
|
||||
"\n",
|
||||
"xxxxxxxxxxxxxxxxxxxx Result 5 xxxxxxxxxxxxxxxxxxxx\n",
|
||||
"\u001b[91mNegative (100%)\u001b[0m --> \u001b[92mPositive (51%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Et \u001b[91mpourtant\u001b[0m on s’\u001b[91men\u001b[0m Doutait !\u001b[91mSecond\u001b[0m \u001b[91mvolet\u001b[0m \u001b[91mtrès\u001b[0m \u001b[91mmauvais\u001b[0m, sans \u001b[91mfraîcheur\u001b[0m et particulièrement lourdingue. Quel \u001b[91mdommage\u001b[0m.\n",
|
||||
"\n",
|
||||
"Et \u001b[92mfin\u001b[0m on s’\u001b[92mpostérieurement\u001b[0m Doutait !\u001b[92mmoment\u001b[0m \u001b[92mchapitre\u001b[0m \u001b[92mincroyablement\u001b[0m \u001b[92mdifficile\u001b[0m, sans \u001b[92mimpudence\u001b[0m et particulièrement lourdingue. Quel \u001b[92mprix\u001b[0m.\n",
|
||||
"\n",
|
||||
"xxxxxxxxxxxxxxxxxxxx Result 6 xxxxxxxxxxxxxxxxxxxx\n",
|
||||
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (50%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Vous reprendrez bien un peu d'été ? Ce film je le voyais comme un mélange de Rohmer et de Rozier, un film de vacances, j'adore ça, un truc beau et pur qui dit des choses sur la vie, l'amour, les filles, les vacances. Un film qui se regarde en sirotant une boisson fraîche en écoutant les grillons ! Sauf qu'en fait \u001b[92mnon\u001b[0m ! On a un film foutraque au \u001b[92mpossible\u001b[0m qui reprend les codes justement de Rohmer voir Godard, enfin la Nouvelle Vague en général dans sa première partie (jusqu'à même finir sur une partie qui ressemblerait à du Kusturica), mais en beaucoup plus léger et décalé. Le film n'en a rien à foutre de rien, il ose tout, n'a peur de rien et ça c'est \u001b[92mbon\u001b[0m. C'est sans doute le film le plus \u001b[92mdrôle\u001b[0m de 2013, mais tout \u001b[92msimplement\u001b[0m l'un des meilleurs tout \u001b[92mcourt\u001b[0m. Le film qui nous sort des dialogues qui pourraient sortir d'un mauvais Godard (oxymore) sur un ton what the fuckesque… raconte des anecdotes débiles au souhait face caméra… et pourtant, il y a quelque chose dans ce film survolté. Il y a du beau. Ces scènes dans la neige, c'est tendre, c'est beau, ça tranche avec le reste et ça donne du coeur à l'amourette, ça aide à le faire paraître comme une évidence. Et puis on a cette scène que je trouve sublime qui m'a profondément émue, cette scène où le docteur Placenta devient tout à coup sérieux et parle de cette date où chaque année il repense à cette fille et au fait qu'une année de plus le sépare d'elle. C'est horrible comme concept et pourtant tellement vrai et sincère. C'est vraiment \u001b[92mtroublant\u001b[0m. Et encore une fois la scène d'avant est très drôle et là, un petit moment de douceur avant de repartir sur le train effréné ! Et il y a ces fesses… Et le plus beau c'est qu'à la fin Vimala Pons a un petit air d'Anna Karina ! Film fout, étonnant, percutant, drôle, beau, triste ! C'est foutrement cool !\n",
|
||||
"\n",
|
||||
"Vous reprendrez bien un peu d'été ? Ce film je le voyais comme un mélange de Rohmer et de Rozier, un film de vacances, j'adore ça, un truc beau et pur qui dit des choses sur la vie, l'amour, les filles, les vacances. Un film qui se regarde en sirotant une boisson fraîche en écoutant les grillons ! Sauf qu'en fait \u001b[91mniet\u001b[0m ! On a un film foutraque au \u001b[91mexécutable\u001b[0m qui reprend les codes justement de Rohmer voir Godard, enfin la Nouvelle Vague en général dans sa première partie (jusqu'à même finir sur une partie qui ressemblerait à du Kusturica), mais en beaucoup plus léger et décalé. Le film n'en a rien à foutre de rien, il ose tout, n'a peur de rien et ça c'est \u001b[91mlisse\u001b[0m. C'est sans doute le film le plus \u001b[91mridicule\u001b[0m de 2013, mais tout \u001b[91msauf\u001b[0m l'un des meilleurs tout \u001b[91minsuffisant\u001b[0m. Le film qui nous sort des dialogues qui pourraient sortir d'un mauvais Godard (oxymore) sur un ton what the fuckesque… raconte des anecdotes débiles au souhait face caméra… et pourtant, il y a quelque chose dans ce film survolté. Il y a du beau. Ces scènes dans la neige, c'est tendre, c'est beau, ça tranche avec le reste et ça donne du coeur à l'amourette, ça aide à le faire paraître comme une évidence. Et puis on a cette scène que je trouve sublime qui m'a profondément émue, cette scène où le docteur Placenta devient tout à coup sérieux et parle de cette date où chaque année il repense à cette fille et au fait qu'une année de plus le sépare d'elle. C'est horrible comme concept et pourtant tellement vrai et sincère. C'est vraiment \u001b[91mennuyeux\u001b[0m. Et encore une fois la scène d'avant est très drôle et là, un petit moment de douceur avant de repartir sur le train effréné ! Et il y a ces fesses… Et le plus beau c'est qu'à la fin Vimala Pons a un petit air d'Anna Karina ! Film fout, étonnant, percutant, drôle, beau, triste ! C'est foutrement cool !\n",
|
||||
"\n",
|
||||
"xxxxxxxxxxxxxxxxxxxx Result 7 xxxxxxxxxxxxxxxxxxxx\n",
|
||||
"\u001b[92mPositive (55%)\u001b[0m --> \u001b[91mNegative (88%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Bon c'est \u001b[92mpas\u001b[0m un grand film mais on passe un bon moment avec ses ado à la recherche de l'orgasme. Y'a que les Allemands pour faire des films aussi barge ! :-)\n",
|
||||
"\n",
|
||||
"Bon c'est \u001b[91mniet\u001b[0m un grand film mais on passe un bon moment avec ses ado à la recherche de l'orgasme. Y'a que les Allemands pour faire des films aussi barge ! :-)\n",
|
||||
"\n",
|
||||
"xxxxxxxxxxxxxxxxxxxx Result 8 xxxxxxxxxxxxxxxxxxxx\n",
|
||||
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (97%)\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[92mTerrible\u001b[0m histoire que ces êtres sans amour, ces êtres lisses et frustres qui passent à côté de leur vie. Quelle leçon Monsieur Brizé! Vous avez tout dit, tout filmé jusqu'au moindre détail. \u001b[92mtout\u001b[0m est beau et terrifiant jusqu'à la scène finale qui nous liquéfie, un Vincent Lindon regardant la vie fixement sans oser la toucher ni la prendre dans ses bras, une Hélène Vincent qui attend, qui attend... Mon Dieu Monsieur Brizé, continuez....\n",
|
||||
"\n",
|
||||
"\u001b[91mméprisable\u001b[0m histoire que ces êtres sans amour, ces êtres lisses et frustres qui passent à côté de leur vie. Quelle leçon Monsieur Brizé! Vous avez tout dit, tout filmé jusqu'au moindre détail. \u001b[91mrien\u001b[0m est beau et terrifiant jusqu'à la scène finale qui nous liquéfie, un Vincent Lindon regardant la vie fixement sans oser la toucher ni la prendre dans ses bras, une Hélène Vincent qui attend, qui attend... Mon Dieu Monsieur Brizé, continuez....\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"xxxxxxxxxxxxxxxxxxxx Result 9 xxxxxxxxxxxxxxxxxxxx\n",
|
||||
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (54%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Un \u001b[92mtrès\u001b[0m joli \u001b[92mfilm\u001b[0m, qui ressemble à un téléfilm mais qui a le mérite d'être émouvant et proche de ses personnages. Magimel est \u001b[92mvraiment\u001b[0m très \u001b[92mbon\u001b[0m et l'histoire est touchante\n",
|
||||
"\n",
|
||||
"Un \u001b[91mplus\u001b[0m joli \u001b[91mfeuil\u001b[0m, qui ressemble à un téléfilm mais qui a le mérite d'être émouvant et proche de ses personnages. Magimel est \u001b[91mabsolument\u001b[0m très \u001b[91mlisse\u001b[0m et l'histoire est touchante\n",
|
||||
"\n",
|
||||
"xxxxxxxxxxxxxxxxxxxx Result 10 xxxxxxxxxxxxxxxxxxxx\n",
|
||||
"\u001b[91mNegative (100%)\u001b[0m --> \u001b[92mPositive (51%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Mais comment certaines personnes ont pus lui \u001b[91mmettre\u001b[0m 5/5 et \u001b[91mdonc\u001b[0m dire indirectement \u001b[91mque\u001b[0m c'est un chef-d'œuvre ??? Et comment a-t-il fait pour sortir au cinéma et non en DTV ??? C'est pas un film que l'on regarde dans une salle obscur ça, pour moi ça ressemble plus à un téléfilm que l'on visionne un dimanche pluvieux \u001b[91mpour\u001b[0m que les enfants arrête de nous casser les pieds ! \u001b[91mEt\u001b[0m puis, le \u001b[91mscénario\u001b[0m avec le chien que devient le meilleur ami du gosse, c'est du vu et revu (un cliché) ! L'acteur principal est quant à lui aussi agaçant que son personnage ! Les suites ont l'air \u001b[91maussi\u001b[0m mauvaises que Buddy Star des Paniers étant donné que l'histoire est quasiment la même (pour moi ça c'est pas des suites, c'est \u001b[91mplutôt\u001b[0m une succession \u001b[91mde\u001b[0m petits reboots inutiles). \u001b[91mReste\u001b[0m regardable pour les moins de 10 ans (et encore, même moi à 6 ans, je n'aurais pas aimé).\n",
|
||||
"\n",
|
||||
"Mais comment certaines personnes ont pus lui \u001b[92mformuler\u001b[0m 5/5 et \u001b[92md'où\u001b[0m dire indirectement \u001b[92mcar\u001b[0m c'est un chef-d'œuvre ??? Et comment a-t-il fait pour sortir au cinéma et non en DTV ??? C'est pas un film que l'on regarde dans une salle obscur ça, pour moi ça ressemble plus à un téléfilm que l'on visionne un dimanche pluvieux \u001b[92mat\u001b[0m que les enfants arrête de nous casser les pieds ! \u001b[92mpoids\u001b[0m puis, le \u001b[92mfigure\u001b[0m avec le chien que devient le meilleur ami du gosse, c'est du vu et revu (un cliché) ! L'acteur principal est quant à lui aussi agaçant que son personnage ! Les suites ont l'air \u001b[92mmaintenant\u001b[0m mauvaises que Buddy Star des Paniers étant donné que l'histoire est quasiment la même (pour moi ça c'est pas des suites, c'est \u001b[92mpeu\u001b[0m une succession \u001b[92mdu\u001b[0m petits reboots inutiles). \u001b[92mrelique\u001b[0m regardable pour les moins de 10 ans (et encore, même moi à 6 ans, je n'aurais pas aimé).\n",
|
||||
"\n",
|
||||
"xxxxxxxxxxxxxxxxxxxx Result 11 xxxxxxxxxxxxxxxxxxxx\n",
|
||||
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (53%)\u001b[0m\n",
|
||||
"\n",
|
||||
"LE film de mon enfance , il a un peu vieilli maintenant , mais l'ours reste toujours impressionnant, il est bien réel contrairement au film 'the Revenant\" . Ce n'est surement pas un chef-d'œuvre mais je le trouve bien réalise , captivant , beaux et accompagné d'une superbe musique. Le gros points noir c'est la facilité qu'ils ont a créer des peaux , des pièges , et rester longtemps sans manger....mais on oublie assez vite ces erreurs grâce a un casting sympathique et aux décors naturels. Un \u001b[92mvieux\u001b[0m film mais qui reste \u001b[92mtoujours\u001b[0m un \u001b[92mbon\u001b[0m \u001b[92mfilm\u001b[0m.\n",
|
||||
"\n",
|
||||
"LE film de mon enfance , il a un peu vieilli maintenant , mais l'ours reste toujours impressionnant, il est bien réel contrairement au film 'the Revenant\" . Ce n'est surement pas un chef-d'œuvre mais je le trouve bien réalise , captivant , beaux et accompagné d'une superbe musique. Le gros points noir c'est la facilité qu'ils ont a créer des peaux , des pièges , et rester longtemps sans manger....mais on oublie assez vite ces erreurs grâce a un casting sympathique et aux décors naturels. Un \u001b[91mbancal\u001b[0m film mais qui reste \u001b[91mdéfinitivement\u001b[0m un \u001b[91mpassable\u001b[0m \u001b[91mpellicule\u001b[0m.\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create the model: a French sentiment analysis model.\n",
|
||||
"# see https://github.com/TheophileBlard/french-sentiment-analysis-with-bert\n",
|
||||
"model = TFAutoModelForSequenceClassification.from_pretrained(\"tblard/tf-allocine\")\n",
|
||||
"tokenizer = AutoTokenizer.from_pretrained(\"tblard/tf-allocine\")\n",
|
||||
"pipeline = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)\n",
|
||||
"\n",
|
||||
"model_wrapper = HuggingFaceSentimentAnalysisPipelineWrapper(pipeline)\n",
|
||||
"\n",
|
||||
"# Create the recipe: PWWS uses a WordNet transformation.\n",
|
||||
"recipe = PWWSRen2019.build(model_wrapper)\n",
|
||||
"#\n",
|
||||
"# WordNet defaults to english. Set the default language to French ('fra')\n",
|
||||
"#\n",
|
||||
"# See \"Building a free French wordnet from multilingual resources\", \n",
|
||||
"# E. L. R. A. (ELRA) (ed.), \n",
|
||||
"# Proceedings of the Sixth International Language Resources and Evaluation (LREC’08).\n",
|
||||
"recipe.transformation.language = 'fra'\n",
|
||||
"\n",
|
||||
"dataset = HuggingFaceDataset('allocine', split='test')\n",
|
||||
"for idx, result in enumerate(recipe.attack_dataset(dataset, indices=range(11))):\n",
|
||||
" print(('x' * 20), f'Result {idx+1}', ('x' * 20))\n",
|
||||
" print(result.__str__(color_method='ansi'))\n",
|
||||
" print()\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "torch",
|
||||
"language": "python",
|
||||
"name": "build_central"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
144
docs/3recipes/attack_recipes.rst
Normal file
@@ -0,0 +1,144 @@
|
||||
Attack Recipes
|
||||
===============
|
||||
|
||||
We provide a number of pre-built attack recipes, which correspond to attacks from the literature. To run an attack recipe from the command line, run::
|
||||
|
||||
textattack attack --recipe [recipe_name]
|
||||
|
||||
To initialize an attack in Python script, use::
|
||||
|
||||
<recipe name>.build(model_wrapper)
|
||||
|
||||
For example, ``attack = InputReductionFeng2018.build(model)`` creates `attack`, an object of type ``Attack`` with the goal function, transformation, constraints, and search method specified in that paper. This object can then be used just like any other attack; for example, by calling ``attack.attack_dataset``.
|
||||
|
||||
TextAttack supports the following attack recipes (each recipe's documentation contains a link to the corresponding paper):
|
||||
|
||||
.. contents:: :local:
|
||||
|
||||
|
||||
Attacks on classification models
|
||||
#################################
|
||||
|
||||
|
||||
Alzantot Genetic Algorithm (Generating Natural Language Adversarial Examples)
|
||||
***********************************************************************************
|
||||
|
||||
.. warning::
|
||||
This attack uses a very slow language model. Consider using the ``fast-alzantot``
|
||||
recipe instead.
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.genetic_algorithm_alzantot_2018
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
Faster Alzantot Genetic Algorithm (Certified Robustness to Adversarial Word Substitutions)
|
||||
**********************************************************************************************
|
||||
|
||||
.. automodule:: textattack.attack_recipes.faster_genetic_algorithm_jia_2019
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
BAE (BAE: BERT-Based Adversarial Examples)
|
||||
*********************************************
|
||||
|
||||
.. automodule:: textattack.attack_recipes.bae_garg_2019
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
BERT-Attack: (BERT-Attack: Adversarial Attack Against BERT Using BERT)
|
||||
*************************************************************************
|
||||
|
||||
.. automodule:: textattack.attack_recipes.bert_attack_li_2020
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
|
||||
CheckList: (Beyond Accuracy: Behavioral Testing of NLP models with CheckList)
|
||||
*******************************************************************************
|
||||
|
||||
.. automodule:: textattack.attack_recipes.checklist_ribeiro_2020
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
DeepWordBug (Black-box Generation of Adversarial Text Sequences to Evade Deep Learning Classifiers)
|
||||
******************************************************************************************************
|
||||
|
||||
.. automodule:: textattack.attack_recipes.deepwordbug_gao_2018
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
HotFlip (HotFlip: White-Box Adversarial Examples for Text Classification)
|
||||
******************************************************************************
|
||||
|
||||
.. automodule:: textattack.attack_recipes.hotflip_ebrahimi_2017
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
Improved Genetic Algorithm (Natural Language Adversarial Attacks and Defenses in Word Level)
|
||||
*************************************************************************************************
|
||||
|
||||
.. automodule:: textattack.attack_recipes.iga_wang_2019
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
Input Reduction (Pathologies of Neural Models Make Interpretations Difficult)
|
||||
************************************************************************************
|
||||
|
||||
.. automodule:: textattack.attack_recipes.input_reduction_feng_2018
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
|
||||
Kuleshov (Adversarial Examples for Natural Language Classification Problems)
|
||||
******************************************************************************
|
||||
|
||||
.. automodule:: textattack.attack_recipes.kuleshov_2017
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
Particle Swarm Optimization (Word-level Textual Adversarial Attacking as Combinatorial Optimization)
|
||||
*****************************************************************************************************
|
||||
|
||||
.. automodule:: textattack.attack_recipes.pso_zang_2020
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
PWWS (Generating Natural Language Adversarial Examples through Probability Weighted Word Saliency)
|
||||
***************************************************************************************************
|
||||
|
||||
.. automodule:: textattack.attack_recipes.pwws_ren_2019
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
TextFooler (Is BERT Really Robust? A Strong Baseline for Natural Language Attack on Text Classification and Entailment)
|
||||
************************************************************************************************************************
|
||||
|
||||
.. automodule:: textattack.attack_recipes.textfooler_jin_2019
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
|
||||
TextBugger (TextBugger: Generating Adversarial Text Against Real-world Applications)
|
||||
****************************************************************************************
|
||||
|
||||
.. automodule:: textattack.attack_recipes.textbugger_li_2018
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
Attacks on sequence-to-sequence models
|
||||
############################################
|
||||
|
||||
MORPHEUS (It’s Morphin’ Time! Combating Linguistic Discrimination with Inflectional Perturbations)
|
||||
*****************************************************************************************************
|
||||
|
||||
.. automodule:: textattack.attack_recipes.morpheus_tan_2020
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
Seq2Sick (Seq2Sick: Evaluating the Robustness of Sequence-to-Sequence Models with Adversarial Examples)
|
||||
*********************************************************************************************************
|
||||
|
||||
.. automodule:: textattack.attack_recipes.seq2sick_cheng_2018_blackbox
|
||||
:members:
|
||||
:noindex:
|
||||
9
docs/3recipes/augmenter_recipes.rst
Normal file
@@ -0,0 +1,9 @@
|
||||
======================
|
||||
Augmenter Recipes
|
||||
======================
|
||||
|
||||
Transformations and constraints can be used for simple NLP data augmentations. Here is a list of recipes for NLP data augmentations
|
||||
|
||||
.. automodule:: textattack.augmentation.recipes
|
||||
:members:
|
||||
:noindex:
|
||||
@@ -1,6 +1,9 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# First run "sphinx-apidoc -f -o apidoc -d 6 -E -T -M ../textattack"
|
||||
# Then run "make html"
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
|
||||
26
docs/_static/css/custom.css
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
div.wy-side-nav-search .version {
|
||||
color: #404040;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
nav.wy-nav-top {
|
||||
background: #AA2396;
|
||||
}
|
||||
|
||||
div.wy-nav-content {
|
||||
max-width: 1000px;
|
||||
}
|
||||
|
||||
span.caption-text {
|
||||
color: #cc4878;
|
||||
}
|
||||
|
||||
/* Change header fonts to Cambria */
|
||||
.rst-content .toctree-wrapper>p.caption, h1, h2, h3, h4, h5, h6, legend {
|
||||
font-family: 'Cambria', serif;
|
||||
}
|
||||
|
||||
/* Change non-header default fonts to Helvetica */
|
||||
/** {
|
||||
font-family: 'Helvetica', sans-serif;
|
||||
}*/
|
||||
BIN
docs/_static/imgs/benchmark/search-example.pdf
vendored
Normal file
BIN
docs/_static/imgs/benchmark/search-fig1.png
vendored
Normal file
|
After Width: | Height: | Size: 807 KiB |
BIN
docs/_static/imgs/benchmark/search-fig2.png
vendored
Normal file
|
After Width: | Height: | Size: 884 KiB |
BIN
docs/_static/imgs/benchmark/search-table1.png
vendored
Normal file
|
After Width: | Height: | Size: 220 KiB |
BIN
docs/_static/imgs/benchmark/search-table2.png
vendored
Normal file
|
After Width: | Height: | Size: 290 KiB |
BIN
docs/_static/imgs/benchmark/search-table31.png
vendored
Normal file
|
After Width: | Height: | Size: 427 KiB |
BIN
docs/_static/imgs/benchmark/search-table32.png
vendored
Normal file
|
After Width: | Height: | Size: 315 KiB |
BIN
docs/_static/imgs/intro/ae_papers.png
vendored
Normal file
|
After Width: | Height: | Size: 16 KiB |
BIN
docs/_static/imgs/intro/mr_aes.png
vendored
Normal file
|
After Width: | Height: | Size: 65 KiB |
BIN
docs/_static/imgs/intro/mr_aes_table.png
vendored
Normal file
|
After Width: | Height: | Size: 26 KiB |
BIN
docs/_static/imgs/intro/pig_airliner.png
vendored
Normal file
|
After Width: | Height: | Size: 211 KiB |
BIN
docs/_static/imgs/intro/textattack_components.png
vendored
Normal file
|
After Width: | Height: | Size: 123 KiB |
115
docs/apidoc/textattack.attack_recipes.rst
Normal file
@@ -0,0 +1,115 @@
|
||||
textattack.attack\_recipes package
|
||||
==================================
|
||||
|
||||
.. automodule:: textattack.attack_recipes
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.attack_recipe
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.bae_garg_2019
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.bert_attack_li_2020
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.checklist_ribeiro_2020
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.deepwordbug_gao_2018
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.faster_genetic_algorithm_jia_2019
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.genetic_algorithm_alzantot_2018
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.hotflip_ebrahimi_2017
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.iga_wang_2019
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.input_reduction_feng_2018
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.kuleshov_2017
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.morpheus_tan_2020
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.pruthi_2019
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.pso_zang_2020
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.pwws_ren_2019
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.seq2sick_cheng_2018_blackbox
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.textbugger_li_2018
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.textfooler_jin_2019
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
38
docs/apidoc/textattack.attack_results.rst
Normal file
@@ -0,0 +1,38 @@
|
||||
textattack.attack\_results package
|
||||
==================================
|
||||
|
||||
.. automodule:: textattack.attack_results
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_results.attack_result
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_results.failed_attack_result
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_results.maximized_attack_result
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_results.skipped_attack_result
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_results.successful_attack_result
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
20
docs/apidoc/textattack.augmentation.rst
Normal file
@@ -0,0 +1,20 @@
|
||||
textattack.augmentation package
|
||||
===============================
|
||||
|
||||
.. automodule:: textattack.augmentation
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.augmentation.augmenter
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.augmentation.recipes
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
44
docs/apidoc/textattack.commands.attack.rst
Normal file
@@ -0,0 +1,44 @@
|
||||
textattack.commands.attack package
|
||||
==================================
|
||||
|
||||
.. automodule:: textattack.commands.attack
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.attack.attack_args
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.attack.attack_args_helpers
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.attack.attack_command
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.attack.attack_resume_command
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.attack.run_attack_parallel
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.attack.run_attack_single_threaded
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
14
docs/apidoc/textattack.commands.eval_model.rst
Normal file
@@ -0,0 +1,14 @@
|
||||
textattack.commands.eval\_model package
|
||||
=======================================
|
||||
|
||||
.. automodule:: textattack.commands.eval_model
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.eval_model.eval_model_command
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
54
docs/apidoc/textattack.commands.rst
Normal file
@@ -0,0 +1,54 @@
|
||||
textattack.commands package
|
||||
===========================
|
||||
|
||||
.. automodule:: textattack.commands
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
|
||||
textattack.commands.attack
|
||||
textattack.commands.eval_model
|
||||
textattack.commands.train_model
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.augment
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.benchmark_recipe
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.list_things
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.peek_dataset
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.textattack_cli
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.textattack_command
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
26
docs/apidoc/textattack.commands.train_model.rst
Normal file
@@ -0,0 +1,26 @@
|
||||
textattack.commands.train\_model package
|
||||
========================================
|
||||
|
||||
.. automodule:: textattack.commands.train_model
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.train_model.run_training
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.train_model.train_args_helpers
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.train_model.train_model_command
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@@ -0,0 +1,32 @@
|
||||
textattack.constraints.grammaticality.language\_models.google\_language\_model package
|
||||
======================================================================================
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.google_language_model
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.google_language_model.alzantot_goog_lm
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.google_language_model.google_language_model
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.google_language_model.lm_data_utils
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.google_language_model.lm_utils
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@@ -0,0 +1,32 @@
|
||||
textattack.constraints.grammaticality.language\_models.learning\_to\_write package
|
||||
==================================================================================
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.learning_to_write
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.learning_to_write.adaptive_softmax
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.learning_to_write.language_model_helpers
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.learning_to_write.learning_to_write
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.learning_to_write.rnn_model
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@@ -0,0 +1,29 @@
|
||||
textattack.constraints.grammaticality.language\_models package
|
||||
==============================================================
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
|
||||
textattack.constraints.grammaticality.language_models.google_language_model
|
||||
textattack.constraints.grammaticality.language_models.learning_to_write
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.gpt2
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.language_model_constraint
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
34
docs/apidoc/textattack.constraints.grammaticality.rst
Normal file
@@ -0,0 +1,34 @@
|
||||
textattack.constraints.grammaticality package
|
||||
=============================================
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
|
||||
textattack.constraints.grammaticality.language_models
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.cola
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_tool
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.part_of_speech
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
38
docs/apidoc/textattack.constraints.overlap.rst
Normal file
@@ -0,0 +1,38 @@
|
||||
textattack.constraints.overlap package
|
||||
======================================
|
||||
|
||||
.. automodule:: textattack.constraints.overlap
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.overlap.bleu_score
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.overlap.chrf_score
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.overlap.levenshtein_edit_distance
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.overlap.max_words_perturbed
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.overlap.meteor_score
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
38
docs/apidoc/textattack.constraints.pre_transformation.rst
Normal file
@@ -0,0 +1,38 @@
|
||||
textattack.constraints.pre\_transformation package
|
||||
==================================================
|
||||
|
||||
.. automodule:: textattack.constraints.pre_transformation
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.pre_transformation.input_column_modification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.pre_transformation.max_word_index_modification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.pre_transformation.min_word_length
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.pre_transformation.repeat_modification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.pre_transformation.stopword_modification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
31
docs/apidoc/textattack.constraints.rst
Normal file
@@ -0,0 +1,31 @@
|
||||
textattack.constraints package
|
||||
==============================
|
||||
|
||||
.. automodule:: textattack.constraints
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
|
||||
textattack.constraints.grammaticality
|
||||
textattack.constraints.overlap
|
||||
textattack.constraints.pre_transformation
|
||||
textattack.constraints.semantics
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.constraint
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.pre_transformation_constraint
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
28
docs/apidoc/textattack.constraints.semantics.rst
Normal file
@@ -0,0 +1,28 @@
|
||||
textattack.constraints.semantics package
|
||||
========================================
|
||||
|
||||
.. automodule:: textattack.constraints.semantics
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
|
||||
textattack.constraints.semantics.sentence_encoders
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.bert_score
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.word_embedding_distance
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@@ -0,0 +1,14 @@
|
||||
textattack.constraints.semantics.sentence\_encoders.bert package
|
||||
================================================================
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.bert
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.bert.bert
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@@ -0,0 +1,20 @@
|
||||
textattack.constraints.semantics.sentence\_encoders.infer\_sent package
|
||||
=======================================================================
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.infer_sent
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.infer_sent.infer_sent
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.infer_sent.infer_sent_model
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@@ -0,0 +1,30 @@
|
||||
textattack.constraints.semantics.sentence\_encoders package
|
||||
===========================================================
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
|
||||
textattack.constraints.semantics.sentence_encoders.bert
|
||||
textattack.constraints.semantics.sentence_encoders.infer_sent
|
||||
textattack.constraints.semantics.sentence_encoders.universal_sentence_encoder
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.sentence_encoder
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.thought_vector
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@@ -0,0 +1,20 @@
|
||||
textattack.constraints.semantics.sentence\_encoders.universal\_sentence\_encoder package
|
||||
========================================================================================
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.universal_sentence_encoder
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.universal_sentence_encoder.multilingual_universal_sentence_encoder
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.universal_sentence_encoder.universal_sentence_encoder
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
28
docs/apidoc/textattack.datasets.rst
Normal file
@@ -0,0 +1,28 @@
|
||||
textattack.datasets package
|
||||
===========================
|
||||
|
||||
.. automodule:: textattack.datasets
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
|
||||
textattack.datasets.translation
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.datasets.dataset
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.datasets.huggingface_dataset
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
14
docs/apidoc/textattack.datasets.translation.rst
Normal file
@@ -0,0 +1,14 @@
|
||||
textattack.datasets.translation package
|
||||
=======================================
|
||||
|
||||
.. automodule:: textattack.datasets.translation
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.datasets.translation.ted_multi
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
26
docs/apidoc/textattack.goal_function_results.rst
Normal file
@@ -0,0 +1,26 @@
|
||||
textattack.goal\_function\_results package
|
||||
==========================================
|
||||
|
||||
.. automodule:: textattack.goal_function_results
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_function_results.classification_goal_function_result
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_function_results.goal_function_result
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_function_results.text_to_text_goal_function_result
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
32
docs/apidoc/textattack.goal_functions.classification.rst
Normal file
@@ -0,0 +1,32 @@
|
||||
textattack.goal\_functions.classification package
|
||||
=================================================
|
||||
|
||||
.. automodule:: textattack.goal_functions.classification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_functions.classification.classification_goal_function
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_functions.classification.input_reduction
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_functions.classification.targeted_classification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_functions.classification.untargeted_classification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
23
docs/apidoc/textattack.goal_functions.rst
Normal file
@@ -0,0 +1,23 @@
|
||||
textattack.goal\_functions package
|
||||
==================================
|
||||
|
||||
.. automodule:: textattack.goal_functions
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
|
||||
textattack.goal_functions.classification
|
||||
textattack.goal_functions.text
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_functions.goal_function
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
26
docs/apidoc/textattack.goal_functions.text.rst
Normal file
@@ -0,0 +1,26 @@
|
||||
textattack.goal\_functions.text package
|
||||
=======================================
|
||||
|
||||
.. automodule:: textattack.goal_functions.text
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_functions.text.minimize_bleu
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_functions.text.non_overlapping_output
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_functions.text.text_to_text_goal_function
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
44
docs/apidoc/textattack.loggers.rst
Normal file
@@ -0,0 +1,44 @@
|
||||
textattack.loggers package
|
||||
==========================
|
||||
|
||||
.. automodule:: textattack.loggers
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.loggers.attack_log_manager
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.loggers.csv_logger
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.loggers.file_logger
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.loggers.logger
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.loggers.visdom_logger
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.loggers.weights_and_biases_logger
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
43
docs/apidoc/textattack.models.helpers.rst
Normal file
@@ -0,0 +1,43 @@
|
||||
textattack.models.helpers package
|
||||
=================================
|
||||
|
||||
.. automodule:: textattack.models.helpers
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.models.helpers.bert_for_classification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.models.helpers.glove_embedding_layer
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.models.helpers.lstm_for_classification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.models.helpers.t5_for_text_to_text
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.models.helpers.utils
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.models.helpers.word_cnn_for_classification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
17
docs/apidoc/textattack.models.rst
Normal file
@@ -0,0 +1,17 @@
|
||||
textattack.models package
|
||||
=========================
|
||||
|
||||
.. automodule:: textattack.models
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
|
||||
textattack.models.helpers
|
||||
textattack.models.tokenizers
|
||||
textattack.models.wrappers
|
||||
26
docs/apidoc/textattack.models.tokenizers.rst
Normal file
@@ -0,0 +1,26 @@
|
||||
textattack.models.tokenizers package
|
||||
====================================
|
||||
|
||||
.. automodule:: textattack.models.tokenizers
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.models.tokenizers.auto_tokenizer
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.models.tokenizers.glove_tokenizer
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.models.tokenizers.t5_tokenizer
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
38
docs/apidoc/textattack.models.wrappers.rst
Normal file
@@ -0,0 +1,38 @@
|
||||
textattack.models.wrappers package
|
||||
==================================
|
||||
|
||||
.. automodule:: textattack.models.wrappers
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.models.wrappers.huggingface_model_wrapper
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.models.wrappers.model_wrapper
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.models.wrappers.pytorch_model_wrapper
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.models.wrappers.sklearn_model_wrapper
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.models.wrappers.tensorflow_model_wrapper
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
25
docs/apidoc/textattack.rst
Normal file
@@ -0,0 +1,25 @@
|
||||
Complete API Reference
|
||||
========================
|
||||
|
||||
.. automodule:: textattack
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
|
||||
textattack.attack_recipes
|
||||
textattack.attack_results
|
||||
textattack.augmentation
|
||||
textattack.commands
|
||||
textattack.constraints
|
||||
textattack.datasets
|
||||
textattack.goal_function_results
|
||||
textattack.goal_functions
|
||||
textattack.loggers
|
||||
textattack.models
|
||||
textattack.search_methods
|
||||
textattack.shared
|
||||
textattack.transformations
|
||||
62
docs/apidoc/textattack.search_methods.rst
Normal file
@@ -0,0 +1,62 @@
|
||||
textattack.search\_methods package
|
||||
==================================
|
||||
|
||||
.. automodule:: textattack.search_methods
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.search_methods.alzantot_genetic_algorithm
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.search_methods.beam_search
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.search_methods.genetic_algorithm
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.search_methods.greedy_search
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.search_methods.greedy_word_swap_wir
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.search_methods.improved_genetic_algorithm
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.search_methods.particle_swarm_optimization
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.search_methods.population_based_search
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.search_methods.search_method
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
52
docs/apidoc/textattack.shared.rst
Normal file
@@ -0,0 +1,52 @@
|
||||
textattack.shared package
|
||||
=========================
|
||||
|
||||
.. automodule:: textattack.shared
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
|
||||
textattack.shared.utils
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.shared.attack
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.shared.attacked_text
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.shared.checkpoint
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.shared.data
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.shared.validators
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.shared.word_embedding
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
32
docs/apidoc/textattack.shared.utils.rst
Normal file
@@ -0,0 +1,32 @@
|
||||
textattack.shared.utils package
|
||||
===============================
|
||||
|
||||
.. automodule:: textattack.shared.utils
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.shared.utils.install
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.shared.utils.misc
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.shared.utils.strings
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.shared.utils.tensor
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
146
docs/apidoc/textattack.transformations.rst
Normal file
@@ -0,0 +1,146 @@
|
||||
textattack.transformations package
|
||||
==================================
|
||||
|
||||
.. automodule:: textattack.transformations
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.composite_transformation
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.random_synonym_insertion
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.transformation
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_deletion
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_change_location
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_change_name
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_change_number
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_contract
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_embedding
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_extend
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_gradient_based
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_homoglyph_swap
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_hownet
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_inflections
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_masked_lm
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_neighboring_character_swap
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_qwerty
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_random_character_deletion
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_random_character_insertion
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_random_character_substitution
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_random_word
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_wordnet
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@@ -1,8 +0,0 @@
|
||||
========
|
||||
Attack
|
||||
========
|
||||
|
||||
The ``Attack`` class represents an adversarial attack composed of a goal function, search method, transformation, and constraints.
|
||||
|
||||
.. automodule:: textattack.shared.attack
|
||||
:members:
|
||||
@@ -1,54 +0,0 @@
|
||||
Attack Recipes
|
||||
===============
|
||||
|
||||
We provide a number of pre-built attack recipes. To run an attack recipe, run::
|
||||
|
||||
python -m textattack --recipe [recipe_name]
|
||||
|
||||
TextFooler
|
||||
###########
|
||||
|
||||
.. automodule:: textattack.attack_recipes.textfooler_jin_2019
|
||||
:members:
|
||||
|
||||
TextFooler-adjusted
|
||||
#####################
|
||||
|
||||
.. automodule:: textattack.attack_recipes.textfooler_jin_2019_adjusted
|
||||
:members:
|
||||
|
||||
Alzantot
|
||||
###########
|
||||
|
||||
.. automodule:: textattack.attack_recipes.alzantot_2018
|
||||
:members:
|
||||
|
||||
Alzantot-adjusted
|
||||
###################
|
||||
|
||||
.. automodule:: textattack.attack_recipes.alzantot_2018_adjusted
|
||||
:members:
|
||||
|
||||
DeepWordBug
|
||||
############
|
||||
|
||||
.. automodule:: textattack.attack_recipes.deepwordbug_gao_2018
|
||||
:members:
|
||||
|
||||
Hotflip
|
||||
###########
|
||||
|
||||
.. automodule:: textattack.attack_recipes.hotflip_ebrahimi_2017
|
||||
:members:
|
||||
|
||||
Kuleshov
|
||||
###########
|
||||
|
||||
.. automodule:: textattack.attack_recipes.kuleshov_2017
|
||||
:members:
|
||||
|
||||
Seq2Sick
|
||||
###########
|
||||
|
||||
.. automodule:: textattack.attack_recipes.seq2sick_cheng_2018_blackbox
|
||||
:members:
|
||||
@@ -1,23 +0,0 @@
|
||||
===================
|
||||
Attack Result
|
||||
===================
|
||||
|
||||
The result of an attack's attempt to find a successful adversarial perturbation.
|
||||
|
||||
.. automodule:: textattack.attack_results.attack_result
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.attack_results.successful_attack_result
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.attack_results.failed_attack_result
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.attack_results.skipped_attack_result
|
||||
:members:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,162 +0,0 @@
|
||||
.. _constraint:
|
||||
|
||||
=============
|
||||
Constraint
|
||||
=============
|
||||
|
||||
Constraints determine whether a given transformation is valid. Since transformations do not perfectly preserve semantics semantics or grammaticality, constraints can increase the likelihood that the resulting transformation preserves these qualities. All constraints are subclasses of the ``Constraint`` abstract class, and must implement at least one of ``__call__`` or ``call_many``.
|
||||
|
||||
We split constraints into three main categories.
|
||||
|
||||
:ref:`Semantics`: Based on the meaning of the input and perturbation.
|
||||
|
||||
:ref:`Grammaticality`: Based on syntactic properties like part-of-speech and grammar.
|
||||
|
||||
:ref:`Overlap`: Based on character-based properties, like edit distance.
|
||||
|
||||
A fourth type of constraint restricts the search method from exploring certain parts of the search space:
|
||||
|
||||
:ref:`pre_transformation`: Based on the input and index of word replacement.
|
||||
|
||||
.. automodule:: textattack.constraints.constraint
|
||||
:special-members: __call__
|
||||
:private-members:
|
||||
:members:
|
||||
|
||||
.. _semantics:
|
||||
|
||||
Semantics
|
||||
----------
|
||||
|
||||
Semantic constraints determine if a transformation is valid based on similarity
|
||||
of the semantics of the orignal input and the transformed input.
|
||||
|
||||
Word Embedding Distance
|
||||
########################
|
||||
.. automodule:: textattack.constraints.semantics.word_embedding_distance
|
||||
:members:
|
||||
|
||||
Sentence Encoders
|
||||
##################
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.sentence_encoder
|
||||
:members:
|
||||
|
||||
Thought Vectors
|
||||
****************
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.thought_vector
|
||||
:members:
|
||||
|
||||
BERT
|
||||
*****
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.bert.bert
|
||||
:members:
|
||||
|
||||
InferSent
|
||||
***********
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.infer_sent.infer_sent_model
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.infer_sent.infer_sent
|
||||
:members:
|
||||
|
||||
Universal Sentence Encoder
|
||||
***************************
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.universal_sentence_encoder.universal_sentence_encoder
|
||||
:members:
|
||||
|
||||
|
||||
.. _grammaticality:
|
||||
|
||||
Grammaticality
|
||||
-----------------
|
||||
|
||||
Grammaticality constraints determine if a transformation is valid based on
|
||||
syntactic properties of the perturbation.
|
||||
|
||||
Language Models
|
||||
################
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.language_model_constraint
|
||||
:members:
|
||||
|
||||
GPT-2
|
||||
*******
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.gpt2
|
||||
:members:
|
||||
|
||||
|
||||
Google 1-Billion Words Language Model
|
||||
**************************************
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.google_language_model.google_language_model
|
||||
:members:
|
||||
|
||||
LanguageTool Grammar Checker
|
||||
##############################
|
||||
.. automodule:: textattack.constraints.grammaticality.language_tool
|
||||
:members:
|
||||
|
||||
Part of Speech
|
||||
###############
|
||||
.. automodule:: textattack.constraints.grammaticality.part_of_speech
|
||||
:members:
|
||||
|
||||
.. _overlap:
|
||||
|
||||
Overlap
|
||||
-----------
|
||||
|
||||
Overlap constraints determine if a transformation is valid based on character-level analysis.
|
||||
|
||||
BLEU Score
|
||||
############
|
||||
.. automodule:: textattack.constraints.overlap.bleu_score
|
||||
:members:
|
||||
|
||||
chrF Score
|
||||
###########
|
||||
.. automodule:: textattack.constraints.overlap.chrf_score
|
||||
:members:
|
||||
|
||||
Lenvenshtein Edit Distance
|
||||
############################
|
||||
.. automodule:: textattack.constraints.overlap.levenshtein_edit_distance
|
||||
:members:
|
||||
|
||||
METEOR Score
|
||||
#############
|
||||
.. automodule:: textattack.constraints.overlap.meteor_score
|
||||
:members:
|
||||
|
||||
Maximum Words Perturbed
|
||||
###########################
|
||||
.. automodule:: textattack.constraints.overlap.max_words_perturbed
|
||||
:members:
|
||||
|
||||
.. _pre_transformation:
|
||||
|
||||
Pre-Transformation
|
||||
----------
|
||||
|
||||
Pre-transformation constraints determine if a transformation is valid based on
|
||||
only the original input and the position of the replacement. These constraints
|
||||
are applied before the transformation is even called. For example, these
|
||||
constraints can prevent search methods from swapping words at the same index
|
||||
twice, or from replacing stopwords.
|
||||
|
||||
Pre-Transformation Constraint
|
||||
########################
|
||||
.. automodule:: textattack.constraints.pre_transformation.pre_transformation_constraint
|
||||
:special-members: __call__
|
||||
:private-members:
|
||||
:members:
|
||||
|
||||
Stopword Modification
|
||||
########################
|
||||
.. automodule:: textattack.constraints.pre_transformation.stopword_modification
|
||||
:members:
|
||||
|
||||
Repeat Modification
|
||||
########################
|
||||
.. automodule:: textattack.constraints.pre_transformation.repeat_modification
|
||||
:members:
|
||||
@@ -1,30 +0,0 @@
|
||||
.. _goal_function:
|
||||
|
||||
================
|
||||
Goal Function
|
||||
================
|
||||
|
||||
Goal functions determine if an attack has been successful.
|
||||
|
||||
.. automodule:: textattack.goal_functions.goal_function
|
||||
:members:
|
||||
:private-members:
|
||||
|
||||
Classification
|
||||
################
|
||||
.. automodule:: textattack.goal_functions.classification.classification_goal_function
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.goal_functions.classification.targeted_classification
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.goal_functions.classification.untargeted_classification
|
||||
:members:
|
||||
|
||||
Text to Text
|
||||
##############
|
||||
.. automodule:: textattack.goal_functions.text.text_to_text_goal_function
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.goal_functions.text.non_overlapping_output
|
||||
:members:
|
||||
@@ -1,14 +0,0 @@
|
||||
=======================
|
||||
Goal Function Result
|
||||
=======================
|
||||
|
||||
Goal function results report the result of a goal function evaluation, indicating whether an attack succeeded for a given example.
|
||||
|
||||
.. automodule:: textattack.goal_function_results.goal_function_result
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.goal_function_results.classification_goal_function_result
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.goal_function_results.text_to_text_goal_function_result
|
||||
:members:
|
||||
@@ -1,36 +0,0 @@
|
||||
=====================
|
||||
Search Method
|
||||
=====================
|
||||
|
||||
Search methods explore the transformation space in an attempt to find a successful attack as determined by a :ref:`goal_function` and list of :ref:`constraint`\s.
|
||||
|
||||
.. automodule:: textattack.search_methods.search_method
|
||||
:special-members: __call__
|
||||
:private-members:
|
||||
:members:
|
||||
|
||||
Greedy Search
|
||||
####################
|
||||
|
||||
.. automodule:: textattack.search_methods.greedy_search
|
||||
:members:
|
||||
|
||||
Beam Search
|
||||
############
|
||||
|
||||
.. automodule:: textattack.search_methods.beam_search
|
||||
:members:
|
||||
|
||||
|
||||
Greedy Word Swap with Word Importance Ranking
|
||||
##############################################
|
||||
|
||||
.. automodule:: textattack.search_methods.greedy_word_swap_wir
|
||||
:members:
|
||||
|
||||
Genetic Algorithm Word Swap
|
||||
###########################
|
||||
|
||||
.. automodule:: textattack.search_methods.genetic_algorithm
|
||||
:members:
|
||||
|
||||
@@ -1,75 +0,0 @@
|
||||
==========================
|
||||
Transformation
|
||||
==========================
|
||||
|
||||
A transformation is a method which perturbs a text input through the insertion, deletion and substiution of words, characters, and phrases. All transformations take a ``TokenizedText`` as input and return a list of ``TokenizedText``\s that contains possible transformations. Every transformation is a subclass of the abstract ``Transformation`` class.
|
||||
|
||||
.. automodule:: textattack.transformations.transformation
|
||||
:special-members: __call__
|
||||
:private-members:
|
||||
:members:
|
||||
|
||||
|
||||
Composite Transformation
|
||||
--------------------------
|
||||
Multiple transformations can be used by providing a list of ``Transformation``\s to ``CompositeTransformation``
|
||||
|
||||
.. automodule:: textattack.transformations.composite_transformation
|
||||
:members:
|
||||
|
||||
Word Swap
|
||||
-----------------
|
||||
Word swap transformations act by replacing some words in the input. Subclasses can implement the abstract ``WordSwap`` class by overriding ``self._get_replacement_words``
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap
|
||||
:private-members:
|
||||
:members:
|
||||
|
||||
|
||||
Word Swap by Embedding
|
||||
----------------------
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_embedding
|
||||
:members:
|
||||
|
||||
Word Swap by WordNet Word Replacement
|
||||
---------------------------------------
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_wordnet
|
||||
:members:
|
||||
|
||||
Word Swap by Gradient
|
||||
---------------------------------------
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_gradient_based
|
||||
:members:
|
||||
|
||||
Word Swap by Homoglyph
|
||||
----------------------
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_homoglyph
|
||||
:members:
|
||||
|
||||
Word Swap by Neighboring Character Swap
|
||||
---------------------------------------
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_neighboring_character_swap
|
||||
:members:
|
||||
|
||||
Word Swap by Random Character Deletion
|
||||
---------------------------------------
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_random_character_deletion
|
||||
:members:
|
||||
|
||||
Word Swap by Random Character Insertion
|
||||
---------------------------------------
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_random_character_insertion
|
||||
:members:
|
||||
|
||||
Word Swap by Random Character Substitution
|
||||
---------------------------------------
|
||||
|
||||
.. automodule:: textattack.transformations.word_swap_random_character_substitution
|
||||
:members:
|
||||
@@ -1,13 +0,0 @@
|
||||
======================
|
||||
Augmenter
|
||||
======================
|
||||
|
||||
Transformations and constraints can be used outside of an attack for simple NLP data augmentation with the ``Augmenter`` module.
|
||||
|
||||
|
||||
.. automodule:: textattack.augmentation.augmenter
|
||||
:members:
|
||||
:exclude-members: DummyTokenizer
|
||||
|
||||
.. automodule:: textattack.augmentation.recipes
|
||||
:members:
|
||||
62
docs/conf.py
@@ -12,19 +12,20 @@
|
||||
#
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(0, os.path.abspath('..'))
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../"))
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
project = 'TextAttack'
|
||||
copyright = '2020, UVA QData Lab'
|
||||
author = 'UVA QData Lab'
|
||||
project = "TextAttack"
|
||||
copyright = "2020, UVA QData Lab"
|
||||
author = "UVA QData Lab"
|
||||
|
||||
# The full version, including alpha/beta/rc tags
|
||||
release = '0.0.1.9'
|
||||
release = "0.2.12"
|
||||
|
||||
# Set master doc to `index.rst`.
|
||||
master_doc = 'index'
|
||||
master_doc = "index"
|
||||
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
|
||||
@@ -32,12 +33,18 @@ master_doc = 'index'
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = [
|
||||
'sphinx.ext.viewcode',
|
||||
'sphinx.ext.autodoc',
|
||||
'sphinx.ext.napoleon',
|
||||
'sphinx_rtd_theme',
|
||||
'nbsphinx'
|
||||
"sphinx.ext.viewcode",
|
||||
"sphinx.ext.autodoc",
|
||||
"sphinx.ext.inheritance_diagram",
|
||||
"sphinx.ext.autosummary",
|
||||
"sphinx.ext.napoleon",
|
||||
"sphinx_rtd_theme",
|
||||
# Enable .ipynb doc files
|
||||
"nbsphinx",
|
||||
# Enable .md doc files
|
||||
"recommonmark",
|
||||
]
|
||||
autosummary_generate = True
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = []
|
||||
@@ -45,28 +52,39 @@ templates_path = []
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
# This pattern also affects html_static_path and html_extra_path.
|
||||
exclude_patterns = ['_build', '**.ipynb_checkpoints']
|
||||
exclude_patterns = ["_build", "**.ipynb_checkpoints"]
|
||||
|
||||
# Mock expensive textattack imports. Docs imports are in `docs/requirements.txt`.
|
||||
autodoc_mock_imports = open('../requirements.txt').readlines()
|
||||
autodoc_mock_imports = []
|
||||
|
||||
# Output file base name for HTML help builder.
|
||||
htmlhelp_basename = "textattack_doc"
|
||||
html_theme_options = {
|
||||
"logo_only": False,
|
||||
"style_nav_header_background": "transparent",
|
||||
"analytics_id": "UA-88637452-2",
|
||||
}
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
html_theme = 'sphinx_rtd_theme'
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
html_theme_options = {
|
||||
"collapse_navigation": False,
|
||||
}
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = []
|
||||
html_static_path = ["_static"]
|
||||
html_css_files = [
|
||||
"css/custom.css",
|
||||
]
|
||||
|
||||
# Path to favicon.
|
||||
html_favicon = 'favicon.png'
|
||||
html_favicon = "favicon.png"
|
||||
|
||||
# Don't show module names in front of class names.
|
||||
add_module_names = False
|
||||
add_module_names = True
|
||||
|
||||
# Sort members by group
|
||||
autodoc_member_order = 'groupwise'
|
||||
autodoc_member_order = "groupwise"
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
=================
|
||||
Datasets
|
||||
=================
|
||||
|
||||
.. automodule:: textattack.datasets.dataset
|
||||
:members:
|
||||
:private-members:
|
||||
|
||||
Classification
|
||||
###############
|
||||
.. automodule:: textattack.datasets.classification.classification_dataset
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.datasets.classification.ag_news
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.datasets.classification.imdb_sentiment
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.datasets.classification.kaggle_fake_news
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.datasets.classification.movie_review_sentiment
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.datasets.classification.yelp_sentiment
|
||||
:members:
|
||||
|
||||
Entailment
|
||||
############
|
||||
.. automodule:: textattack.datasets.entailment.entailment_dataset
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.datasets.entailment.mnli
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.datasets.entailment.snli
|
||||
:members:
|
||||
|
||||
|
||||
Translation
|
||||
#############
|
||||
.. automodule:: textattack.datasets.translation.translation_datasets
|
||||
:members:
|
||||
|
||||
@@ -1,126 +0,0 @@
|
||||
Models
|
||||
===============
|
||||
|
||||
TextAttack provides different pre-trained models for testing NLP attacks.
|
||||
|
||||
We split models up into two broad categories:
|
||||
|
||||
- **Classification**: models that output probability scores for some number of classes. These include models for sentiment classification, topic classification, and entailment.
|
||||
- **Text-to-text**: models that output a sequence of text. These include models that do translation and summarization.
|
||||
|
||||
|
||||
**Classification models:**
|
||||
|
||||
:ref:`BERT`: ``bert-base-uncased`` fine-tuned on various datasets using transformers_.
|
||||
|
||||
:ref:`LSTM`: a standard LSTM fine-tuned on various datasets.
|
||||
|
||||
:ref:`CNN`: a Word-CNN fine-tuned on various datasets.
|
||||
|
||||
|
||||
**Text-to-text models:**
|
||||
|
||||
:ref:`T5`: ``T5`` fine-tuned on various datasets using transformers_.
|
||||
|
||||
|
||||
|
||||
BERT
|
||||
********
|
||||
.. _BERT:
|
||||
|
||||
.. automodule:: textattack.models.helpers.bert_for_classification
|
||||
:members:
|
||||
|
||||
|
||||
We provide pre-trained BERT models on the following datasets:
|
||||
|
||||
.. automodule:: textattack.models.classification.bert.bert_for_ag_news_classification
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.models.classification.bert.bert_for_imdb_sentiment_classification
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.models.classification.bert.bert_for_mr_sentiment_classification
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.models.classification.bert.bert_for_yelp_sentiment_classification
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.models.entailment.bert.bert_for_mnli
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.models.entailment.bert.bert_for_snli
|
||||
:members:
|
||||
|
||||
LSTM
|
||||
*******
|
||||
.. _LSTM:
|
||||
|
||||
.. automodule:: textattack.models.helpers.lstm_for_classification
|
||||
:members:
|
||||
|
||||
|
||||
We provide pre-trained LSTM models on the following datasets:
|
||||
|
||||
.. automodule:: textattack.models.classification.lstm.lstm_for_ag_news_classification
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.models.classification.lstm.lstm_for_imdb_sentiment_classification
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.models.classification.lstm.lstm_for_mr_sentiment_classification
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.models.classification.lstm.lstm_for_yelp_sentiment_classification
|
||||
:members:
|
||||
|
||||
|
||||
|
||||
Word-CNN
|
||||
************
|
||||
.. _CNN:
|
||||
|
||||
.. automodule:: textattack.models.helpers.word_cnn_for_classification
|
||||
:members:
|
||||
|
||||
|
||||
We provide pre-trained CNN models on the following datasets:
|
||||
|
||||
.. automodule:: textattack.models.classification.cnn.word_cnn_for_ag_news_classification
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.models.classification.cnn.word_cnn_for_imdb_sentiment_classification
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.models.classification.cnn.word_cnn_for_mr_sentiment_classification
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.models.classification.cnn.word_cnn_for_yelp_sentiment_classification
|
||||
:members:
|
||||
|
||||
|
||||
.. _T5:
|
||||
|
||||
T5
|
||||
*****************
|
||||
|
||||
.. automodule:: textattack.models.helpers.t5_for_text_to_text
|
||||
:members:
|
||||
|
||||
|
||||
We provide pre-trained T5 models on the following tasks & datasets:
|
||||
|
||||
Translation
|
||||
##############
|
||||
|
||||
.. automodule:: textattack.models.translation.t5.t5_models
|
||||
:members:
|
||||
|
||||
Summarization
|
||||
##############
|
||||
|
||||
.. automodule:: textattack.models.summarization.t5_summarization
|
||||
:members:
|
||||
|
||||
|
||||
.. _transformers: https://github.com/huggingface/transformers
|
||||
@@ -1,21 +0,0 @@
|
||||
===========
|
||||
Tokenizers
|
||||
===========
|
||||
|
||||
.. automodule:: textattack.tokenizers.tokenizer
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.tokenizers.auto_tokenizer
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.tokenizers.spacy_tokenizer
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.tokenizers.t5_tokenizer
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.tokenizers.bert_tokenizer
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.tokenizers.bert_entailment_tokenizer
|
||||
:members:
|
||||
@@ -1,334 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# The TextAttack🐙 ecosystem: search, transformations, and constraints\n",
|
||||
"\n",
|
||||
"An attack in TextAttack consists of four parts.\n",
|
||||
"\n",
|
||||
"### Goal function\n",
|
||||
"\n",
|
||||
"The **goal function** determines if the attack is successful or not. One common goal function is **untargeted classification**, where the attack tries to perturb an input to change its classification. \n",
|
||||
"\n",
|
||||
"### Search method\n",
|
||||
"The **search method** explores the space of potential transformations and tries to locate a successful perturbation. Greedy search, beam search, and brute-force search are all examples of search methods.\n",
|
||||
"\n",
|
||||
"### Transformation\n",
|
||||
"A **transformation** takes a text input and transforms it, replacing words or phrases with similar ones, while trying not to change the meaning. Paraphrase and synonym substitution are two broad classes of transformations.\n",
|
||||
"\n",
|
||||
"### Constraints\n",
|
||||
"Finally, **constraints** determine whether or not a given transformation is valid. Transformations don't perfectly preserve syntax or semantics, so additional constraints can increase the probability that these qualities are preserved from the source to adversarial example. There are many types of constraints: overlap constraints that measure edit distance, syntactical constraints check part-of-speech and grammar errors, and semantic constraints like language models and sentence encoders."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### A custom transformation\n",
|
||||
"\n",
|
||||
"This lesson explains how to create a custom transformation. In TextAttack, many transformations involve *word swaps*: they take a word and try and find suitable substitutes. Some attacks focus on replacing characters with neighboring characters to create \"typos\" (these don't intend to preserve the grammaticality of inputs). Other attacks rely on semantics: they take a word and try to replace it with semantic equivalents.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Banana word swap 🍌\n",
|
||||
"\n",
|
||||
"As an introduction to writing transformations for TextAttack, we're going to try a very simple transformation: one that replaces any given word with the word 'banana'. In TextAttack, there's an abstract `WordSwap` class that handles the heavy lifting of breaking sentences into words and avoiding replacement of stopwords. We can extend `WordSwap` and implement a single method, `_get_replacement_words`, to indicate to replace each word with 'banana'."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from textattack.transformations import WordSwap\n",
|
||||
"\n",
|
||||
"class BananaWordSwap(WordSwap):\n",
|
||||
" \"\"\" Transforms an input by replacing any word with 'banana'.\n",
|
||||
" \"\"\"\n",
|
||||
" \n",
|
||||
" # We don't need a constructor, since our class doesn't require any parameters.\n",
|
||||
"\n",
|
||||
" def _get_replacement_words(self, word):\n",
|
||||
" \"\"\" Returns 'banana', no matter what 'word' was originally.\n",
|
||||
" \n",
|
||||
" Returns a list with one item, since `_get_replacement_words` is intended to\n",
|
||||
" return a list of candidate replacement words.\n",
|
||||
" \"\"\"\n",
|
||||
" return ['banana']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"source": [
|
||||
"### Using our transformation\n",
|
||||
"\n",
|
||||
"Now we have the transformation chosen, but we're missing a few other things. To complete the attack, we need to choose the **search method** and **constraints**. And to use the attack, we need a **goal function**, a **model** and a **dataset**. (The goal function indicates the task our model performs – in this case, classification – and the type of attack – in this case, we'll perform an untargeted attack.)\n",
|
||||
"\n",
|
||||
"### Creating the goal function, model, and dataset\n",
|
||||
"We are performing an untargeted attack on a classification model, so we'll use the `UntargetedClassification` class. For the model, let's use an LSTM trained for news classification on the AG News dataset. Luckily, TextAttack comes with 1000 text samples from some popular datasets, as well as pretrained models for those datasets. So we don't have to train our own model, or procure someone else's. We can just use the built-in datasets and models for this."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[34;1mtextattack\u001b[0m: Goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'> matches model LSTMForAGNewsClassification.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Import the dataset.\n",
|
||||
"from textattack.datasets.classification import AGNews\n",
|
||||
"# Create the model.\n",
|
||||
"from textattack.models.classification.lstm import LSTMForAGNewsClassification\n",
|
||||
"model = LSTMForAGNewsClassification()\n",
|
||||
"# Create the goal function using the model.\n",
|
||||
"from textattack.goal_functions import UntargetedClassification\n",
|
||||
"goal_function = UntargetedClassification(model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Creating the attack\n",
|
||||
"Let's keep it simple: let's use a greedy search method, and let's not use any constraints for now. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from textattack.search_methods import GreedySearch\n",
|
||||
"from textattack.constraints.pre_transformation import RepeatModification, StopwordModification\n",
|
||||
"from textattack.shared import Attack\n",
|
||||
"\n",
|
||||
"# We're going to use our Banana word swap class as the attack transformation.\n",
|
||||
"transformation = BananaWordSwap() \n",
|
||||
"# We'll constrain modification of already modified indices and stopwords\n",
|
||||
"constraints = [RepeatModification(),\n",
|
||||
" StopwordModification()]\n",
|
||||
"# We'll use the Greedy search method\n",
|
||||
"search_method = GreedySearch()\n",
|
||||
"# Now, let's make the attack from the 4 components:\n",
|
||||
"attack = Attack(goal_function, constraints, transformation, search_method)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's print our attack to see all the parameters:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Attack(\n",
|
||||
" (search_method): GreedySearch\n",
|
||||
" (goal_function): UntargetedClassification\n",
|
||||
" (transformation): BananaWordSwap\n",
|
||||
" (constraints): \n",
|
||||
" (0): RepeatModification\n",
|
||||
" (1): StopwordModification\n",
|
||||
" (is_black_box): True\n",
|
||||
")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(attack)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using the attack\n",
|
||||
"\n",
|
||||
"Let's use our attack to attack 10 samples (by setting `num_examples` to 10). Additionally, we set `attack_n` to `True`, which indicates that we should attack 10 samples, no matter what. If the model mispredicts a sample already, it isn't attacked; since `attack_n` is `True`, if a sample is mispredicted, we'll take try the next thing in the dataset, and continue until `num_examples` attacks have been completed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"12it [00:00, 19.61it/s] \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from tqdm import tqdm # tqdm provides us a nice progress bar.\n",
|
||||
"from textattack.loggers import CSVLogger # tracks a dataframe for us.\n",
|
||||
"\n",
|
||||
"results_iterable = attack.attack_dataset(AGNews(), num_examples=10, attack_n=True)\n",
|
||||
"results = []\n",
|
||||
"\n",
|
||||
"logger = CSVLogger(color_method='html')\n",
|
||||
"\n",
|
||||
"for result in tqdm(results_iterable, total=10):\n",
|
||||
" logger.log_attack_result(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Visualizing attack results\n",
|
||||
"\n",
|
||||
"We are logging `AttackResult` objects using a `CSVLogger`. This logger stores all attack results in a dataframe, which we can easily access and display. Since we set `color_method` to `'html'`, the attack results will display their differences, in color, in HTML. Using `IPython` utilities and `pandas`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>original_text</th>\n",
|
||||
" <th>perturbed_text</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>Thirst, Fear and Bribes on <font color = red>Desert</font> Escape from Africa <font color = red>AGADEZ</font>, <font color = red>Niger</font> (Reuters) - Customs officers in this dusty Saharan town turned a blind eye as yet another creaking truck piled with grain, smuggled cigarettes and dozens of migrants heading for Europe rumbled off into the desert.</td>\n",
|
||||
" <td>Thirst, Fear and Bribes on <font color = blue>banana</font> Escape from Africa <font color = blue>banana</font>, <font color = blue>banana</font> (Reuters) - Customs officers in this dusty Saharan town turned a blind eye as yet another creaking truck piled with grain, smuggled cigarettes and dozens of migrants heading for Europe rumbled off into the desert.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>Toshiba 20 TV freaks out, sends distress signal See what happens when your warranty runs out?. In this case, a 20 Toshiba owned by Chris van Rossman started sending out the international distress signal at 121.</td>\n",
|
||||
" <td>Toshiba 20 TV freaks out, sends distress signal See what happens when your warranty runs out?. In this case, a 20 Toshiba owned by Chris van Rossman started sending out the international distress signal at 121.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>British hostage tried fleeing before death: report The portrait of Ken Bigley, who was murdered in Iraq October 7, stands in front of the congregation during a service at Liverpool #39;s Roman Catholic Cathedral on October 10.</td>\n",
|
||||
" <td>British hostage tried fleeing before death: report The portrait of Ken Bigley, who was murdered in Iraq October 7, stands in front of the congregation during a service at Liverpool #39;s Roman Catholic Cathedral on October 10.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td><font color = purple>Keychain</font> <font color = purple>clicker</font> kills TVs Discrete device turns off televisions, creating a little peace and quiet. Until the yelling starts.</td>\n",
|
||||
" <td><font color = blue>banana</font> <font color = blue>banana</font> kills TVs Discrete device turns off televisions, creating a little peace and quiet. Until the yelling starts.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td><font color = red>Cleric</font> returns to <font color = red>Iraq</font>, orders march on Najaf <font color = red>Powerful</font> Shiite leader says he plans to <font color = red>lead</font> a mass demonstration today to end fighting. <font color = red>Iraqi</font> <font color = red>hostage</font>: <font color = red>Militants</font> <font color = red>said</font> Wednesday they had <font color = red>kidnapped</font> the brother-in-law of <font color = red>Iraqi</font> Defense <font color = red>Minister</font> Hazem Shaalan</td>\n",
|
||||
" <td><font color = blue>banana</font> returns to <font color = blue>banana</font>, orders march on Najaf <font color = blue>banana</font> Shiite leader says he plans to <font color = blue>banana</font> a mass demonstration today to end fighting. <font color = blue>banana</font> <font color = blue>banana</font>: <font color = blue>banana</font> <font color = blue>banana</font> Wednesday they had <font color = blue>banana</font> the brother-in-law of <font color = blue>banana</font> Defense <font color = blue>banana</font> Hazem Shaalan</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5</th>\n",
|
||||
" <td><font color = green>Hewitt</font> Beats Roddick to <font color = green>Reach</font> Masters Final HOUSTON (Reuters) - <font color = green>A</font> fired-up Lleyton Hewitt defused hard-hitting Andy Roddick 6-3, 6-2 on Saturday, scurrying into the final of the <font color = green>Masters</font> Cup for the third time in four years.</td>\n",
|
||||
" <td><font color = blue>banana</font> Beats Roddick to <font color = blue>banana</font> Masters Final HOUSTON (Reuters) - <font color = blue>banana</font> fired-up Lleyton Hewitt defused hard-hitting Andy Roddick 6-3, 6-2 on Saturday, scurrying into the final of the <font color = blue>banana</font> Cup for the third time in four years.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6</th>\n",
|
||||
" <td><font color = blue>Despite</font> <font color = blue>booming</font> economy, no concrete move on <font color = blue>debt</font> relief (AFP) AFP - Senior finance officials have hailed a robust global <font color = blue>economic</font> recovery, albeit one threatened by surging <font color = blue>oil</font> prices, but made little headway pushing China toward currency reform and took no firm <font color = blue>steps</font> to ease the debt of the world's poorest nations.</td>\n",
|
||||
" <td><font color = red>banana</font> <font color = red>banana</font> economy, no concrete move on <font color = red>banana</font> relief (AFP) AFP - Senior finance officials have hailed a robust global <font color = red>banana</font> recovery, albeit one threatened by surging <font color = red>banana</font> prices, but made little headway pushing China toward currency reform and took no firm <font color = red>banana</font> to ease the debt of the world's poorest nations.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>7</th>\n",
|
||||
" <td><font color = red>Ethiopian</font> court sentences 3 former <font color = red>rebels</font> to death for mass murders (Canadian <font color = red>Press</font>) Canadian <font color = red>Press</font> - ADDIS ABABA, <font color = red>Ethiopia</font> (AP) - A court has sentenced three former <font color = red>rebels</font> to death for <font color = red>killing</font> dozens of people while rebel factions jockeyed for power more than a decade ago, a government spokesman said Thursday.</td>\n",
|
||||
" <td><font color = blue>banana</font> court sentences 3 former <font color = blue>banana</font> to death for mass murders (Canadian <font color = blue>banana</font>) Canadian <font color = blue>banana</font> - ADDIS ABABA, <font color = blue>banana</font> (AP) - A court has sentenced three former <font color = blue>banana</font> to death for <font color = blue>banana</font> dozens of people while rebel factions jockeyed for power more than a decade ago, a government spokesman said Thursday.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>8</th>\n",
|
||||
" <td>Just a close call for closer No need to check with your primary care <font color = green>provider</font> about the origin of that <font color = green>nervous</font> <font color = green>tic</font> you woke up with this morning, on the first full day of autumn. Ninth-inning home runs allowed in three consecutive <font color = green>games</font> by Sox closer <font color = green>Keith</font> Foulke, who also was tagged with blown saves in each of the last two <font color = green>games</font>, were enough to leave ...</td>\n",
|
||||
" <td>Just a close call for closer No need to check with your primary care <font color = blue>banana</font> about the origin of that <font color = blue>banana</font> <font color = blue>banana</font> you woke up with this morning, on the first full day of autumn. Ninth-inning home runs allowed in three consecutive <font color = blue>banana</font> by Sox closer <font color = blue>banana</font> Foulke, who also was tagged with blown saves in each of the last two <font color = blue>banana</font>, were enough to leave ...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>9</th>\n",
|
||||
" <td><font color = purple>Study</font>: Wild <font color = purple>Monkeys</font> Resort to Use of Tools WASHINGTON - Wild South American monkeys routinely use fist-sized rocks to crack open seeds and to dig in dry Brazilian soil for grubs and edible tubers, researchers report in the journal Science.</td>\n",
|
||||
" <td><font color = blue>banana</font>: Wild <font color = blue>banana</font> Resort to Use of Tools WASHINGTON - Wild South American monkeys routinely use fist-sized rocks to crack open seeds and to dig in dry Brazilian soil for grubs and edible tubers, researchers report in the journal Science.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>10</th>\n",
|
||||
" <td>Bryant's Request to Seal Evidence Opposed (AP) AP - <font color = green>The</font> prosecutor who charged Kobe Bryant with felony sexual assault has joined news organizations in opposing an attempt by the <font color = green>NBA</font> star's attorney to permanently seal evidence and documents in the case.</td>\n",
|
||||
" <td>Bryant's Request to Seal Evidence Opposed (AP) AP - <font color = blue>banana</font> prosecutor who charged Kobe Bryant with felony sexual assault has joined news organizations in opposing an attempt by the <font color = blue>banana</font> star's attorney to permanently seal evidence and documents in the case.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>11</th>\n",
|
||||
" <td>Eagles have lift off Crystal Palace were triumphant last night (Oct 4) over 10-<font color = green>man</font> Fulham at Selhurst Park, lifting themselves off the <font color = green>bottom</font> of the <font color = green>Premiership</font>.</td>\n",
|
||||
" <td>Eagles have lift off Crystal Palace were triumphant last night (Oct 4) over 10-<font color = purple>banana</font> Fulham at Selhurst Park, lifting themselves off the <font color = purple>banana</font> of the <font color = purple>banana</font>.</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"pd.options.display.max_colwidth = 480 # increase colum width so we can actually read the examples\n",
|
||||
"\n",
|
||||
"from IPython.core.display import display, HTML\n",
|
||||
"display(HTML(logger.df[['original_text', 'perturbed_text']].to_html(escape=False)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"source": [
|
||||
"### Conclusion 🍌\n",
|
||||
"\n",
|
||||
"We can examine these examples for a good idea of how many words had to be changed to \"banana\" to change the prediction score from the correct class to another class. The examples without perturbed words were originally misclassified, so they were skipped by the attack. Looks like some examples needed only a single \"banana\", while others needed up to 17 \"banana\" substitutions to change the class score. Wow!"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,567 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# The importance of constraints\n",
|
||||
"\n",
|
||||
"Constraints determine which potential adversarial examples are valid inputs to the model. When determining the efficacy of an attack, constraints are everything. After all, an attack that looks very powerful may just be generating nonsense. Or, perhaps more nefariously, an attack may generate a real-looking example that changes the original label of the input. That's why you should always clearly define the *constraints* your adversarial examples must meet. \n",
|
||||
"\n",
|
||||
"### Classes of constraints\n",
|
||||
"\n",
|
||||
"TextAttack evaluates constraints using methods from three groups:\n",
|
||||
"\n",
|
||||
"- **Overlap constraints** determine if a perturbation is valid based on character-level analysis. For example, some attacks are constrained by edit distance: a perturbation is only valid if it perturbs some small number of characters (or fewer).\n",
|
||||
"\n",
|
||||
"- **Grammaticality constraints** filter inputs based on syntactical information. For example, an attack may require that adversarial perturbations do not introduce grammatical errors.\n",
|
||||
"\n",
|
||||
"- **Semantic constraints** try to ensure that the perturbation is semantically similar to the original input. For example, we may design a constraint that uses a sentence encoder to encode the original and perturbed inputs, and enforce that the sentence encodings be within some fixed distance of one another. (This is what happens in subclasses of `textattack.constraints.semantics.sentence_encoders`.)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### A new constraint\n",
|
||||
"\n",
|
||||
"To add our own constraint, we need to create a subclass of `textattack.constraints.Constraint`. We can implement one of two functions, either `__call__` or `call_many`:\n",
|
||||
"\n",
|
||||
"- `__call__` determines if original input `x` and perturbation `x_adv` fulfill a desired constraint. It returns either `True` or `False`.\n",
|
||||
"- `call_many` determines if a list of perturbations `x_adv` fulfill the constraint from original input `x`. This is here in case your constraint can be vectorized. If not, just implement `__call__`, and `__call__` will be executed for each `(x, x_adv)` pair."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### A custom constraint\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"For fun, we're going to see what happens when we constrain an attack to only allow perturbations that substitute out a named entity for another. In linguistics, a **named entity** is a proper noun, the name of a person, organization, location, product, etc. Named Entity Recognition is a popular NLP task (and one that state-of-the-art models can perform quite well). \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### NLTK and Named Entity Recognition\n",
|
||||
"\n",
|
||||
"**NLTK**, the Natural Language Toolkit, is a Python package that helps developers write programs that process natural language. NLTK comes with predefined algorithms for lots of linguistic tasks– including Named Entity Recognition.\n",
|
||||
"\n",
|
||||
"First, we're going to write a constraint class. In the `__call__` method, we're going to use NLTK to find the named entities in both `x` and `x_adv`. We will only return `True` (that is, our constraint is met) if `x_adv` has substituted one named entity in `x` for another.\n",
|
||||
"\n",
|
||||
"Let's import NLTK and download the required modules:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[nltk_data] Downloading package punkt to /u/edl9cy/nltk_data...\n",
|
||||
"[nltk_data] Package punkt is already up-to-date!\n",
|
||||
"[nltk_data] Downloading package maxent_ne_chunker to\n",
|
||||
"[nltk_data] /u/edl9cy/nltk_data...\n",
|
||||
"[nltk_data] Package maxent_ne_chunker is already up-to-date!\n",
|
||||
"[nltk_data] Downloading package words to /u/edl9cy/nltk_data...\n",
|
||||
"[nltk_data] Package words is already up-to-date!\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import nltk\n",
|
||||
"nltk.download('punkt') # The NLTK tokenizer\n",
|
||||
"nltk.download('maxent_ne_chunker') # NLTK named-entity chunker\n",
|
||||
"nltk.download('words') # NLTK list of words"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### NLTK NER Example\n",
|
||||
"\n",
|
||||
"Here's an example of using NLTK to find the named entities in a sentence:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(S\n",
|
||||
" In/IN\n",
|
||||
" 2017/CD\n",
|
||||
" ,/,\n",
|
||||
" star/NN\n",
|
||||
" quarterback/NN\n",
|
||||
" (PERSON Tom/NNP Brady/NNP)\n",
|
||||
" led/VBD\n",
|
||||
" the/DT\n",
|
||||
" (ORGANIZATION Patriots/NNP)\n",
|
||||
" to/TO\n",
|
||||
" the/DT\n",
|
||||
" (ORGANIZATION Super/NNP Bowl/NNP)\n",
|
||||
" ,/,\n",
|
||||
" but/CC\n",
|
||||
" lost/VBD\n",
|
||||
" to/TO\n",
|
||||
" the/DT\n",
|
||||
" (ORGANIZATION Philadelphia/NNP Eagles/NNP)\n",
|
||||
" ./.)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sentence = ('In 2017, star quarterback Tom Brady led the Patriots to the Super Bowl, '\n",
|
||||
" 'but lost to the Philadelphia Eagles.')\n",
|
||||
"\n",
|
||||
"# 1. Tokenize using the NLTK tokenizer.\n",
|
||||
"tokens = nltk.word_tokenize(sentence)\n",
|
||||
"\n",
|
||||
"# 2. Tag parts of speech using the NLTK part-of-speech tagger.\n",
|
||||
"tagged = nltk.pos_tag(tokens)\n",
|
||||
"\n",
|
||||
"# 3. Extract entities from tagged sentence.\n",
|
||||
"entities = nltk.chunk.ne_chunk(tagged)\n",
|
||||
"print(entities)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It looks like `nltk.chunk.ne_chunk` gives us an `nltk.tree.Tree` object where named entities are also `nltk.tree.Tree` objects within that tree. We can take this a step further and grab the named entities from the tree of entities:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Tree('PERSON', [('Tom', 'NNP'), ('Brady', 'NNP')]), Tree('ORGANIZATION', [('Patriots', 'NNP')]), Tree('ORGANIZATION', [('Super', 'NNP'), ('Bowl', 'NNP')]), Tree('ORGANIZATION', [('Philadelphia', 'NNP'), ('Eagles', 'NNP')])]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 4. Filter entities to just named entities.\n",
|
||||
"named_entities = [entity for entity in entities if isinstance(entity, nltk.tree.Tree)]\n",
|
||||
"print(named_entities)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Caching with `@functools.lru_cache`\n",
|
||||
"\n",
|
||||
"A little-known feature of Python 3 is `functools.lru_cache`, a decorator that allows users to easily cache the results of a function in an LRU cache. We're going to be using the NLTK library quite a bit to tokenize, parse, and detect named entities in sentences. These sentences might repeat themselves. As such, we'll use this decorator to cache named entities so that we don't have to perform this expensive computation multiple times."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Putting it all together: getting a list of Named Entity Labels from a sentence\n",
|
||||
"\n",
|
||||
"Now that we know how to tokenize, parse, and detect named entities using NLTK, let's put it all together into a single helper function. Later, when we implement our constraint, we can query this function to easily get the entity labels from a sentence. We can even use `@functools.lru_cache` to try and speed this process up."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import functools\n",
|
||||
"\n",
|
||||
"@functools.lru_cache(maxsize=2**14)\n",
|
||||
"def get_entities(sentence):\n",
|
||||
" tokens = nltk.word_tokenize(sentence)\n",
|
||||
" tagged = nltk.pos_tag(tokens)\n",
|
||||
" # Setting `binary=True` makes NLTK return all of the named\n",
|
||||
" # entities tagged as NNP instead of detailed tags like\n",
|
||||
" #'Organization', 'Geo-Political Entity', etc.\n",
|
||||
" entities = nltk.chunk.ne_chunk(tagged, binary=True)\n",
|
||||
" return entities.leaves()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And let's test our function to make sure it works:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[('Jack', 'NNP'),\n",
|
||||
" ('Black', 'NNP'),\n",
|
||||
" ('starred', 'VBD'),\n",
|
||||
" ('in', 'IN'),\n",
|
||||
" ('the', 'DT'),\n",
|
||||
" ('2003', 'CD'),\n",
|
||||
" ('film', 'NN'),\n",
|
||||
" ('classic', 'JJ'),\n",
|
||||
" ('``', '``'),\n",
|
||||
" ('School', 'NNP'),\n",
|
||||
" ('of', 'IN'),\n",
|
||||
" ('Rock', 'NNP'),\n",
|
||||
" (\"''\", \"''\"),\n",
|
||||
" ('.', '.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sentence = 'Jack Black starred in the 2003 film classic \"School of Rock\".'\n",
|
||||
"get_entities(sentence)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We flattened the tree of entities, so the return format is a list of `(word, entity type)` tuples. For non-entities, the `entity_type` is just the part of speech of the word. `'NNP'` is the indicator of a named entity (a proper noun, according to NLTK). Looks like we identified three named entities here: 'Jack' and 'Black', 'School', and 'Rock'. as a 'GPE'. (Seems that the labeler thinks Rock is the name of a place, a city or something.) Whatever technique NLTK uses for named entity recognition may be a bit rough, but it did a pretty decent job here!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Creating our NamedEntityConstraint\n",
|
||||
"\n",
|
||||
"Now that we know how to detect named entities using NLTK, let's create our custom constraint."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from textattack.constraints import Constraint\n",
|
||||
"\n",
|
||||
"class NamedEntityConstraint(Constraint):\n",
|
||||
" \"\"\" A constraint that ensures `x_adv` only substitutes named entities from `x` with other named entities.\n",
|
||||
" \"\"\"\n",
|
||||
" def _check_constraint(self, x, x_adv, original_text=None):\n",
|
||||
" x_entities = get_entities(x.text)\n",
|
||||
" x_adv_entities = get_entities(x_adv.text)\n",
|
||||
" # If there aren't named entities, let's return False (the attack\n",
|
||||
" # will eventually fail).\n",
|
||||
" if len(x_entities) == 0:\n",
|
||||
" return False\n",
|
||||
" if len(x_entities) != len(x_adv_entities):\n",
|
||||
" # If the two sentences have a different number of entities, then \n",
|
||||
" # they definitely don't have the same labels. In this case, the \n",
|
||||
" # constraint is violated, and we return True.\n",
|
||||
" return False\n",
|
||||
" else:\n",
|
||||
" # Here we compare all of the words, in order, to make sure that they match.\n",
|
||||
" # If we find two words that don't match, this means a word was swapped \n",
|
||||
" # between `x` and `x_adv`. That word must be a named entity to fulfill our\n",
|
||||
" # constraint.\n",
|
||||
" x_word_label = None\n",
|
||||
" x_adv_word_label = None\n",
|
||||
" for (word_1, label_1), (word_2, label_2) in zip(x_entities, x_adv_entities):\n",
|
||||
" if word_1 != word_2:\n",
|
||||
" # Finally, make sure that words swapped between `x` and `x_adv` are named entities. If \n",
|
||||
" # they're not, then we also return False.\n",
|
||||
" if (label_1 not in ['NNP', 'NE']) or (label_2 not in ['NNP', 'NE']):\n",
|
||||
" return False \n",
|
||||
" # If we get here, all of the labels match up. Return True!\n",
|
||||
" return True\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"source": [
|
||||
"### Testing our constraint\n",
|
||||
"\n",
|
||||
"We need to create an attack and a dataset to test our constraint on. We went over all of this in the first tutorial, so let's gloss over this part for now."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[34;1mtextattack\u001b[0m: Goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'> matches model LSTMForYelpSentimentClassification.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Import the dataset.\n",
|
||||
"from textattack.datasets.classification import YelpSentiment\n",
|
||||
"# Create the model.\n",
|
||||
"from textattack.models.classification.lstm import LSTMForYelpSentimentClassification\n",
|
||||
"model = LSTMForYelpSentimentClassification()\n",
|
||||
"# Create the goal function using the model.\n",
|
||||
"from textattack.goal_functions import UntargetedClassification\n",
|
||||
"goal_function = UntargetedClassification(model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Attack(\n",
|
||||
" (search_method): GreedySearch\n",
|
||||
" (goal_function): UntargetedClassification\n",
|
||||
" (transformation): WordSwapEmbedding(\n",
|
||||
" (max_candidates): 15\n",
|
||||
" (embedding_type): paragramcf\n",
|
||||
" )\n",
|
||||
" (constraints): \n",
|
||||
" (0): NamedEntityConstraint\n",
|
||||
" (1): RepeatModification\n",
|
||||
" (2): StopwordModification\n",
|
||||
" (is_black_box): True\n",
|
||||
")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from textattack.transformations import WordSwapEmbedding\n",
|
||||
"from textattack.search_methods import GreedySearch\n",
|
||||
"from textattack.constraints.pre_transformation import RepeatModification, StopwordModification\n",
|
||||
"from textattack.shared import Attack\n",
|
||||
"\n",
|
||||
"# We're going to the `WordSwapEmbedding` transformation. Using the default settings, this\n",
|
||||
"# will try substituting words with their neighbors in the counter-fitted embedding space. \n",
|
||||
"transformation = WordSwapEmbedding(max_candidates=15) \n",
|
||||
"\n",
|
||||
"# We'll use the greedy search method again\n",
|
||||
"search_method = GreedySearch()\n",
|
||||
"\n",
|
||||
"# Our constraints will be the same as Tutorial 1, plus the named entity constraint\n",
|
||||
"constraints = [RepeatModification(),\n",
|
||||
" StopwordModification(),\n",
|
||||
" NamedEntityConstraint()]\n",
|
||||
"\n",
|
||||
"# Now, let's make the attack using these parameters. \n",
|
||||
"attack = Attack(goal_function, constraints, transformation, search_method)\n",
|
||||
"\n",
|
||||
"print(attack)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"torch.cuda.is_available()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's use our attack. We're going to iterate through the `YelpSentiment` dataset and attack samples until we achieve 10 successes. (There's a lot to check here, and since we're using a greedy search over all potential word swap positions, each sample will take a few minutes. This will take a few hours to run on a single core.)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from textattack.loggers import CSVLogger # tracks a dataframe for us.\n",
|
||||
"from textattack.attack_results import SuccessfulAttackResult\n",
|
||||
"\n",
|
||||
"results_iterable = attack.attack_dataset(YelpSentiment(), attack_n=True)\n",
|
||||
"logger = CSVLogger(color_method='html')\n",
|
||||
"\n",
|
||||
"num_successes = 0\n",
|
||||
"while num_successes < 10:\n",
|
||||
" result = next(results_iterable)\n",
|
||||
" if isinstance(result, SuccessfulAttackResult):\n",
|
||||
" logger.log_attack_result(result)\n",
|
||||
" num_successes += 1\n",
|
||||
" print(num_successes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's visualize our 10 successes in color:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>original_text</th>\n",
|
||||
" <th>perturbed_text</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td><font color = red>Quick</font> attentive bartenders. No Convenient parking. No Patrons above the age of 24. No Hot waitresses. No. Slow Waitresses. Yes. Everlasting brodeo. Yes. Dirty Bathrooms. Yes. Douchebaggery all around. Yes. Unfresh oysters. Yes. $3 kiltlifter. Yes. Asshole cops that follow you home afterwards. Yes. I decided that I pretty much hate this place. Sorry, I just do.</td>\n",
|
||||
" <td><font color = green>Speedily</font> attentive bartenders. No Convenient parking. No Patrons above the age of 24. No Hot waitresses. No. Slow Waitresses. Yes. Everlasting brodeo. Yes. Dirty Bathrooms. Yes. Douchebaggery all around. Yes. Unfresh oysters. Yes. $3 kiltlifter. Yes. Asshole cops that follow you home afterwards. Yes. I decided that I pretty much hate this place. Sorry, I just do.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>On Yelp there are 5-6 Paradise restaurants in Vegas. Which one is the original? Who cares?! :) I came to this one as the only place you pass from the Monorail to the trade show conference down the hall is the Foruna coffee and wine place. The only thing fortunate was to avoid its yucky looking pastries. U-<font color = red>turn</font> back and keep right instead of heading back to the Monorail and you will hit <font color = red>Paradise</font>. I sat at the bar and stared at the hazy looking brownish restaurant. It felt like <font color = red>Canter</font>'s in L.A. (which is one of the oldest in the city!?) <font color = red>Service</font> wasn't great here. One lady gave me a menu; then she gave me the takeout menu (clearly marked with the gouging 18% gratuity added.) Then another lady came and took my order. I ordered the healthy option. <font color = red>Water</font>/ <font color = red>Joke</font>. Oatmeal with dried cherries and coffee. It came in a neat recycled cardboard holder with 2 cups (coffee and oatmeal) and 3 little containers. Milk for the oatmeal but I used it for the coffee; 2 brown sugars. Stirring the oatmeal there was no sign of the cherries so I checked the gouging menu and then asked waitress #1 where they were. It seems #2 forgot. Anyway the dried cherries transformed into tiny prunes. That was even better on the healthy quota. Overall, not thrilled with my dining experience here, but the food was ok. Of course I paid $11 for the pleasure. $3 for coffee - cheaper than the Starbucks in the Harrah's - $6 for oatmeal. $1.50 for tip.</td>\n",
|
||||
" <td>On Yelp there are 5-6 Paradise restaurants in Vegas. Which one is the original? Who cares?! :) I came to this one as the only place you pass from the Monorail to the trade show conference down the hall is the Foruna coffee and wine place. The only thing fortunate was to avoid its yucky looking pastries. U-<font color = green>transforming</font> back and keep right instead of heading back to the Monorail and you will hit <font color = green>Heavens</font>. I sat at the bar and stared at the hazy looking brownish restaurant. It felt like <font color = green>Pirouette</font>'s in L.A. (which is one of the oldest in the city!?) <font color = green>Department</font> wasn't great here. One lady gave me a menu; then she gave me the takeout menu (clearly marked with the gouging 18% gratuity added.) Then another lady came and took my order. I ordered the healthy option. <font color = green>Eau</font>/ <font color = green>Giggle</font>. Oatmeal with dried cherries and coffee. It came in a neat recycled cardboard holder with 2 cups (coffee and oatmeal) and 3 little containers. Milk for the oatmeal but I used it for the coffee; 2 brown sugars. Stirring the oatmeal there was no sign of the cherries so I checked the gouging menu and then asked waitress #1 where they were. It seems #2 forgot. Anyway the dried cherries transformed into tiny prunes. That was even better on the healthy quota. Overall, not thrilled with my dining experience here, but the food was ok. Of course I paid $11 for the pleasure. $3 for coffee - cheaper than the Starbucks in the Harrah's - $6 for oatmeal. $1.50 for tip.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>Dining in <font color = green>Red</font> Lobster is dining in my comfort zone. Going back to 1977, I have been a Red Lobster fan. In my early forties, I continue to be a Red Lobster fan. In 35 years I never got tired of it. So, it is with little surprise that when we visited Phoenix that we would dine in a Red Lobster. For whatever reason, the Cactus Road Red Lobster (near Paradise Valley Mall) was dead on a Friday night. It was the first time in a while that when I visited a Red Lobster that a wait to be seated was nonexistent. If there was a wait, I'm sure that the usual lobster tank would have occupied me for a few minutes. As soon as I saw the \"All You Can Eat Shrimp special\" on the menu, I was decided. The shrimp special is no joke. There is a choice of four shrimps including shrimp scampi, fried shrimp, and shrimp with linguini. I'll be the first to admit that Red Lobster seafood isn't Wolfgang Puck Gang quality at the same time that I'll admit that it is far better than frozen foods seafood or a burger served in a casual restaurant chain. It wasn't just the all you can eat shrimp and bottomless soda that makes this a good deal for approximately $20. It was also the salad with dressing and house bread. Red Lobster's menu features many entrees that are affordable. I just happened to hit Red Lobster when they featured the \"All You Can Eat Shrimp.\" If it had been another time of the year, I would have ordered shrimp linguini, which comes with a salad and bread. I had zero complaints about the service. Our Waitress was prompt with the refills at the exact time that she did not rush us. Dining in Red Lobster is dining in my comfort zone. As long as I am living and have some green in my pocket, I will return to a Red Lobster throughout the year.</td>\n",
|
||||
" <td>Dining in <font color = red>Flushed</font> Lobster is dining in my comfort zone. Going back to 1977, I have been a Red Lobster fan. In my early forties, I continue to be a Red Lobster fan. In 35 years I never got tired of it. So, it is with little surprise that when we visited Phoenix that we would dine in a Red Lobster. For whatever reason, the Cactus Road Red Lobster (near Paradise Valley Mall) was dead on a Friday night. It was the first time in a while that when I visited a Red Lobster that a wait to be seated was nonexistent. If there was a wait, I'm sure that the usual lobster tank would have occupied me for a few minutes. As soon as I saw the \"All You Can Eat Shrimp special\" on the menu, I was decided. The shrimp special is no joke. There is a choice of four shrimps including shrimp scampi, fried shrimp, and shrimp with linguini. I'll be the first to admit that Red Lobster seafood isn't Wolfgang Puck Gang quality at the same time that I'll admit that it is far better than frozen foods seafood or a burger served in a casual restaurant chain. It wasn't just the all you can eat shrimp and bottomless soda that makes this a good deal for approximately $20. It was also the salad with dressing and house bread. Red Lobster's menu features many entrees that are affordable. I just happened to hit Red Lobster when they featured the \"All You Can Eat Shrimp.\" If it had been another time of the year, I would have ordered shrimp linguini, which comes with a salad and bread. I had zero complaints about the service. Our Waitress was prompt with the refills at the exact time that she did not rush us. Dining in Red Lobster is dining in my comfort zone. As long as I am living and have some green in my pocket, I will return to a Red Lobster throughout the year.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>Lots of great reviews for a reason. There is an undeserved 1-star on here because of lack of 'authenticity' and the fact that they gave the user a fork. <font color = green>Well</font>, guess what--I've been given forks at sushi restaurants that were fantastic. Some people don't know how to eat Ethiopian and that's fine. One thing you should definitely do is read the blurb on the front of the menu. Not only is it really interesting (the restaurant is actually named after an ancient church that was excavated) but it gives you helpful background on what you're about to get in to. Like other users note, injera (en-jee-rah) is weird. Looks like a sponge, feels like rubber, tastes like a sour sock. So, you get why Ethiopians eat it smothered in stuffs. Luckily, the stuffs are hella tasty, and vegetarian/vegan friendly. I'd recommend the spicy stuff (No. 9 was my favorite) but anything cold can get kind of odd. Lots of lentils are served here, and you can even buy the Lalibela brand at Whole Foods. What else can I say, the service was awesome, the food was unique and reasonably priced. If it's between you and one other person I'd probably only get three entrees, they are pretty big despite what the server will probably say. I didn't give five stars because I've only been once. Try it out soon.</td>\n",
|
||||
" <td>Lots of great reviews for a reason. There is an undeserved 1-star on here because of lack of 'authenticity' and the fact that they gave the user a fork. <font color = red>Alright</font>, guess what--I've been given forks at sushi restaurants that were fantastic. Some people don't know how to eat Ethiopian and that's fine. One thing you should definitely do is read the blurb on the front of the menu. Not only is it really interesting (the restaurant is actually named after an ancient church that was excavated) but it gives you helpful background on what you're about to get in to. Like other users note, injera (en-jee-rah) is weird. Looks like a sponge, feels like rubber, tastes like a sour sock. So, you get why Ethiopians eat it smothered in stuffs. Luckily, the stuffs are hella tasty, and vegetarian/vegan friendly. I'd recommend the spicy stuff (No. 9 was my favorite) but anything cold can get kind of odd. Lots of lentils are served here, and you can even buy the Lalibela brand at Whole Foods. What else can I say, the service was awesome, the food was unique and reasonably priced. If it's between you and one other person I'd probably only get three entrees, they are pretty big despite what the server will probably say. I didn't give five stars because I've only been once. Try it out soon.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td><font color = red>Worst</font> hot dog ever.</td>\n",
|
||||
" <td><font color = green>Grandest</font> hot dog ever.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5</th>\n",
|
||||
" <td>For those parents who need to take kids in for their first haircut, or one of the first few haircuts, I strongly recommend a place like this. Cool <font color = green>Cuts</font> has a nice play area for kids while they wait including video game stations for the older kids. A basic cut is $16.95 which is average. You can find cheaper prices at Stupidcuts and Hateclips, but you will make up the difference by waiting at least an hour for a moron to cut your child's hair badly, and your child being scared to death in the process. Spend a few bucks more and go to a shop that specializes in kids. The kids get to sit in a regular chair or a special car or airplane chair and watch a video from a decent selection while the snippers do their work. My little boy just had his third haircut ever, and Maryam (sic) did a great job. I think she set the land speed record for cutting hair. He was seated and done within 5 minutes, and all that while having a screaming fit the way only an 18 month old boy from my family can do. They will use scissors if the child can stand it, and clippers if she has a squirmer. I recommend calling ahead for a reservation or you will have to wait (or shop with your wife while waiting!).</td>\n",
|
||||
" <td>For those parents who need to take kids in for their first haircut, or one of the first few haircuts, I strongly recommend a place like this. Cool <font color = red>Cutback</font> has a nice play area for kids while they wait including video game stations for the older kids. A basic cut is $16.95 which is average. You can find cheaper prices at Stupidcuts and Hateclips, but you will make up the difference by waiting at least an hour for a moron to cut your child's hair badly, and your child being scared to death in the process. Spend a few bucks more and go to a shop that specializes in kids. The kids get to sit in a regular chair or a special car or airplane chair and watch a video from a decent selection while the snippers do their work. My little boy just had his third haircut ever, and Maryam (sic) did a great job. I think she set the land speed record for cutting hair. He was seated and done within 5 minutes, and all that while having a screaming fit the way only an 18 month old boy from my family can do. They will use scissors if the child can stand it, and clippers if she has a squirmer. I recommend calling ahead for a reservation or you will have to wait (or shop with your wife while waiting!).</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6</th>\n",
|
||||
" <td>I went to Ramsay's steak at the Paris for my birthday and we loved it. This time in Vegas we were looking for a light lunch and wanted to try Ramsay's pub and grill. We watch Hells Kitchen so we knew it existed. It is within the Cesar's palace. <font color = red>Nice</font> decor. I ordered a burger, nothing fancy. It took more than one hour for my girlfriend and I to get our food. THe waitress disappeared for more than 45 min, when she finally came back she was worried that we did not get our food yet, eventually the manager came to apologize as well and to offer us a round of drink assuring that food was on its way. Another 15-20 min later, we finally got our order, it looked great however my patty was not cooked... simply grilled on the outside and raw on the inside, it was funny to see that considering the show that Gordon puts on in Hells Kitchen over uncooked meat... I mentioned to the waitress and the manager who did not charge us for my dish. They offered to order a new one but at this point the quick lunch turned into a 2 hours bad experience and we wanted to get out. I think the staff handled the situation properly, some might have yelled a little more and maybe ask for more comps... This was certainly a bad experience overall, I am sure however it does not reflect the standards of the restaurant... I hope. but with so many other places in Vegas I doubt I will give it another shot.... I give two stars for the staff's reaction, even slow, at least they tried to do something... and for the beer I was drinking that I thought was great !</td>\n",
|
||||
" <td>I went to Ramsay's steak at the Paris for my birthday and we loved it. This time in Vegas we were looking for a light lunch and wanted to try Ramsay's pub and grill. We watch Hells Kitchen so we knew it existed. It is within the Cesar's palace. <font color = green>Delightful</font> decor. I ordered a burger, nothing fancy. It took more than one hour for my girlfriend and I to get our food. THe waitress disappeared for more than 45 min, when she finally came back she was worried that we did not get our food yet, eventually the manager came to apologize as well and to offer us a round of drink assuring that food was on its way. Another 15-20 min later, we finally got our order, it looked great however my patty was not cooked... simply grilled on the outside and raw on the inside, it was funny to see that considering the show that Gordon puts on in Hells Kitchen over uncooked meat... I mentioned to the waitress and the manager who did not charge us for my dish. They offered to order a new one but at this point the quick lunch turned into a 2 hours bad experience and we wanted to get out. I think the staff handled the situation properly, some might have yelled a little more and maybe ask for more comps... This was certainly a bad experience overall, I am sure however it does not reflect the standards of the restaurant... I hope. but with so many other places in Vegas I doubt I will give it another shot.... I give two stars for the staff's reaction, even slow, at least they tried to do something... and for the beer I was drinking that I thought was great !</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>7</th>\n",
|
||||
" <td><font color = red>Great</font> name, but the beer is insipid, bland, and served in plastic pitchers. Yuck. College greeksters might dig it, but anyone with an actual palate will be repelled. Costly, boring, feh.</td>\n",
|
||||
" <td><font color = green>Phenomenal</font> name, but the beer is insipid, bland, and served in plastic pitchers. Yuck. College greeksters might dig it, but anyone with an actual palate will be repelled. Costly, boring, feh.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>8</th>\n",
|
||||
" <td><font color = green>Lauren</font> R. did a great job explaining how these bad boys are served! She's right on the money and so are these! Seriously, some of the best hot dogs in town are here.. not topped with bacon but wrapped in bacon.. the hot dogs are not cooked on site but at a local commissary where they can legally cook bacon wrapped hot dogs. It is very much illegal not only in Arizona but in California to cook hot dogs on the street in this manor. In fact there was a lady in LA who was actually arrested for cooking and serving them this way. Anyway, enough with the education.. stop by and eat one, keep this cart going, amazing hot dogs and friendly people running this one!! Oh and they have real Mexican coke, with sugar, no HFCS!</td>\n",
|
||||
" <td><font color = red>Lorraine</font> R. did a great job explaining how these bad boys are served! She's right on the money and so are these! Seriously, some of the best hot dogs in town are here.. not topped with bacon but wrapped in bacon.. the hot dogs are not cooked on site but at a local commissary where they can legally cook bacon wrapped hot dogs. It is very much illegal not only in Arizona but in California to cook hot dogs on the street in this manor. In fact there was a lady in LA who was actually arrested for cooking and serving them this way. Anyway, enough with the education.. stop by and eat one, keep this cart going, amazing hot dogs and friendly people running this one!! Oh and they have real Mexican coke, with sugar, no HFCS!</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>9</th>\n",
|
||||
" <td><font color = red>Awesome</font> atmosphere and design. Unfortunately the food doesn't keep up with the ambiance. Prices were high for pre-made food. Throw a chef in here and it could be an amazing spot.</td>\n",
|
||||
" <td><font color = green>Sublime</font> atmosphere and design. Unfortunately the food doesn't keep up with the ambiance. Prices were high for pre-made food. Throw a chef in here and it could be an amazing spot.</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"pd.options.display.max_colwidth = 480 # increase column width so we can actually read the examples\n",
|
||||
"\n",
|
||||
"from IPython.core.display import display, HTML\n",
|
||||
"display(HTML(logger.df[['passage_1', 'passage_2']].to_html(escape=False)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Conclusion\n",
|
||||
"\n",
|
||||
"Our constraint seems to have done its job: it filtered out attacks that did not swap out a named entity for another, according to the NLTK named entity detector. However, we can see some problems inherent in the detector: it often thinks the first word of a given sentence is a named entity, probably due to capitalization. (This is why \"Awesome atmosphere\" can be replaced by \"Sublime atmosphere\" and still fulfill our constraint; NLTK is telling us that both of those are proper nouns, some specific named type of atmosphere.) \n",
|
||||
"\n",
|
||||
"We did manage to produce some nice adversarial examples! \"Cool Cuts\" hair cuttery became \"Cool Cutback\" and the entire prediction (of 298 words) flipped from positive to negative. \"Red Lobster\" became \"Flushed Lobster\" and the prediction (of 337 words) also shifted from positive to negative."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
104
docs/index.rst
@@ -1,78 +1,44 @@
|
||||
TextAttack
|
||||
======================================
|
||||
TextAttack Documentation
|
||||
=======================================
|
||||
|
||||
`TextAttack <https://github.com/QData/TextAttack>`__ is a Python framework for adversarial attacks and data augmentation in NLP.
|
||||
|
||||
NLP Attacks
|
||||
-----------
|
||||
|
||||
TextAttack provides a framework for constructing and thinking about attacks via perturbation in NLP. TextAttack builds attacks from four components:
|
||||
|
||||
- `Goal Functions <attacks/goal_function.html>`__ stipulate the goal of the attack, like to change the prediction score of a classification model, or to change all of the words in a translation output.
|
||||
- `Constraints <attacks/constraint.html>`__ determine if a potential perturbation is valid with respect to the original input.
|
||||
- `Transformations <attacks/transformation.html>`__ take a text input and transform it by inserting and deleting characters, words, and/or phrases.
|
||||
- `Search Methods <attacks/search_method.html>`__ explore the space of possible **transformations** within the defined **constraints** and attempt to find a successful perturbation which satisfies the **goal function**.
|
||||
|
||||
TextAttack provides a set of `Attack Recipes <attacks/attack_recipes.html>`__ that assemble attacks from the literature from these four components.
|
||||
|
||||
Data Augmentation
|
||||
-------------
|
||||
Data augmentation is easy and extremely common in computer vision but harder and less common in NLP. We provide a `Data Augmentation <augmentation/augmenter.html>`__ module using transformations and constraints.
|
||||
|
||||
Features
|
||||
------------
|
||||
TextAttack has some other features that make it a pleasure to use:
|
||||
|
||||
- `Built-in Datasets <datasets_models/datasets.html>`__ for running attacks without supplying your own data
|
||||
- `Pre-trained Models <datasets_models/models.html>`__ for testing attacks and evaluating constraints
|
||||
- `Built-in Tokenizers <datasets_models/tokenizers.html>`__ so you don't have to worry about tokenizing the inputs
|
||||
- `Visualization options <misc/loggers.html>`__ like Weights & Biases and Visdom
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:hidden:
|
||||
:caption: Quickstart
|
||||
:maxdepth: 6
|
||||
:caption: About
|
||||
|
||||
quickstart/installation
|
||||
quickstart/overview
|
||||
Example 1: Transformations <examples/1_Introduction_and_Transformations.ipynb>
|
||||
Example 2: Constraints <examples/2_Constraints.ipynb>
|
||||
1start/basic-Intro.rst
|
||||
1start/what_is_an_adversarial_attack.md
|
||||
1start/references.md
|
||||
1start/benchmark-search.md
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 3
|
||||
:hidden:
|
||||
:caption: NLP Attacks
|
||||
:maxdepth: 6
|
||||
:caption: Get Started
|
||||
|
||||
attacks/attack
|
||||
attacks/attack_result
|
||||
attacks/goal_function
|
||||
attacks/goal_function_result
|
||||
attacks/constraint
|
||||
attacks/transformation
|
||||
attacks/search_method
|
||||
attacks/attack_recipes
|
||||
Installation <1start/installation>
|
||||
Command-Line Usage <1start/command_line_usage.md>
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
:caption: Notebook Tutorials
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 3
|
||||
:hidden:
|
||||
:caption: Data Augmentation
|
||||
|
||||
augmentation/augmenter
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 3
|
||||
:hidden:
|
||||
:caption: Models, Datasets and Tokenizers
|
||||
|
||||
datasets_models/models
|
||||
datasets_models/datasets
|
||||
datasets_models/tokenizers
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 3
|
||||
:hidden:
|
||||
:caption: Miscellaneous
|
||||
Tutorial 0: TextAttack End-To-End (Train, Eval, Attack) <2notebook/0_End_to_End.ipynb>
|
||||
Tutorial 1: Transformations <2notebook/1_Introduction_and_Transformations.ipynb>
|
||||
Tutorial 2: Constraints <2notebook/2_Constraints.ipynb>
|
||||
Tutorial 3: Attacking TensorFlow models <2notebook/Example_0_tensorflow.ipynb>
|
||||
Tutorial 4: Attacking scikit-learn models <2notebook/Example_1_sklearn.ipynb>
|
||||
Tutorial 5: Attacking AllenNLP models <2notebook/Example_2_allennlp.ipynb>
|
||||
Tutorial 6: Attacking multilingual models <2notebook/Example_4_CamemBERT.ipynb>
|
||||
|
||||
misc/loggers
|
||||
misc/validators
|
||||
misc/tokenized_text
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
:glob:
|
||||
:caption: Developer Guide
|
||||
|
||||
1start/support.md
|
||||
1start/api-design-tips.md
|
||||
3recipes/attack_recipes
|
||||
3recipes/augmenter_recipes
|
||||
apidoc/textattack
|
||||
@@ -1,29 +0,0 @@
|
||||
======================
|
||||
Loggers
|
||||
======================
|
||||
|
||||
Loggers track, visualize, and export attack results.
|
||||
|
||||
.. automodule:: textattack.loggers.logger
|
||||
:members:
|
||||
|
||||
Loggers
|
||||
########
|
||||
|
||||
.. automodule:: textattack.loggers.file_logger
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.loggers.csv_logger
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.loggers.visdom_logger
|
||||
:members:
|
||||
|
||||
.. automodule:: textattack.loggers.weights_and_biases_logger
|
||||
:members:
|
||||
|
||||
|
||||
Log Manager
|
||||
############
|
||||
.. automodule:: textattack.loggers.attack_log_manager
|
||||
:members:
|
||||
@@ -1,6 +0,0 @@
|
||||
===================
|
||||
Tokenized Text
|
||||
===================
|
||||
|
||||
.. automodule:: textattack.shared.tokenized_text
|
||||
:members:
|
||||
@@ -1,8 +0,0 @@
|
||||
======================
|
||||
Validators
|
||||
======================
|
||||
|
||||
Validators ensure compatibility between search methods, transformations, constraints, and goal functions.
|
||||
|
||||
.. automodule:: textattack.shared.validators
|
||||
:members:
|
||||
@@ -1,52 +0,0 @@
|
||||
===========
|
||||
Overview
|
||||
===========
|
||||
TextAttack builds attacks from four components:
|
||||
|
||||
- `Goal Functions <../attacks/goal_function.html>`__ stipulate the goal of the attack, like to change the prediction score of a classification model, or to change all of the words in a translation output.
|
||||
- `Constraints <../attacks/constraint.html>`__ determine if a potential perturbation is valid with respect to the original input.
|
||||
- `Transformations <../attacks/transformation.html>`__ take a text input and transform it by inserting and deleting characters, words, and/or phrases.
|
||||
- `Search Methods <../attacks/search_method.html>`__ explore the space of possible **transformations** within the defined **constraints** and attempt to find a successful perturbation which satisfies the **goal function**.
|
||||
|
||||
Any model that overrides ``__call__``, takes ``TokenizedText`` as input, and formats output correctly can be used with TextAttack. TextAttack also has built-in datasets and pre-trained models on these datasets. Below is an example of attacking a pre-trained model on the AGNews dataset::
|
||||
|
||||
from tqdm import tqdm
|
||||
from textattack.loggers import FileLogger
|
||||
|
||||
from textattack.datasets.classification import AGNews
|
||||
from textattack.models.classification.lstm import LSTMForAGNewsClassification
|
||||
from textattack.goal_functions import UntargetedClassification
|
||||
|
||||
from textattack.shared import Attack
|
||||
from textattack.search_methods import GreedySearch
|
||||
from textattack.transformations import WordSwapEmbedding
|
||||
from textattack.constraints.grammaticality import PartOfSpeech
|
||||
from textattack.constraints.semantics import RepeatModification, StopwordModification
|
||||
|
||||
# Create the model and goal function
|
||||
model = LSTMForAGNewsClassification()
|
||||
goal_function = UntargetedClassification(model)
|
||||
|
||||
# Use the default WordSwapEmbedding transformation
|
||||
transformation = WordSwapEmbedding()
|
||||
|
||||
# Add a constraint, note that an empty list can be used if no constraints are wanted
|
||||
constraints = [
|
||||
RepeatModification(),
|
||||
StopwordModification(),
|
||||
PartOfSpeech()
|
||||
]
|
||||
|
||||
# Choose a search method
|
||||
search = GreedySearch()
|
||||
|
||||
# Make an attack with the above parameters
|
||||
attack = Attack(goal_function, constraints, transformation, search)
|
||||
|
||||
# Run the attack on 5 examples and see the results using a logger to output to stdout
|
||||
results = attack.attack_dataset(AGNews(), num_examples=5, attack_n=True)
|
||||
|
||||
logger = FileLogger(stdout=True)
|
||||
|
||||
for result in tqdm(results, total=5):
|
||||
logger.log_attack_result(result)
|
||||
@@ -1,2 +1,4 @@
|
||||
recommonmark
|
||||
nbsphinx
|
||||
sphinx-rtd-theme
|
||||
sphinx - autobuild
|
||||
sphinx - rtd - theme
|
||||
|
||||
61
examples/attack/attack_camembert.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# Quiet TensorFlow.
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification, pipeline
|
||||
|
||||
from textattack.attack_recipes import PWWSRen2019
|
||||
from textattack.datasets import HuggingFaceDataset
|
||||
from textattack.models.wrappers import ModelWrapper
|
||||
|
||||
if "TF_CPP_MIN_LOG_LEVEL" not in os.environ:
|
||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
||||
|
||||
|
||||
class HuggingFaceSentimentAnalysisPipelineWrapper(ModelWrapper):
|
||||
"""Transformers sentiment analysis pipeline returns a list of responses,
|
||||
like
|
||||
[{'label': 'POSITIVE', 'score': 0.7817379832267761}]
|
||||
We need to convert that to a format TextAttack understands, like
|
||||
[[0.218262017, 0.7817379832267761]
|
||||
"""
|
||||
|
||||
def __init__(self, pipeline):
|
||||
self.pipeline = pipeline
|
||||
|
||||
def __call__(self, text_inputs):
|
||||
raw_outputs = self.pipeline(text_inputs)
|
||||
outputs = []
|
||||
for output in raw_outputs:
|
||||
score = output["score"]
|
||||
if output["label"] == "POSITIVE":
|
||||
outputs.append([1 - score, score])
|
||||
else:
|
||||
outputs.append([score, 1 - score])
|
||||
return np.array(outputs)
|
||||
|
||||
|
||||
# Create the model: a French sentiment analysis model.
|
||||
# see https://github.com/TheophileBlard/french-sentiment-analysis-with-bert
|
||||
model = TFAutoModelForSequenceClassification.from_pretrained("tblard/tf-allocine")
|
||||
tokenizer = AutoTokenizer.from_pretrained("tblard/tf-allocine")
|
||||
pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
|
||||
|
||||
model_wrapper = HuggingFaceSentimentAnalysisPipelineWrapper(pipeline)
|
||||
|
||||
# Create the recipe: PWWS uses a WordNet transformation.
|
||||
recipe = PWWSRen2019.build(model_wrapper)
|
||||
# WordNet defaults to english. Set the default language to French ('fra')
|
||||
#
|
||||
# See
|
||||
# "Building a free French wordnet from multilingual resources",
|
||||
# E. L. R. A. (ELRA) (ed.),
|
||||
# Proceedings of the Sixth International Language Resources and Evaluation (LREC’08).
|
||||
|
||||
recipe.transformation.language = "fra"
|
||||
|
||||
dataset = HuggingFaceDataset("allocine", split="test")
|
||||
for idx, result in enumerate(recipe.attack_dataset(dataset)):
|
||||
print(("-" * 20), f"Result {idx+1}", ("-" * 20))
|
||||
print(result.__str__(color_method="ansi"))
|
||||
print()
|
||||
7
examples/attack/attack_from_components.sh
Executable file
@@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
# Shows how to build an attack from components and use it on a pre-trained
|
||||
# model on the Yelp dataset.
|
||||
textattack attack --attack-n --goal-function untargeted-classification \
|
||||
--model bert-base-uncased-yelp --num-examples 8 --transformation word-swap-wordnet \
|
||||
--constraints edit-distance^12 max-words-perturbed:max_percent=0.75 repeat stopword \
|
||||
--search greedy
|
||||