Compare commits
1335 Commits
mcts-jeff
...
flair-vers
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e86e563c37 | ||
|
|
7855b9e474 | ||
|
|
9a7f142e34 | ||
|
|
f855d15108 | ||
|
|
d93682d8bd | ||
|
|
9d9ecaed06 | ||
|
|
8637d20c2b | ||
|
|
4ece786aee | ||
|
|
5b6663273f | ||
|
|
4d3accf290 | ||
|
|
f7a476c467 | ||
|
|
254c98473d | ||
|
|
501e443328 | ||
|
|
40268e9c63 | ||
|
|
bede5f1c50 | ||
|
|
262ca05307 | ||
|
|
9540ee457f | ||
|
|
b537b1dae8 | ||
|
|
9a67b9eff0 | ||
|
|
7a315ecfe2 | ||
|
|
6295e633ce | ||
|
|
17eb7fc846 | ||
|
|
120025d8c0 | ||
|
|
da8d5d06a0 | ||
|
|
819f5c0e4a | ||
|
|
abdf349256 | ||
|
|
d75a27c9d1 | ||
|
|
c2b11f386b | ||
|
|
ae5ee0672c | ||
|
|
f208318e35 | ||
|
|
c9cf6d7394 | ||
|
|
b430d76d25 | ||
|
|
1d09830091 | ||
|
|
70362230e5 | ||
|
|
2c471f55f5 | ||
|
|
0eaa5883f8 | ||
|
|
1d8b72b85d | ||
|
|
72409a0dd9 | ||
|
|
9310492974 | ||
|
|
0f39de49f7 | ||
|
|
3854f8f770 | ||
|
|
3a375a3d8e | ||
|
|
4fbb09c9ac | ||
|
|
429254d873 | ||
|
|
1f709ff0fa | ||
|
|
25d3296979 | ||
|
|
21054c99c2 | ||
|
|
cc5ff46226 | ||
|
|
3c40ce8605 | ||
|
|
a32263a20e | ||
|
|
fccf60dfd4 | ||
|
|
58d6ea6652 | ||
|
|
de88a5ee23 | ||
|
|
d394d048ac | ||
|
|
65010cf562 | ||
|
|
2744fd3ca8 | ||
|
|
10f54a16f4 | ||
|
|
4015ad23b4 | ||
|
|
3f224bce50 | ||
|
|
937ba0d6b8 | ||
|
|
d5e9048a48 | ||
|
|
5030d52997 | ||
|
|
c7e15279a0 | ||
|
|
723bf6c191 | ||
|
|
6950621a95 | ||
|
|
e9fde9912e | ||
|
|
fe8bd660e4 | ||
|
|
facad249d8 | ||
|
|
ed303a58a1 | ||
|
|
b8a5830c7f | ||
|
|
10309a32de | ||
|
|
1021c5643e | ||
|
|
dc45bb346d | ||
|
|
5453e1e05f | ||
|
|
ef72936a83 | ||
|
|
4da3a257ab | ||
|
|
12b76d3c3b | ||
|
|
ae68c81411 | ||
|
|
8e18c3c850 | ||
|
|
aa736d26b4 | ||
|
|
6c3359a33d | ||
|
|
ac306c7973 | ||
|
|
3b6de0b28a | ||
|
|
62a99ae7de | ||
|
|
e6b3709b31 | ||
|
|
9f84732045 | ||
|
|
d04c740f11 | ||
|
|
678cf4ba93 | ||
|
|
749c023c61 | ||
|
|
9b7fb83ab4 | ||
|
|
0c9356d316 | ||
|
|
32c25bbb58 | ||
|
|
055505955e | ||
|
|
aa0d39bdfb | ||
|
|
77a244d5cc | ||
|
|
67e1af6c02 | ||
|
|
3172bf0bb9 | ||
|
|
022c69fb5e | ||
|
|
211abc5a67 | ||
|
|
1855169799 | ||
|
|
b1dd4dfbc7 | ||
|
|
96cf4e42b8 | ||
|
|
db58b2f567 | ||
|
|
3a26b143b8 | ||
|
|
a1d8c4d347 | ||
|
|
0e7a831325 | ||
|
|
153e21171b | ||
|
|
e343f6dbfd | ||
|
|
93a8c5cd42 | ||
|
|
6fa44c1270 | ||
|
|
54cbfbcafc | ||
|
|
ba8a755024 | ||
|
|
30e21cd85d | ||
|
|
ca8b990c40 | ||
|
|
9c01755aa2 | ||
|
|
eebf2071b9 | ||
|
|
ad2cf4a49c | ||
|
|
693d8dd6eb | ||
|
|
494caeaf59 | ||
|
|
203dba9b4d | ||
|
|
86a3d3116e | ||
|
|
5780b177ef | ||
|
|
c2bc09f528 | ||
|
|
91e364aa64 | ||
|
|
2884cbf65f | ||
|
|
2009a77f7d | ||
|
|
47a03e3e40 | ||
|
|
20f79e1af5 | ||
|
|
e32476a8af | ||
|
|
29bf26f326 | ||
|
|
4019eb1479 | ||
|
|
aa2ccb5734 | ||
|
|
261c02640f | ||
|
|
50daaa91e7 | ||
|
|
08302ea336 | ||
|
|
4316c02110 | ||
|
|
3828c745f3 | ||
|
|
1092c2f479 | ||
|
|
b0a9c97be9 | ||
|
|
b7b036aa36 | ||
|
|
4509e9aa57 | ||
|
|
6713f087d8 | ||
|
|
9cd54c2625 | ||
|
|
3a5351a349 | ||
|
|
2ef1905ce5 | ||
|
|
870200cc96 | ||
|
|
fa68f522c6 | ||
|
|
026e20c6b9 | ||
|
|
fb745638c1 | ||
|
|
ff37cded04 | ||
|
|
06189a40f9 | ||
|
|
0acd8e42dc | ||
|
|
62a1a15717 | ||
|
|
f6a8d264fd | ||
|
|
2db7ebd533 | ||
|
|
ef42e02417 | ||
|
|
e618838fa8 | ||
|
|
0603d7c132 | ||
|
|
e2396ab2dc | ||
|
|
994a56caf2 | ||
|
|
748ec1c7e7 | ||
|
|
a17f702e01 | ||
|
|
bdc29925a5 | ||
|
|
ebf1620a59 | ||
|
|
eb8172a64f | ||
|
|
83416735c4 | ||
|
|
6a98570c66 | ||
|
|
1adc909d74 | ||
|
|
4222393d03 | ||
|
|
d6cdae4091 | ||
|
|
88fb9853bf | ||
|
|
c5863a020c | ||
|
|
b47bf6051a | ||
|
|
89d47b9def | ||
|
|
d66215d334 | ||
|
|
3a27cb0d36 | ||
|
|
15ad4298a0 | ||
|
|
d7422ba61c | ||
|
|
a029964dc7 | ||
|
|
c40e7f83c1 | ||
|
|
04437d7193 | ||
|
|
47c17586cd | ||
|
|
7408bf0199 | ||
|
|
fb039c6b7b | ||
|
|
ffd75ce3e5 | ||
|
|
86f3294125 | ||
|
|
eb51ca31ba | ||
|
|
b619607255 | ||
|
|
cb2237e9b0 | ||
|
|
1909a874ee | ||
|
|
1565374241 | ||
|
|
a6b3d8c16d | ||
|
|
8ddac0eb88 | ||
|
|
48a04b4969 | ||
|
|
77ae894957 | ||
|
|
adae04ac1a | ||
|
|
ca5f15ce90 | ||
|
|
03f64b022e | ||
|
|
cef0bb02e2 | ||
|
|
4bfed05499 | ||
|
|
953878dcaa | ||
|
|
b4cecad98c | ||
|
|
2773cdd1d1 | ||
|
|
0e38e8dc9b | ||
|
|
8a0acc24d2 | ||
|
|
9fda56c685 | ||
|
|
c739959ff6 | ||
|
|
c8253ff520 | ||
|
|
7670b8ea24 | ||
|
|
451978b88e | ||
|
|
4d19b60c89 | ||
|
|
5f4a0eb379 | ||
|
|
b8cf9ba646 | ||
|
|
e49f3c0da9 | ||
|
|
544a36c0bc | ||
|
|
a47e90c868 | ||
|
|
82e8f51c90 | ||
|
|
d6e4dab61a | ||
|
|
04b7c6f79b | ||
|
|
bdbd370499 | ||
|
|
5f978edb2a | ||
|
|
b120fb5159 | ||
|
|
3426940895 | ||
|
|
68246ea202 | ||
|
|
7047f44829 | ||
|
|
e2012aa2e4 | ||
|
|
8814393093 | ||
|
|
d44b2ccf75 | ||
|
|
aef7af6c9a | ||
|
|
da1b22627a | ||
|
|
0dbbcd5c9b | ||
|
|
850bc31388 | ||
|
|
15a11c4a4d | ||
|
|
27c52e2cdf | ||
|
|
3bfb42ccdf | ||
|
|
7f5ea106e8 | ||
|
|
973e4d5fe6 | ||
|
|
0d69fd2733 | ||
|
|
9931b90594 | ||
|
|
cb5a123fac | ||
|
|
4ab2b96d68 | ||
|
|
2e8d6170c7 | ||
|
|
94f0208b1c | ||
|
|
0cc049a8c7 | ||
|
|
4fcdc4be10 | ||
|
|
2df9a3ba43 | ||
|
|
7f7e10831d | ||
|
|
efe3ac7704 | ||
|
|
7635c7be35 | ||
|
|
c55dd28f5d | ||
|
|
50baac6102 | ||
|
|
b29a09a75e | ||
|
|
54e01ff80b | ||
|
|
754e06af13 | ||
|
|
419586841a | ||
|
|
43e7577eac | ||
|
|
e1951815ae | ||
|
|
6a383bfa08 | ||
|
|
6f12980302 | ||
|
|
fdfaf6cd2e | ||
|
|
8cfb885d78 | ||
|
|
8dc04acf0f | ||
|
|
f617feb535 | ||
|
|
8c8b68fe89 | ||
|
|
318d7abd96 | ||
|
|
c5139c2d91 | ||
|
|
f819d8ac98 | ||
|
|
30a2bc9c28 | ||
|
|
2266394f45 | ||
|
|
ba44ea9fe0 | ||
|
|
0a224123ad | ||
|
|
167e0bc99e | ||
|
|
3997933c5d | ||
|
|
5dcd2a09ac | ||
|
|
f503eb1f3b | ||
|
|
8580d0e201 | ||
|
|
3f3b3907ed | ||
|
|
d89c81c091 | ||
|
|
8f403d46e4 | ||
|
|
28601ebd40 | ||
|
|
987a0a21c9 | ||
|
|
d8b2c9d1f4 | ||
|
|
5b7ddc2f67 | ||
|
|
c9451e3663 | ||
|
|
5a345e66a2 | ||
|
|
d0a6abf89e | ||
|
|
9098f890b5 | ||
|
|
e10efaf4d8 | ||
|
|
b3b4896a26 | ||
|
|
b71461b20c | ||
|
|
45a1375e5d | ||
|
|
0f8abe71f8 | ||
|
|
a7759a0102 | ||
|
|
81951945a8 | ||
|
|
9b1898f423 | ||
|
|
eb0b837439 | ||
|
|
278bb33b58 | ||
|
|
545687dfa5 | ||
|
|
46ab64f915 | ||
|
|
2fa6544e20 | ||
|
|
9d7c9ba129 | ||
|
|
695d00a3e0 | ||
|
|
ec26dbf1b8 | ||
|
|
d37c5ab88e | ||
|
|
fe5aa5cf41 | ||
|
|
2719e20962 | ||
|
|
10965415a5 | ||
|
|
a53aec9001 | ||
|
|
ede35d40ff | ||
|
|
ef3c2e2bca | ||
|
|
993d49e90b | ||
|
|
02c653df1b | ||
|
|
168aeda0a6 | ||
|
|
e00253e256 | ||
|
|
71185a01c7 | ||
|
|
3bdd0a8481 | ||
|
|
61b4d564dc | ||
|
|
5cd97aa904 | ||
|
|
9ead8533b9 | ||
|
|
d320b8d3c7 | ||
|
|
061bbd998b | ||
|
|
f375f87a19 | ||
|
|
2215c56682 | ||
|
|
c77f26665b | ||
|
|
1e65a93c8c | ||
|
|
c214210c2c | ||
|
|
04e98ba8ba | ||
|
|
b9e0c1da74 | ||
|
|
0adaa2bdba | ||
|
|
1295e8fa94 | ||
|
|
a1781a0c4f | ||
|
|
7f2621bc87 | ||
|
|
b14f7534b8 | ||
|
|
f1c322119d | ||
|
|
5214d3b317 | ||
|
|
a7ffdd283f | ||
|
|
0fad7f1c2a | ||
|
|
48d89d6f17 | ||
|
|
bb4539a566 | ||
|
|
0ee2f68f4a | ||
|
|
9ee0ba169d | ||
|
|
e7484d7f98 | ||
|
|
7a27040a47 | ||
|
|
c2924a49fb | ||
|
|
84ab6bcdf5 | ||
|
|
3abaae4255 | ||
|
|
ea191caec5 | ||
|
|
783213ffc5 | ||
|
|
c8047d6a4f | ||
|
|
18ecc5dd03 | ||
|
|
cff1d2dcb9 | ||
|
|
56d5bc5816 | ||
|
|
aeacb556fe | ||
|
|
8bf85fdbc2 | ||
|
|
bbd5a7e8db | ||
|
|
c0a60b3224 | ||
|
|
152a8d843f | ||
|
|
5c1579e709 | ||
|
|
9bacdfcbd8 | ||
|
|
a0193ba263 | ||
|
|
0401591371 | ||
|
|
6bf376f8f9 | ||
|
|
155dd490d7 | ||
|
|
d810489bf6 | ||
|
|
e583188b91 | ||
|
|
5f04781809 | ||
|
|
a03cbb1d27 | ||
|
|
b0892ce7fe | ||
|
|
e33bfc2bb6 | ||
|
|
4dc804a8f7 | ||
|
|
6838209255 | ||
|
|
305eb48cb6 | ||
|
|
b21bdc96c4 | ||
|
|
24669a90b9 | ||
|
|
be6cd8801f | ||
|
|
3e9cbcbd05 | ||
|
|
f0e27d129c | ||
|
|
e04c3777b3 | ||
|
|
170e45b714 | ||
|
|
7caab13a88 | ||
|
|
a9ab5d4502 | ||
|
|
2df8833965 | ||
|
|
f44947d0d0 | ||
|
|
ad68251221 | ||
|
|
b148bd2818 | ||
|
|
2328b7f76c | ||
|
|
0f0649e29b | ||
|
|
de0b5b8b8b | ||
|
|
f950128ff4 | ||
|
|
16986b421e | ||
|
|
fb6088f39a | ||
|
|
03e85b39c2 | ||
|
|
acbc6c0276 | ||
|
|
16c3b0d0ed | ||
|
|
6ccfaeaa63 | ||
|
|
c04f8f4b2a | ||
|
|
740c333838 | ||
|
|
e6f07eb4de | ||
|
|
5325ef633e | ||
|
|
d9ae5f6abb | ||
|
|
07abaf5b1b | ||
|
|
a0e94792f5 | ||
|
|
8dad54a145 | ||
|
|
866301ba08 | ||
|
|
84c1681f34 | ||
|
|
dcf408be95 | ||
|
|
4893d47e6c | ||
|
|
79f3f4f8f0 | ||
|
|
bdbeae80d2 | ||
|
|
6273b19c19 | ||
|
|
3ffd776b63 | ||
|
|
21496460fe | ||
|
|
e2dd75001c | ||
|
|
4f16bb097b | ||
|
|
3c65e9d0a1 | ||
|
|
46ef3bcac5 | ||
|
|
dcce7b3434 | ||
|
|
7a89aba559 | ||
|
|
e6786b6bd7 | ||
|
|
3458a2b6c2 | ||
|
|
4478ad910b | ||
|
|
8d64e89a1f | ||
|
|
7dcf27d7bd | ||
|
|
41e747215b | ||
|
|
af029bff8b | ||
|
|
58f522b30c | ||
|
|
2509a14331 | ||
|
|
f79463903e | ||
|
|
230e39b590 | ||
|
|
6d83b2c9cb | ||
|
|
96a0360567 | ||
|
|
4c69e8656a | ||
|
|
77ff8f35e2 | ||
|
|
36dfce6bda | ||
|
|
887dca1307 | ||
|
|
b998e0f654 | ||
|
|
69a415f80b | ||
|
|
a4bd404d12 | ||
|
|
c78aa66980 | ||
|
|
c41a13e6cd | ||
|
|
23d0de6b43 | ||
|
|
95603fb73c | ||
|
|
00c80ee514 | ||
|
|
2ea690bbc6 | ||
|
|
fc7d5294d9 | ||
|
|
821c350c37 | ||
|
|
1e36c63c61 | ||
|
|
688d997694 | ||
|
|
d326796c60 | ||
|
|
e91a652715 | ||
|
|
dfe26b63c4 | ||
|
|
0c95966716 | ||
|
|
74bc5c0013 | ||
|
|
beac247faa | ||
|
|
8c726d1e83 | ||
|
|
13bc18aa41 | ||
|
|
c863095613 | ||
|
|
b8d94ca4d5 | ||
|
|
cc6f9af25d | ||
|
|
09d5f1c2e7 | ||
|
|
ecc010de65 | ||
|
|
6737a4ee18 | ||
|
|
5a2f7b3bd5 | ||
|
|
9b120b6253 | ||
|
|
0e10c10a9f | ||
|
|
4420daf15e | ||
|
|
6b40544a5b | ||
|
|
9b2fc8df24 | ||
|
|
f3557c4180 | ||
|
|
7abd27cec7 | ||
|
|
2cb37c9f60 | ||
|
|
0728a779a6 | ||
|
|
a07171a6d9 | ||
|
|
63ad70cf17 | ||
|
|
352ab99e6f | ||
|
|
7e38ee28fd | ||
|
|
5d892ce54c | ||
|
|
c9b1bcabdd | ||
|
|
4ffa618be3 | ||
|
|
67e03ac3c0 | ||
|
|
c8e7da9213 | ||
|
|
55cb0710bf | ||
|
|
4627f210c6 | ||
|
|
1505015ff5 | ||
|
|
e7a8dec4b5 | ||
|
|
aacd8b675f | ||
|
|
d5b3e4b7c1 | ||
|
|
e257ed8146 | ||
|
|
d715a3abcb | ||
|
|
c6b9b9c280 | ||
|
|
8c777d60dc | ||
|
|
a55fa39dc2 | ||
|
|
97b67b0d21 | ||
|
|
fcc066b089 | ||
|
|
0e0c25e887 | ||
|
|
1aa8bac7ba | ||
|
|
98dc6eef60 | ||
|
|
7004f96132 | ||
|
|
8351959c3e | ||
|
|
bb009c70f9 | ||
|
|
1846a875e9 | ||
|
|
90bfe90e83 | ||
|
|
22f36b594c | ||
|
|
1ca633dc2f | ||
|
|
0c52fb6066 | ||
|
|
f817a2bb16 | ||
|
|
a131988988 | ||
|
|
224d788d3e | ||
|
|
a07deef834 | ||
|
|
38e3e917f5 | ||
|
|
69bdd25028 | ||
|
|
c341d9e04a | ||
|
|
f8a0cfe266 | ||
|
|
3b87ccedf7 | ||
|
|
e6223182d6 | ||
|
|
0e276cdc36 | ||
|
|
3390a18c52 | ||
|
|
275c77905e | ||
|
|
685a8ac38a | ||
|
|
d701d19437 | ||
|
|
4d794d3d53 | ||
|
|
8d038e1b6c | ||
|
|
753438a9f1 | ||
|
|
36c3e69db6 | ||
|
|
e53ab4653f | ||
|
|
f56f68c42d | ||
|
|
94ea52460e | ||
|
|
61d907c50b | ||
|
|
f079faa61e | ||
|
|
8c575f2006 | ||
|
|
e57634cdb0 | ||
|
|
d878843eb4 | ||
|
|
55f274b55e | ||
|
|
47d15acea9 | ||
|
|
4fb3850072 | ||
|
|
e496280286 | ||
|
|
88024e1f49 | ||
|
|
6ae5dd5065 | ||
|
|
3dae361054 | ||
|
|
5da0f427da | ||
|
|
a2f770aaf0 | ||
|
|
b44bcd0f61 | ||
|
|
5ce3fcb793 | ||
|
|
d61adba949 | ||
|
|
43e5418846 | ||
|
|
2cdf624b67 | ||
|
|
428b19a511 | ||
|
|
bbd1587e5f | ||
|
|
ee3f476888 | ||
|
|
23c06a5c74 | ||
|
|
2c67aa6106 | ||
|
|
356e6b2a97 | ||
|
|
4d28076efb | ||
|
|
2223f70c0d | ||
|
|
4f9db82164 | ||
|
|
4a6ab23e53 | ||
|
|
0f5a3f33fc | ||
|
|
4180b99af3 | ||
|
|
32b8468d80 | ||
|
|
17e1b3cd0f | ||
|
|
cd611887dc | ||
|
|
84e88304ea | ||
|
|
e2d91f0597 | ||
|
|
8f2809e313 | ||
|
|
663f951bec | ||
|
|
cc02abd4dc | ||
|
|
e10b903677 | ||
|
|
273605a5f7 | ||
|
|
5c53dfad21 | ||
|
|
9fa5874144 | ||
|
|
ae3a28a759 | ||
|
|
4269029b86 | ||
|
|
1575566804 | ||
|
|
548d751613 | ||
|
|
f97475b98d | ||
|
|
7e50816135 | ||
|
|
1d8d772928 | ||
|
|
4b5178a761 | ||
|
|
7062355a3a | ||
|
|
9d0a668210 | ||
|
|
4fc83907c6 | ||
|
|
44efdfad05 | ||
|
|
32beeb7bbf | ||
|
|
64575735d5 | ||
|
|
a9ec9ac276 | ||
|
|
419975abdd | ||
|
|
e654dde2af | ||
|
|
fc655cb2e3 | ||
|
|
2461c7be80 | ||
|
|
f1f14de651 | ||
|
|
e76b745187 | ||
|
|
be16974b3b | ||
|
|
44d08d55f4 | ||
|
|
8ea872245a | ||
|
|
4d2585e608 | ||
|
|
d6b036e8bf | ||
|
|
96c5feac64 | ||
|
|
0ece7ca4e9 | ||
|
|
02c9d76cad | ||
|
|
e47e34ebe1 | ||
|
|
c9931daf43 | ||
|
|
c1a801e81d | ||
|
|
e055f17790 | ||
|
|
c2ec986a1d | ||
|
|
d027dc07c0 | ||
|
|
af7f31d265 | ||
|
|
e66429cf80 | ||
|
|
b014a75f9d | ||
|
|
253af191d9 | ||
|
|
20116b483d | ||
|
|
63077c06d3 | ||
|
|
c0cc8552b1 | ||
|
|
5afe88a290 | ||
|
|
005336c8e4 | ||
|
|
aef56dcbbf | ||
|
|
f2e3786e3b | ||
|
|
8c28c555c2 | ||
|
|
4bdaff94cb | ||
|
|
e2217914b6 | ||
|
|
042a371c86 | ||
|
|
3f6df81c71 | ||
|
|
8af151880d | ||
|
|
4baf44607e | ||
|
|
70b035aeb3 | ||
|
|
3139e9a369 | ||
|
|
8f87b26994 | ||
|
|
d292f3ee13 | ||
|
|
65483f6aec | ||
|
|
f33dcc1630 | ||
|
|
c5d0f5702e | ||
|
|
31a69a839e | ||
|
|
5c1d03bdfa | ||
|
|
579a81d7ae | ||
|
|
b6deca335c | ||
|
|
c542479f42 | ||
|
|
0b9a022eed | ||
|
|
0df48891fa | ||
|
|
1f985a27d0 | ||
|
|
cd8326e6ea | ||
|
|
37138a6183 | ||
|
|
17415b6820 | ||
|
|
e658b202a6 | ||
|
|
0511da8366 | ||
|
|
7a2fde1520 | ||
|
|
d13971813e | ||
|
|
9697442ab2 | ||
|
|
f691ede779 | ||
|
|
e16a26d283 | ||
|
|
ec791d1d7b | ||
|
|
a78db7377e | ||
|
|
436b79de66 | ||
|
|
50f642ff37 | ||
|
|
4e9965d016 | ||
|
|
77bfdcc28c | ||
|
|
3416cd3828 | ||
|
|
916989c361 | ||
|
|
0291a6fbdb | ||
|
|
ae575cfeb5 | ||
|
|
adbda1ea3b | ||
|
|
279c8afb40 | ||
|
|
6cd3a2d9a5 | ||
|
|
505ddfee10 | ||
|
|
e979dba481 | ||
|
|
2cc7610cca | ||
|
|
15bd6bd3e0 | ||
|
|
f9ead09a82 | ||
|
|
b34af9fbb0 | ||
|
|
1c9dc3630d | ||
|
|
b0f473685d | ||
|
|
f2eadb894d | ||
|
|
739664f0ff | ||
|
|
3519a3006f | ||
|
|
97b4aa2886 | ||
|
|
4ba7fac789 | ||
|
|
d9ad73b8ff | ||
|
|
aacbed4009 | ||
|
|
cb9e16e127 | ||
|
|
81ceab3352 | ||
|
|
d849b626d4 | ||
|
|
4e45ec4da3 | ||
|
|
b589baf985 | ||
|
|
ea3911fb4c | ||
|
|
5a1dd3e3e9 | ||
|
|
f504cdc724 | ||
|
|
fe90ceaf9a | ||
|
|
89f1724ed4 | ||
|
|
f578375d6c | ||
|
|
0b99512193 | ||
|
|
5d5e1b5857 | ||
|
|
00573d24fa | ||
|
|
e2b89f076f | ||
|
|
a54a3c4e57 | ||
|
|
a04a6a847a | ||
|
|
d66d374a19 | ||
|
|
d82dd4e127 | ||
|
|
024369f5f4 | ||
|
|
c7cfa42bcf | ||
|
|
6271fe9edc | ||
|
|
ee05ca0f17 | ||
|
|
0560d4f80f | ||
|
|
7acb17aa27 | ||
|
|
46b0830508 | ||
|
|
5cce3918bf | ||
|
|
618e815eea | ||
|
|
29432d88c8 | ||
|
|
e6d671e918 | ||
|
|
a06f221443 | ||
|
|
bfd512f63c | ||
|
|
a68caf3df5 | ||
|
|
1a92d70457 | ||
|
|
9d3c068db4 | ||
|
|
5a8d74d288 | ||
|
|
7aa407496f | ||
|
|
f5e1a731d2 | ||
|
|
ac95db5611 | ||
|
|
7255a9ac28 | ||
|
|
589113f82f | ||
|
|
2e64ac8aed | ||
|
|
e6e16a3fee | ||
|
|
c50a0d8f9d | ||
|
|
289c84a48a | ||
|
|
5de2efacc1 | ||
|
|
792fc5b11d | ||
|
|
cdd9061cb4 | ||
|
|
28952235dc | ||
|
|
5db7e1c45b | ||
|
|
9c8b9b6b8a | ||
|
|
fb6b0b1511 | ||
|
|
6f4dc29d00 | ||
|
|
519a06617e | ||
|
|
32ed762a3e | ||
|
|
8094ae5f7f | ||
|
|
94a519f31f | ||
|
|
8b3b549ce8 | ||
|
|
92797cb643 | ||
|
|
7a4b83c979 | ||
|
|
e43108ef9b | ||
|
|
5ed22b52c1 | ||
|
|
a7fdef9875 | ||
|
|
055a1f6829 | ||
|
|
36ebda0452 | ||
|
|
410c77fea0 | ||
|
|
613bbf0b88 | ||
|
|
995c2d7c25 | ||
|
|
ebe050c864 | ||
|
|
d65b5963ed | ||
|
|
8f0c443e66 | ||
|
|
277fb908b2 | ||
|
|
0c26e0ae72 | ||
|
|
c46c3d06ea | ||
|
|
b9564088d8 | ||
|
|
f63a9ec83c | ||
|
|
3a48ad3e7b | ||
|
|
4d388a6cf9 | ||
|
|
c3823aca93 | ||
|
|
334671c89b | ||
|
|
073b258aa5 | ||
|
|
9cebf97c82 | ||
|
|
f9eff53df4 | ||
|
|
a0c6b78ad9 | ||
|
|
3f0fdbe227 | ||
|
|
1c6be3079a | ||
|
|
5038c1790d | ||
|
|
7382c7a1b5 | ||
|
|
50d8ffe507 | ||
|
|
22ef1bd459 | ||
|
|
60bd380417 | ||
|
|
5ac5dc4d28 | ||
|
|
8866b67650 | ||
|
|
11eec566a8 | ||
|
|
c351f537cd | ||
|
|
39e4a8e2c3 | ||
|
|
5827b6a859 | ||
|
|
ce789411e2 | ||
|
|
15bdf7a5c5 | ||
|
|
5a237c2e16 | ||
|
|
09a2e30ddb | ||
|
|
39e2a3663a | ||
|
|
9b48469f1a | ||
|
|
e83f4a0091 | ||
|
|
e63ffa921b | ||
|
|
576876257f | ||
|
|
3853928a44 | ||
|
|
ba3b412658 | ||
|
|
4dd95303a6 | ||
|
|
e937e01531 | ||
|
|
185c17eb77 | ||
|
|
d6a480abc9 | ||
|
|
c2fa2c5051 | ||
|
|
3a4b8b2f7e | ||
|
|
09ea975796 | ||
|
|
1394770562 | ||
|
|
77877daa44 | ||
|
|
3ad73f7dc3 | ||
|
|
da7273ac6e | ||
|
|
eb742affad | ||
|
|
5cbff8b842 | ||
|
|
cff37df829 | ||
|
|
5f584c2d75 | ||
|
|
31b3b2fa4e | ||
|
|
cb3b13cd31 | ||
|
|
aaf36912f0 | ||
|
|
f0ed3ca1be | ||
|
|
700d26f4ce | ||
|
|
a6ee121ed3 | ||
|
|
c2c7c00f5e | ||
|
|
4f8b227ef9 | ||
|
|
974061c0aa | ||
|
|
18563814bd | ||
|
|
2498149d22 | ||
|
|
19c4f8032b | ||
|
|
34115e7c03 | ||
|
|
5541904af7 | ||
|
|
1690c305e2 | ||
|
|
80b9a6e7e7 | ||
|
|
f875f65eed | ||
|
|
1387478fb6 | ||
|
|
641cbe7b61 | ||
|
|
eb57e7df5d | ||
|
|
a9db0ffda5 | ||
|
|
9e37fd386d | ||
|
|
ae5047017a | ||
|
|
d36c80b114 | ||
|
|
17e445b607 | ||
|
|
4173ab10aa | ||
|
|
dfdbfbe5f5 | ||
|
|
5b9827387d | ||
|
|
629650a283 | ||
|
|
afd4a8f425 | ||
|
|
8d5e8e7b90 | ||
|
|
3649791e5c | ||
|
|
86c6e149cb | ||
|
|
45409f5a95 | ||
|
|
279f05640c | ||
|
|
0de91af40e | ||
|
|
782f9b74da | ||
|
|
14242e8d19 | ||
|
|
e0fbf1dcd6 | ||
|
|
52c39ac891 | ||
|
|
b5c3fe2d92 | ||
|
|
a2f40c15c0 | ||
|
|
dfd04704fa | ||
|
|
56ae810afb | ||
|
|
58609feac1 | ||
|
|
824df740a1 | ||
|
|
7e3b90e979 | ||
|
|
c0a3a734d8 | ||
|
|
a19b2e44ce | ||
|
|
140624ea98 | ||
|
|
526d7fb481 | ||
|
|
5c00076890 | ||
|
|
437b8714a3 | ||
|
|
24eead8c34 | ||
|
|
3aa81edfc5 | ||
|
|
9bf2d1be14 | ||
|
|
92f1ca1554 | ||
|
|
ef48bbda7d | ||
|
|
ad2dba067e | ||
|
|
fbcdb74b6d | ||
|
|
087880c586 | ||
|
|
1399e37240 | ||
|
|
29dd7ee6ff | ||
|
|
528f2e2f55 | ||
|
|
11013b3a4f | ||
|
|
25883025a4 | ||
|
|
cd40129e42 | ||
|
|
72423547a6 | ||
|
|
5e7f7300aa | ||
|
|
b6f2c44d7f | ||
|
|
884e3d47be | ||
|
|
0c91cd9512 | ||
|
|
71f98a6b84 | ||
|
|
718a74b036 | ||
|
|
029cb2eee1 | ||
|
|
991787dcc6 | ||
|
|
ece588f4ef | ||
|
|
9f8135d5bc | ||
|
|
7e7d634f75 | ||
|
|
703712f38a | ||
|
|
705fecbd9f | ||
|
|
2f7870ab10 | ||
|
|
cd8c3369fc | ||
|
|
accdae149f | ||
|
|
0c58730d82 | ||
|
|
38b44dac4e | ||
|
|
2770f43242 | ||
|
|
d72823acdb | ||
|
|
5be7de0cf7 | ||
|
|
f881ca4930 | ||
|
|
d03b6947ca | ||
|
|
ae08158f3f | ||
|
|
da78b8febc | ||
|
|
19b6d9a016 | ||
|
|
c1b6245132 | ||
|
|
734a79665c | ||
|
|
489fd92ace | ||
|
|
bcf5eb1f7b | ||
|
|
12ae5edeb9 | ||
|
|
f72f49d5db | ||
|
|
58cada77d5 | ||
|
|
4076469962 | ||
|
|
f28efb0825 | ||
|
|
c08d0c7b48 | ||
|
|
d7476b30ad | ||
|
|
99fedba104 | ||
|
|
5b7bbffe90 | ||
|
|
c815dd1d06 | ||
|
|
a7ef32773e | ||
|
|
a9c8a84070 | ||
|
|
407cf8cb3d | ||
|
|
e6da096d4e | ||
|
|
9b9b704a9e | ||
|
|
6be1261551 | ||
|
|
1cb7614287 | ||
|
|
5fec3aeb0d | ||
|
|
7cf2128fb2 | ||
|
|
05b32b87f0 | ||
|
|
0a48bf79e4 | ||
|
|
e22fcdb253 | ||
|
|
a99cb7aec4 | ||
|
|
868d6e1aed | ||
|
|
5edafc2b38 | ||
|
|
220880fc82 | ||
|
|
b71b2594f3 | ||
|
|
7d9ce86f07 | ||
|
|
54e81e07f8 | ||
|
|
c50eb227ea | ||
|
|
97593cc3c0 | ||
|
|
352a959ef3 | ||
|
|
65135d4cab | ||
|
|
eb68e95f9f | ||
|
|
ae6046bfb8 | ||
|
|
eddb836123 | ||
|
|
59b6efc3f8 | ||
|
|
7e8fb3ca36 | ||
|
|
909d315a92 | ||
|
|
c99ec4022e | ||
|
|
09502e4f8c | ||
|
|
4297e3f411 | ||
|
|
ee6d8ea80f | ||
|
|
0fc4c9995b | ||
|
|
0430055c1c | ||
|
|
6c8728432f | ||
|
|
ebbb81ffb9 | ||
|
|
dafd251614 | ||
|
|
70ef17ed1d | ||
|
|
2ea9b06a0f | ||
|
|
39463bdcc1 | ||
|
|
79e85fc98e | ||
|
|
cdc4f9f9c5 | ||
|
|
36c2f91aad | ||
|
|
711cb7dd14 | ||
|
|
e1f80a9850 | ||
|
|
7e9f3ece13 | ||
|
|
ae4d91ab3f | ||
|
|
6f7af674e2 | ||
|
|
c6c40a15a8 | ||
|
|
c3189709e5 | ||
|
|
94a98f59e5 | ||
|
|
46b0f38967 | ||
|
|
88db893756 | ||
|
|
2fc7f99351 | ||
|
|
fc30840e80 | ||
|
|
9310719694 | ||
|
|
f8320e3be2 | ||
|
|
769f160fa0 | ||
|
|
87ec3ad1bc | ||
|
|
7b37d18ce4 | ||
|
|
779d82e524 | ||
|
|
9b5dd320ff | ||
|
|
0f44580c9d | ||
|
|
b1d5e6f6f1 | ||
|
|
ec5598baed | ||
|
|
73c0fda293 | ||
|
|
7651e2738c | ||
|
|
a2532198a5 | ||
|
|
b876bb2aaa | ||
|
|
6651c7482a | ||
|
|
674a34cb96 | ||
|
|
2c9e484173 | ||
|
|
5636af9fb3 | ||
|
|
9bb4b80af5 | ||
|
|
09977fdb60 | ||
|
|
59f5f8b084 | ||
|
|
1367e247cf | ||
|
|
3da7e5cfdb | ||
|
|
7427865517 | ||
|
|
e0fa607e7b | ||
|
|
c88798073b | ||
|
|
20234b5203 | ||
|
|
a63730344c | ||
|
|
1acd494a3a | ||
|
|
d1460bca67 | ||
|
|
b2760dc701 | ||
|
|
b497fbf5e3 | ||
|
|
98b8d6102a | ||
|
|
8ac1139fdc | ||
|
|
c54b89eb82 | ||
|
|
693927c580 | ||
|
|
db5307432f | ||
|
|
03a9585022 | ||
|
|
abb85ae30a | ||
|
|
01a639515b | ||
|
|
451871ade8 | ||
|
|
1ce3750996 | ||
|
|
2fdc35569b | ||
|
|
7194037380 | ||
|
|
5284fb8a84 | ||
|
|
986a297242 | ||
|
|
971bb8c2aa | ||
|
|
555f77b463 | ||
|
|
77d36c8423 | ||
|
|
2244b01cac | ||
|
|
d870fc6187 | ||
|
|
d4c7030328 | ||
|
|
21133e0296 | ||
|
|
c6a02f0a6a | ||
|
|
bdf90d984d | ||
|
|
e394d818be | ||
|
|
b30ff29b1a | ||
|
|
795e1b30e3 | ||
|
|
c8508c5978 | ||
|
|
1e50592f2c | ||
|
|
54632c3c7b | ||
|
|
b30fdcd1da | ||
|
|
2b217bfa3c | ||
|
|
4c56832253 | ||
|
|
c0edd5e08b | ||
|
|
7daadbc899 | ||
|
|
bce2d7c000 | ||
|
|
981edc182a | ||
|
|
36b52608b2 | ||
|
|
05139cf828 | ||
|
|
f588200119 | ||
|
|
5f3e2b2961 | ||
|
|
86dff2174a | ||
|
|
ca311f286f | ||
|
|
0525cabdb3 | ||
|
|
47e979a560 | ||
|
|
3f15bb34f8 | ||
|
|
c459ccd6f3 | ||
|
|
6e299917fb | ||
|
|
1ebf2b1163 | ||
|
|
b928840bb5 | ||
|
|
8268b42d84 | ||
|
|
a3e5b22805 | ||
|
|
c45d6aa638 | ||
|
|
e6a7969abc | ||
|
|
d332211244 | ||
|
|
9b19499d6f | ||
|
|
ce2fe0a04b | ||
|
|
b6eed3dd08 | ||
|
|
af2541cf2b | ||
|
|
7e44c37813 | ||
|
|
2fa479aad9 | ||
|
|
ba9211e253 | ||
|
|
33c7018cd0 | ||
|
|
4431319405 | ||
|
|
f7d38f7668 | ||
|
|
25b83de8d4 | ||
|
|
021b2cf26f | ||
|
|
072d66cbf3 | ||
|
|
112ff9541a | ||
|
|
7a3e900507 | ||
|
|
5f75ad1617 | ||
|
|
466fb2bf30 | ||
|
|
ec38b81ee6 | ||
|
|
dc3fb96d28 | ||
|
|
773502c6aa | ||
|
|
9518a46531 | ||
|
|
5335fc3beb | ||
|
|
a068902c9e | ||
|
|
fef39d78f7 | ||
|
|
8a7035a664 | ||
|
|
906bbec918 | ||
|
|
7c6eefd795 | ||
|
|
c82eb78117 | ||
|
|
70a254e157 | ||
|
|
c5abf88839 | ||
|
|
8d7cf8783a | ||
|
|
4d4fb77ead | ||
|
|
249f6e4841 | ||
|
|
8cb5ee086d | ||
|
|
d9d8f677bf | ||
|
|
f81402f518 | ||
|
|
a046e0e864 | ||
|
|
75304e1f98 | ||
|
|
2f422fe3af | ||
|
|
335c3baf65 | ||
|
|
4b2cb255cb | ||
|
|
4014273fb0 | ||
|
|
d45962c3ac | ||
|
|
0fcfb51b7f | ||
|
|
4db4999186 | ||
|
|
a10b228849 | ||
|
|
2d98ce0114 | ||
|
|
0d91781e88 | ||
|
|
5991aa6d4e | ||
|
|
3b40a3afb4 | ||
|
|
e3a2b0c693 | ||
|
|
b36026cdd3 | ||
|
|
153b095eea | ||
|
|
fe109267a1 | ||
|
|
43f7ff9122 | ||
|
|
ca1dea595c | ||
|
|
7e95794324 | ||
|
|
69e114099b | ||
|
|
2449a2e315 | ||
|
|
3fd821fb66 | ||
|
|
cf1414289a | ||
|
|
90db2f5037 | ||
|
|
f90c9af9f4 | ||
|
|
004a86d099 | ||
|
|
5d75d02a47 | ||
|
|
74d69c5d08 | ||
|
|
eab52d5150 | ||
|
|
273f9772ac | ||
|
|
c27be4f38d | ||
|
|
76d1e5e620 | ||
|
|
c7858e025f | ||
|
|
68cd4b6ad8 | ||
|
|
98cbdec9d2 | ||
|
|
5d0f92af4c | ||
|
|
cdc8e666b2 | ||
|
|
f89634a309 | ||
|
|
a9b84c2c7f | ||
|
|
25afb3a2e1 | ||
|
|
1d4da67d11 | ||
|
|
42128dc764 | ||
|
|
af21d42a47 | ||
|
|
a296683dd0 | ||
|
|
a44a20e044 | ||
|
|
a14a562cec | ||
|
|
3371e99c41 | ||
|
|
a2d1e3e2d8 | ||
|
|
2551ea07bf | ||
|
|
dda1939036 | ||
|
|
b160aa1abc | ||
|
|
3836386b2f | ||
|
|
c415103439 | ||
|
|
4599561f2e | ||
|
|
51a50f6ca2 | ||
|
|
2971895216 | ||
|
|
bd21328ab8 | ||
|
|
59fcc7ce40 | ||
|
|
0793169325 | ||
|
|
902f7f0940 | ||
|
|
e3c6402f88 | ||
|
|
66e28a35c6 | ||
|
|
2e9cc2c724 | ||
|
|
ff96d5f3dc | ||
|
|
ca016aee2c | ||
|
|
b4acbe8aab | ||
|
|
4bcc2deb3a | ||
|
|
a13db2dc5e | ||
|
|
4b79073b16 | ||
|
|
24b7ec9464 | ||
|
|
138f93cacd | ||
|
|
98b904561a | ||
|
|
29f9606aaa | ||
|
|
4772831527 | ||
|
|
05e86a8f03 | ||
|
|
6b8edfa4f9 | ||
|
|
e11dec246c | ||
|
|
768e35572b | ||
|
|
f076172c31 | ||
|
|
8b93500cda | ||
|
|
5f92c5e78d | ||
|
|
26456908ab | ||
|
|
10e5c061ae | ||
|
|
76f2f67aca | ||
|
|
733fc197d0 | ||
|
|
98bf92e1ce | ||
|
|
e4a4f54202 | ||
|
|
873cad803f | ||
|
|
4617760b66 | ||
|
|
956b6f4b5a | ||
|
|
6599e9b401 | ||
|
|
41ea84639c | ||
|
|
81e1b9f6ce | ||
|
|
7bdc6e457b | ||
|
|
7b72100085 | ||
|
|
d415cd4062 | ||
|
|
8dcc0805b3 | ||
|
|
d2c115d3c8 | ||
|
|
8691088e65 | ||
|
|
2241edd2e4 | ||
|
|
324c9b99fd | ||
|
|
7e5113f91d | ||
|
|
2c12a65f4e | ||
|
|
070a4902d5 | ||
|
|
30478bcde7 | ||
|
|
ec43be52a1 | ||
|
|
f42772c150 | ||
|
|
f9d59a6885 | ||
|
|
347f5356a1 | ||
|
|
ba400f05b3 | ||
|
|
46f4d0b4e7 | ||
|
|
89a66beaf7 | ||
|
|
b86e060dc2 | ||
|
|
c16675ad4e | ||
|
|
661336d864 | ||
|
|
6d4107b270 | ||
|
|
0bd5b1e851 | ||
|
|
d96ffbe613 | ||
|
|
58b2221151 | ||
|
|
42b3afc244 | ||
|
|
f3a4accdc4 | ||
|
|
b27abe3fac | ||
|
|
7a6dfeda2a | ||
|
|
ab1325e4d3 | ||
|
|
462620e074 | ||
|
|
90e6872eb4 | ||
|
|
d6e90065c2 | ||
|
|
4d74aa5316 | ||
|
|
2bbd06b0c2 | ||
|
|
40b5640381 | ||
|
|
dbac606afc | ||
|
|
dddf722aa3 | ||
|
|
6953f0ee7d | ||
|
|
5314f4b624 | ||
|
|
466ef97ade | ||
|
|
83b7fae8b2 | ||
|
|
f0ad52e219 | ||
|
|
a8418f8f75 | ||
|
|
d25bf44f52 | ||
|
|
9bf213f7fd | ||
|
|
c155c0a390 | ||
|
|
b0f6987cfa | ||
|
|
d9619c9f06 | ||
|
|
b53206634b | ||
|
|
686ae84909 | ||
|
|
07e927e3e4 | ||
|
|
ca535eb08e | ||
|
|
e93f456d07 | ||
|
|
ff4c40bc14 | ||
|
|
0c31234580 | ||
|
|
1d8a172c05 | ||
|
|
fcb82dac5e | ||
|
|
c4329985a0 | ||
|
|
10539ece4e | ||
|
|
f9ae39e624 | ||
|
|
d80838e1e7 | ||
|
|
62baccc55d | ||
|
|
a2b1ffda1f | ||
|
|
49c53ebe6f | ||
|
|
8acf7aa728 | ||
|
|
f0ff005fbb | ||
|
|
3da7acf0df | ||
|
|
035e826e3f | ||
|
|
6a44f04f95 | ||
|
|
889954aaa9 | ||
|
|
8b924362a5 | ||
|
|
377e8b7a34 | ||
|
|
824cd9be8e | ||
|
|
719514645c | ||
|
|
753d429629 | ||
|
|
7b2956b2c8 | ||
|
|
656c5d571a | ||
|
|
b1c262fada | ||
|
|
00170fe52c | ||
|
|
d66acde7ea | ||
|
|
a01add4747 | ||
|
|
9c8bed5eda | ||
|
|
f4d7ba0f1a | ||
|
|
798a26efb9 | ||
|
|
8b718e3b34 | ||
|
|
ec857cb59f | ||
|
|
1e673c4596 | ||
|
|
3c68d02fb0 | ||
|
|
74b7dbb70a | ||
|
|
e1a66fabf7 | ||
|
|
62d3a1070c | ||
|
|
721a88978a | ||
|
|
252b9d2064 | ||
|
|
298518afca | ||
|
|
5222d4c604 | ||
|
|
b6dd920c08 | ||
|
|
d24bd67c0b | ||
|
|
ee95b82895 | ||
|
|
20f3769963 | ||
|
|
726cf61aa3 | ||
|
|
76082e4133 | ||
|
|
f0f40826a8 | ||
|
|
e2e979d3c1 | ||
|
|
ab965e292b | ||
|
|
a92e3e817d | ||
|
|
cab61ec436 | ||
|
|
e02b8e3e68 | ||
|
|
c1bc321f8f | ||
|
|
e22a992db7 | ||
|
|
0261c4742e | ||
|
|
7ca1473a8e | ||
|
|
f2fe8e1ad7 | ||
|
|
0b98045ed5 | ||
|
|
e464296eb5 | ||
|
|
87ba014c7e | ||
|
|
5386ee1740 | ||
|
|
e7ec5f0a82 | ||
|
|
ca789b73fa | ||
|
|
7976679671 | ||
|
|
4cc1ed4be8 | ||
|
|
073eba8e66 | ||
|
|
216e73acfa | ||
|
|
78868da533 | ||
|
|
f73509a942 | ||
|
|
24e48f0022 | ||
|
|
7a490831d5 | ||
|
|
4a79cfd35c | ||
|
|
74a4a08ca0 | ||
|
|
6c93c7af7c | ||
|
|
63c0af441d | ||
|
|
18ffe94b32 | ||
|
|
171af100c9 | ||
|
|
b5cd81ae89 | ||
|
|
eeda291577 | ||
|
|
11f295b908 | ||
|
|
a793bd8aa3 | ||
|
|
9152ba8d82 | ||
|
|
0191c4ac5c | ||
|
|
a5dfc23894 | ||
|
|
7a893def7e | ||
|
|
7f5f493023 | ||
|
|
515930343d | ||
|
|
14aa7d0f60 | ||
|
|
8c24c71bb2 | ||
|
|
9c06550d07 | ||
|
|
2892baef91 | ||
|
|
5f0d828ac3 | ||
|
|
ae473724d7 | ||
|
|
c470a4c37c | ||
|
|
81b7eb618f | ||
|
|
d32b685dc3 | ||
|
|
08abc0dd16 | ||
|
|
39cad7e363 |
31
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Create a report to help us improve
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
**To Reproduce**
|
||||
Steps to reproduce the behavior:
|
||||
1. Run following command `textattack ...`
|
||||
2. Run following code ...
|
||||
4. See error
|
||||
|
||||
**Expected behavior**
|
||||
A clear and concise description of what you expected to happen.
|
||||
|
||||
**Screenshots or Traceback**
|
||||
If applicable, add screenshots to help explain your problem. Also, copy and paste tracebacks produced by the bug.
|
||||
|
||||
**System Information (please complete the following information):**
|
||||
- OS: [e.g. MacOS, Linux, Windows]
|
||||
- Library versions (e.g. `torch==1.7.0, transformers==3.3.0`)
|
||||
- Textattack version
|
||||
|
||||
**Additional context**
|
||||
Add any other context about the problem here.
|
||||
10
.github/ISSUE_TEMPLATE/custom.md
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
---
|
||||
name: Custom issue template
|
||||
about: Describe this issue template's purpose here.
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
|
||||
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for this project
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Is your feature request related to a problem? Please describe.**
|
||||
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
||||
|
||||
**Describe the solution you'd like**
|
||||
A clear and concise description of what you want to happen.
|
||||
|
||||
**Describe alternatives you've considered**
|
||||
A clear and concise description of any alternative solutions or features you've considered.
|
||||
|
||||
**Additional context**
|
||||
Add any other context or screenshots about the feature request here.
|
||||
22
.github/pull_request_template.md
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
# What does this PR do?
|
||||
|
||||
## Summary
|
||||
*Example: This PR adds [CLARE](https://arxiv.org/abs/2009.07502) attack, which uses distilled RoBERTa masked language model to perform word swaps, word insertions, word merges (which is where we combine two adjacent words and replace it with another word) in a greedy manner. s*
|
||||
|
||||
## Additions
|
||||
- *Example: Added `clare` recipe as `textattack.attack_recipes.CLARE2020`.*
|
||||
|
||||
## Changes
|
||||
- *Example: `WordSwapMaskedLM` has been updated to have a minimum confidence score cutoff and batching has been added for faster performance.*
|
||||
|
||||
## Deletions
|
||||
- *Example: Remove unnecessary files under `textattack.models...`*
|
||||
|
||||
## Checklist
|
||||
- [ ] The title of your pull request should be a summary of its contribution.
|
||||
- [ ] Please write detailed description of what parts have been newly added and what parts have been modified. Please also explain why certain changes were made.
|
||||
- [ ] If your pull request addresses an issue, please mention the issue number in the pull request description to make sure they are linked (and people consulting the issue know you are working on it)
|
||||
- [ ] To indicate a work in progress please mark it as a draft on Github.
|
||||
- [ ] Make sure existing tests pass.
|
||||
- [ ] Add relevant tests. No quality testing = no merge.
|
||||
- [ ] All public methods must have informative docstrings that work nicely with sphinx. For new modules/files, please add/modify the appropriate `.rst` file in `TextAttack/docs/apidoc`.'
|
||||
34
.github/workflows/check-formatting.yml
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||
|
||||
name: Formatting with black & isort
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.8]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel
|
||||
pip install black flake8 isort # Testing packages
|
||||
python setup.py install_egg_info # Workaround https://github.com/pypa/pip/issues/4537
|
||||
pip install -e .[dev]
|
||||
- name: Check code format with black and isort
|
||||
run: |
|
||||
make lint
|
||||
67
.github/workflows/codeql-analysis.yml
vendored
Normal file
@@ -0,0 +1,67 @@
|
||||
# For most projects, this workflow file will not need changing; you simply need
|
||||
# to commit it to your repository.
|
||||
#
|
||||
# You may wish to alter this file to override the set of languages analyzed,
|
||||
# or to provide custom queries or build logic.
|
||||
#
|
||||
# ******** NOTE ********
|
||||
# We have attempted to detect the languages in your repository. Please check
|
||||
# the `language` matrix defined below to confirm you have the correct set of
|
||||
# supported CodeQL languages.
|
||||
#
|
||||
name: "CodeQL"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master, master* ]
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: [ master ]
|
||||
schedule:
|
||||
- cron: '24 1 * * 0'
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: [ 'python' ]
|
||||
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
|
||||
# Learn more:
|
||||
# https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v1
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
# By default, queries listed here will override any specified in a config file.
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
||||
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v1
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
|
||||
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
|
||||
# and modify them (or add more) to build your code if your project
|
||||
# uses a compiled language
|
||||
|
||||
#- run: |
|
||||
# make bootstrap
|
||||
# make release
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v1
|
||||
38
.github/workflows/make-docs.yml
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||
|
||||
name: Build documentation with Sphinx
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.8]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo sed -i 's/azure\.//' /etc/apt/sources.list # workaround for flaky pandoc install
|
||||
sudo apt-get update # from here https://github.com/actions/virtual-environments/issues/675
|
||||
sudo apt-get install pandoc -o Acquire::Retries=3 # install pandoc
|
||||
python -m pip install --upgrade pip setuptools wheel # update python
|
||||
pip install ipython --upgrade # needed for Github for whatever reason
|
||||
python setup.py install_egg_info # Workaround https://github.com/pypa/pip/issues/4537
|
||||
pip install -e .[dev]
|
||||
pip install jupyter 'ipykernel<5.0.0' 'ipython<7.0.0' # ipykernel workaround: github.com/jupyter/notebook/issues/4050
|
||||
- name: Build docs with Sphinx and check for errors
|
||||
run: |
|
||||
sphinx-build -b html docs docs/_build/html -W
|
||||
31
.github/workflows/publish-to-pypi.yml
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
# This workflows will upload a Python Package using Twine when a release is created
|
||||
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
|
||||
|
||||
name: Upload Python Package to PyPI
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel
|
||||
pip install setuptools wheel twine
|
||||
- name: Build and publish
|
||||
env:
|
||||
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
||||
run: |
|
||||
python setup.py sdist bdist_wheel
|
||||
twine upload dist/*
|
||||
45
.github/workflows/run-pytest.yml
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||
|
||||
name: Test with PyTest
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.6, 3.7, 3.8]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel
|
||||
pip install pytest pytest-xdist # Testing packages
|
||||
pip uninstall textattack --yes # Remove TA if it's already installed
|
||||
python setup.py install_egg_info # Workaround https://github.com/pypa/pip/issues/4537
|
||||
pip install -e .[dev]
|
||||
pip freeze
|
||||
- name: Free disk space
|
||||
run: |
|
||||
sudo apt-get remove mysql-client libmysqlclient-dev -y >/dev/null 2>&1
|
||||
sudo apt-get remove php* -y >/dev/null 2>&1
|
||||
sudo apt-get autoremove -y >/dev/null 2>&1
|
||||
sudo apt-get autoclean -y >/dev/null 2>&1
|
||||
sudo rm -rf /usr/local/lib/android >/dev/null 2>&1
|
||||
docker rmi $(docker image ls -aq) >/dev/null 2>&1
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
pytest tests -v
|
||||
|
||||
12
.gitignore
vendored
@@ -9,6 +9,10 @@ outputs/
|
||||
|
||||
# IDE files
|
||||
.c9*
|
||||
.idea/
|
||||
|
||||
# Jupyter notebook files
|
||||
.ipynb_checkpoints/
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
@@ -19,9 +23,6 @@ docs/_build/
|
||||
# Files from IDES
|
||||
.*.py
|
||||
|
||||
# CSVs to upload to MTurk
|
||||
*.csv
|
||||
|
||||
# TF Hub modules
|
||||
tensorflow-hub
|
||||
|
||||
@@ -35,8 +36,13 @@ dist/
|
||||
# Weights & Biases outputs
|
||||
wandb/
|
||||
|
||||
# Tensorboard logs
|
||||
runs/
|
||||
|
||||
# checkpoints
|
||||
checkpoints/
|
||||
|
||||
# vim
|
||||
*.swp
|
||||
|
||||
.vscode
|
||||
@@ -17,3 +17,5 @@ python:
|
||||
- requirements: requirements.txt
|
||||
- method: pip
|
||||
path: .
|
||||
extra_requirements:
|
||||
- docs
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
language: python
|
||||
python: '3.8'
|
||||
before_install:
|
||||
- python --version
|
||||
- pip install -U pip
|
||||
- pip install -U pytest
|
||||
install:
|
||||
- pip install -e .
|
||||
script: pytest tests # run tests
|
||||
222
CONTRIBUTING.md
Normal file
@@ -0,0 +1,222 @@
|
||||
# How can I contribute to TextAttack?
|
||||
|
||||
We welcome contributions from all members of the community– and there are lots
|
||||
of ways to help without editing the code! Answering questions, helping others,
|
||||
reaching out and improving the documentations are immensely valuable to the
|
||||
community.
|
||||
|
||||
It also helps us if you spread the word: reference the library from blog posts
|
||||
on the awesome projects it made possible, shout out on Twitter every time it has
|
||||
helped you, or simply star the repo to say "thank you".
|
||||
|
||||
## Slack Channel
|
||||
|
||||
For help and realtime updates related to TextAttack, please [join the TextAttack Slack](https://join.slack.com/t/textattack/shared_invite/zt-ez3ts03b-Nr55tDiqgAvCkRbbz8zz9g)!
|
||||
|
||||
## Ways to contribute
|
||||
|
||||
There are lots of ways you can contribute to TextAttack:
|
||||
* Submitting issues on Github to report bugs or make feature requests
|
||||
* Fixing outstanding issues with the existing code
|
||||
* Implementing new features
|
||||
* Adding support for new models and datasets
|
||||
* Contributing to the examples or to the documentation
|
||||
|
||||
*All are equally valuable to the community.*
|
||||
|
||||
## Submitting a new issue or feature request
|
||||
|
||||
Do your best to follow these guidelines when submitting an issue or a feature
|
||||
request. It will make it easier for us to come back to you quickly and with good
|
||||
feedback.
|
||||
|
||||
### Found a bug?
|
||||
|
||||
TextAttack can remain robust and reliable thanks to users who notify us of
|
||||
the problems they encounter. So thank you for [reporting an issue](https://github.com/QData/TextAttack/issues).
|
||||
|
||||
We also have a suite of tests intended to detect bugs before they enter the
|
||||
codebase. That said, they still happen (Turing completeness and all) so it's up
|
||||
to you to report the bugs you find! We would really appreciate it if you could
|
||||
make sure the bug was not already reported (use the search bar on Github under
|
||||
Issues).
|
||||
|
||||
To help us fix your issue quickly, please follow these steps:
|
||||
|
||||
* Include your **OS type and version**, the versions of **Python**, **PyTorch** and
|
||||
**Tensorflow** when applicable;
|
||||
* A short, self-contained, code snippet that allows us to reproduce the bug in
|
||||
less than 30s;
|
||||
* Provide the *full* traceback if an exception is raised.
|
||||
|
||||
### Do you want to add your model?
|
||||
|
||||
Awesome! Please provide the following information:
|
||||
|
||||
* Short description of the model and link to the paper;
|
||||
* Link to the implementation if it is open-source;
|
||||
* Link to the model weights if they are available.
|
||||
|
||||
If you are willing to contribute the model yourself, let us know so we can best
|
||||
guide you. We can host your model on our S3 server, but if you trained your
|
||||
model using `transformers`, it's better if you host your model on their
|
||||
[model hub](https://huggingface.co/models).
|
||||
|
||||
### Do you want a new feature: a component, a recipe, or something else?
|
||||
|
||||
A world-class feature request addresses the following points:
|
||||
|
||||
1. Motivation first:
|
||||
* Is it related to a problem/frustration with the library? If so, please explain
|
||||
why. Providing a code snippet that demonstrates the problem is best.
|
||||
* Is it related to something you would need for a project? We'd love to hear
|
||||
about it!
|
||||
* Is it something you worked on and think could benefit the community?
|
||||
Awesome! Tell us what problem it solved for you.
|
||||
2. Write a *full paragraph* describing the feature;
|
||||
3. Provide a **code snippet** that demonstrates its future use;
|
||||
4. In case this is related to a paper, please attach a link;
|
||||
5. Attach any additional information (drawings, screenshots, etc.) you think may help.
|
||||
|
||||
|
||||
## Start contributing! (Pull Requests)
|
||||
|
||||
Before writing code, we strongly advise you to search through the exising PRs or
|
||||
issues to make sure that nobody is already working on the same thing. If you are
|
||||
unsure, it is always a good idea to open an issue to get some feedback.
|
||||
|
||||
You will need basic `git` proficiency to be able to contribute to
|
||||
`textattack`. `git` is not the easiest tool to use but it has the greatest
|
||||
manual. Type `git --help` in a shell and enjoy. If you prefer books, [Pro
|
||||
Git](https://git-scm.com/book/en/v2) is a very good reference.
|
||||
|
||||
Follow these steps to start contributing:
|
||||
|
||||
1. Fork the [repository](https://github.com/QData/TextAttack) by
|
||||
clicking on the 'Fork' button on the repository's page. This creates a copy of the code
|
||||
under your GitHub user account.
|
||||
|
||||
2. Clone your fork to your local disk, and add the base repository as a remote:
|
||||
|
||||
```bash
|
||||
$ git clone git@github.com:<your Github handle>/TextAttack.git
|
||||
$ cd TextAttack
|
||||
$ git remote add upstream https://github.com/QData/TextAttack
|
||||
```
|
||||
|
||||
3. Create a new branch to hold your development changes:
|
||||
|
||||
```bash
|
||||
$ git checkout -b a-descriptive-name-for-my-changes
|
||||
```
|
||||
|
||||
**do not** work on the `master` branch.
|
||||
|
||||
4. Set up a development environment by running the following commands in a virtual environment:
|
||||
|
||||
|
||||
```bash
|
||||
$ cd TextAttack
|
||||
$ pip install -e . ".[dev]"
|
||||
$ pip install black isort pytest pytest-xdist
|
||||
```
|
||||
|
||||
This will install `textattack` in editable mode and install `black` and
|
||||
`isort`, packages we use for code formatting.
|
||||
|
||||
(If TextAttack was already installed in the virtual environment, remove
|
||||
it with `pip uninstall textattack` before reinstalling it in editable
|
||||
mode with the `-e` flag.)
|
||||
|
||||
5. Develop the features on your branch.
|
||||
|
||||
As you work on the features, you should make sure that the test suite
|
||||
passes:
|
||||
|
||||
```bash
|
||||
$ make test
|
||||
```
|
||||
|
||||
(or just simply `pytest`.)
|
||||
|
||||
> **Tip:** if you're fixing just one or two tests, you can run only the last tests that failed using `pytest --lf`.
|
||||
|
||||
`textattack` relies on `black` and `isort` to format its source code
|
||||
consistently. After you make changes, format them with:
|
||||
|
||||
```bash
|
||||
$ make format
|
||||
```
|
||||
|
||||
You can run quality checks to make sure your code is formatted properly
|
||||
using this command:
|
||||
|
||||
```bash
|
||||
$ make lint
|
||||
```
|
||||
|
||||
Once you're happy with your changes, add changed files using `git add` and
|
||||
make a commit with `git commit` to record your changes locally:
|
||||
|
||||
```bash
|
||||
$ git add modified_file.py
|
||||
$ git commit
|
||||
```
|
||||
|
||||
Please write [good commit messages](https://chris.beams.io/posts/git-commit/).
|
||||
|
||||
It is a good idea to sync your copy of the code with the original
|
||||
repository regularly. This way you can quickly account for changes:
|
||||
|
||||
```bash
|
||||
$ git fetch upstream
|
||||
$ git rebase upstream/master
|
||||
```
|
||||
|
||||
Push the changes to your account using:
|
||||
|
||||
```bash
|
||||
$ git push -u origin a-descriptive-name-for-my-changes
|
||||
```
|
||||
|
||||
6. Add documentation.
|
||||
|
||||
Our docs are in the `docs/` folder. Thanks to `sphinx-automodule`, adding
|
||||
documentation for a new code file should just be two lines. Our docs will
|
||||
automatically generate from the comments you added to your code. If you're
|
||||
adding an attack recipe, add a reference in `attack_recipes.rst`.
|
||||
If you're adding a transformation, add a reference in `transformation.rst`, etc.
|
||||
|
||||
You can build the docs and view the updates using `make docs`. If you're
|
||||
adding a tutorial or something where you want to update the docs multiple
|
||||
times, you can run `make docs-auto`. This will run a server using
|
||||
`sphinx-autobuild` that should automatically reload whenever you change
|
||||
a file.
|
||||
|
||||
7. Once you are satisfied (**and the checklist below is happy too**), go to the
|
||||
webpage of your fork on GitHub. Click on 'Pull request' to send your changes
|
||||
to the project maintainers for review.
|
||||
|
||||
8. It's ok if maintainers ask you for changes. It happens to core contributors
|
||||
too! So everyone can see the changes in the Pull request, work in your local
|
||||
branch and push the changes to your fork. They will automatically appear in
|
||||
the pull request.
|
||||
|
||||
|
||||
### Checklist
|
||||
|
||||
1. The title of your pull request should be a summary of its contribution.
|
||||
2. If your pull request adresses an issue, please mention the issue number in
|
||||
the pull request description to make sure they are linked (and people
|
||||
consulting the issue know you are working on it);
|
||||
3. To indicate a work in progress please mark it as a draft on Github.
|
||||
4. Make sure existing tests pass.
|
||||
5. Add relevant tests. No quality testing = no merge.
|
||||
6. All public methods must have informative docstrings that work nicely with sphinx.
|
||||
|
||||
### Tests
|
||||
|
||||
You can run TextAttack tests with `pytest`. Just type `make test`.
|
||||
|
||||
|
||||
#### This guide was heavily inspired by the awesome [transformers guide to contributing](https://github.com/huggingface/transformers/blob/master/CONTRIBUTING.md)
|
||||
34
Makefile
Normal file
@@ -0,0 +1,34 @@
|
||||
PEP_IGNORE_ERRORS="C901 E501 W503 E203 E231 E266 F403"
|
||||
|
||||
format: FORCE ## Run black and isort (rewriting files)
|
||||
black .
|
||||
isort --atomic tests textattack
|
||||
docformatter --in-place --recursive textattack tests
|
||||
|
||||
lint: FORCE ## Run black, isort, flake8 (in check mode)
|
||||
black . --check
|
||||
isort --check-only tests textattack
|
||||
flake8 . --count --ignore=$(PEP_IGNORE_ERRORS) --show-source --statistics --exclude=./.*,build,dist
|
||||
|
||||
test: FORCE ## Run tests using pytest
|
||||
python -m pytest --dist=loadfile -n auto
|
||||
|
||||
docs: FORCE ## Build docs using Sphinx.
|
||||
sphinx-build -b html docs docs/_build/html
|
||||
|
||||
docs-check: FORCE ## Builds docs using Sphinx. If there is an error, exit with an error code (instead of warning & continuing).
|
||||
sphinx-build -b html docs docs/_build/html -W
|
||||
|
||||
docs-auto: FORCE ## Build docs using Sphinx and run hotreload server using Sphinx autobuild.
|
||||
sphinx-autobuild docs docs/_build/html --port 8765
|
||||
|
||||
all: format lint docs-check test ## Format, lint, and test.
|
||||
|
||||
.PHONY: help
|
||||
|
||||
.DEFAULT_GOAL := help
|
||||
|
||||
help:
|
||||
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
|
||||
|
||||
FORCE:
|
||||
552
README.md
@@ -1,28 +1,43 @@
|
||||
|
||||
|
||||
<h1 align="center">TextAttack 🐙</h1>
|
||||
|
||||
<p align="center">Generating adversarial examples for NLP models</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://textattack.readthedocs.io/">Docs</a> •
|
||||
<a href="https://textattack.readthedocs.io/">[TextAttack Documentation on ReadTheDocs]</a>
|
||||
<br> <br>
|
||||
<a href="#about">About</a> •
|
||||
<a href="#setup">Setup</a> •
|
||||
<a href="#usage">Usage</a> •
|
||||
<a href="#design">Design</a>
|
||||
<br> <br>
|
||||
<a target="_blank" href="https://travis-ci.org/QData/TextAttack">
|
||||
<img src="https://travis-ci.org/QData/TextAttack.svg?branch=master" alt="Coverage Status">
|
||||
<a target="_blank">
|
||||
<img src="https://github.com/QData/TextAttack/workflows/Github%20PyTest/badge.svg" alt="Github Runner Covergae Status">
|
||||
</a>
|
||||
<a href="https://badge.fury.io/py/textattack">
|
||||
<img src="https://badge.fury.io/py/textattack.svg" alt="PyPI version" height="18">
|
||||
</a>
|
||||
|
||||
</p>
|
||||
|
||||
<img src="http://jackxmorris.com/files/textattack.gif" alt="TextAttack Demo GIF" style="display: block; margin: 0 auto;" />
|
||||
|
||||
## About
|
||||
|
||||
TextAttack is a Python framework for running adversarial attacks against NLP models. TextAttack builds attacks from four components: a search method, goal function, transformation, and set of constraints. TextAttack's modular design makes it easily extensible to new NLP tasks, models, and attack strategies. TextAttack currently supports attacks on models trained for classification, entailment, and translation.
|
||||
TextAttack is a Python framework for adversarial attacks, data augmentation, and model training in NLP.
|
||||
|
||||
> If you're looking for information about TextAttack's menagerie of pre-trained models, you might want the [TextAttack Model Zoo](https://textattack.readthedocs.io/en/latest/3recipes/models.html) page.
|
||||
|
||||
## Slack Channel
|
||||
|
||||
For help and realtime updates related to TextAttack, please [join the TextAttack Slack](https://join.slack.com/t/textattack/shared_invite/zt-huomtd9z-KqdHBPPu2rOP~Z8q3~urgg)!
|
||||
|
||||
### *Why TextAttack?*
|
||||
|
||||
There are lots of reasons to use TextAttack:
|
||||
|
||||
1. **Understand NLP models better** by running different adversarial attacks on them and examining the output
|
||||
2. **Research and develop different NLP adversarial attacks** using the TextAttack framework and library of components
|
||||
3. **Augment your dataset** to increase model generalization and robustness downstream
|
||||
3. **Train NLP models** using just a single command (all downloads included!)
|
||||
|
||||
## Setup
|
||||
|
||||
@@ -30,116 +45,529 @@ TextAttack is a Python framework for running adversarial attacks against NLP mod
|
||||
|
||||
You should be running Python 3.6+ to use this package. A CUDA-compatible GPU is optional but will greatly improve code speed. TextAttack is available through pip:
|
||||
|
||||
```
|
||||
```bash
|
||||
pip install textattack
|
||||
```
|
||||
|
||||
### Configuration
|
||||
TextAttack downloads files to `~/.cache/textattack/` by default. This includes pretrained models,
|
||||
dataset samples, and the configuration file `config.yaml`. To change the cache path, set the
|
||||
environment variable `TA_CACHE_DIR`.
|
||||
Once TextAttack is installed, you can run it via command-line (`textattack ...`)
|
||||
or via python module (`python -m textattack ...`).
|
||||
|
||||
> **Tip**: TextAttack downloads files to `~/.cache/textattack/` by default. This includes pretrained models,
|
||||
> dataset samples, and the configuration file `config.yaml`. To change the cache path, set the
|
||||
> environment variable `TA_CACHE_DIR`. (for example: `TA_CACHE_DIR=/tmp/ textattack attack ...`).
|
||||
|
||||
## Usage
|
||||
|
||||
### Running Attacks
|
||||
### Help: `textattack --help`
|
||||
|
||||
The [`examples/`](docs/examples/) folder contains notebooks walking through examples of basic usage of TextAttack, including building a custom transformation and a custom constraint. These examples can also be viewed through the [documentation website](https://textattack.readthedocs.io/en/latest).
|
||||
TextAttack's main features can all be accessed via the `textattack` command. Two very
|
||||
common commands are `textattack attack <args>`, and `textattack augment <args>`. You can see more
|
||||
information about all commands using
|
||||
```bash
|
||||
textattack --help
|
||||
```
|
||||
or a specific command using, for example,
|
||||
```bash
|
||||
textattack attack --help
|
||||
```
|
||||
|
||||
We also have a command-line interface for running attacks. See help info and list of arguments with `python -m textattack --help`.
|
||||
The [`examples/`](examples/) folder includes scripts showing common TextAttack usage for training models, running attacks, and augmenting a CSV file. The [documentation website](https://textattack.readthedocs.io/en/latest) contains walkthroughs explaining basic usage of TextAttack, including building a custom transformation and a custom constraint..
|
||||
|
||||
### Attack Recipes
|
||||
### Running Attacks: `textattack attack --help`
|
||||
|
||||
We include attack recipes which build an attack such that only one command line argument has to be passed. To run an attack recipes, run `python -m textattack --recipe [recipe_name]`
|
||||
The easiest way to try out an attack is via the command-line interface, `textattack attack`.
|
||||
|
||||
The first are for classification and entailment attacks:
|
||||
- **textfooler**: Greedy attack with word importance ranking (["Is Bert Really Robust?" (Jin et al., 2019)](https://arxiv.org/abs/1907.11932)).
|
||||
- **alzantot**: Genetic algorithm attack from (["Generating Natural Language Adversarial Examples" (Alzantot et al., 2018)](https://arxiv.org/abs/1804.07998)).
|
||||
- **tf-adjusted**: TextFooler attack with constraint thresholds adjusted based on human evaluation and grammaticality enforced.
|
||||
- **alz-adjusted**: Alzantot's attack adjusted to follow the same constraints as tf-adjusted such that the only difference is the search method.
|
||||
- **deepwordbug**: Replace-1 scoring and multi-transformation character-swap attack (["Black-box Generation of Adversarial Text Sequences to Evade Deep Learning Classifiers" (Gao et al., 2018)](https://arxiv.org/abs/1801.04354)).
|
||||
- **hotflip**: Beam search and gradient-based word swap (["HotFlip: White-Box Adversarial Examples for Text Classification" (Ebrahimi et al., 2017)](https://arxiv.org/abs/1712.06751)).
|
||||
- **kuleshov**: Greedy search and counterfitted embedding swap (["Adversarial Examples for Natural Language Classification Problems" (Kuleshov et al., 2018)](https://openreview.net/pdf?id=r1QZ3zbAZ)).
|
||||
> **Tip:** If your machine has multiple GPUs, you can distribute the attack across them using the `--parallel` option. For some attacks, this can really help performance.
|
||||
|
||||
The final is for translation attacks:
|
||||
- **seq2sick**: Greedy attack with goal of changing every word in the output translation. Currently implemented as black-box with plans to change to white-box as done in paper (["Seq2Sick: Evaluating the Robustness of Sequence-to-Sequence Models with Adversarial Examples" (Cheng et al., 2018)](https://arxiv.org/abs/1803.01128)).
|
||||
Here are some concrete examples:
|
||||
|
||||
### Augmenting Text
|
||||
*TextFooler on BERT trained on the MR sentiment classification dataset*:
|
||||
```bash
|
||||
textattack attack --recipe textfooler --model bert-base-uncased-mr --num-examples 100
|
||||
```
|
||||
|
||||
*DeepWordBug on DistilBERT trained on the Quora Question Pairs paraphrase identification dataset*:
|
||||
```bash
|
||||
textattack attack --model distilbert-base-uncased-qqp --recipe deepwordbug --num-examples 100
|
||||
```
|
||||
|
||||
*Beam search with beam width 4 and word embedding transformation and untargeted goal function on an LSTM*:
|
||||
```bash
|
||||
textattack attack --model lstm-mr --num-examples 20 \
|
||||
--search-method beam-search^beam_width=4 --transformation word-swap-embedding \
|
||||
--constraints repeat stopword max-words-perturbed^max_num_words=2 embedding^min_cos_sim=0.8 part-of-speech \
|
||||
--goal-function untargeted-classification
|
||||
```
|
||||
|
||||
> **Tip:** Instead of specifying a dataset and number of examples, you can pass `--interactive` to attack samples inputted by the user.
|
||||
|
||||
### Attacks and Papers Implemented ("Attack Recipes"): `textattack attack --recipe [recipe_name]`
|
||||
|
||||
We include attack recipes which implement attacks from the literature. You can list attack recipes using `textattack list attack-recipes`.
|
||||
|
||||
To run an attack recipe: `textattack attack --recipe [recipe_name]`
|
||||
|
||||
|
||||
<table style="width:100%" border="1">
|
||||
<thead>
|
||||
<tr class="header">
|
||||
<th><strong>Attack Recipe Name</strong></th>
|
||||
<th><strong>Goal Function</strong></th>
|
||||
<th><strong>ConstraintsEnforced</strong></th>
|
||||
<th><strong>Transformation</strong></th>
|
||||
<th><strong>Search Method</strong></th>
|
||||
<th><strong>Main Idea</strong></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr><td style="text-align: center;" colspan="6"><strong><br>Attacks on classification tasks, like sentiment classification and entailment:<br></strong></td></tr>
|
||||
|
||||
<tr>
|
||||
<td><code>alzantot</code> <span class="citation" data-cites="Alzantot2018GeneratingNL Jia2019CertifiedRT"></span></td>
|
||||
<td><sub>Untargeted {Classification, Entailment}</sub></td>
|
||||
<td><sub>Percentage of words perturbed, Language Model perplexity, Word embedding distance</sub></td>
|
||||
<td><sub>Counter-fitted word embedding swap</sub></td>
|
||||
<td><sub>Genetic Algorithm</sub></td>
|
||||
<td ><sub>from (["Generating Natural Language Adversarial Examples" (Alzantot et al., 2018)](https://arxiv.org/abs/1804.07998))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>bae</code> <span class="citation" data-cites="garg2020bae"></span></td>
|
||||
<td><sub>Untargeted Classification</sub></td>
|
||||
<td><sub>USE sentence encoding cosine similarity</sub></td>
|
||||
<td><sub>BERT Masked Token Prediction</sub></td>
|
||||
<td><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>BERT masked language model transformation attack from (["BAE: BERT-based Adversarial Examples for Text Classification" (Garg & Ramakrishnan, 2019)](https://arxiv.org/abs/2004.01970)). </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>bert-attack</code> <span class="citation" data-cites="li2020bertattack"></span></td>
|
||||
<td><sub>Untargeted Classification</td>
|
||||
<td><sub>USE sentence encoding cosine similarity, Maximum number of words perturbed</td>
|
||||
<td><sub>BERT Masked Token Prediction (with subword expansion)</td>
|
||||
<td><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub> (["BERT-ATTACK: Adversarial Attack Against BERT Using BERT" (Li et al., 2020)](https://arxiv.org/abs/2004.09984))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>checklist</code> <span class="citation" data-cites="Gao2018BlackBoxGO"></span></td>
|
||||
<td><sub>{Untargeted, Targeted} Classification</sub></td>
|
||||
<td><sub>checklist distance</sub></td>
|
||||
<td><sub>contract, extend, and substitutes name entities</sub></td>
|
||||
<td><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>Invariance testing implemented in CheckList . (["Beyond Accuracy: Behavioral Testing of NLP models with CheckList" (Ribeiro et al., 2020)](https://arxiv.org/abs/2005.04118))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> <code>clare</code> <span class="citation" data-cites="Alzantot2018GeneratingNL Jia2019CertifiedRT"></span></td>
|
||||
<td><sub>Untargeted {Classification, Entailment}</sub></td>
|
||||
<td><sub>USE sentence encoding cosine similarity</sub></td>
|
||||
<td><sub>RoBERTa Masked Prediction for token swap, insert and merge</sub></td>
|
||||
<td><sub>Greedy</sub></td>
|
||||
<td ><sub>["Contextualized Perturbation for Textual Adversarial Attack" (Li et al., 2020)](https://arxiv.org/abs/2009.07502))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>deepwordbug</code> <span class="citation" data-cites="Gao2018BlackBoxGO"></span></td>
|
||||
<td><sub>{Untargeted, Targeted} Classification</sub></td>
|
||||
<td><sub>Levenshtein edit distance</sub></td>
|
||||
<td><sub>{Character Insertion, Character Deletion, Neighboring Character Swap, Character Substitution}</sub></td>
|
||||
<td><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>Greedy replace-1 scoring and multi-transformation character-swap attack (["Black-box Generation of Adversarial Text Sequences to Evade Deep Learning Classifiers" (Gao et al., 2018)](https://arxiv.org/abs/1801.04354)</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> <code>fast-alzantot</code> <span class="citation" data-cites="Alzantot2018GeneratingNL Jia2019CertifiedRT"></span></td>
|
||||
<td><sub>Untargeted {Classification, Entailment}</sub></td>
|
||||
<td><sub>Percentage of words perturbed, Language Model perplexity, Word embedding distance</sub></td>
|
||||
<td><sub>Counter-fitted word embedding swap</sub></td>
|
||||
<td><sub>Genetic Algorithm</sub></td>
|
||||
<td ><sub>Modified, faster version of the Alzantot et al. genetic algorithm, from (["Certified Robustness to Adversarial Word Substitutions" (Jia et al., 2019)](https://arxiv.org/abs/1909.00986))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>hotflip</code> (word swap) <span class="citation" data-cites="Ebrahimi2017HotFlipWA"></span></td>
|
||||
<td><sub>Untargeted Classification</sub></td>
|
||||
<td><sub>Word Embedding Cosine Similarity, Part-of-speech match, Number of words perturbed</sub></td>
|
||||
<td><sub>Gradient-Based Word Swap</sub></td>
|
||||
<td><sub>Beam search</sub></td>
|
||||
<td ><sub> (["HotFlip: White-Box Adversarial Examples for Text Classification" (Ebrahimi et al., 2017)](https://arxiv.org/abs/1712.06751))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>iga</code> <span class="citation" data-cites="iga-wang2019natural"></span></td>
|
||||
<td><sub>Untargeted {Classification, Entailment}</sub></td>
|
||||
<td><sub>Percentage of words perturbed, Word embedding distance</sub></td>
|
||||
<td><sub>Counter-fitted word embedding swap</sub></td>
|
||||
<td><sub>Genetic Algorithm</sub></td>
|
||||
<td ><sub>Improved genetic algorithm -based word substitution from (["Natural Language Adversarial Attacks and Defenses in Word Level (Wang et al., 2019)"](https://arxiv.org/abs/1909.06723)</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>input-reduction</code> <span class="citation" data-cites="feng2018pathologies"></span></td>
|
||||
<td><sub>Input Reduction</sub></td>
|
||||
<td></td>
|
||||
<td><sub>Word deletion</sub></td>
|
||||
<td><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>Greedy attack with word importance ranking , Reducing the input while maintaining the prediction through word importance ranking (["Pathologies of Neural Models Make Interpretation Difficult" (Feng et al., 2018)](https://arxiv.org/pdf/1804.07781.pdf))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>kuleshov</code> <span class="citation" data-cites="Kuleshov2018AdversarialEF"></span></td>
|
||||
<td><sub>Untargeted Classification</sub></td>
|
||||
<td><sub>Thought vector encoding cosine similarity, Language model similarity probability</sub></td>
|
||||
<td><sub>Counter-fitted word embedding swap</sub></td>
|
||||
<td><sub>Greedy word swap</sub></td>
|
||||
<td ><sub>(["Adversarial Examples for Natural Language Classification Problems" (Kuleshov et al., 2018)](https://openreview.net/pdf?id=r1QZ3zbAZ)) </sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>pruthi</code> <span class="citation" data-cites="pruthi2019combating"></span></td>
|
||||
<td><sub>Untargeted Classification</sub></td>
|
||||
<td><sub>Minimum word length, Maximum number of words perturbed</sub></td>
|
||||
<td><sub>{Neighboring Character Swap, Character Deletion, Character Insertion, Keyboard-Based Character Swap}</sub></td>
|
||||
<td><sub>Greedy search</sub></td>
|
||||
<td ><sub>simulates common typos (["Combating Adversarial Misspellings with Robust Word Recognition" (Pruthi et al., 2019)](https://arxiv.org/abs/1905.11268) </sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>pso</code> <span class="citation" data-cites="pso-zang-etal-2020-word"></span></td>
|
||||
<td><sub>Untargeted Classification</sub></td>
|
||||
<td></td>
|
||||
<td><sub>HowNet Word Swap</sub></td>
|
||||
<td><sub>Particle Swarm Optimization</sub></td>
|
||||
<td ><sub>(["Word-level Textual Adversarial Attacking as Combinatorial Optimization" (Zang et al., 2020)](https://www.aclweb.org/anthology/2020.acl-main.540/)) </sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>pwws</code> <span class="citation" data-cites="pwws-ren-etal-2019-generating"></span></td>
|
||||
<td><sub>Untargeted Classification</sub></td>
|
||||
<td></td>
|
||||
<td><sub>WordNet-based synonym swap</sub></td>
|
||||
<td><sub>Greedy-WIR (saliency)</sub></td>
|
||||
<td ><sub>Greedy attack with word importance ranking based on word saliency and synonym swap scores (["Generating Natural Language Adversarial Examples through Probability Weighted Word Saliency" (Ren et al., 2019)](https://www.aclweb.org/anthology/P19-1103/))</sub> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>textbugger</code> : (black-box) <span class="citation" data-cites="Li2019TextBuggerGA"></span></td>
|
||||
<td><sub>Untargeted Classification</sub></td>
|
||||
<td><sub>USE sentence encoding cosine similarity</sub></td>
|
||||
<td><sub>{Character Insertion, Character Deletion, Neighboring Character Swap, Character Substitution}</sub></td>
|
||||
<td><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>([(["TextBugger: Generating Adversarial Text Against Real-world Applications" (Li et al., 2018)](https://arxiv.org/abs/1812.05271)).</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>textfooler</code> <span class="citation" data-cites="Jin2019TextFooler"></span></td>
|
||||
<td><sub>Untargeted {Classification, Entailment}</sub></td>
|
||||
<td><sub>Word Embedding Distance, Part-of-speech match, USE sentence encoding cosine similarity</sub></td>
|
||||
<td><sub>Counter-fitted word embedding swap</sub></td>
|
||||
<td><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>Greedy attack with word importance ranking (["Is Bert Really Robust?" (Jin et al., 2019)](https://arxiv.org/abs/1907.11932))</sub> </td>
|
||||
</tr>
|
||||
|
||||
<tr><td style="text-align: center;" colspan="6"><strong><br>Attacks on sequence-to-sequence models: <br></strong></td></tr>
|
||||
|
||||
<tr>
|
||||
<td><code>morpheus</code> <span class="citation" data-cites="morpheus-tan-etal-2020-morphin"></span></td>
|
||||
<td><sub>Minimum BLEU Score</sub> </td>
|
||||
<td></td>
|
||||
<td><sub>Inflection Word Swap</sub> </td>
|
||||
<td><sub>Greedy search</sub> </td>
|
||||
<td ><sub>Greedy to replace words with their inflections with the goal of minimizing BLEU score (["It’s Morphin’ Time! Combating Linguistic Discrimination with Inflectional Perturbations"](https://www.aclweb.org/anthology/2020.acl-main.263.pdf)</sub> </td>
|
||||
</tr>
|
||||
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>seq2sick</code> :(black-box) <span class="citation" data-cites="cheng2018seq2sick"></span></td>
|
||||
<td><sub>Non-overlapping output</sub> </td>
|
||||
<td></td>
|
||||
<td><sub>Counter-fitted word embedding swap</sub> </td>
|
||||
<td><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>Greedy attack with goal of changing every word in the output translation. Currently implemented as black-box with plans to change to white-box as done in paper (["Seq2Sick: Evaluating the Robustness of Sequence-to-Sequence Models with Adversarial Examples" (Cheng et al., 2018)](https://arxiv.org/abs/1803.01128)) </sub> </td>
|
||||
</tr>
|
||||
|
||||
|
||||
</tbody>
|
||||
</font>
|
||||
</table>
|
||||
|
||||
|
||||
|
||||
#### Recipe Usage Examples
|
||||
|
||||
Here are some examples of testing attacks from the literature from the command-line:
|
||||
|
||||
*TextFooler against BERT fine-tuned on SST-2:*
|
||||
```bash
|
||||
textattack attack --model bert-base-uncased-sst2 --recipe textfooler --num-examples 10
|
||||
```
|
||||
|
||||
*seq2sick (black-box) against T5 fine-tuned for English-German translation:*
|
||||
```bash
|
||||
textattack attack --model t5-en-de --recipe seq2sick --num-examples 100
|
||||
```
|
||||
|
||||
### Augmenting Text: `textattack augment`
|
||||
|
||||
Many of the components of TextAttack are useful for data augmentation. The `textattack.Augmenter` class
|
||||
uses a transformation and a list of constraints to augment data. We also offer three built-in recipes
|
||||
uses a transformation and a list of constraints to augment data. We also offer five built-in recipes
|
||||
for data augmentation:
|
||||
- `textattack.WordNetAugmenter` augments text by replacing words with WordNet synonyms
|
||||
- `textattack.EmbeddingAugmenter` augments text by replacing words with neighbors in the counter-fitted embedding space, with a constraint to ensure their cosine similarity is at least 0.8
|
||||
- `textattack.CharSwapAugmenter` augments text by substituting, deleting, inserting, and swapping adjacent characters
|
||||
- `textattack.EasyDataAugmenter` augments text with a combination of word insertions, substitutions and deletions.
|
||||
- `textattack.CheckListAugmenter` augments text by contraction/extension and by substituting names, locations, numbers.
|
||||
- `textattack.CLAREAugmenter` augments text by replacing, inserting, and merging with a pre-trained masked language model.
|
||||
|
||||
All `Augmenter` objects implement `augment` and `augment_many` to generate augmentations
|
||||
of a string or a list of strings. Here's an example of how to use the `EmbeddingAugmenter`:
|
||||
#### Augmentation Command-Line Interface
|
||||
The easiest way to use our data augmentation tools is with `textattack augment <args>`. `textattack augment`
|
||||
takes an input CSV file and text column to augment, along with the number of words to change per augmentation
|
||||
and the number of augmentations per input example. It outputs a CSV in the same format with all the augmentation
|
||||
examples corresponding to the proper columns.
|
||||
|
||||
For example, given the following as `examples.csv`:
|
||||
|
||||
```csv
|
||||
"text",label
|
||||
"the rock is destined to be the 21st century's new conan and that he's going to make a splash even greater than arnold schwarzenegger , jean- claud van damme or steven segal.", 1
|
||||
"the gorgeously elaborate continuation of 'the lord of the rings' trilogy is so huge that a column of words cannot adequately describe co-writer/director peter jackson's expanded vision of j . r . r . tolkien's middle-earth .", 1
|
||||
"take care of my cat offers a refreshingly different slice of asian cinema .", 1
|
||||
"a technically well-made suspenser . . . but its abrupt drop in iq points as it races to the finish line proves simply too discouraging to let slide .", 0
|
||||
"it's a mystery how the movie could be released in this condition .", 0
|
||||
```
|
||||
|
||||
The command `textattack augment --csv examples.csv --input-column text --recipe embedding --pct-words-to-swap .1 --transformations-per-example 2 --exclude-original`
|
||||
will augment the `text` column by altering 10% of each example's words, generating twice as many augmentations as original inputs, and exclude the original inputs from the
|
||||
output CSV. (All of this will be saved to `augment.csv` by default.)
|
||||
|
||||
> **Tip:** Just as running attacks interactively, you can also pass `--interactive` to augment samples inputted by the user to quickly try out different augmentation recipes!
|
||||
|
||||
|
||||
After augmentation, here are the contents of `augment.csv`:
|
||||
```csv
|
||||
text,label
|
||||
"the rock is destined to be the 21st century's newest conan and that he's gonna to make a splashing even stronger than arnold schwarzenegger , jean- claud van damme or steven segal.",1
|
||||
"the rock is destined to be the 21tk century's novel conan and that he's going to make a splat even greater than arnold schwarzenegger , jean- claud van damme or stevens segal.",1
|
||||
the gorgeously elaborate continuation of 'the lord of the rings' trilogy is so huge that a column of expression significant adequately describe co-writer/director pedro jackson's expanded vision of j . rs . r . tolkien's middle-earth .,1
|
||||
the gorgeously elaborate continuation of 'the lordy of the piercings' trilogy is so huge that a column of mots cannot adequately describe co-novelist/director peter jackson's expanded vision of j . r . r . tolkien's middle-earth .,1
|
||||
take care of my cat offerings a pleasantly several slice of asia cinema .,1
|
||||
taking care of my cat offers a pleasantly different slice of asiatic kino .,1
|
||||
a technically good-made suspenser . . . but its abrupt drop in iq points as it races to the finish bloodline proves straightforward too disheartening to let slide .,0
|
||||
a technically well-made suspenser . . . but its abrupt drop in iq dot as it races to the finish line demonstrates simply too disheartening to leave slide .,0
|
||||
it's a enigma how the film wo be releases in this condition .,0
|
||||
it's a enigma how the filmmaking wo be publicized in this condition .,0
|
||||
```
|
||||
|
||||
The 'embedding' augmentation recipe uses counterfitted embedding nearest-neighbors to augment data.
|
||||
|
||||
#### Augmentation Python Interface
|
||||
In addition to the command-line interface, you can augment text dynamically by importing the
|
||||
`Augmenter` in your own code. All `Augmenter` objects implement `augment` and `augment_many` to generate augmentations
|
||||
of a string or a list of strings. Here's an example of how to use the `EmbeddingAugmenter` in a python script:
|
||||
|
||||
```python
|
||||
>>> from textattack.augmentation import EmbeddingAugmenter
|
||||
>>> augmenter = EmbeddingAugmenter()
|
||||
>>> s = 'What I cannot create, I do not understand.'
|
||||
>>> augmenter.augment(s)
|
||||
['What I notable create, I do not understand.', 'What I significant create, I do not understand.', 'What I cannot engender, I do not understand.', 'What I cannot creating, I do not understand.', 'What I cannot creations, I do not understand.', 'What I cannot create, I do not comprehend.', 'What I cannot create, I do not fathom.', 'What I cannot create, I do not understanding.', 'What I cannot create, I do not understands.', 'What I cannot create, I do not understood.', 'What I cannot create, I do not realise.']
|
||||
```
|
||||
You can also create your own augmenter from scratch by importing transformations/constraints from `textattack.transformations` and `textattack.constraints`. Here's an example that generates augmentations of a string using `WordSwapRandomCharacterDeletion`:
|
||||
|
||||
```python
|
||||
>>> from textattack.transformations import WordSwapRandomCharacterDeletion
|
||||
>>> from textattack.transformations import CompositeTransformation
|
||||
>>> from textattack.augmentation import Augmenter
|
||||
>>> transformation = CompositeTransformation([WordSwapRandomCharacterDeletion()])
|
||||
>>> augmenter = Augmenter(transformation=transformation, transformations_per_example=5)
|
||||
>>> s = 'What I cannot create, I do not understand.'
|
||||
>>> augmenter.augment(s)
|
||||
['What I cannot creae, I do not understand.', 'What I cannot creat, I do not understand.', 'What I cannot create, I do not nderstand.', 'What I cannot create, I do nt understand.', 'Wht I cannot create, I do not understand.']
|
||||
```
|
||||
|
||||
### Training Models: `textattack train`
|
||||
|
||||
Our model training code is available via `textattack train` to help you train LSTMs,
|
||||
CNNs, and `transformers` models using TextAttack out-of-the-box. Datasets are
|
||||
automatically loaded using the `datasets` package.
|
||||
|
||||
#### Training Examples
|
||||
*Train our default LSTM for 50 epochs on the Yelp Polarity dataset:*
|
||||
```bash
|
||||
textattack train --model lstm --dataset yelp_polarity --batch-size 64 --epochs 50 --learning-rate 1e-5
|
||||
```
|
||||
|
||||
The training process has data augmentation built-in:
|
||||
```bash
|
||||
textattack train --model lstm --dataset rotten_tomatoes --augment eda --pct-words-to-swap .1 --transformations-per-example 4
|
||||
```
|
||||
This uses the `EasyDataAugmenter` recipe to augment the `rotten_tomatoes` dataset before training.
|
||||
|
||||
*Fine-Tune `bert-base` on the `CoLA` dataset for 5 epochs**:
|
||||
```bash
|
||||
textattack train --model bert-base-uncased --dataset glue^cola --batch-size 32 --epochs 5
|
||||
```
|
||||
|
||||
|
||||
### To check datasets: `textattack peek-dataset`
|
||||
|
||||
To take a closer look at a dataset, use `textattack peek-dataset`. TextAttack will print some cursory statistics about the inputs and outputs from the dataset. For example, `textattack peek-dataset --dataset-from-huggingface snli` will show information about the SNLI dataset from the NLP package.
|
||||
|
||||
|
||||
### To list functional components: `textattack list`
|
||||
|
||||
There are lots of pieces in TextAttack, and it can be difficult to keep track of all of them. You can use `textattack list` to list components, for example, pretrained models (`textattack list models`) or available search methods (`textattack list search-methods`).
|
||||
|
||||
|
||||
## Design
|
||||
|
||||
### TokenizedText
|
||||
|
||||
To allow for word replacement after a sequence has been tokenized, we include a `TokenizedText` object which maintains both a list of tokens and the original text, with punctuation. We use this object in favor of a list of words or just raw text.
|
||||
### Models
|
||||
|
||||
### Models and Datasets
|
||||
TextAttack is model-agnostic! You can use `TextAttack` to analyze any model that outputs IDs, tensors, or strings. To help users, TextAttack includes pre-trained models for different common NLP tasks. This makes it easier for
|
||||
users to get started with TextAttack. It also enables a more fair comparison of attacks from
|
||||
the literature.
|
||||
|
||||
TextAttack is model-agnostic! Anything that overrides `__call__`, takes in `TokenizedText`, and correctly formats output works. However, TextAttack provides pre-trained models and samples for the following datasets:
|
||||
|
||||
#### Classification:
|
||||
* AG News dataset topic classification
|
||||
* IMDB dataset sentiment classification
|
||||
* Movie Review dataset sentiment classification
|
||||
* Yelp dataset sentiment classification
|
||||
|
||||
#### Entailment:
|
||||
* SNLI datastet
|
||||
* MNLI dataset (matched & unmatched)
|
||||
#### Built-in Models and Datasets
|
||||
|
||||
#### Translation:
|
||||
* newstest2013 English to German dataset
|
||||
TextAttack also comes built-in with models and datasets. Our command-line interface will automatically match the correct
|
||||
dataset to the correct model. We include 82 different (Oct 2020) pre-trained models for each of the nine [GLUE](https://gluebenchmark.com/)
|
||||
tasks, as well as some common datasets for classification, translation, and summarization.
|
||||
|
||||
### Attacks
|
||||
A list of available pretrained models and their validation accuracies is available at
|
||||
[textattack/models/README.md](textattack/models/README.md). You can also view a full list of provided models
|
||||
& datasets via `textattack attack --help`.
|
||||
|
||||
The `attack_one` method in an `Attack` takes as input a `TokenizedText`, and outputs either a `SuccessfulAttackResult` if it succeeds or a `FailedAttackResult` if it fails. We formulate an attack as consisting of four components: a **goal function** which determines if the attack has succeeded, **constraints** defining which perturbations are valid, a **transformation** that generates potential modifications given an input, and a **search method** which traverses through the search space of possible perturbations.
|
||||
Here's an example of using one of the built-in models (the SST-2 dataset is automatically loaded):
|
||||
|
||||
### Goal Functions
|
||||
```bash
|
||||
textattack attack --model roberta-base-sst2 --recipe textfooler --num-examples 10
|
||||
```
|
||||
|
||||
A `GoalFunction` takes as input a `TokenizedText` object and the ground truth output, and determines whether the attack has succeeded, returning a `GoalFunctionResult`.
|
||||
#### HuggingFace support: `transformers` models and `datasets` datasets
|
||||
|
||||
### Constraints
|
||||
We also provide built-in support for [`transformers` pretrained models](https://huggingface.co/models)
|
||||
and datasets from the [`datasets` package](https://github.com/huggingface/datasets)! Here's an example of loading
|
||||
and attacking a pre-trained model and dataset:
|
||||
|
||||
A `Constraint` takes as input an original `TokenizedText`, and a list of transformed `TokenizedText`s. For each transformed option, it returns a boolean representing whether the constraint is met.
|
||||
```bash
|
||||
textattack attack --model-from-huggingface distilbert-base-uncased-finetuned-sst-2-english --dataset-from-huggingface glue^sst2 --recipe deepwordbug --num-examples 10
|
||||
```
|
||||
|
||||
### Transformations
|
||||
You can explore other pre-trained models using the `--model-from-huggingface` argument, or other datasets by changing
|
||||
`--dataset-from-huggingface`.
|
||||
|
||||
A `Transformation` takes as input a `TokenizedText` and returns a list of possible transformed `TokenizedText`s. For example, a transformation might return all possible synonym replacements.
|
||||
|
||||
### Search Methods
|
||||
#### Loading a model or dataset from a file
|
||||
|
||||
You can easily try out an attack on a local model or dataset sample. To attack a pre-trained model,
|
||||
create a short file that loads them as variables `model` and `tokenizer`. The `tokenizer` must
|
||||
be able to transform string inputs to lists or tensors of IDs using a method called `encode()`. The
|
||||
model must take inputs via the `__call__` method.
|
||||
|
||||
##### Model from a file
|
||||
To experiment with a model you've trained, you could create the following file
|
||||
and name it `my_model.py`:
|
||||
|
||||
```python
|
||||
model = load_your_model_with_custom_code() # replace this line with your model loading code
|
||||
tokenizer = load_your_tokenizer_with_custom_code() # replace this line with your tokenizer loading code
|
||||
```
|
||||
|
||||
Then, run an attack with the argument `--model-from-file my_model.py`. The model and tokenizer will be loaded automatically.
|
||||
|
||||
|
||||
|
||||
### Custom Datasets
|
||||
|
||||
|
||||
#### Dataset from a file
|
||||
|
||||
Loading a dataset from a file is very similar to loading a model from a file. A 'dataset' is any iterable of `(input, output)` pairs.
|
||||
The following example would load a sentiment classification dataset from file `my_dataset.py`:
|
||||
|
||||
```python
|
||||
dataset = [('Today was....', 1), ('This movie is...', 0), ...]
|
||||
```
|
||||
|
||||
You can then run attacks on samples from this dataset by adding the argument `--dataset-from-file my_dataset.py`.
|
||||
|
||||
|
||||
#### Dataset via AttackedText class
|
||||
|
||||
To allow for word replacement after a sequence has been tokenized, we include an `AttackedText` object
|
||||
which maintains both a list of tokens and the original text, with punctuation. We use this object in favor of a list of words or just raw text.
|
||||
|
||||
|
||||
|
||||
#### Dataset via Data Frames (*coming soon*)
|
||||
|
||||
|
||||
|
||||
### Attacks and how to design a new attack
|
||||
|
||||
The `attack_one` method in an `Attack` takes as input an `AttackedText`, and outputs either a `SuccessfulAttackResult` if it succeeds or a `FailedAttackResult` if it fails.
|
||||
|
||||
|
||||
We formulate an attack as consisting of four components: a **goal function** which determines if the attack has succeeded, **constraints** defining which perturbations are valid, a **transformation** that generates potential modifications given an input, and a **search method** which traverses through the search space of possible perturbations. The attack attempts to perturb an input text such that the model output fulfills the goal function (i.e., indicating whether the attack is successful) and the perturbation adheres to the set of constraints (e.g., grammar constraint, semantic similarity constraint). A search method is used to find a sequence of transformations that produce a successful adversarial example.
|
||||
|
||||
|
||||
This modular design unifies adversarial attack methods into one system, enables us to easily assemble attacks from the literature while re-using components that are shared across attacks. We provides clean, readable implementations of 16 adversarial attack recipes from the literature (see above table). For the first time, these attacks can be benchmarked, compared, and analyzed in a standardized setting.
|
||||
|
||||
|
||||
TextAttack is model-agnostic - meaning it can run attacks on models implemented in any deep learning framework. Model objects must be able to take a string (or list of strings) and return an output that can be processed by the goal function. For example, machine translation models take a list of strings as input and produce a list of strings as output. Classification and entailment models return an array of scores. As long as the user's model meets this specification, the model is fit to use with TextAttack.
|
||||
|
||||
|
||||
|
||||
#### Goal Functions
|
||||
|
||||
A `GoalFunction` takes as input an `AttackedText` object, scores it, and determines whether the attack has succeeded, returning a `GoalFunctionResult`.
|
||||
|
||||
#### Constraints
|
||||
|
||||
A `Constraint` takes as input a current `AttackedText`, and a list of transformed `AttackedText`s. For each transformed option, it returns a boolean representing whether the constraint is met.
|
||||
|
||||
#### Transformations
|
||||
|
||||
A `Transformation` takes as input an `AttackedText` and returns a list of possible transformed `AttackedText`s. For example, a transformation might return all possible synonym replacements.
|
||||
|
||||
#### Search Methods
|
||||
|
||||
A `SearchMethod` takes as input an initial `GoalFunctionResult` and returns a final `GoalFunctionResult` The search is given access to the `get_transformations` function, which takes as input an `AttackedText` object and outputs a list of possible transformations filtered by meeting all of the attack’s constraints. A search consists of successive calls to `get_transformations` until the search succeeds (determined using `get_goal_results`) or is exhausted.
|
||||
|
||||
|
||||
## On Benchmarking Attacks
|
||||
|
||||
- See our analysis paper: Searching for a Search Method: Benchmarking Search Algorithms for Generating NLP Adversarial Examples at [EMNLP BlackBoxNLP](https://arxiv.org/abs/2009.06368).
|
||||
|
||||
- As we emphasized in the above paper, we don't recommend to directly compare Attack Recipes out of the box.
|
||||
|
||||
- This comment is due to that attack recipes in the recent literature used different ways or thresholds in setting up their constraints. Without the constraint space held constant, an increase in attack success rate could come from an improved search or transformation method or a less restrictive search space.
|
||||
|
||||
- Our Github on benchmarking scripts and results: [TextAttack-Search-Benchmark Github](https://github.com/QData/TextAttack-Search-Benchmark)
|
||||
|
||||
|
||||
## On Quality of Generated Adversarial Examples in Natural Language
|
||||
|
||||
- Our analysis Paper in [EMNLP Findings](https://arxiv.org/abs/2004.14174)
|
||||
- We analyze the generated adversarial examples of two state-of-the-art synonym substitution attacks. We find that their perturbations often do not preserve semantics, and 38% introduce grammatical errors. Human surveys reveal that to successfully preserve semantics, we need to significantly increase the minimum cosine similarities between the embeddings of swapped words and between the sentence encodings of original and perturbed sentences.With constraints adjusted to better preserve semantics and grammaticality, the attack success rate drops by over 70 percentage points.
|
||||
- Our Github on Reevaluation results: [Reevaluating-NLP-Adversarial-Examples Github](https://github.com/QData/Reevaluating-NLP-Adversarial-Examples)
|
||||
- As we have emphasized in this analysis paper, we recommend researchers and users to be EXTREMELY mindful on the quality of generated adversarial examples in natural language
|
||||
- We recommend the field to use human-evaluation derived thresholds for setting up constraints
|
||||
|
||||
|
||||
|
||||
## Multi-lingual Support
|
||||
|
||||
- See [README_ZH.md](https://github.com/QData/TextAttack/blob/master/README_ZH.md) for our README in Chinese
|
||||
|
||||
|
||||
A `SearchMethod` takes as input an initial `GoalFunctionResult` and returns a final `GoalFunctionResult` The search is given access to the `get_transformations` function, which takes as input a `TokenizedText` object and outputs a list of possible transformations filtered by meeting all of the attack’s constraints. A search consists of successive calls to `get_transformations` until the search succeeds (determined using `get_goal_results`) or is exhausted.
|
||||
|
||||
## Contributing to TextAttack
|
||||
|
||||
We welcome contributions and suggestions! Submit a pull request or issue and we will do our best to respond in a timely manner.
|
||||
We welcome suggestions and contributions! Submit an issue or pull request and we will do our best to respond in a timely manner. TextAttack is currently in an "alpha" stage in which we are working to improve its capabilities and design.
|
||||
|
||||
See [CONTRIBUTING.md](https://github.com/QData/TextAttack/blob/master/CONTRIBUTING.md) for detailed information on contributing.
|
||||
|
||||
## Citing TextAttack
|
||||
|
||||
If you use TextAttack for your research, please cite [TextAttack: A Framework for Adversarial Attacks in Natural Language Processing](https://arxiv.org/abs/2005.05909).
|
||||
If you use TextAttack for your research, please cite [TextAttack: A Framework for Adversarial Attacks, Data Augmentation, and Adversarial Training in NLP](https://arxiv.org/abs/2005.05909).
|
||||
|
||||
```bibtex
|
||||
@misc{Morris2020TextAttack,
|
||||
Author = {John X. Morris and Eli Lifland and Jin Yong Yoo and Yanjun Qi},
|
||||
Title = {TextAttack: A Framework for Adversarial Attacks in Natural Language Processing},
|
||||
Year = {2020},
|
||||
Eprint = {arXiv:2005.05909},
|
||||
@inproceedings{morris2020textattack,
|
||||
title={TextAttack: A Framework for Adversarial Attacks, Data Augmentation, and Adversarial Training in NLP},
|
||||
author={Morris, John and Lifland, Eli and Yoo, Jin Yong and Grigsby, Jake and Jin, Di and Qi, Yanjun},
|
||||
booktitle={Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations},
|
||||
pages={119--126},
|
||||
year={2020}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
517
README_ZH.md
Normal file
@@ -0,0 +1,517 @@
|
||||
<h1 align="center">TextAttack 🐙</h1>
|
||||
|
||||
<p align="center">为 NLP 模型生成对抗样本</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://textattack.readthedocs.io/">[TextAttack 的 ReadTheDocs 文档]</a>
|
||||
<br> <br>
|
||||
<a href="#简介">简介</a> •
|
||||
<a href="#环境配置">环境配置</a> •
|
||||
<a href="#使用方法textattack---help">使用方法</a> •
|
||||
<a href="#设计模式">设计模式</a>
|
||||
<br> <br>
|
||||
<a target="_blank">
|
||||
<img src="https://github.com/QData/TextAttack/workflows/Github%20PyTest/badge.svg" alt="Github Runner Covergae Status">
|
||||
</a>
|
||||
<a href="https://badge.fury.io/py/textattack">
|
||||
<img src="https://badge.fury.io/py/textattack.svg" alt="PyPI version" height="18">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
<img src="http://jackxmorris.com/files/textattack.gif" alt="TextAttack Demo GIF" style="display: block; margin: 0 auto;" />
|
||||
|
||||
## 简介
|
||||
|
||||
TextAttack 是一个可以实行自然语言处理的 Python 框架,用于方便快捷地进行对抗攻击,增强数据,以及训练模型。
|
||||
|
||||
> 如果你在寻找 TextAttacks 支持的预训练模型,请访问 [TextAttack Model Zoo](https://textattack.readthedocs.io/en/latest/3recipes/models.html)。
|
||||
|
||||
## Slack 频道
|
||||
|
||||
加入[TextAttack Slack](https://join.slack.com/t/textattack/shared_invite/zt-huomtd9z-KqdHBPPu2rOP~Z8q3~urgg) 频道,获取在线帮助与更新提示!
|
||||
|
||||
### *选择 TextAttack 的原因*
|
||||
|
||||
1. **深入理解 NLP 模型**: 通过使用各种对抗攻击,观察模型的表现
|
||||
2. **研究与开发 NLP 对抗攻击**: 在你的项目中使用 TextAttack 的框架与组件库
|
||||
3. **进行数据增强**: 提升模型的泛化性与鲁棒性
|
||||
3. **训练 NLP 模型**: 只需一行命令,轻松训练模型 (包括下载所有的依赖资源!)
|
||||
|
||||
## 环境配置
|
||||
|
||||
### 安装
|
||||
|
||||
支持 Python 3.6 及以上。支持 CPU ,使用兼容 CUDA 的 GPU ,还可以大幅度提高代码运行速度。使用 pip 轻松安装 TextAttack:
|
||||
|
||||
```bash
|
||||
pip install textattack
|
||||
```
|
||||
|
||||
当 TextAttack 安装完成,可以通过命令行 (`textattack ...`)
|
||||
或者通过 python 模块 (`python -m textattack ...`) 运行 TextAttack。
|
||||
|
||||
> **小提醒**:TextAttack 默认将文件下载保存在 `~/.cache/textattack/` 路径。这些文件包括预训练模型,数据集,以及配置文件 `config.yaml`。若需更改缓存路径,可以通过设置环境变量 `TA_CACHE_DIR`。(例如: `TA_CACHE_DIR=/tmp/ textattack attack ...`).
|
||||
|
||||
## 使用方法:`textattack --help`
|
||||
|
||||
TextAttack 的主要功能均可通过 `textattack` 命令运行。常用的两个命令为 `textattack attack <args>` 和 `textattack augment <args>`。你可以通过如下命令获取关于所有命令的介绍:
|
||||
```bash
|
||||
textattack --help
|
||||
```
|
||||
或者获取具体命令的用法,例如:
|
||||
```bash
|
||||
textattack attack --help
|
||||
```
|
||||
|
||||
文件夹 [`examples/`](examples/) 里是一些示例脚本,展示了 TextAttack 的常用方法,包括训练模型,对抗攻击,以及数据增强。[文档网站](https://textattack.readthedocs.io/en/latest) 中有 TextAttack 基本用法的详尽说明与示例,包括自定义攻击的变换与约束。
|
||||
|
||||
### 运行对抗攻击:`textattack attack --help`
|
||||
|
||||
尝试运行对抗攻击,最快捷的方法是通过命令行接口:`textattack attack`
|
||||
|
||||
> **小提醒**:如果你的机器有多个 GPU,可以通过 `--parallel` 参数将对抗攻击分布在多个 GPU 上。这对一些攻击策略的性能提升巨大。
|
||||
|
||||
下面是几个具体的例子:
|
||||
|
||||
*对 MR 情感分类数据集上训练的 BERT 模型进行 TextFooler 攻击*:
|
||||
|
||||
```bash
|
||||
textattack attack --recipe textfooler --model bert-base-uncased-mr --num-examples 100
|
||||
```
|
||||
|
||||
*对 Quora 问句对数据集上训练的 DistilBERT 模型进行 DeepWordBug 攻击*:
|
||||
|
||||
```bash
|
||||
textattack attack --model distilbert-base-uncased-qqp --recipe deepwordbug --num-examples 100
|
||||
```
|
||||
|
||||
*对 MR 数据集上训练的 LSTM 模型:设置束搜索宽度为 4,使用词嵌入转换进行无目标攻击*:
|
||||
|
||||
```bash
|
||||
textattack attack --model lstm-mr --num-examples 20 \
|
||||
--search-method beam-search^beam_width=4 --transformation word-swap-embedding \
|
||||
--constraints repeat stopword max-words-perturbed^max_num_words=2 embedding^min_cos_sim=0.8 part-of-speech \
|
||||
--goal-function untargeted-classification
|
||||
```
|
||||
|
||||
> **小提醒**:除了设置具体的数据集与样本数量,你还可以通过传入 `--interactive` 参数,对用户输入的文本进行攻击。
|
||||
|
||||
### 攻击策略:`textattack attack --recipe [recipe_name]`
|
||||
|
||||
我们实现了一些文献中的攻击策略(Attack recipe)。使用 `textattack list attack-recipes` 命令可以列出所有内置的攻击策略。
|
||||
|
||||
运行攻击策略:`textattack attack --recipe [recipe_name]`
|
||||
|
||||
|
||||
<table>
|
||||
<thead>
|
||||
<tr class="header">
|
||||
<th><strong>—————— 攻击策略 ——————</strong></th>
|
||||
<th><strong>—————— 目标函数 ——————</strong></th>
|
||||
<th><strong>—————— 约束条件 ——————</strong></th>
|
||||
<th><strong>—————— 变换方式 ——————</strong></th>
|
||||
<th><strong>——————— 搜索方法 ———————</strong></th>
|
||||
<th><strong>主要思想</strong></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr><td colspan="6"><strong><br>对于分类任务的攻击策略,例如情感分类和文本蕴含任务:<br></strong></td></tr>
|
||||
|
||||
<tr>
|
||||
<td><code>alzantot</code> <span class="citation" data-cites="Alzantot2018GeneratingNL Jia2019CertifiedRT"></span></td>
|
||||
<td><sub>无目标<br/>{分类,蕴含}</sub></td>
|
||||
<td><sub>被扰动词的比例,语言模型的困惑度,词嵌入的距离</sub></td>
|
||||
<td><sub>Counter-fitted 词嵌入替换</sub></td>
|
||||
<td><sub>遗传算法</sub></td>
|
||||
<td ><sub>来自 (["Generating Natural Language Adversarial Examples" (Alzantot et al., 2018)](https://arxiv.org/abs/1804.07998))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>bae</code> <span class="citation" data-cites="garg2020bae"></span></td>
|
||||
<td><sub>无目标<br/>分类</sub></td>
|
||||
<td><sub>USE 通用句子编码向量的 cosine 相似度</sub></td>
|
||||
<td><sub>BERT 遮罩词预测</sub></td>
|
||||
<td><sub>对 WIR 的贪心搜索</sub></td>
|
||||
<td><sub>使用 BERT 语言模型作为变换的攻击方法,来自 (["BAE: BERT-based Adversarial Examples for Text Classification" (Garg & Ramakrishnan, 2019)](https://arxiv.org/abs/2004.01970)). </sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>bert-attack</code> <span class="citation" data-cites="li2020bertattack"></span></td>
|
||||
<td><sub>无目标<br/>分类</sub></td>
|
||||
<td><sub>USE 通用句子编码向量的 cosine 相似度, 被扰动词的最大数量</sub></td>
|
||||
<td><sub>BERT 遮罩词预测 (包括对 subword 的扩充)</sub></td>
|
||||
<td><sub>对 WIR 的贪心搜索</sub></td>
|
||||
<td ><sub> (["BERT-ATTACK: Adversarial Attack Against BERT Using BERT" (Li et al., 2020)](https://arxiv.org/abs/2004.09984))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>checklist</code> <span class="citation" data-cites="Gao2018BlackBoxGO"></span></td>
|
||||
<td><sub>{无目标,有目标}<br/>分类</sub></td>
|
||||
<td><sub>checklist 距离</sub></td>
|
||||
<td><sub>简写,扩写,以及命名实体替换</sub></td>
|
||||
<td><sub>对 WIR 的贪心搜索</sub></td>
|
||||
<td ><sub>CheckList 中实现的不变性检验(["Beyond Accuracy: Behavioral Testing of NLP models with CheckList" (Ribeiro et al., 2020)](https://arxiv.org/abs/2005.04118))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> <code>clare (*coming soon*)</code> <span class="citation" data-cites="Alzantot2018GeneratingNL Jia2019CertifiedRT"></span></td>
|
||||
<td><sub>无目标<br/>{分类,蕴含}</sub></td>
|
||||
<td><sub>RoBERTa 掩码语言模型</sub></td>
|
||||
<td><sub>词的替换,插入,合并</sub></td>
|
||||
<td><sub>贪心搜索</sub></td>
|
||||
<td ><sub>["Contextualized Perturbation for Textual Adversarial Attack" (Li et al., 2020)](https://arxiv.org/abs/2009.07502))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>deepwordbug</code> <span class="citation" data-cites="Gao2018BlackBoxGO"></span></td>
|
||||
<td><sub>{无目标,有目标}<br/>分类</sub></td>
|
||||
<td><sub>Levenshtein 编辑距离</sub></td>
|
||||
<td><sub>{字符的插入,删除,替换,以及临近字符交换}</sub></td>
|
||||
<td><sub>对 WIR 的贪心搜索</sub></td>
|
||||
<td ><sub>贪心搜索 replace-1 分数,多种变换的字符交换式的攻击 (["Black-box Generation of Adversarial Text Sequences to Evade Deep Learning Classifiers" (Gao et al., 2018)](https://arxiv.org/abs/1801.04354)</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> <code>fast-alzantot</code> <span class="citation" data-cites="Alzantot2018GeneratingNL Jia2019CertifiedRT"></span></td>
|
||||
<td><sub>无目标<br/>{分类,蕴含}</sub></td>
|
||||
<td><sub>被扰动词的比例,语言模型的困惑度,词嵌入的距离</sub></td>
|
||||
<td><sub>Counter-fitted 词嵌入替换</sub></td>
|
||||
<td><sub>遗传算法</sub></td>
|
||||
<td ><sub>改进过的更快的 Alzantot et al. 遗传算法, 来自 (["Certified Robustness to Adversarial Word Substitutions" (Jia et al., 2019)](https://arxiv.org/abs/1909.00986))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>hotflip</code> (word swap) <span class="citation" data-cites="Ebrahimi2017HotFlipWA"></span></td>
|
||||
<td><sub>无目标<br/>分类</sub></td>
|
||||
<td><sub>词嵌入的 cosine 相似度,词性的匹配,被扰动词的数量</sub></td>
|
||||
<td><sub>基于梯度的词的交换</sub></td>
|
||||
<td><sub>束搜索</sub></td>
|
||||
<td ><sub> (["HotFlip: White-Box Adversarial Examples for Text Classification" (Ebrahimi et al., 2017)](https://arxiv.org/abs/1712.06751))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>iga</code> <span class="citation" data-cites="iga-wang2019natural"></span></td>
|
||||
<td><sub>无目标<br/>{分类,蕴含}</sub></td>
|
||||
<td><sub>被扰动词的比例,词嵌入的距离</sub></td>
|
||||
<td><sub>Counter-fitted 词嵌入替换</sub></td>
|
||||
<td><sub>遗传算法</sub></td>
|
||||
<td ><sub>改进的基于遗传算法的词替换,来自 (["Natural Language Adversarial Attacks and Defenses in Word Level (Wang et al., 2019)"](https://arxiv.org/abs/1909.06723)</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>input-reduction</code> <span class="citation" data-cites="feng2018pathologies"></span></td>
|
||||
<td><sub>输入归约</sub></td>
|
||||
<td></td>
|
||||
<td><sub>词的删除</sub></td>
|
||||
<td><sub>对 WIR 的贪心搜索</sub></td>
|
||||
<td ><sub>基于词重要性排序的贪心攻击方法,在缩减输入词的同时保持预测结果不变 (["Pathologies of Neural Models Make Interpretation Difficult" (Feng et al., 2018)](https://arxiv.org/pdf/1804.07781.pdf))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>kuleshov</code> <span class="citation" data-cites="Kuleshov2018AdversarialEF"></span></td>
|
||||
<td><sub>无目标<br/>分类</sub></td>
|
||||
<td><sub>Thought vector 编码的 cosine 相似度, 语言模型给出的相似度概率</sub></td>
|
||||
<td><sub>Counter-fitted 词嵌入替换</sub></td>
|
||||
<td><sub>贪心的词的替换</sub></td>
|
||||
<td ><sub>(["Adversarial Examples for Natural Language Classification Problems" (Kuleshov et al., 2018)](https://openreview.net/pdf?id=r1QZ3zbAZ)) </sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>pruthi</code> <span class="citation" data-cites="pruthi2019combating"></span></td>
|
||||
<td><sub>无目标<br/>分类</sub></td>
|
||||
<td><sub>词的最短长度,被扰动词的最大数量</sub></td>
|
||||
<td><sub>{临近字符替换,字符的插入与删除,基于键盘字符位置的字符替换}</sub></td>
|
||||
<td><sub>贪心搜索</sub></td>
|
||||
<td ><sub>模拟常见的打字错误 (["Combating Adversarial Misspellings with Robust Word Recognition" (Pruthi et al., 2019)](https://arxiv.org/abs/1905.11268) </sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>pso</code> <span class="citation" data-cites="pso-zang-etal-2020-word"></span></td>
|
||||
<td><sub>无目标<br/>分类</sub></td>
|
||||
<td></td>
|
||||
<td><sub>基于 HowNet 的词替换</sub></td>
|
||||
<td><sub>粒子群优化算法</sub></td>
|
||||
<td ><sub>(["Word-level Textual Adversarial Attacking as Combinatorial Optimization" (Zang et al., 2020)](https://www.aclweb.org/anthology/2020.acl-main.540/)) </sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>pwws</code> <span class="citation" data-cites="pwws-ren-etal-2019-generating"></span></td>
|
||||
<td><sub>无目标<br/>分类</sub></td>
|
||||
<td></td>
|
||||
<td><sub>基于 WordNet 的同义词替换</sub></td>
|
||||
<td><sub>对 WIR 的贪心搜索</sub></td>
|
||||
<td ><sub>贪心的攻击方法,基于词重要性排序,词的显著性,以及同义词替换分数(["Generating Natural Language Adversarial Examples through Probability Weighted Word Saliency" (Ren et al., 2019)](https://www.aclweb.org/anthology/P19-1103/))</sub> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>textbugger</code> : (black-box) <span class="citation" data-cites="Li2019TextBuggerGA"></span></td>
|
||||
<td><sub>无目标<br/>分类</sub></td>
|
||||
<td><sub>USE 通用句子编码向量的 cosine 相似度</sub></td>
|
||||
<td><sub>{字符的插入、删除、替换,以及临近字符交换}</sub></td>
|
||||
<td><sub>对 WIR 的贪心搜索</sub></td>
|
||||
<td ><sub>([(["TextBugger: Generating Adversarial Text Against Real-world Applications" (Li et al., 2018)](https://arxiv.org/abs/1812.05271)).</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>textfooler</code> <span class="citation" data-cites="Jin2019TextFooler"></span></td>
|
||||
<td><sub>无目标<br/>{分类,蕴含}</sub></td>
|
||||
<td><sub>词嵌入的距离,词性的匹配,USE 通用句子编码向量的 cosine 相似度</sub></td>
|
||||
<td><sub>Counter-fitted 词嵌入替换</sub></td>
|
||||
<td><sub>对 WIR 的贪心搜索</sub></td>
|
||||
<td ><sub>对词重要性排序的贪心攻击方法(["Is Bert Really Robust?" (Jin et al., 2019)](https://arxiv.org/abs/1907.11932))</sub> </td>
|
||||
</tr>
|
||||
|
||||
<tr><td colspan="6"><strong><br>对 seq2seq 模型的攻击策略:<br></strong></td></tr>
|
||||
|
||||
<tr>
|
||||
<td><code>morpheus</code> <span class="citation" data-cites="morpheus-tan-etal-2020-morphin"></span></td>
|
||||
<td><sub>最小 BLEU 分数</sub> </td>
|
||||
<td></td>
|
||||
<td><sub>词的屈折变化</sub> </td>
|
||||
<td><sub>贪心搜索</sub> </td>
|
||||
<td ><sub>贪心的用词的屈折变化进行替换,来最小化 BLEU 分数(["It’s Morphin’ Time! Combating Linguistic Discrimination with Inflectional Perturbations"](https://www.aclweb.org/anthology/2020.acl-main.263.pdf)</sub> </td>
|
||||
</tr>
|
||||
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>seq2sick</code> :(black-box) <span class="citation" data-cites="cheng2018seq2sick"></span></td>
|
||||
<td><sub>翻译结果无重叠</sub> </td>
|
||||
<td></td>
|
||||
<td><sub>Counter-fitted 词嵌入替换</sub> </td>
|
||||
<td><sub>对 WIR 的贪心搜索</sub></td>
|
||||
<td ><sub>贪心攻击方法,以改变全部的翻译结果为目标。目前实现的是黑盒攻击,计划改为与论文中一样的白盒攻击(["Seq2Sick: Evaluating the Robustness of Sequence-to-Sequence Models with Adversarial Examples" (Cheng et al., 2018)](https://arxiv.org/abs/1803.01128)) </sub> </td>
|
||||
</tr>
|
||||
|
||||
</tbody>
|
||||
</font>
|
||||
</table>
|
||||
|
||||
> WIR 为 word word importance ranking 的缩写,即词重要性排序。
|
||||
|
||||
|
||||
|
||||
#### 运行攻击的例子
|
||||
|
||||
下面是几个样例,在命令行中验证上述实现的攻击方法:
|
||||
|
||||
*对在 SST-2 上精调的 BERT 模型进行 TextFooler 攻击:*
|
||||
```bash
|
||||
textattack attack --model bert-base-uncased-sst2 --recipe textfooler --num-examples 10
|
||||
```
|
||||
|
||||
*对用于英语-德语翻译的 T2 模型进行 seq2sick (黑盒) 攻击:*
|
||||
```bash
|
||||
textattack attack --model t5-en-de --recipe seq2sick --num-examples 100
|
||||
```
|
||||
|
||||
### 增强文本数据:`textattack augment`
|
||||
|
||||
TextAttack 的组件中,有很多易用的数据增强工具。`textattack.Augmenter` 类使用 *变换* 与一系列的 *约束* 进行数据增强。我们提供了 5 中内置的数据增强策略:
|
||||
- `textattack.WordNetAugmenter` 通过基于 WordNet 同义词替换的方式增强文本
|
||||
- `textattack.EmbeddingAugmenter` 通过邻近词替换的方式增强文本,使用 counter-fitted 词嵌入空间中的邻近词进行替换,约束二者的 cosine 相似度不低于 0.8
|
||||
- `textattack.CharSwapAugmenter` 通过字符的增删改,以及临近字符交换的方式增强文本
|
||||
- `textattack.EasyDataAugmenter` 通过对词的增删改来增强文本
|
||||
- `textattack.CheckListAugmenter` 通过简写,扩写以及对实体、地点、数字的替换来增强文本
|
||||
|
||||
#### 数据增强的命令行接口
|
||||
使用 textattack 来进行数据增强,最快捷的方法是通过 `textattack augment <args>` 命令行接口。 `textattack augment` 使用 CSV 文件作为输入,在参数中设置需要增强的文本列,每个样本允许改变的比例,以及对于每个输入样本生成多少个增强样本。输出的结果保存为与输入文件格式一致的 CSV 文件,结果文件中为对指定的文本列生成的增强样本。
|
||||
|
||||
比如,对于下面这个 `examples.csv` 文件:
|
||||
|
||||
```csv
|
||||
"text",label
|
||||
"the rock is destined to be the 21st century's new conan and that he's going to make a splash even greater than arnold schwarzenegger , jean- claud van damme or steven segal.", 1
|
||||
"the gorgeously elaborate continuation of 'the lord of the rings' trilogy is so huge that a column of words cannot adequately describe co-writer/director peter jackson's expanded vision of j . r . r . tolkien's middle-earth .", 1
|
||||
"take care of my cat offers a refreshingly different slice of asian cinema .", 1
|
||||
"a technically well-made suspenser . . . but its abrupt drop in iq points as it races to the finish line proves simply too discouraging to let slide .", 0
|
||||
"it's a mystery how the movie could be released in this condition .", 0
|
||||
```
|
||||
|
||||
使用命令 `textattack augment --csv examples.csv --input-column text --recipe embedding --pct-words-to-swap .1 --transformations-per-example 2 --exclude-original`
|
||||
会增强 `text` 列,约束对样本中 10% 的词进行修改,生成输入数据两倍的样本,同时结果文件中不保存 csv 文件的原始输入。(默认所有结果将会保存在 `augment.csv` 文件中)
|
||||
|
||||
数据增强后,下面是 `augment.csv` 文件的内容:
|
||||
```csv
|
||||
text,label
|
||||
"the rock is destined to be the 21st century's newest conan and that he's gonna to make a splashing even stronger than arnold schwarzenegger , jean- claud van damme or steven segal.",1
|
||||
"the rock is destined to be the 21tk century's novel conan and that he's going to make a splat even greater than arnold schwarzenegger , jean- claud van damme or stevens segal.",1
|
||||
the gorgeously elaborate continuation of 'the lord of the rings' trilogy is so huge that a column of expression significant adequately describe co-writer/director pedro jackson's expanded vision of j . rs . r . tolkien's middle-earth .,1
|
||||
the gorgeously elaborate continuation of 'the lordy of the piercings' trilogy is so huge that a column of mots cannot adequately describe co-novelist/director peter jackson's expanded vision of j . r . r . tolkien's middle-earth .,1
|
||||
take care of my cat offerings a pleasantly several slice of asia cinema .,1
|
||||
taking care of my cat offers a pleasantly different slice of asiatic kino .,1
|
||||
a technically good-made suspenser . . . but its abrupt drop in iq points as it races to the finish bloodline proves straightforward too disheartening to let slide .,0
|
||||
a technically well-made suspenser . . . but its abrupt drop in iq dot as it races to the finish line demonstrates simply too disheartening to leave slide .,0
|
||||
it's a enigma how the film wo be releases in this condition .,0
|
||||
it's a enigma how the filmmaking wo be publicized in this condition .,0
|
||||
```
|
||||
|
||||
在 'embedding' 增强策略中,使用 counterfitted 词嵌入空间的最近邻来增强数据。
|
||||
|
||||
#### 数据增强的 Python 接口
|
||||
除了使用命令行接口,你还可以在自己的代码中导入 `Augmenter` 来进行动态的数据增强。所有的 `Augmenter` 对象都实现了 `augment` 和 `augment_many` 方法,用于对单个 string 和一个 list 的 string 进行数据增强。下面是在 python 脚本中使用 `EmbeddingAugmenter` 的例子:
|
||||
|
||||
```python
|
||||
>>> from textattack.augmentation import EmbeddingAugmenter
|
||||
>>> augmenter = EmbeddingAugmenter()
|
||||
>>> s = 'What I cannot create, I do not understand.'
|
||||
>>> augmenter.augment(s)
|
||||
['What I notable create, I do not understand.', 'What I significant create, I do not understand.', 'What I cannot engender, I do not understand.', 'What I cannot creating, I do not understand.', 'What I cannot creations, I do not understand.', 'What I cannot create, I do not comprehend.', 'What I cannot create, I do not fathom.', 'What I cannot create, I do not understanding.', 'What I cannot create, I do not understands.', 'What I cannot create, I do not understood.', 'What I cannot create, I do not realise.']
|
||||
```
|
||||
你还可以通过从 `textattack.transformations` 和 `textattack.constraints` 导入 *变换* 与 *约束* 来从头创建自己的数据增强方法。下面是一个使用 `WordSwapRandomCharacterDeletion` *变换* 进行数据增强的例子:
|
||||
|
||||
```python
|
||||
>>> from textattack.transformations import WordSwapRandomCharacterDeletion
|
||||
>>> from textattack.transformations import CompositeTransformation
|
||||
>>> from textattack.augmentation import Augmenter
|
||||
>>> transformation = CompositeTransformation([WordSwapRandomCharacterDeletion()])
|
||||
>>> augmenter = Augmenter(transformation=transformation, transformations_per_example=5)
|
||||
>>> s = 'What I cannot create, I do not understand.'
|
||||
>>> augmenter.augment(s)
|
||||
['What I cannot creae, I do not understand.', 'What I cannot creat, I do not understand.', 'What I cannot create, I do not nderstand.', 'What I cannot create, I do nt understand.', 'Wht I cannot create, I do not understand.']
|
||||
```
|
||||
|
||||
### 训练模型:`textattack train`
|
||||
|
||||
通过 `textattack train` 可以便捷地使用 TextAttack 框架来训练 LSTM,CNN,以及 `transofrmers` 模型。数据集会通过 `datasets` 包自动加载。
|
||||
|
||||
#### 运行训练的例子
|
||||
*在 Yelp 分类数据集上对 TextAttack 中默认的 LSTM 模型训练 50 个 epoch:*
|
||||
```bash
|
||||
textattack train --model lstm --dataset yelp_polarity --batch-size 64 --epochs 50 --learning-rate 1e-5
|
||||
```
|
||||
|
||||
训练接口中同样内置了数据增强功能:
|
||||
```bash
|
||||
textattack train --model lstm --dataset rotten_tomatoes --augment eda --pct-words-to-swap .1 --transformations-per-example 4
|
||||
```
|
||||
上面这个例子在训练之前使用 `EasyDataAugmenter` 策略对 `rotten_tomatoes` 数据集进行数据增强。
|
||||
|
||||
*在 `CoLA` 数据集上对 `bert-base` 模型精调 5 个 epoch:*
|
||||
```bash
|
||||
textattack train --model bert-base-uncased --dataset glue^cola --batch-size 32 --epochs 5
|
||||
```
|
||||
|
||||
|
||||
### 检查数据集:`textattack peek-dataset`
|
||||
|
||||
使用 `textattack peek-dataset` 可以进一步的观察数据。TextAttack 会打印出数据集粗略的统计信息,包括数据样例,输入文本的统计信息以及标签分布。比如,运行 `textattack peek-dataset --dataset-from-huggingface snli` 命令,会打印指定 NLP 包中 SNLI 数据集的统计信息。
|
||||
|
||||
|
||||
### 列出功能组件:`textattack list`
|
||||
|
||||
TextAttack 中有很多组件,有时很难跟进所有组件的情况。你可以使用 `textattack list` 列出所有的组件。比如,列出预训练模型 (`textattack list models`),或是列出可用的搜索方法 (`textattack list search-methods`)。
|
||||
|
||||
|
||||
## 设计模式
|
||||
|
||||
|
||||
### 模型
|
||||
|
||||
TextAttack 不依赖具体模型!你可以使用 TextAttack 来分析任何模型,只要模型的输出是 ID,张量,或者字符串。为了方便使用,TextAttack 内置了常见 NLP 任务的各种预训练模型。你可以轻松愉悦地上手 TextAttack。同时还可以更公平的比较不同文献的 attack 策略。
|
||||
|
||||
|
||||
|
||||
#### 内置的模型
|
||||
|
||||
TextAttack 提供了各种内置模型和数据集。使用 TextAttack 命令行接口,可以自动匹配模型和数据集。
|
||||
我们为 [GLUE](https://gluebenchmark.com/) 中的九个任务内置了多种预训练模型,并且还内置了很多常见的分类任务、翻译任务和摘要任务的数据集。
|
||||
|
||||
[textattack/models/README.md](textattack/models/README.md) 这个列表包含可用的预训练模型以及这些模型的准确率。你还可以通过 `textattack attack --help` 查看完整列表,包括所有的内置模型与数据集。
|
||||
|
||||
下面是一个使用内置模型的例子(SST-2 数据集会自动的加载):
|
||||
```bash
|
||||
textattack attack --model roberta-base-sst2 --recipe textfooler --num-examples 10
|
||||
```
|
||||
|
||||
#### HuggingFace 支持 :`transformers` 模型和 `datasets` 数据集
|
||||
|
||||
TextAttack 兼容 [`transformers` 预训练模型](https://huggingface.co/models)
|
||||
和 [`datasets` 数据集](https://github.com/huggingface/datasets)! 下面是一个例子,加载数据集并攻击相应预训练模型:
|
||||
|
||||
```bash
|
||||
textattack attack --model-from-huggingface distilbert-base-uncased-finetuned-sst-2-english --dataset-from-huggingface glue^sst2 --recipe deepwordbug --num-examples 10
|
||||
```
|
||||
|
||||
你还可以通过 `--model-from-huggingface` 参数探索更多支持的预训练模型,或是通过
|
||||
`--dataset-from-huggingface` 参数指定其他数据集。
|
||||
|
||||
|
||||
#### 加载本地文件中的模型与数据集
|
||||
|
||||
你可以快捷地对本地模型或数据样本进行攻击:创建一个简单的文件就可以加载预训练模型,然后在文件中可以通过对象 `model` 与 `tokenizer` 对象加载模型。`tokenizer` 对象必须实现 `encode()` 方法,该方法将输入字符串转为一个列表或一个 ID 张量。`model` 对象必须通过实现 `__call__` 方法来加载模型。
|
||||
|
||||
##### 使用本地模型
|
||||
对于你已经训练完成的模型,可以通过创建下面这样的文件,将其命名为 `my_model.py`:
|
||||
|
||||
```python
|
||||
model = load_your_model_with_custom_code() # replace this line with your model loading code
|
||||
tokenizer = load_your_tokenizer_with_custom_code() # replace this line with your tokenizer loading code
|
||||
```
|
||||
|
||||
然后,在运行攻击时指定参数 `--model-from-file my_model.py`,就可以自动载入你的模型与分词器。
|
||||
|
||||
### 数据集
|
||||
|
||||
#### 使用本地数据集
|
||||
|
||||
加载本地数据集与加载本地预训练模型的方法相似。`dataset` 对象可以是任意可迭代的`(input, output)` 对。下面这个例子演示了如何在 `my_dataset.py` 脚本中加载一个情感分类数据集:
|
||||
|
||||
```python
|
||||
dataset = [('Today was....', 1), ('This movie is...', 0), ...]
|
||||
```
|
||||
|
||||
然后,在运行攻击时指定参数 `--dataset-from-file my_dataset.py`,就可以对这个本地数据集进行攻击。
|
||||
|
||||
#### 通过 AttackedText 类调用数据集
|
||||
|
||||
为了对分词后的句子运行攻击方法,我们设计了 `AttackedText` 对象。它同时维护 token 列表与含有标点符号的原始文本。我们使用这个对象来处理原始的与分词后的文本。
|
||||
|
||||
#### 通过 Data Frames 调用数据集(*即將上線*)
|
||||
|
||||
|
||||
### 何为攻击 & 如何设计新的攻击
|
||||
|
||||
`Attack` 中的 `attack_one` 方法以 `AttackedText` 对象作为输入,若攻击成功,返回 `SuccessfulAttackResult`,若攻击失败,返回 `FailedAttackResult`。
|
||||
|
||||
|
||||
我们将攻击划分并定义为四个组成部分:**目标函数** 定义怎样的攻击是一次成功的攻击,**约束条件** 定义怎样的扰动是可行的,**变换规则** 对输入文本生成一系列可行的扰动结果,**搜索方法** 在搜索空间中遍历所有可行的扰动结果。每一次攻击都尝试对输入的文本添加扰动,使其通过目标函数(即判断攻击是否成功),并且扰动要符合约束(如语法约束,语义相似性约束)。最后用搜索方法在所有可行的变换结果中,挑选出优质的对抗样本。
|
||||
|
||||
|
||||
这种模块化的设计可以将各种对抗攻击策略整合在一个系统里。这使得我们可以方便地将文献中的方法集成在一起,同时复用攻击策略之间相同的部分。我们已经实现了 16 种简明易读的攻击策略(见上表)。史上首次!各种攻击方法终于可以在标准的设置下作为基准方法,进行比较与分析。
|
||||
|
||||
|
||||
TextAttack 是不依赖具体模型的,这意味着可以对任何深度学习框架训练的模型进行攻击。只要被攻击的模型可以读取字符串(或一组字符串),并根据目标函数返回一个结果。比如说,机器翻译模型读取一句话,返回一句对应的翻译结果。分类或蕴含任务的模型输入字符串,返回一组分数。只要你的模型满足这两点,就可以使用 TextAttack 进行攻击。
|
||||
|
||||
|
||||
|
||||
### 目标函数
|
||||
|
||||
目标函数 `GoalFunction` 以 `AttackedText` 对象作为输入,为输入对象打分,并且判别这次攻击是否满足目标函数定义的成功条件,返回一个 `GoalFunctionResult` 对象。
|
||||
|
||||
### 约束条件
|
||||
|
||||
约束条件 `Constraint` 以 `AttackedText` 对象作为输入,返回一个变换后的 `AttackedText` 列表。对于每条变换,返回一个布尔值表示这条变换是否满足约束条件。
|
||||
|
||||
### 变换规则
|
||||
|
||||
变换规则 `Transformation` 以 `AttackedText` 对象作为输入,返回对于 `AttackedText` 所有可行变换的列表。例如,一个变换规则可以是返回所有可能的同义词替换结果。
|
||||
|
||||
### 搜索方法
|
||||
|
||||
搜索方法 `SearchMethod` 以初始的 `GoalFunctionResult` 作为输入,返回最终的 `GoalFunctionResult`。`get_transformations` 方法,以一个 `AttackedText` 对象作为输入,返还所有符合约束条件的变换结果。搜索方法不断地调用 `get_transformations` 函数,直到攻击成功 (由 `get_goal_results` 决定) 或搜索结束。
|
||||
|
||||
### 公平比较攻击策略(Benchmarking Attacks)
|
||||
|
||||
- 详细情况参见我们的分析文章:Searching for a Search Method: Benchmarking Search Algorithms for Generating NLP Adversarial Examples at [EMNLP BlackBoxNLP](https://arxiv.org/abs/2009.06368).
|
||||
|
||||
- 正如我们在上面的文章中所强调的,我们不推荐在对攻击策略没有约束的情况下直接进行比较。
|
||||
|
||||
- 对这点进行强调,是由于最近的文献中在设置约束时使用了不同的方法或者阈值。在不固定约束空间时,攻击成功率的增加可能是源于改进的搜索方法或变换方式,又或是降低了对搜索空间的约束。
|
||||
|
||||
## 帮助改进 TextAttack
|
||||
|
||||
我们欢迎任何建议与改进!请提交 Issues(议题)和 Pull requests(拉取请求),我们会竭尽所能的做出即时反馈。TextAttack 当前处于 "alpha" 版本,我们仍在完善它的设计与功能。
|
||||
|
||||
关于提交建议与改进的详细指引,查看 [CONTRIBUTING.md](https://github.com/QData/TextAttack/blob/master/CONTRIBUTING.md) 。
|
||||
|
||||
## 引用 TextAttack
|
||||
|
||||
如果 TextAttack 对你的研究工作有所帮助,欢迎在论文中引用 [TextAttack: A Framework for Adversarial Attacks, Data Augmentation, and Adversarial Training in NLP](https://arxiv.org/abs/2005.05909)。
|
||||
|
||||
```bibtex
|
||||
@misc{morris2020textattack,
|
||||
title={TextAttack: A Framework for Adversarial Attacks, Data Augmentation, and Adversarial Training in NLP},
|
||||
author={John X. Morris and Eli Lifland and Jin Yong Yoo and Jake Grigsby and Di Jin and Yanjun Qi},
|
||||
year={2020},
|
||||
eprint={2005.05909},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
60
docs/0_get_started/basic-Intro.rst
Normal file
@@ -0,0 +1,60 @@
|
||||
TextAttack Basic Functions
|
||||
===========================
|
||||
|
||||
Welcome to the documentation for TextAttack!
|
||||
|
||||
What is TextAttack?
|
||||
----------------------
|
||||
`TextAttack <https://github.com/QData/TextAttack>`__ is a Python framework for adversarial attacks, adversarial training, and data augmentation in NLP.
|
||||
|
||||
TextAttack makes experimenting with the robustness of NLP models seamless, fast, and easy. It's also useful for NLP model training, adversarial training, and data augmentation.
|
||||
|
||||
TextAttack provides components for common NLP tasks like sentence encoding, grammar-checking, and word replacement that can be used on their own.
|
||||
|
||||
Where should I start?
|
||||
----------------------
|
||||
|
||||
This is a great question, and one we get a lot. First of all, almost everything in TextAttack can be done in two ways: via the command-line or via the Python API. If you're looking to integrate TextAttack into an existing project, the Python API is likely for you. If you'd prefer to use built-in functionality end-to-end (training a model, running an adversarial attack, augmenting a CSV) then you can just use the command-line API.
|
||||
|
||||
|
||||
TextAttack does three things very well:
|
||||
|
||||
1. Adversarial attacks (Python: ``textattack.shared.Attack``, Bash: ``textattack attack``)
|
||||
2. Data augmentation (Python: ``textattack.augmentation.Augmenter``, Bash: ``textattack augment``)
|
||||
3. Model training (Python: ``textattack.commands.train.*``, Bash: ``textattack train``)
|
||||
|
||||
Adversarial training can be achieved as a combination of [1] and/or [2] with [3] (via ``textattack train --attack``). To see all this in action, see :ref:`the TextAttack End-to-End tutorial </2notebook/0_End_to_End.ipynb>`.
|
||||
|
||||
All of the other components: datasets, models & model wrappers, loggers, transformations, constraints, search methods, goal functions, etc., are developed to support one or more of these three functions. Feel free though to install textattack to include just one of those components! (For example, TextAttack provides a really easy Python interface for accessing and using word embeddings that will automatically download and save them on the first use.)
|
||||
|
||||
|
||||
NLP Attacks
|
||||
-----------
|
||||
|
||||
TextAttack provides a framework for constructing and thinking about generating inputs in NLP via perturbation attacks.
|
||||
|
||||
|
||||
TextAttack builds attacks from four components:
|
||||
|
||||
|
||||
|
||||
- :ref:`Goal Functions <goal_function>`: stipulate the goal of the attack, like to change the prediction score of a classification model, or to change all of the words in a translation output.
|
||||
- :ref:`Constraints <constraint>`: determine if a potential perturbation is valid with respect to the original input.
|
||||
- :ref:`Transformations <transformations>`: take a text input and transform it by inserting and deleting characters, words, and/or phrases.
|
||||
- :ref:`Search Methods <search_methods>`: explore the space of possible **transformations** within the defined **constraints** and attempt to find a successful perturbation which satisfies the **goal function**.
|
||||
|
||||
|
||||
TextAttack provides a set of :ref:`Attack Recipes <attack_recipes>` that assemble attacks from the literature from these four components. Take a look at these recipes (or our `paper on ArXiv <https://arxiv.org/abs/2005.05909>`__) to get a feel for how the four components work together to create an adversarial attack.
|
||||
|
||||
Data Augmentation
|
||||
--------------------
|
||||
Data augmentation is easy and extremely common in computer vision but harder and less common in NLP. We provide a :ref:`Data Augmentation <augmentation>` module using transformations and constraints.
|
||||
|
||||
Features
|
||||
------------
|
||||
TextAttack has some other features that make it a pleasure to use:
|
||||
|
||||
- :ref:`Pre-trained Models <models>` for testing attacks and evaluating constraints
|
||||
- :ref:`Visualization options <loggers>` like Weights & Biases and Visdom
|
||||
- :ref:`AttackedText <attacked_text>`, a utility class for strings that includes tools for tokenizing and editing text
|
||||
|
||||
145
docs/0_get_started/command_line_usage.md
Normal file
@@ -0,0 +1,145 @@
|
||||
Command-Line Usage
|
||||
=======================================
|
||||
|
||||
The easiest way to use textattack is from the command-line. Installing textattack
|
||||
will provide you with the handy `textattack` command which will allow you to do
|
||||
just about anything TextAttack offers in a single bash command.
|
||||
|
||||
> *Tip*: If you are for some reason unable to use the `textattack` command, you
|
||||
> can access all the same functionality by prepending `python -m` to the command
|
||||
> (`python -m textattack ...`).
|
||||
|
||||
|
||||
> The [`examples/`](https://github.com/QData/TextAttack/tree/master/examples) folder includes scripts showing common TextAttack usage for training models, running attacks, and augmenting a CSV file.
|
||||
|
||||
|
||||
> The [documentation website](https://textattack.readthedocs.io/en/latest) contains walkthroughs explaining basic usage of TextAttack, including building a custom transformation and a custom constraint..
|
||||
|
||||
|
||||
To see all available commands, type `textattack --help`. This page explains
|
||||
some of the most important functionalities of textattack: NLP data augmentation,
|
||||
adversarial attacks, and training and evaluating models.
|
||||
|
||||
## Data Augmentation with `textattack augment`
|
||||
|
||||
The easiest way to use our data augmentation tools is with `textattack augment <args>`. `textattack augment`
|
||||
takes an input CSV file and text column to augment, along with the percentage of words to change per augmentation
|
||||
and the number of augmentations per input example. It outputs a CSV in the same format with all the augmentation
|
||||
examples corresponding to the proper columns.
|
||||
|
||||
For example, given the following as `examples.csv`:
|
||||
|
||||
```
|
||||
"text",label
|
||||
"the rock is destined to be the 21st century's new conan and that he's going to make a splash even greater than arnold schwarzenegger , jean- claud van damme or steven segal.", 1
|
||||
"the gorgeously elaborate continuation of 'the lord of the rings' trilogy is so huge that a column of words cannot adequately describe co-writer/director peter jackson's expanded vision of j . r . r . tolkien's middle-earth .", 1
|
||||
"take care of my cat offers a refreshingly different slice of asian cinema .", 1
|
||||
"a technically well-made suspenser . . . but its abrupt drop in iq points as it races to the finish line proves simply too discouraging to let slide .", 0
|
||||
"it's a mystery how the movie could be released in this condition .", 0
|
||||
```
|
||||
|
||||
The command:
|
||||
```
|
||||
textattack augment --csv examples.csv --input-column text --recipe eda --pct-words-to-swap .1 \
|
||||
--transformations-per-example 2 --exclude-original
|
||||
```
|
||||
will augment the `text` column with 10% of words edited per augmentation, twice as many augmentations as original inputs, and exclude the original inputs from the
|
||||
output CSV. (All of this will be saved to `augment.csv` by default.)
|
||||
|
||||
After augmentation, here are the contents of `augment.csv`:
|
||||
```
|
||||
text,label
|
||||
"the rock is destined to be the 21st century's new conan and that he's to make splash even greater arnold schwarzenegger , jean- claud van damme or steven segal.",1
|
||||
"the Arnold rock is destined to be the 21st vanguard century's new specify conan and that he's going to make a splash even greater than arnold schwarzenegger , jean- claud van damme or steven segal.",1
|
||||
the gorgeously continuation of 'the lord of the rings' trilogy is so huge that a column of cannot adequately describe co-writer/ peter jackson's expanded vision of j . r . r . tolkien's middle-earth .,1
|
||||
the splendidly elaborate continuation of 'the lord of the rings' trilogy is so huge that a column of parole cannot adequately describe co-writer/director peter jackson's expanded vision of J . r . r . tolkien's middle-earth .,1
|
||||
take care of my cat offers a refreshingly slice different of asian cinema .,1
|
||||
take care of my cast offers a refreshingly different slice of asian cinema .,1
|
||||
a technically well-made suspenser . . . but its abrupt drop in iq points as it races to the finish line proves simply too discouraging to rush let IT slide .,0
|
||||
a technically well-made suspenser . . . but its abrupt drop in iq points as it races to the finish line proves just too discouraging to let chute .,0
|
||||
it's a mystery how the movie could this released in be condition .,0
|
||||
it's a whodunit how the movie could be released in this condition .,0
|
||||
```
|
||||
|
||||
The 'eda' augmentation uses a combination of word swaps, insertions, and substitutions to generate new examples.
|
||||
|
||||
## Adversarial Attacks with `textattack attack`
|
||||
|
||||
The heart of textattack is running adversarial attacks on NLP models with
|
||||
`textattack attack`. You can build an attack from the command-line in several ways:
|
||||
1. Use an **attack recipe** to launch an attack from the literature: `textattack attack --recipe deepwordbug`
|
||||
2. Build your attack from components:
|
||||
```
|
||||
textattack attack --model lstm-mr --num-examples 20 --search-method beam-search^beam_width=4 \
|
||||
--transformation word-swap-embedding \
|
||||
--constraints repeat stopword max-words-perturbed^max_num_words=2 embedding^min_cos_sim=0.8 part-of-speech \
|
||||
--goal-function untargeted-classification
|
||||
```
|
||||
3. Create a python file that builds your attack and load it: `textattack attack --attack-from-file my_file.py^my_attack_name`
|
||||
|
||||
## Training Models with `textattack train`
|
||||
|
||||
With textattack, you can train models on any classification or regression task
|
||||
from [`datasets`](https://github.com/huggingface/datasets/) using a single line.
|
||||
|
||||
### Available Models
|
||||
#### TextAttack Models
|
||||
TextAttack has two build-in model types, a 1-layer bidirectional LSTM with a hidden
|
||||
state size of 150 (`lstm`), and a WordCNN with 3 window sizes
|
||||
(3, 4, 5) and 100 filters for the window size (`cnn`). Both models set dropout
|
||||
to 0.3 and use a base of the 200-dimensional GLoVE embeddings.
|
||||
|
||||
#### `transformers` Models
|
||||
Along with the `lstm` and `cnn`, you can theoretically fine-tune any model based
|
||||
in the huggingface [transformers](https://github.com/huggingface/transformers/)
|
||||
repo. Just type the model name (like `bert-base-cased`) and it will be automatically
|
||||
loaded.
|
||||
|
||||
Here are some models from transformers that have worked well for us:
|
||||
- `bert-base-uncased` and `bert-base-cased`
|
||||
- `distilbert-base-uncased` and `distilbert-base-cased`
|
||||
- `albert-base-v2`
|
||||
- `roberta-base`
|
||||
- `xlnet-base-cased`
|
||||
|
||||
## Evaluating Models with `textattack eval-model`
|
||||
|
||||
Any TextAttack-compatible model can be evaluated using `textattack eval-model`. TextAttack-trained models can be evaluated using `textattack eval --num-examples <num-examples> --model /path/to/trained/model/`
|
||||
|
||||
## Other Commands
|
||||
|
||||
### Checkpoints and `textattack attack-resume`
|
||||
|
||||
Some attacks can take a very long time. Sometimes this is because they're using
|
||||
a very slow search method (like beam search with a high beam width) or sometimes
|
||||
they're just attacking a large number of samples. In these cases, it can be
|
||||
useful to save attack checkpoints throughout the course of the attack. Then,
|
||||
if the attack crashes for some reason, you can resume without restarting from
|
||||
scratch.
|
||||
|
||||
- To save checkpoints while running an attack, add the argument `--checkpoint-interval X`,
|
||||
where X is the number of attacks you want to run between checkpoints (for example `textattack attack <args> --checkpoint-interval 5`).
|
||||
- To load an attack from a checkpoint, use `textattack attack-resume --checkpoint-file <checkpoint-file>`.
|
||||
|
||||
### Listing features with `textattack list`
|
||||
|
||||
TextAttack has a lot of built-in features (models, search methods, constraints, etc.)
|
||||
and it can get overwhelming to keep track of all the options. To list all of the
|
||||
options within a given category, use `textattack list`.
|
||||
|
||||
For example:
|
||||
- list all the built-in models: `textattack list models`
|
||||
- list all constraints: `textattack list constraints`
|
||||
- list all search methods: `textattack list search-methods`
|
||||
|
||||
### Examining datasets with `textattack peek-dataset`
|
||||
It can be useful to take a cursory look at and compute some basic statistics of
|
||||
whatever dataset you're working with. Whether you're loading a dataset of your
|
||||
own from a file, or one from NLP, you can use `textattack peek-dataset` to
|
||||
see some basic information about the dataset.
|
||||
|
||||
For example, use `textattack peek-dataset --dataset-from-huggingface glue^mrpc` to see
|
||||
information about the MRPC dataset (from the GLUE set of datasets). This will
|
||||
print statistics like the number of labels, average number of words, etc.
|
||||
|
||||
|
||||
49
docs/0_get_started/installation.md
Normal file
@@ -0,0 +1,49 @@
|
||||
Installation
|
||||
==============
|
||||
|
||||
To use TextAttack, you must be running Python 3.6 or above. A CUDA-compatible GPU is optional but will greatly improve speed.
|
||||
|
||||
We recommend installing TextAttack in a virtual environment (check out this [guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/)).
|
||||
|
||||
There are two ways to install TextAttack. If you want to simply use as it is, install via `pip`. If you want to make any changes and play around, install it from source.
|
||||
|
||||
## Install with pip
|
||||
Simply run
|
||||
|
||||
pip install textattack
|
||||
|
||||
## Install from Source
|
||||
To install TextAttack from source, first clone the repo by running
|
||||
|
||||
git clone https://github.com/QData/TextAttack.git
|
||||
cd TextAttack
|
||||
|
||||
Then, install it using `pip`.
|
||||
|
||||
pip install -e .
|
||||
|
||||
To install TextAttack for further development, please run this instead.
|
||||
|
||||
pip install -e .[dev]
|
||||
|
||||
This installs additional dependencies required for development.
|
||||
|
||||
|
||||
## Optional Dependencies
|
||||
For quick installation, TextAttack only installs esssential packages as dependencies (e.g. Transformers, PyTorch). However, you might need to install additional packages to run certain attacks or features.
|
||||
For example, Tensorflow and Tensorflow Hub are required to use the TextFooler attack, which was proposed in [Is BERT Really Robust? A Strong Baseline for Natural Language Attack on Text Classification and Entailment](https://arxiv.org/abs/1907.11932) by Di Jin, Zhijing Jin, Joey Tianyi Zhou, and Peter Szolov.
|
||||
|
||||
If you attempting to use a feature that requires additional dependencies, TextAttack will let you know which ones you need to install.
|
||||
|
||||
However, during installation step, you can also install them together with TextAttack.
|
||||
You can install Tensorflow and its related packages by running
|
||||
|
||||
pip install textattack[tensorflow]
|
||||
|
||||
You can also install other miscallenous optional dependencies by running
|
||||
|
||||
pip install textattack[optional]
|
||||
|
||||
To install both groups of packages, run
|
||||
|
||||
pip install textattack[tensorflow, optional]
|
||||
33
docs/0_get_started/quick_api_tour.rst
Normal file
@@ -0,0 +1,33 @@
|
||||
Quick Tour
|
||||
==========================
|
||||
|
||||
Let us have a quick look at how TextAttack can be used to carry out adversarial attack.
|
||||
|
||||
Attacking a BERT model
|
||||
------------------------------
|
||||
Let us attack a BERT model fine-tuned for sentimental classification task. We are going to use a model that has already been fine-tuned on IMDB dataset using the Transformers library.
|
||||
|
||||
.. code-block::
|
||||
|
||||
>>> import transformers
|
||||
>>> model = transformers.AutoModelForSequenceClassification.from_pretrained("textattack/bert-base-uncased-imdb")
|
||||
>>> tokenizer = transformers.AutoTokenizer.from_pretrained("textattack/bert-base-uncased-imdb")
|
||||
|
||||
|
||||
TextAttack requires both the model and the tokenizer to be wrapped by a :class:`~transformers.models.wrapper.ModelWrapper` class that implements the forward pass operation given a list of input texts. For models provided by Transformers library, we can also simply use :class:`~transformers.models.wrapper.HuggingFaceModelWrapper` class which implements both the forward pass and tokenization.
|
||||
|
||||
.. code-block::
|
||||
|
||||
>>> import textattack
|
||||
>>> model_wrapper = textattack.models.wrappers.HuggingFaceModelWrapper(model, tokenizer)
|
||||
|
||||
Next, let's build the attack that we want to use. TextAttack provides prebuilt attacks in the form of :class:`~transformers.attack_recipes.AttackRecipe`. For this example, we will use :ref:TextFooler attack
|
||||
|
||||
Let us also load the IMDB dataset using 🤗 Datasets library. TextAttack also requires that the dataset
|
||||
|
||||
.. code-block::
|
||||
|
||||
>>> import datasets
|
||||
>>> dataset = datasets.load_dataset("imdb", split="test")
|
||||
|
||||
|
||||
123
docs/1start/FAQ.md
Normal file
@@ -0,0 +1,123 @@
|
||||
Frequently Asked Questions
|
||||
========================================
|
||||
|
||||
## Via Slack: Where to Ask Questions:
|
||||
|
||||
For help and realtime updates related to TextAttack, please [join the TextAttack Slack](https://join.slack.com/t/textattack/shared_invite/zt-huomtd9z-KqdHBPPu2rOP~Z8q3~urgg)!
|
||||
|
||||
|
||||
## Via CLI: `--help`
|
||||
|
||||
+ Easiest self help: `textattack --help`
|
||||
+ More concrete self help:
|
||||
- `textattack attack --help`
|
||||
- `textattack augment --help`
|
||||
- `textattack train --help`
|
||||
- `textattack peek-dataset --help`
|
||||
- `textattack list`, e.g., `textattack list search-methods`
|
||||
|
||||
|
||||
## Via our papers: More details on results
|
||||
+ [references](https://textattack.readthedocs.io/en/latest/1start/references.html)
|
||||
|
||||
|
||||
## Via readthedocs: More details on APIs
|
||||
+ [complete API reference on TextAttack](https://textattack.readthedocs.io/en/latest/apidoc/textattack.html)
|
||||
|
||||
|
||||
## More Concrete Questions:
|
||||
|
||||
|
||||
### 1. How to Train
|
||||
|
||||
For example, you can *Train our default LSTM for 50 epochs on the Yelp Polarity dataset:*
|
||||
```bash
|
||||
textattack train --model lstm --dataset yelp_polarity --batch-size 64 --epochs 50 --learning-rate 1e-5
|
||||
```
|
||||
|
||||
The training process has data augmentation built-in:
|
||||
```bash
|
||||
textattack train --model lstm --dataset rotten_tomatoes --augment eda --pct-words-to-swap .1 --transformations-per-example 4
|
||||
```
|
||||
This uses the `EasyDataAugmenter` recipe to augment the `rotten_tomatoes` dataset before training.
|
||||
|
||||
*Fine-Tune `bert-base` on the `CoLA` dataset for 5 epochs**:
|
||||
```bash
|
||||
textattack train --model bert-base-uncased --dataset glue^cola --batch-size 32 --epochs 5
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
### 2. Use Custom Models
|
||||
|
||||
TextAttack is model-agnostic! You can use `TextAttack` to analyze any model that outputs IDs, tensors, or strings. To help users, TextAttack includes pre-trained models for different common NLP tasks. This makes it easier for
|
||||
users to get started with TextAttack. It also enables a more fair comparison of attacks from the literature. A list of available pretrained models and their validation accuracies is available at [HERE](https://textattack.readthedocs.io/en/latest/3recipes/models.html).
|
||||
|
||||
|
||||
You can easily try out an attack on a local model you prefer. To attack a pre-trained model, create a short file that loads them as variables `model` and `tokenizer`. The `tokenizer` must
|
||||
be able to transform string inputs to lists or tensors of IDs using a method called `encode()`. The
|
||||
model must take inputs via the `__call__` method.
|
||||
|
||||
##### Model from a file
|
||||
To experiment with a model you've trained, you could create the following file
|
||||
and name it `my_model.py`:
|
||||
|
||||
```python
|
||||
model = load_your_model_with_custom_code() # replace this line with your model loading code
|
||||
tokenizer = load_your_tokenizer_with_custom_code() # replace this line with your tokenizer loading code
|
||||
```
|
||||
|
||||
Then, run an attack with the argument `--model-from-file my_model.py`. The model and tokenizer will be loaded automatically.
|
||||
|
||||
TextAttack is model-agnostic - meaning it can run attacks on models implemented in any deep learning framework. Model objects must be able to take a string (or list of strings) and return an output that can be processed by the goal function. For example, machine translation models take a list of strings as input and produce a list of strings as output. Classification and entailment models return an array of scores. As long as the user's model meets this specification, the model is fit to use with TextAttack.
|
||||
|
||||
|
||||
### 3. Use Custom Datasets
|
||||
|
||||
|
||||
#### From a file
|
||||
|
||||
Loading a dataset from a file is very similar to loading a model from a file. A 'dataset' is any iterable of `(input, output)` pairs.
|
||||
The following example would load a sentiment classification dataset from file `my_dataset.py`:
|
||||
|
||||
```python
|
||||
dataset = [('Today was....', 1), ('This movie is...', 0), ...]
|
||||
```
|
||||
|
||||
You can then run attacks on samples from this dataset by adding the argument `--dataset-from-file my_dataset.py`.
|
||||
|
||||
|
||||
|
||||
#### Custom Dataset via AttackedText class
|
||||
|
||||
To allow for word replacement after a sequence has been tokenized, we include an `AttackedText` object
|
||||
which maintains both a list of tokens and the original text, with punctuation. We use this object in favor of a list of words or just raw text.
|
||||
|
||||
|
||||
#### Custome Dataset via Data Frames or other python data objects (*coming soon*)
|
||||
|
||||
|
||||
### 4. Benchmarking Attacks
|
||||
|
||||
- See our analysis paper: Searching for a Search Method: Benchmarking Search Algorithms for Generating NLP Adversarial Examples at [EMNLP BlackBoxNLP](https://arxiv.org/abs/2009.06368).
|
||||
|
||||
- As we emphasized in the above paper, we don't recommend to directly compare Attack Recipes out of the box.
|
||||
|
||||
- This comment is due to that attack recipes in the recent literature used different ways or thresholds in setting up their constraints. Without the constraint space held constant, an increase in attack success rate could from an improved search or transformation method or a less restrictive search space.
|
||||
|
||||
|
||||
### 5. Create Custom or New Attacks
|
||||
|
||||
The `attack_one` method in an `Attack` takes as input an `AttackedText`, and outputs either a `SuccessfulAttackResult` if it succeeds or a `FailedAttackResult` if it fails.
|
||||
|
||||
- [Here is an example of using TextAttack to create a new attack method](https://github.com/jxmorris12/second-order-adversarial-examples)
|
||||
|
||||
|
||||
We formulate an attack as consisting of four components: a **goal function** which determines if the attack has succeeded, **constraints** defining which perturbations are valid, a **transformation** that generates potential modifications given an input, and a **search method** which traverses through the search space of possible perturbations. The attack attempts to perturb an input text such that the model output fulfills the goal function (i.e., indicating whether the attack is successful) and the perturbation adheres to the set of constraints (e.g., grammar constraint, semantic similarity constraint). A search method is used to find a sequence of transformations that produce a successful adversarial example.
|
||||
|
||||
|
||||
This modular design unifies adversarial attack methods into one system, enables us to easily assemble attacks from the literature while re-using components that are shared across attacks. We provides clean, readable implementations of 16 adversarial attack recipes from the literature (see [our tool paper](https://arxiv.org/abs/2005.05909) and [our benchmark search paper](https://arxiv.org/abs/2009.06368)). For the first time, these attacks can be benchmarked, compared, and analyzed in a standardized setting.
|
||||
|
||||
|
||||
|
||||
54
docs/1start/api-design-tips.md
Normal file
@@ -0,0 +1,54 @@
|
||||
Lessons learned in designing TextAttack
|
||||
=========================================
|
||||
|
||||
|
||||
*This documentation page was adapted from [Our Workshop Paper in EMNLP 2nd Workshop for Natural Language Processing Open Source Software (NLP-OSS)](https://arxiv.org/abs/2010.01724).*
|
||||
|
||||
|
||||
TextAttack is an open-source Python toolkit for adversarial attacks, adversarial training, and data augmentation in NLP. TextAttack unites 15+ papers from the NLP adversarial attack literature into a single shared framework, with many components reused across attacks. This framework allows both researchers and developers to test and study the weaknesses of their NLP models.
|
||||
|
||||
## Challenges in Design
|
||||
|
||||
|
||||
One of the challenges for building such tools is that the tool should be flexible enough to work with many different deep learning frameworks (e.g. PyTorch, Tensorflow, Scikit-learn). Also, the tool should be able to work with datasets from various sources and in various formats. Lastly, the tools needs to be compatible with different hardware setups.
|
||||
|
||||
|
||||
## Our design tips
|
||||
|
||||
We provide the following broad advice to help other future developers create user-friendly NLP libraries in Python:
|
||||
- To become model-agnostic, implement a model wrapper class: a model is anything that takes string input(s) and returns a prediction.
|
||||
- To become data-agnostic, take dataset inputs as (input, output) pairs, where each model input is represented as an OrderedDict.
|
||||
- Do not plan for inputs (tensors, lists, etc.) to be a certain size or shape unless explicitly necessary.
|
||||
- Centralize common text operations, like parsing and string-level operations, in one class.
|
||||
- Whenever possible, cache repeated computations, including model inferences.
|
||||
- If your program runs on a single GPU, but your system contains $N$ GPUs, you can obtain an performance boost proportional to N through parallelism.
|
||||
- Dynamically choose between devices. (Do not require a GPU or TPU if one is not necessary.)
|
||||
|
||||
|
||||
Our modular and extendable design allows us to reuse many components to offer 15+ different adversarial attack methods proposed by literature. Our model-agnostic and dataset-agnostic design allows users to easily run adversarial attacks against their own models built using any deep learning framework. We hope that our lessons from developing TextAttack will help others create user-friendly open-source NLP libraries.
|
||||
|
||||
|
||||
## TextAttack flowchart
|
||||
|
||||

|
||||
|
||||
|
||||
+ Here is a summary diagram of TextAttack Ecosystem
|
||||
|
||||

|
||||
|
||||
|
||||
|
||||
## More Details in Reference
|
||||
|
||||
```
|
||||
@misc{morris2020textattack,
|
||||
title={TextAttack: Lessons learned in designing Python frameworks for NLP},
|
||||
author={John X. Morris and Jin Yong Yoo and Yanjun Qi},
|
||||
year={2020},
|
||||
eprint={2010.01724},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.SE}
|
||||
}
|
||||
```
|
||||
|
||||
237
docs/1start/attacks4Components.md
Normal file
@@ -0,0 +1,237 @@
|
||||
Four Components of TextAttack Attacks
|
||||
========================================
|
||||
|
||||
To unify adversarial attack methods into one system, We formulate an attack as consisting of four components: a **goal function** which determines if the attack has succeeded, **constraints** defining which perturbations are valid, a **transformation** that generates potential modifications given an input, and a **search method** which traverses through the search space of possible perturbations. The attack attempts to perturb an input text such that the model output fulfills the goal function (i.e., indicating whether the attack is successful) and the perturbation adheres to the set of constraints (e.g., grammar constraint, semantic similarity constraint). A search method is used to find a sequence of transformations that produce a successful adversarial example.
|
||||
|
||||
|
||||
|
||||
This modular design enables us to easily assemble attacks from the literature while re-using components that are shared across attacks. TextAttack provides clean, readable implementations of 16 adversarial attacks from the literature. For the first time, these attacks can be benchmarked, compared, and analyzed in a standardized setting.
|
||||
|
||||
|
||||
- Two examples showing four components of two SOTA attacks
|
||||

|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
### Goal Functions
|
||||
|
||||
A `GoalFunction` takes as input an `AttackedText` object, scores it, and determines whether the attack has succeeded, returning a `GoalFunctionResult`.
|
||||
|
||||
### Constraints
|
||||
|
||||
A `Constraint` takes as input a current `AttackedText`, and a list of transformed `AttackedText`s. For each transformed option, it returns a boolean representing whether the constraint is met.
|
||||
|
||||
### Transformations
|
||||
|
||||
A `Transformation` takes as input an `AttackedText` and returns a list of possible transformed `AttackedText`s. For example, a transformation might return all possible synonym replacements.
|
||||
|
||||
### Search Methods
|
||||
|
||||
A `SearchMethod` takes as input an initial `GoalFunctionResult` and returns a final `GoalFunctionResult` The search is given access to the `get_transformations` function, which takes as input an `AttackedText` object and outputs a list of possible transformations filtered by meeting all of the attack’s constraints. A search consists of successive calls to `get_transformations` until the search succeeds (determined using `get_goal_results`) or is exhausted.
|
||||
|
||||
|
||||
|
||||
### On Benchmarking Attack Recipes
|
||||
|
||||
- Please read our analysis paper: Searching for a Search Method: Benchmarking Search Algorithms for Generating NLP Adversarial Examples at [EMNLP BlackBoxNLP](https://arxiv.org/abs/2009.06368).
|
||||
|
||||
- As we emphasized in the above paper, we don't recommend to directly compare Attack Recipes out of the box.
|
||||
|
||||
- This is due to that attack recipes in the recent literature used different ways or thresholds in setting up their constraints. Without the constraint space held constant, an increase in attack success rate could come from an improved search or a better transformation method or a less restrictive search space.
|
||||
|
||||
|
||||
|
||||
### Four components in Attack Recipes we have implemented
|
||||
|
||||
|
||||
- TextAttack provides clean, readable implementations of 16 adversarial attacks from the literature.
|
||||
|
||||
- To run an attack recipe: `textattack attack --recipe [recipe_name]`
|
||||
|
||||
|
||||
|
||||
<table style="width:100%" border="1">
|
||||
<thead>
|
||||
<tr class="header">
|
||||
<th style="text-align: left;"><strong>Attack Recipe Name</strong></th>
|
||||
<th style="text-align: left;"><strong>Goal Function</strong></th>
|
||||
<th style="text-align: left; width:130px" ><strong>Constraints-Enforced</strong></th>
|
||||
<th style="text-align: left;"><strong>Transformation</strong></th>
|
||||
<th style="text-align: left;"><strong>Search Method</strong></th>
|
||||
<th style="text-align: left;"><strong>Main Idea</strong></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr><td style="text-align: center;" colspan="6"><strong><br>Attacks on classification tasks, like sentiment classification and entailment:<br></strong></td></tr>
|
||||
|
||||
<tr class="even">
|
||||
<td style="text-align: left;"><code>alzantot</code> <span class="citation" data-cites="Alzantot2018GeneratingNL Jia2019CertifiedRT"></span></td>
|
||||
<td style="text-align: left;"><sub>Untargeted {Classification, Entailment}</sub></td>
|
||||
<td style="text-align: left;"><sub>Percentage of words perturbed, Language Model perplexity, Word embedding distance</sub></td>
|
||||
<td style="text-align: left;"><sub>Counter-fitted word embedding swap</sub></td>
|
||||
<td style="text-align: left;"><sub>Genetic Algorithm</sub></td>
|
||||
<td ><sub>from (["Generating Natural Language Adversarial Examples" (Alzantot et al., 2018)](https://arxiv.org/abs/1804.07998))</sub></td>
|
||||
</tr>
|
||||
<tr class="odd">
|
||||
<td style="text-align: left;"><code>bae</code> <span class="citation" data-cites="garg2020bae"></span></td>
|
||||
<td style="text-align: left;"><sub>Untargeted Classification</sub></td>
|
||||
<td style="text-align: left;"><sub>USE sentence encoding cosine similarity</sub></td>
|
||||
<td style="text-align: left;"><sub>BERT Masked Token Prediction</sub></td>
|
||||
<td style="text-align: left;"><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>BERT masked language model transformation attack from (["BAE: BERT-based Adversarial Examples for Text Classification" (Garg & Ramakrishnan, 2019)](https://arxiv.org/abs/2004.01970)). </td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td style="text-align: left;"><code>bert-attack</code> <span class="citation" data-cites="li2020bertattack"></span></td>
|
||||
<td style="text-align: left;"><sub>Untargeted Classification</td>
|
||||
<td style="text-align: left;"><sub>USE sentence encoding cosine similarity, Maximum number of words perturbed</td>
|
||||
<td style="text-align: left;"><sub>BERT Masked Token Prediction (with subword expansion)</td>
|
||||
<td style="text-align: left;"><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub> (["BERT-ATTACK: Adversarial Attack Against BERT Using BERT" (Li et al., 2020)](https://arxiv.org/abs/2004.09984))</sub></td>
|
||||
</tr>
|
||||
<tr class="odd">
|
||||
<td style="text-align: left;"><code>checklist</code> <span class="citation" data-cites="Gao2018BlackBoxGO"></span></td>
|
||||
<td style="text-align: left;"><sub>{Untargeted, Targeted} Classification</sub></td>
|
||||
<td style="text-align: left;"><sub>checklist distance</sub></td>
|
||||
<td style="text-align: left;"><sub>contract, extend, and substitutes name entities</sub></td>
|
||||
<td style="text-align: left;"><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>Invariance testing implemented in CheckList . (["Beyond Accuracy: Behavioral Testing of NLP models with CheckList" (Ribeiro et al., 2020)](https://arxiv.org/abs/2005.04118))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> <code>clare</code> <span class="citation" data-cites="Alzantot2018GeneratingNL Jia2019CertifiedRT"></span></td>
|
||||
<td><sub>Untargeted {Classification, Entailment}</sub></td>
|
||||
<td><sub>USE sentence encoding cosine similarity</sub></td>
|
||||
<td><sub>RoBERTa Masked Prediction for token swap, insert and merge</sub></td>
|
||||
<td><sub>Greedy</sub></td>
|
||||
<td ><sub>["Contextualized Perturbation for Textual Adversarial Attack" (Li et al., 2020)](https://arxiv.org/abs/2009.07502))</sub></td>
|
||||
</tr>
|
||||
<tr class="odd">
|
||||
<td style="text-align: left;"><code>deepwordbug</code> <span class="citation" data-cites="Gao2018BlackBoxGO"></span></td>
|
||||
<td style="text-align: left;"><sub>{Untargeted, Targeted} Classification</sub></td>
|
||||
<td style="text-align: left;"><sub>Levenshtein edit distance</sub></td>
|
||||
<td style="text-align: left;"><sub>{Character Insertion, Character Deletion, Neighboring Character Swap, Character Substitution}</sub></td>
|
||||
<td style="text-align: left;"><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>Greedy replace-1 scoring and multi-transformation character-swap attack (["Black-box Generation of Adversarial Text Sequences to Evade Deep Learning Classifiers" (Gao et al., 2018)](https://arxiv.org/abs/1801.04354)</sub></td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td style="text-align: left;"> <code>fast-alzantot</code> <span class="citation" data-cites="Alzantot2018GeneratingNL Jia2019CertifiedRT"></span></td>
|
||||
<td style="text-align: left;"><sub>Untargeted {Classification, Entailment}</sub></td>
|
||||
<td style="text-align: left;"><sub>Percentage of words perturbed, Language Model perplexity, Word embedding distance</sub></td>
|
||||
<td style="text-align: left;"><sub>Counter-fitted word embedding swap</sub></td>
|
||||
<td style="text-align: left;"><sub>Genetic Algorithm</sub></td>
|
||||
<td ><sub>Modified, faster version of the Alzantot et al. genetic algorithm, from (["Certified Robustness to Adversarial Word Substitutions" (Jia et al., 2019)](https://arxiv.org/abs/1909.00986))</sub></td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td style="text-align: left;"><code>hotflip</code> (word swap) <span class="citation" data-cites="Ebrahimi2017HotFlipWA"></span></td>
|
||||
<td style="text-align: left;"><sub>Untargeted Classification</sub></td>
|
||||
<td style="text-align: left;"><sub>Word Embedding Cosine Similarity, Part-of-speech match, Number of words perturbed</sub></td>
|
||||
<td style="text-align: left;"><sub>Gradient-Based Word Swap</sub></td>
|
||||
<td style="text-align: left;"><sub>Beam search</sub></td>
|
||||
<td ><sub> (["HotFlip: White-Box Adversarial Examples for Text Classification" (Ebrahimi et al., 2017)](https://arxiv.org/abs/1712.06751))</sub></td>
|
||||
</tr>
|
||||
<tr class="odd">
|
||||
<td style="text-align: left;"><code>iga</code> <span class="citation" data-cites="iga-wang2019natural"></span></td>
|
||||
<td style="text-align: left;"><sub>Untargeted {Classification, Entailment}</sub></td>
|
||||
<td style="text-align: left;"><sub>Percentage of words perturbed, Word embedding distance</sub></td>
|
||||
<td style="text-align: left;"><sub>Counter-fitted word embedding swap</sub></td>
|
||||
<td style="text-align: left;"><sub>Genetic Algorithm</sub></td>
|
||||
<td ><sub>Improved genetic algorithm -based word substitution from (["Natural Language Adversarial Attacks and Defenses in Word Level (Wang et al., 2019)"](https://arxiv.org/abs/1909.06723)</sub></td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td style="text-align: left;"><code>input-reduction</code> <span class="citation" data-cites="feng2018pathologies"></span></td>
|
||||
<td style="text-align: left;"><sub>Input Reduction</sub></td>
|
||||
<td style="text-align: left;"></td>
|
||||
<td style="text-align: left;"><sub>Word deletion</sub></td>
|
||||
<td style="text-align: left;"><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>Greedy attack with word importance ranking , Reducing the input while maintaining the prediction through word importance ranking (["Pathologies of Neural Models Make Interpretation Difficult" (Feng et al., 2018)](https://arxiv.org/pdf/1804.07781.pdf))</sub></td>
|
||||
</tr>
|
||||
<tr class="odd">
|
||||
<td style="text-align: left;"><code>kuleshov</code> <span class="citation" data-cites="Kuleshov2018AdversarialEF"></span></td>
|
||||
<td style="text-align: left;"><sub>Untargeted Classification</sub></td>
|
||||
<td style="text-align: left;"><sub>Thought vector encoding cosine similarity, Language model similarity probability</sub></td>
|
||||
<td style="text-align: left;"><sub>Counter-fitted word embedding swap</sub></td>
|
||||
<td style="text-align: left;"><sub>Greedy word swap</sub></td>
|
||||
<td ><sub>(["Adversarial Examples for Natural Language Classification Problems" (Kuleshov et al., 2018)](https://openreview.net/pdf?id=r1QZ3zbAZ)) </sub></td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td style="text-align: left;"><code>pruthi</code> <span class="citation" data-cites="pruthi2019combating"></span></td>
|
||||
<td style="text-align: left;"><sub>Untargeted Classification</sub></td>
|
||||
<td style="text-align: left;"><sub>Minimum word length, Maximum number of words perturbed</sub></td>
|
||||
<td style="text-align: left;"><sub>{Neighboring Character Swap, Character Deletion, Character Insertion, Keyboard-Based Character Swap}</sub></td>
|
||||
<td style="text-align: left;"><sub>Greedy search</sub></td>
|
||||
<td ><sub>simulates common typos (["Combating Adversarial Misspellings with Robust Word Recognition" (Pruthi et al., 2019)](https://arxiv.org/abs/1905.11268) </sub></td>
|
||||
</tr>
|
||||
<tr class="odd">
|
||||
<td style="text-align: left;"><code>pso</code> <span class="citation" data-cites="pso-zang-etal-2020-word"></span></td>
|
||||
<td style="text-align: left;"><sub>Untargeted Classification</sub></td>
|
||||
<td style="text-align: left;"></td>
|
||||
<td style="text-align: left;"><sub>HowNet Word Swap</sub></td>
|
||||
<td style="text-align: left;"><sub>Particle Swarm Optimization</sub></td>
|
||||
<td ><sub>(["Word-level Textual Adversarial Attacking as Combinatorial Optimization" (Zang et al., 2020)](https://www.aclweb.org/anthology/2020.acl-main.540/)) </sub></td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td style="text-align: left;"><code>pwws</code> <span class="citation" data-cites="pwws-ren-etal-2019-generating"></span></td>
|
||||
<td style="text-align: left;"><sub>Untargeted Classification</sub></td>
|
||||
<td style="text-align: left;"></td>
|
||||
<td style="text-align: left;"><sub>WordNet-based synonym swap</sub></td>
|
||||
<td style="text-align: left;"><sub>Greedy-WIR (saliency)</sub></td>
|
||||
<td ><sub>Greedy attack with word importance ranking based on word saliency and synonym swap scores (["Generating Natural Language Adversarial Examples through Probability Weighted Word Saliency" (Ren et al., 2019)](https://www.aclweb.org/anthology/P19-1103/))</sub> </td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td style="text-align: left;"><code>textbugger</code> : (black-box) <span class="citation" data-cites="Li2019TextBuggerGA"></span></td>
|
||||
<td style="text-align: left;"><sub>Untargeted Classification</sub></td>
|
||||
<td style="text-align: left;"><sub>USE sentence encoding cosine similarity</sub></td>
|
||||
<td style="text-align: left;"><sub>{Character Insertion, Character Deletion, Neighboring Character Swap, Character Substitution}</sub></td>
|
||||
<td style="text-align: left;"><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>([(["TextBugger: Generating Adversarial Text Against Real-world Applications" (Li et al., 2018)](https://arxiv.org/abs/1812.05271)).</sub></td>
|
||||
</tr>
|
||||
<tr class="odd">
|
||||
<td style="text-align: left;"><code>textfooler</code> <span class="citation" data-cites="Jin2019TextFooler"></span></td>
|
||||
<td style="text-align: left;"><sub>Untargeted {Classification, Entailment}</sub></td>
|
||||
<td style="text-align: left;"><sub>Word Embedding Distance, Part-of-speech match, USE sentence encoding cosine similarity</sub></td>
|
||||
<td style="text-align: left;"><sub>Counter-fitted word embedding swap</sub></td>
|
||||
<td style="text-align: left;"><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>Greedy attack with word importance ranking (["Is Bert Really Robust?" (Jin et al., 2019)](https://arxiv.org/abs/1907.11932))</sub> </td>
|
||||
</tr>
|
||||
|
||||
<tr><td style="text-align: center;" colspan="6"><strong><br>Attacks on sequence-to-sequence models: <br></strong></td></tr>
|
||||
|
||||
<tr class="odd">
|
||||
<td style="text-align: left;"><code>morpheus</code> <span class="citation" data-cites="morpheus-tan-etal-2020-morphin"></span></td>
|
||||
<td style="text-align: left;"><sub>Minimum BLEU Score</sub> </td>
|
||||
<td style="text-align: left;"></td>
|
||||
<td style="text-align: left;"><sub>Inflection Word Swap</sub> </td>
|
||||
<td style="text-align: left;"><sub>Greedy search</sub> </td>
|
||||
<td ><sub>Greedy to replace words with their inflections with the goal of minimizing BLEU score (["It’s Morphin’ Time! Combating Linguistic Discrimination with Inflectional Perturbations"](https://www.aclweb.org/anthology/2020.acl-main.263.pdf)</sub> </td>
|
||||
</tr>
|
||||
|
||||
</tr>
|
||||
<tr class="odd">
|
||||
<td style="text-align: left;"><code>seq2sick</code> :(black-box) <span class="citation" data-cites="cheng2018seq2sick"></span></td>
|
||||
<td style="text-align: left;"><sub>Non-overlapping output</sub> </td>
|
||||
<td style="text-align: left;"></td>
|
||||
<td style="text-align: left;"><sub>Counter-fitted word embedding swap</sub> </td>
|
||||
<td style="text-align: left;"><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>Greedy attack with goal of changing every word in the output translation. Currently implemented as black-box with plans to change to white-box as done in paper (["Seq2Sick: Evaluating the Robustness of Sequence-to-Sequence Models with Adversarial Examples" (Cheng et al., 2018)](https://arxiv.org/abs/1803.01128)) </sub> </td>
|
||||
</tr>
|
||||
|
||||
|
||||
</tbody>
|
||||
</font>
|
||||
</table>
|
||||
|
||||
|
||||
|
||||
- Citations
|
||||
|
||||
```
|
||||
@misc{morris2020textattack,
|
||||
title={TextAttack: A Framework for Adversarial Attacks, Data Augmentation, and Adversarial Training in NLP},
|
||||
author={John X. Morris and Eli Lifland and Jin Yong Yoo and Jake Grigsby and Di Jin and Yanjun Qi},
|
||||
year={2020},
|
||||
eprint={2005.05909},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL}
|
||||
}
|
||||
```
|
||||
48
docs/1start/benchmark-search.md
Normal file
@@ -0,0 +1,48 @@
|
||||
Benchmarking Search Algorithms for Generating NLP Adversarial Examples
|
||||
=========================================================================
|
||||
|
||||
|
||||
*This documentation page was adapted from Our Paper in [EMNLP BlackBoxNLP](https://arxiv.org/abs/2009.06368).*
|
||||
|
||||
|
||||
### Title: Searching for a Search Method: Benchmarking Search Algorithms for Generating NLP Adversarial Examples
|
||||
|
||||
|
||||
- Abstract: We study the behavior of several black-box search algorithms used for generating adversarial examples for natural language processing (NLP) tasks. We perform a fine-grained analysis of three elements relevant to search: search algorithm, search space, and search budget. When new search methods are proposed in past work, the attack search space is often modified alongside the search method. Without ablation studies benchmarking the search algorithm change with the search space held constant, an increase in attack success rate could from an improved search method or a less restrictive search space. Additionally, many previous studies fail to properly consider the search algorithms' run-time cost, which is essential for downstream tasks like adversarial training. Our experiments provide a reproducible benchmark of search algorithms across a variety of search spaces and query budgets to guide future research in adversarial NLP. Based on our experiments, we recommend greedy attacks with word importance ranking when under a time constraint or attacking long inputs, and either beam search or particle swarm optimization otherwise.
|
||||
|
||||
|
||||
+ Citations:
|
||||
```
|
||||
@misc{yoo2020searching,
|
||||
title={Searching for a Search Method: Benchmarking Search Algorithms for Generating NLP Adversarial Examples},
|
||||
author={Jin Yong Yoo and John X. Morris and Eli Lifland and Yanjun Qi},
|
||||
year={2020},
|
||||
eprint={2009.06368},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL}
|
||||
}
|
||||
```
|
||||
|
||||
### Our search benchmarking result Github
|
||||
|
||||
TextAttack-Search-Benchmark Github [https://github.com/QData/TextAttack-Search-Benchmark](https://github.com/QData/TextAttack-Search-Benchmark)
|
||||
|
||||
### Our benchmarking results on comparing search methods used in the past attacks.
|
||||
|
||||
|
||||

|
||||
|
||||

|
||||

|
||||

|
||||

|
||||

|
||||
|
||||

|
||||
|
||||
|
||||
### Benchmarking Attack Recipes
|
||||
|
||||
- As we emphasized in the above paper, we don't recommend to directly compare Attack Recipes out of the box.
|
||||
|
||||
- This is due to that attack recipes in the recent literature used different ways or thresholds in setting up their constraints. Without the constraint space held constant, an increase in attack success rate could come from an improved search or transformation method or a less restrictive search space.
|
||||
45
docs/1start/quality-SOTA-recipes.md
Normal file
@@ -0,0 +1,45 @@
|
||||
On Quality of Generated Adversarial Examples and How to Set Attack Contraints
|
||||
==============================================================================
|
||||
|
||||
|
||||
### Title: Reevaluating Adversarial Examples in Natural Language
|
||||
|
||||
- Paper [EMNLP Findings](https://arxiv.org/abs/2004.14174)
|
||||
|
||||
- Abstract: State-of-the-art attacks on NLP models lack a shared definition of a what constitutes a successful attack. We distill ideas from past work into a unified framework: a successful natural language adversarial example is a perturbation that fools the model and follows some linguistic constraints. We then analyze the outputs of two state-of-the-art synonym substitution attacks. We find that their perturbations often do not preserve semantics, and 38% introduce grammatical errors. Human surveys reveal that to successfully preserve semantics, we need to significantly increase the minimum cosine similarities between the embeddings of swapped words and between the sentence encodings of original and perturbed sentences.With constraints adjusted to better preserve semantics and grammaticality, the attack success rate drops by over 70 percentage points.
|
||||
|
||||
|
||||
### Our Github on Reevaluation: [Reevaluating-NLP-Adversarial-Examples Github](https://github.com/QData/Reevaluating-NLP-Adversarial-Examples)
|
||||
|
||||
|
||||
- Citations
|
||||
```
|
||||
@misc{morris2020reevaluating,
|
||||
title={Reevaluating Adversarial Examples in Natural Language},
|
||||
author={John X. Morris and Eli Lifland and Jack Lanchantin and Yangfeng Ji and Yanjun Qi},
|
||||
year={2020},
|
||||
eprint={2004.14174},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
### Some of our evaluation results on quality of two SOTA attack recipes
|
||||
|
||||
- As we have emphasized in this paper, we recommend researchers and users to be EXTREMELY mindful on the quality of generated adversarial examples in natural language
|
||||
- We recommend the field to use human-evaluation derived thresholds for setting up constraints
|
||||
|
||||
|
||||

|
||||

|
||||
|
||||
|
||||
### Some of our evaluation results on how to set constraints to evaluate NLP model's adversarial robustness
|
||||
|
||||

|
||||

|
||||
|
||||
|
||||
|
||||

|
||||
65
docs/1start/references.md
Normal file
@@ -0,0 +1,65 @@
|
||||
How to Cite TextAttack
|
||||
===========================
|
||||
|
||||
## Main Paper: TextAttack: A Framework for Adversarial Attacks, Data Augmentation, and Adversarial Training in NLP
|
||||
|
||||
- Paper [EMNLP Demo](https://arxiv.org/abs/2005.05909)
|
||||
|
||||
- Abstract: An adversarial example is an input designed to fool a machine learning model. While there has been substantial research using adversarial attacks to analyze NLP models, each attack is implemented in its own code repository. It remains challenging to develop NLP attacks and utilize them to improve model performance. This paper introduces TextAttack, a Python framework for adversarial attacks, data augmentation, and adversarial training in NLP. TextAttack builds attacks from four components: a goal function, a set of constraints, a transformation, and a search method. TextAttack's modular design enables researchers to easily construct attacks from combinations of novel and existing components. TextAttack provides implementations of 16 adversarial attacks from the literature and supports a variety of models and datasets, including BERT and other transformers, and all GLUE tasks. TextAttack also includes data augmentation and adversarial training modules for using components of adversarial attacks to improve model accuracy and robustness. TextAttack is democratizing NLP: anyone can try data augmentation and adversarial training on any model or dataset, with just a few lines of code. Code and tutorials are available at this site.
|
||||
|
||||
### Our Github on TextAttack: [https://github.com/QData/TextAttack](https://github.com/QData/TextAttack)
|
||||
|
||||
- Citations
|
||||
|
||||
```
|
||||
@misc{morris2020textattack,
|
||||
title={TextAttack: A Framework for Adversarial Attacks, Data Augmentation, and Adversarial Training in NLP},
|
||||
author={John X. Morris and Eli Lifland and Jin Yong Yoo and Jake Grigsby and Di Jin and Yanjun Qi},
|
||||
year={2020},
|
||||
eprint={2005.05909},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Our Analysis paper: Reevaluating Adversarial Examples in Natural Language
|
||||
|
||||
- Paper [EMNLP Findings](https://arxiv.org/abs/2004.14174)
|
||||
|
||||
- Abstract: State-of-the-art attacks on NLP models lack a shared definition of a what constitutes a successful attack. We distill ideas from past work into a unified framework: a successful natural language adversarial example is a perturbation that fools the model and follows some linguistic constraints. We then analyze the outputs of two state-of-the-art synonym substitution attacks. We find that their perturbations often do not preserve semantics, and 38% introduce grammatical errors. Human surveys reveal that to successfully preserve semantics, we need to significantly increase the minimum cosine similarities between the embeddings of swapped words and between the sentence encodings of original and perturbed sentences.With constraints adjusted to better preserve semantics and grammaticality, the attack success rate drops by over 70 percentage points.
|
||||
|
||||
### Our Github on Reevaluation: [Reevaluating-NLP-Adversarial-Examples Github](https://github.com/QData/Reevaluating-NLP-Adversarial-Examples)
|
||||
|
||||
- Citations
|
||||
```
|
||||
@misc{morris2020reevaluating,
|
||||
title={Reevaluating Adversarial Examples in Natural Language},
|
||||
author={John X. Morris and Eli Lifland and Jack Lanchantin and Yangfeng Ji and Yanjun Qi},
|
||||
year={2020},
|
||||
eprint={2004.14174},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL}
|
||||
}
|
||||
```
|
||||
|
||||
## Our Analysis paper: Searching for a Search Method: Benchmarking Search Algorithms for Generating NLP Adversarial Examples
|
||||
|
||||
- Paper [EMNLP BlackBoxNLP](https://arxiv.org/abs/2009.06368)
|
||||
|
||||
- Abstract: We study the behavior of several black-box search algorithms used for generating adversarial examples for natural language processing (NLP) tasks. We perform a fine-grained analysis of three elements relevant to search: search algorithm, search space, and search budget. When new search methods are proposed in past work, the attack search space is often modified alongside the search method. Without ablation studies benchmarking the search algorithm change with the search space held constant, an increase in attack success rate could from an improved search method or a less restrictive search space. Additionally, many previous studies fail to properly consider the search algorithms' run-time cost, which is essential for downstream tasks like adversarial training. Our experiments provide a reproducible benchmark of search algorithms across a variety of search spaces and query budgets to guide future research in adversarial NLP. Based on our experiments, we recommend greedy attacks with word importance ranking when under a time constraint or attacking long inputs, and either beam search or particle swarm optimization otherwise.
|
||||
|
||||
### Our Github on benchmarking: [TextAttack-Search-Benchmark Github](https://github.com/QData/TextAttack-Search-Benchmark)
|
||||
|
||||
|
||||
- Citations:
|
||||
```
|
||||
@misc{yoo2020searching,
|
||||
title={Searching for a Search Method: Benchmarking Search Algorithms for Generating NLP Adversarial Examples},
|
||||
author={Jin Yong Yoo and John X. Morris and Eli Lifland and Yanjun Qi},
|
||||
year={2020},
|
||||
eprint={2009.06368},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL}
|
||||
}
|
||||
```
|
||||
233
docs/1start/support.md
Normal file
@@ -0,0 +1,233 @@
|
||||
How can I contribute to TextAttack?
|
||||
============================================
|
||||
|
||||
*This documentation page is adapted from [TextAttack Github CONTRIBUTING.md](https://github.com/QData/TextAttack/blob/master/CONTRIBUTING.md). for detailed information on contributing*
|
||||
|
||||
|
||||
We welcome contributions from all members of the community– and there are lots
|
||||
of ways to help without editing the code! Answering questions, helping others,
|
||||
reaching out and improving the documentations are immensely valuable to the
|
||||
community.
|
||||
|
||||
It also helps us if you spread the word: reference the library from blog posts
|
||||
on the awesome projects it made possible, shout out on Twitter every time it has
|
||||
helped you, or simply star the repo to say "thank you".
|
||||
|
||||
## Slack Channel
|
||||
|
||||
For help and realtime updates related to TextAttack, please [join the TextAttack Slack](https://join.slack.com/t/textattack/shared_invite/zt-ez3ts03b-Nr55tDiqgAvCkRbbz8zz9g)!
|
||||
|
||||
## Ways to contribute
|
||||
|
||||
There are lots of ways you can contribute to TextAttack:
|
||||
* Submitting issues on Github to report bugs or make feature requests
|
||||
* Fixing outstanding issues with the existing code
|
||||
* Implementing new features
|
||||
* Adding support for new models and datasets
|
||||
* Contributing to the examples or to the documentation
|
||||
|
||||
*All are equally valuable to the community.*
|
||||
|
||||
## Submitting a new issue or feature request
|
||||
|
||||
Do your best to follow these guidelines when submitting an issue or a feature
|
||||
request. It will make it easier for us to come back to you quickly and with good
|
||||
feedback.
|
||||
|
||||
### Found a bug?
|
||||
|
||||
TextAttack can remain robust and reliable thanks to users who notify us of
|
||||
the problems they encounter. So thank you for [reporting an issue](https://github.com/QData/TextAttack/issues).
|
||||
|
||||
We also have a suite of tests intended to detect bugs before they enter the
|
||||
codebase. That said, they still happen (Turing completeness and all) so it's up
|
||||
to you to report the bugs you find! We would really appreciate it if you could
|
||||
make sure the bug was not already reported (use the search bar on Github under
|
||||
Issues).
|
||||
|
||||
To help us fix your issue quickly, please follow these steps:
|
||||
|
||||
* Include your **OS type and version**, the versions of **Python**, **PyTorch** and
|
||||
**Tensorflow** when applicable;
|
||||
* A short, self-contained, code snippet that allows us to reproduce the bug in
|
||||
less than 30s;
|
||||
* Provide the *full* traceback if an exception is raised.
|
||||
|
||||
### Do you want to add your model?
|
||||
|
||||
Awesome! Please provide the following information:
|
||||
|
||||
* Short description of the model and link to the paper;
|
||||
* Link to the implementation if it is open-source;
|
||||
* Link to the model weights if they are available.
|
||||
|
||||
If you are willing to contribute the model yourself, let us know so we can best
|
||||
guide you. We can host your model on our S3 server, but if you trained your
|
||||
model using `transformers`, it's better if you host your model on their
|
||||
[model hub](https://huggingface.co/models).
|
||||
|
||||
### Do you want a new feature: a component, a recipe, or something else?
|
||||
|
||||
A world-class feature request addresses the following points:
|
||||
|
||||
1. Motivation first:
|
||||
* Is it related to a problem/frustration with the library? If so, please explain
|
||||
why. Providing a code snippet that demonstrates the problem is best.
|
||||
* Is it related to something you would need for a project? We'd love to hear
|
||||
about it!
|
||||
* Is it something you worked on and think could benefit the community?
|
||||
Awesome! Tell us what problem it solved for you.
|
||||
2. Write a *full paragraph* describing the feature;
|
||||
3. Provide a **code snippet** that demonstrates its future use;
|
||||
4. In case this is related to a paper, please attach a link;
|
||||
5. Attach any additional information (drawings, screenshots, etc.) you think may help.
|
||||
|
||||
|
||||
## Start contributing! (Pull Requests)
|
||||
|
||||
Before writing code, we strongly advise you to search through the exising PRs or
|
||||
issues to make sure that nobody is already working on the same thing. If you are
|
||||
unsure, it is always a good idea to open an issue to get some feedback.
|
||||
|
||||
You will need basic `git` proficiency to be able to contribute to
|
||||
`textattack`. `git` is not the easiest tool to use but it has the greatest
|
||||
manual. Type `git --help` in a shell and enjoy. If you prefer books, [Pro
|
||||
Git](https://git-scm.com/book/en/v2) is a very good reference.
|
||||
|
||||
Follow these steps to start contributing:
|
||||
|
||||
1. Fork the [repository](https://github.com/QData/TextAttack) by
|
||||
clicking on the 'Fork' button on the repository's page. This creates a copy of the code
|
||||
under your GitHub user account.
|
||||
|
||||
2. Clone your fork to your local disk, and add the base repository as a remote:
|
||||
|
||||
```bash
|
||||
$ git clone git@github.com:<your Github handle>/TextAttack.git
|
||||
$ cd TextAttack
|
||||
$ git remote add upstream https://github.com/QData/TextAttack
|
||||
```
|
||||
|
||||
3. Create a new branch to hold your development changes:
|
||||
|
||||
```bash
|
||||
$ git checkout -b a-descriptive-name-for-my-changes
|
||||
```
|
||||
|
||||
**do not** work on the `master` branch.
|
||||
|
||||
4. Set up a development environment by running the following commands in a virtual environment:
|
||||
|
||||
|
||||
```bash
|
||||
$ cd TextAttack
|
||||
$ pip install -e . ".[dev]"
|
||||
$ pip install black isort pytest pytest-xdist
|
||||
```
|
||||
|
||||
This will install `textattack` in editable mode and install `black` and
|
||||
`isort`, packages we use for code formatting.
|
||||
|
||||
(If TextAttack was already installed in the virtual environment, remove
|
||||
it with `pip uninstall textattack` before reinstalling it in editable
|
||||
mode with the `-e` flag.)
|
||||
|
||||
5. Develop the features on your branch.
|
||||
|
||||
As you work on the features, you should make sure that the test suite
|
||||
passes:
|
||||
|
||||
```bash
|
||||
$ make test
|
||||
```
|
||||
|
||||
(or just simply `pytest`.)
|
||||
|
||||
> **Tip:** if you're fixing just one or two tests, you can run only the last tests that failed using `pytest --lf`.
|
||||
|
||||
`textattack` relies on `black` and `isort` to format its source code
|
||||
consistently. After you make changes, format them with:
|
||||
|
||||
```bash
|
||||
$ make format
|
||||
```
|
||||
|
||||
You can run quality checks to make sure your code is formatted properly
|
||||
using this command:
|
||||
|
||||
```bash
|
||||
$ make lint
|
||||
```
|
||||
|
||||
Once you're happy with your changes, add changed files using `git add` and
|
||||
make a commit with `git commit` to record your changes locally:
|
||||
|
||||
```bash
|
||||
$ git add modified_file.py
|
||||
$ git commit
|
||||
```
|
||||
|
||||
Please write [good commit messages](https://chris.beams.io/posts/git-commit/).
|
||||
|
||||
It is a good idea to sync your copy of the code with the original
|
||||
repository regularly. This way you can quickly account for changes:
|
||||
|
||||
```bash
|
||||
$ git fetch upstream
|
||||
$ git rebase upstream/master
|
||||
```
|
||||
|
||||
Push the changes to your account using:
|
||||
|
||||
```bash
|
||||
$ git push -u origin a-descriptive-name-for-my-changes
|
||||
```
|
||||
|
||||
6. Add documentation.
|
||||
|
||||
Our docs are in the `docs/` folder. Thanks to `sphinx-automodule`, adding
|
||||
documentation for a new code file should just be two lines. Our docs will
|
||||
automatically generate from the comments you added to your code. If you're
|
||||
adding an attack recipe, add a reference in `attack_recipes.rst`.
|
||||
If you're adding a transformation, add a reference in `transformation.rst`, etc.
|
||||
|
||||
You can build the docs and view the updates using `make docs`. If you're
|
||||
adding a tutorial or something where you want to update the docs multiple
|
||||
times, you can run `make docs-auto`. This will run a server using
|
||||
`sphinx-autobuild` that should automatically reload whenever you change
|
||||
a file.
|
||||
|
||||
7. Once you are satisfied (**and the checklist below is happy too**), go to the
|
||||
webpage of your fork on GitHub. Click on 'Pull request' to send your changes
|
||||
to the project maintainers for review.
|
||||
|
||||
8. It's ok if maintainers ask you for changes. It happens to core contributors
|
||||
too! So everyone can see the changes in the Pull request, work in your local
|
||||
branch and push the changes to your fork. They will automatically appear in
|
||||
the pull request.
|
||||
|
||||
|
||||
### Checklist
|
||||
|
||||
1. The title of your pull request should be a summary of its contribution.
|
||||
2. If your pull request adresses an issue, please mention the issue number in
|
||||
the pull request description to make sure they are linked (and people
|
||||
consulting the issue know you are working on it);
|
||||
3. To indicate a work in progress please mark it as a draft on Github.
|
||||
4. Make sure existing tests pass.
|
||||
5. Add relevant tests. No quality testing = no merge.
|
||||
6. All public methods must have informative docstrings that work nicely with sphinx.
|
||||
|
||||
### Tests
|
||||
|
||||
You can run TextAttack tests with `pytest`. Just type `make test`.
|
||||
|
||||
|
||||
#### This guide was heavily inspired by the awesome [transformers guide to contributing](https://github.com/huggingface/transformers/blob/master/CONTRIBUTING.md)
|
||||
|
||||
+ More details on how we design our APIs at [API-Design-Tips](https://textattack.readthedocs.io/en/latest/1start/api-design-tips.html)
|
||||
+ Here is a summary diagram of TextAttack Ecosystem
|
||||
|
||||

|
||||
|
||||
|
||||
22
docs/1start/talks-visualization.md
Normal file
@@ -0,0 +1,22 @@
|
||||
TextAttack Presentations
|
||||
===========================
|
||||
|
||||
|
||||
## 2020: Jack Morris' summary tutorial talk on TextAttack
|
||||
|
||||
- On Jul 31, 2020, Jack Morries gave an invited talk at Weights & Biases research salon on " TextAttack: A Framework for Adversarial Attacks, Data Augmentation, and Adversarial Training in NLP"
|
||||
|
||||
- [Youtube Talk link](https://www.youtube.com/watch?v=22Q3f7Fb110)
|
||||
|
||||
|
||||
## 2021: Dr. Qi's summary tutorial talk on TextAttack
|
||||
|
||||
- On April 14 2021, Prof. Qi gave an invited talk at the UVA Human and Machine Intelligence Seminar on "Generalizing Adversarial Examples to Natural Language Processing"
|
||||
|
||||
- [TalkSlide](https://qdata.github.io/qdata-page/pic/20210414-HMI-textAttack.pdf)
|
||||
|
||||
|
||||
|
||||
## We have built a new WebDemo For Visulizing TextAttack generated Examples;
|
||||
|
||||
- [TextAttack-WebDemo Github](https://github.com/QData/TextAttack-WebDemo)
|
||||
104
docs/1start/what_is_an_adversarial_attack.md
Normal file
@@ -0,0 +1,104 @@
|
||||
What is an adversarial attack in NLP?
|
||||
=======================================
|
||||
|
||||
*This documentation page was adapted from [a blog post we wrote about adversarial examples in NLP](https://towardsdatascience.com/what-are-adversarial-examples-in-nlp-f928c574478e).*
|
||||
|
||||
This page is intended to clear up some terminology for those unclear on the meaning of the term ‘adversarial attack’ in natural language processing. We'll try and give an intro to NLP adversarial attacks, try to clear up lots of the scholarly jargon, and give a high-level overview of the uses of TextAttack.
|
||||
|
||||
This article talks about the concept of adversarial examples as applied to NLP (natural language processing). The terminology can be confusing at times, so we’ll begin with an overview of the language used to talk about adversarial examples and adversarial attacks. Then, we’ll talk about TextAttack, an open-source Python library for adversarial examples, data augmentation, and adversarial training in NLP that’s changing the way people research the robustness of NLP models. We’ll conclude with some thoughts on the future of this area of research.
|
||||
|
||||
An adversarial example is an input designed to fool a machine learning model [1]. In TextAttack, we are concerned with adversarial perturbations, changes to benign inputs that cause them to be misclassified by models. ‘Adversarial perturbation’ is more specific than just ‘adversarial example’, as the class of all adversarial examples also includes inputs designed from scratch to fool machine learning models. TextAttack attacks generate a specific kind of adversarial examples, adversarial perturbations.
|
||||
|
||||
As alluded to above, an adversarial attack on a machine learning model is a process for generating adversarial perturbations. TextAttack attacks iterate through a dataset (list of inputs to a model), and for each correctly predicted sample, search for an adversarial perturbation (we’ll talk more about this later). If an example is incorrectly predicted to begin with, it is not attacked, since the input already fools the model. TextAttack breaks the attack process up into stages, and provides a [system of interchangeable components](/2notebook/1_Introduction_and_Transformations.ipynb) for managing each stage of the attack.
|
||||
|
||||
Adversarial robustness is a measurement of a model’s susceptibility to adversarial examples. TextAttack often measures robustness using attack success rate, the percentage of attack attempts that produce successful adversarial examples, or after-attack accuracy, the percentage of inputs that are both correctly classified and unsuccessfully attacked.
|
||||
|
||||
To improve our numeracy when talking about adversarial attacks, let’s take a look at a concrete example of some attack results:
|
||||
|
||||

|
||||
|
||||
*These results come from using TextAttack to run the DeepWordBug attack on an LSTM trained on the Rotten Tomatoes Movie Review sentiment classification dataset, using 200 total examples. These results come from using TextAttack to run the DeepWordBug attack on an LSTM trained on the Rotten Tomatoes Movie Review sentiment classification dataset, using 200 total examples.*
|
||||
|
||||
This attack was run on 200 examples. Out of those 200, the model initially predicted 43 of them incorrectly; this leads to an accuracy of 157/200 or 78.5%. TextAttack ran the adversarial attack process on the remaining 157 examples to try to find a valid adversarial perturbation for each one. Out of those 157, 29 attacks failed, leading to a success rate of 128/157 or 81.5%. Another way to articulate this is that the model correctly predicted and resisted attacks for 29 out of 200 total samples, leading to an accuracy under attack (or “after-attack accuracy”) of 29/200 or 14.5%.
|
||||
|
||||
TextAttack also logged some other helpful statistics for this attack. Among the 157 successful attacks, on average, the attack changed 15.5% of words to alter the prediction, and made 32.7 queries to find a successful perturbation. Across all 200 inputs, the average number of words was 18.97.
|
||||
|
||||
Now that we have provided some terminology, let’s look at some concrete examples of proposed adversarial attacks. We will give some background on adversarial attacks in other domains and then examples of different attacks in NLP.
|
||||
|
||||
## Terminology
|
||||
|
||||
Research in 2013 [2] showed neural networks are vulnerable to adversarial examples. These original adversarial attacks apply a small, well-chosen perturbation to an image to fool an image classifier. In this example, the classifier correctly predicts the original image to be a pig. After a small perturbation, however, the classifier predicts the pig to be an airliner (with extremely high confidence!).
|
||||
|
||||

|
||||
|
||||
*An adversarial example for an ImageNet classifier. Superimposing a tiny (but deliberate) amount of noise causes the model to classify this pig as an airliner.*
|
||||
|
||||
|
||||
These adversarial examples exhibit a serious security flaw in deep neural networks. Therefore adversarial examples pose a security problem for downstream systems that include neural networks, including text-to-speech systems and self-driving cars. Adversarial examples are useful outside of security: researchers have used adversarial examples to improve and interpret deep learning models.
|
||||
|
||||
As you might imagine, adversarial examples in deep neural networks have caught the attention of many researchers around the world, and this 2013 paper spawned an explosion of research into the topic.
|
||||
|
||||
|
||||

|
||||
<br>
|
||||
*The number of papers related to ‘adversarial examples’ on arxiv.org between 2014 and 2020. [Graph from https://nicholas.carlini.com/writing/2019/all-adversarial-example-papers.html]*
|
||||
|
||||
|
||||
Many new, more sophisticated adversarial attacks have been proposed, along with “defenses,” procedures for training neural networks that are resistant (“robust”) against adversarial attacks. Training deep neural networks that are highly accurate while remaining robust to adversarial attacks remains an open problem [3].
|
||||
|
||||
Naturally, many have wondered about what adversarial examples for NLP models might be. No natural analogy to the adversarial examples in computer vision (like the pig-to-airliner bamboozle above) exists for NLP. After all, two sequences of text cannot be truly indistinguishable without being the same. (In the above example, the pig-classified input and its airliner-classified perturbation are literally indistinguishable to the human eye.)
|
||||
|
||||
|
||||
## Adversarial Examples in NLP
|
||||
|
||||

|
||||
|
||||
*Two different ideas of adversarial examples in NLP. These results were generated using TextAttack on an LSTM trained on the Rotten Tomatoes Movie Review sentiment classification dataset. These are *real* adversarial examples, generated using the DeepWordBug and TextFooler attacks. To generate them yourself, after installing TextAttack, run ‘textattack attack — model lstm-mr — num-examples 1 — recipe RECIPE — num-examples-offset 19’ where RECIPE is ‘deepwordbug’ or ‘textfooler’.*
|
||||
|
||||
Because two text sequences are never indistinguishable, researchers have proposed various alternative definitions for adversarial examples in NLP. We find it useful to group adversarial attacks based on their chosen definitions of adversarial examples.
|
||||
|
||||
Although attacks in NLP cannot find an adversarial perturbation that is literally indistinguishable to the original input, they can find a perturbation that is very similar. Our mental model groups NLP adversarial attacks into two groups, based on their notions of ‘similarity’:
|
||||
|
||||
|
||||
**Visual similarity.** Some NLP attacks consider an adversarial example to be a text sequence that looks very similar to the original input -- perhaps just a few character changes away -- but receives a different prediction from the model. Some of these adversarial attacks try to change as few characters as possible to change the model’s prediction; others try to introduce realistic ‘typos’ similar to those that humans would make.
|
||||
|
||||
Some researchers have raised concern that these attacks can be defended against quite effectively, either by using a rule-based spellchecker or a sequence-to-sequence model trained to correct adversarial typos.
|
||||
TextAttack attack recipes that fall under this category: deepwordbug, hotflip, pruthi, textbugger\*, morpheus
|
||||
|
||||
|
||||
**Semantic similarity.** Other NLP attacks consider an adversarial example valid if it is semantically indistinguishable from the original input. In other words, if the perturbation is a paraphrase of the original input, but the input and perturbation receive different predictions, then the input is a valid adversarial example.
|
||||
|
||||
Some NLP models are trained to measure semantic similarity. Adversarial attacks based on the notion of semantic indistinguishability typically use another NLP model to enforce that perturbations are grammatically valid and semantically similar to the original input.
|
||||
|
||||
TextAttack attack recipes that fall under this category: alzantot, bae, bert-attack, fast-alzantot, iga, kuleshov, pso, pwws, textbugger\*, textfooler
|
||||
|
||||
\*The textbugger attack generates perturbations using both typo-like character edits and synonym substitutions. It could be considered to use both definitions of indistinguishability.
|
||||
|
||||
## Generating adversarial examples with TextAttack
|
||||
|
||||
TextAttack supports adversarial attacks based in both definitions of indistinguishability. Both types of attacks are useful for training more robust NLP models. Our goal is to enable research into adversarial examples in NLP by providing a set of intuitive, reusable components for building as many attacks from the literature as possible.
|
||||
|
||||
We define the adversarial attack processing using four components: a goal function, constraints, transformation, and search method. (We’ll go into this in detail in a future post!) These components allow us to reuse many things between attacks from different research papers. They also make it easy to develop methods for NLP data augmentation.
|
||||
|
||||
TextAttack also includes code for loading popular NLP datasets and training models on them. By integrating this training code with adversarial attacks and data augmentation techniques, TextAttack provides an environment for researchers to test adversarial training in many different scenarios.
|
||||
|
||||
The following figure shows an overview of the main functionality of TextAttack:
|
||||
<br>
|
||||

|
||||
|
||||
|
||||
|
||||
## The future of adversarial attacks in NLP
|
||||
|
||||
We are excited to see the impact that TextAttack has on the NLP research community! One thing we would like to see research in is the combination of components from various papers. TextAttack makes it easy to run ablation studies to compare the effects of swapping out, say, search method from paper A with the search method from paper B, without making any other changes. (And these tests can be run across dozens of pre-trained models and datasets with no downloads!)
|
||||
|
||||
We hope that use of TextAttack leads to more diversity in adversarial attacks. One thing that all current adversarial attacks have in common is that they make substitutions on the word or character level. We hope that future adversarial attacks in NLP can broaden scope to try different approaches to phrase-level replacements as well as full-sentence paraphrases. Additionally, there has been a focus on English in the adversarial attack literature; we look forward to seeing adversarial attacks applied to more languages.
|
||||
|
||||
To get started with TextAttack, you might want to start with one of our [introductory tutorials](/2notebook/0_End_to_End.ipynb).
|
||||
|
||||
|
||||
.. [1] “Attacking Machine Learning with Adversarial Examples”, Goodfellow, 2013. [https://openai.com/blog/adversarial-example-research/]
|
||||
|
||||
.. [2] “Intriguing properties of neural networks”, Szegedy, 2013. [https://arxiv.org/abs/1312.6199]
|
||||
|
||||
.. [3] “Robustness May Be at Odds with Accuracy”, Tsipras, 2018. [https://arxiv.org/abs/1805.12152]
|
||||
9842
docs/2notebook/0_End_to_End.ipynb
Normal file
923
docs/2notebook/1_Introduction_and_Transformations.ipynb
Normal file
@@ -0,0 +1,923 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "xK7B3NnYaPR6"
|
||||
},
|
||||
"source": [
|
||||
"# The TextAttack ecosystem: search, transformations, and constraints"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "9rY3w9b2aPSG"
|
||||
},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/QData/TextAttack/blob/master/docs/2notebook/1_Introduction_and_Transformations.ipynb)\n",
|
||||
"\n",
|
||||
"[](https://github.com/QData/TextAttack/blob/master/docs/2notebook/1_Introduction_and_Transformations.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "urhoEHXJf8YK"
|
||||
},
|
||||
"source": [
|
||||
"Installation of Attack-api branch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "HTe13zUKaPSH"
|
||||
},
|
||||
"source": [
|
||||
"An attack in TextAttack consists of four parts.\n",
|
||||
"\n",
|
||||
"### Goal function\n",
|
||||
"\n",
|
||||
"The **goal function** determines if the attack is successful or not. One common goal function is **untargeted classification**, where the attack tries to perturb an input to change its classification. \n",
|
||||
"\n",
|
||||
"### Search method\n",
|
||||
"The **search method** explores the space of potential transformations and tries to locate a successful perturbation. Greedy search, beam search, and brute-force search are all examples of search methods.\n",
|
||||
"\n",
|
||||
"### Transformation\n",
|
||||
"A **transformation** takes a text input and transforms it, for example replacing words or phrases with similar ones, while trying not to change the meaning. Paraphrase and synonym substitution are two broad classes of transformations.\n",
|
||||
"\n",
|
||||
"### Constraints\n",
|
||||
"Finally, **constraints** determine whether or not a given transformation is valid. Transformations don't perfectly preserve syntax or semantics, so additional constraints can increase the probability that these qualities are preserved from the source to adversarial example. There are many types of constraints: overlap constraints that measure edit distance, syntactical constraints check part-of-speech and grammar errors, and semantic constraints like language models and sentence encoders."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "tiXXNJO4aPSI"
|
||||
},
|
||||
"source": [
|
||||
"### A custom transformation\n",
|
||||
"\n",
|
||||
"This lesson explains how to create a custom transformation. In TextAttack, many transformations involve *word swaps*: they take a word and try and find suitable substitutes. Some attacks focus on replacing characters with neighboring characters to create \"typos\" (these don't intend to preserve the grammaticality of inputs). Other attacks rely on semantics: they take a word and try to replace it with semantic equivalents.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Banana word swap \n",
|
||||
"\n",
|
||||
"As an introduction to writing transformations for TextAttack, we're going to try a very simple transformation: one that replaces any given word with the word 'banana'. In TextAttack, there's an abstract `WordSwap` class that handles the heavy lifting of breaking sentences into words and avoiding replacement of stopwords. We can extend `WordSwap` and implement a single method, `_get_replacement_words`, to indicate to replace each word with 'banana'. 🍌"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"id": "8r7zviXkaPSJ"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from textattack.transformations import WordSwap\n",
|
||||
"\n",
|
||||
"class BananaWordSwap(WordSwap):\n",
|
||||
" \"\"\" Transforms an input by replacing any word with 'banana'.\n",
|
||||
" \"\"\"\n",
|
||||
" \n",
|
||||
" # We don't need a constructor, since our class doesn't require any parameters.\n",
|
||||
"\n",
|
||||
" def _get_replacement_words(self, word):\n",
|
||||
" \"\"\" Returns 'banana', no matter what 'word' was originally.\n",
|
||||
" \n",
|
||||
" Returns a list with one item, since `_get_replacement_words` is intended to\n",
|
||||
" return a list of candidate replacement words.\n",
|
||||
" \"\"\"\n",
|
||||
" return ['banana']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "RHGvZxenaPSJ"
|
||||
},
|
||||
"source": [
|
||||
"### Using our transformation\n",
|
||||
"\n",
|
||||
"Now we have the transformation chosen, but we're missing a few other things. To complete the attack, we need to choose the **search method** and **constraints**. And to use the attack, we need a **goal function**, a **model** and a **dataset**. (The goal function indicates the task our model performs – in this case, classification – and the type of attack – in this case, we'll perform an untargeted attack.)\n",
|
||||
"\n",
|
||||
"### Creating the goal function, model, and dataset\n",
|
||||
"We are performing an untargeted attack on a classification model, so we'll use the `UntargetedClassification` class. For the model, let's use BERT trained for news classification on the AG News dataset. We've pretrained several models and uploaded them to the [HuggingFace Model Hub](https://huggingface.co/textattack). TextAttack integrates with any model from HuggingFace's Model Hub and any dataset from HuggingFace's `datasets`!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "wREwoDkMaPSK",
|
||||
"outputId": "4a8f74c7-c51a-4216-8435-be52d2165d4c"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"textattack: Unknown if model of class <class 'transformers.models.bert.modeling_bert.BertForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.\n",
|
||||
"Using custom data configuration default\n",
|
||||
"Reusing dataset ag_news (/p/qdata/jy2ma/.cache/textattack/datasets/ag_news/default/0.0.0/0eeeaaa5fb6dffd81458e293dfea1adba2881ffcbdc3fb56baeb5a892566c29a)\n",
|
||||
"textattack: Loading \u001b[94mdatasets\u001b[0m dataset \u001b[94mag_news\u001b[0m, split \u001b[94mtest\u001b[0m.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Import the model\n",
|
||||
"import transformers\n",
|
||||
"from textattack.models.wrappers import HuggingFaceModelWrapper\n",
|
||||
"\n",
|
||||
"model = transformers.AutoModelForSequenceClassification.from_pretrained(\"textattack/bert-base-uncased-ag-news\")\n",
|
||||
"tokenizer = transformers.AutoTokenizer.from_pretrained(\"textattack/bert-base-uncased-ag-news\")\n",
|
||||
"\n",
|
||||
"model_wrapper = HuggingFaceModelWrapper(model, tokenizer)\n",
|
||||
"\n",
|
||||
"# Create the goal function using the model\n",
|
||||
"from textattack.goal_functions import UntargetedClassification\n",
|
||||
"goal_function = UntargetedClassification(model_wrapper)\n",
|
||||
"\n",
|
||||
"# Import the dataset\n",
|
||||
"from textattack.datasets import HuggingFaceDataset\n",
|
||||
"dataset = HuggingFaceDataset(\"ag_news\", None, \"test\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "sfGMvqcTaPSN"
|
||||
},
|
||||
"source": [
|
||||
"### Creating the attack\n",
|
||||
"Let's keep it simple: let's use a greedy search method, and let's not use any constraints for now. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"id": "nSAHSoI_aPSO"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from textattack.search_methods import GreedySearch\n",
|
||||
"from textattack.constraints.pre_transformation import RepeatModification, StopwordModification\n",
|
||||
"from textattack import Attack\n",
|
||||
"\n",
|
||||
"# We're going to use our Banana word swap class as the attack transformation.\n",
|
||||
"transformation = BananaWordSwap() \n",
|
||||
"# We'll constrain modification of already modified indices and stopwords\n",
|
||||
"constraints = [RepeatModification(),\n",
|
||||
" StopwordModification()]\n",
|
||||
"# We'll use the Greedy search method\n",
|
||||
"search_method = GreedySearch()\n",
|
||||
"# Now, let's make the attack from the 4 components:\n",
|
||||
"attack = Attack(goal_function, constraints, transformation, search_method)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "PqrHaZOaaPSO"
|
||||
},
|
||||
"source": [
|
||||
"Let's print our attack to see all the parameters:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "d2qYOr0maPSP",
|
||||
"outputId": "7266dc40-fc6c-4c78-90a8-8150e8fb5d8e"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Attack(\n",
|
||||
" (search_method): GreedySearch\n",
|
||||
" (goal_function): UntargetedClassification\n",
|
||||
" (transformation): BananaWordSwap\n",
|
||||
" (constraints): \n",
|
||||
" (0): RepeatModification\n",
|
||||
" (1): StopwordModification\n",
|
||||
" (is_black_box): True\n",
|
||||
")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(attack)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "m97uyJxDh1wq",
|
||||
"outputId": "87ca8836-9781-4c5d-85f2-7ffbf4a7ef80"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(OrderedDict([('text', \"Fears for T N pension after talks Unions representing workers at Turner Newall say they are 'disappointed' after talks with stricken parent firm Federal Mogul.\")]), 2)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(dataset[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "GYKoVFuXaPSP"
|
||||
},
|
||||
"source": [
|
||||
"### Using the attack\n",
|
||||
"\n",
|
||||
"Let's use our attack to successfully attack 10 samples."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "LyokhnFtaPSQ",
|
||||
"outputId": "d8a43c4f-1551-40c9-d031-a42b429ed33d"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 0%| | 0/10 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Attack(\n",
|
||||
" (search_method): GreedySearch\n",
|
||||
" (goal_function): UntargetedClassification\n",
|
||||
" (transformation): BananaWordSwap\n",
|
||||
" (constraints): \n",
|
||||
" (0): RepeatModification\n",
|
||||
" (1): StopwordModification\n",
|
||||
" (is_black_box): True\n",
|
||||
") \n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 1 / 0 / 0 / 1: 10%|█ | 1/10 [00:01<00:14, 1.57s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 1 ---------------------------------------------\n",
|
||||
"\u001b[94mBusiness (100%)\u001b[0m --> \u001b[91mWorld (89%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Fears for T N \u001b[94mpension\u001b[0m after \u001b[94mtalks\u001b[0m \u001b[94mUnions\u001b[0m representing \u001b[94mworkers\u001b[0m at Turner Newall say they are '\u001b[94mdisappointed'\u001b[0m after talks with stricken parent firm Federal \u001b[94mMogul\u001b[0m.\n",
|
||||
"\n",
|
||||
"Fears for T N \u001b[91mbanana\u001b[0m after \u001b[91mbanana\u001b[0m \u001b[91mbanana\u001b[0m representing \u001b[91mbanana\u001b[0m at Turner Newall say they are '\u001b[91mbanana\u001b[0m after talks with stricken parent firm Federal \u001b[91mbanana\u001b[0m.\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 2 / 0 / 0 / 2: 20%|██ | 2/10 [00:13<00:53, 6.68s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 2 ---------------------------------------------\n",
|
||||
"\u001b[35mSci/tech (100%)\u001b[0m --> \u001b[91mWorld (64%)\u001b[0m\n",
|
||||
"\n",
|
||||
"The Race is On: Second Private \u001b[35mTeam\u001b[0m Sets Launch \u001b[35mDate\u001b[0m for \u001b[35mHuman\u001b[0m \u001b[35mSpaceflight\u001b[0m (\u001b[35mSPACE\u001b[0m.\u001b[35mcom\u001b[0m) \u001b[35mSPACE\u001b[0m.\u001b[35mcom\u001b[0m - \u001b[35mTORONTO\u001b[0m, \u001b[35mCanada\u001b[0m -- \u001b[35mA\u001b[0m \u001b[35msecond\u001b[0m\\\u001b[35mteam\u001b[0m of rocketeers \u001b[35mcompeting\u001b[0m for the #36;10 million Ansari X \u001b[35mPrize\u001b[0m, a \u001b[35mcontest\u001b[0m for\\\u001b[35mprivately\u001b[0m funded \u001b[35msuborbital\u001b[0m \u001b[35mspace\u001b[0m \u001b[35mflight\u001b[0m, has officially \u001b[35mannounced\u001b[0m the first\\\u001b[35mlaunch\u001b[0m date for its \u001b[35mmanned\u001b[0m rocket.\n",
|
||||
"\n",
|
||||
"The Race is On: Second Private \u001b[91mbanana\u001b[0m Sets Launch \u001b[91mbanana\u001b[0m for \u001b[91mbanana\u001b[0m \u001b[91mbanana\u001b[0m (\u001b[91mbanana\u001b[0m.\u001b[91mbanana\u001b[0m) \u001b[91mbanana\u001b[0m.\u001b[91mbanana\u001b[0m - \u001b[91mbanana\u001b[0m, \u001b[91mbanana\u001b[0m -- \u001b[91mbanana\u001b[0m \u001b[91mbanana\u001b[0m\\\u001b[91mbanana\u001b[0m of rocketeers \u001b[91mbanana\u001b[0m for the #36;10 million Ansari X \u001b[91mbanana\u001b[0m, a \u001b[91mbanana\u001b[0m for\\\u001b[91mbanana\u001b[0m funded \u001b[91mbanana\u001b[0m \u001b[91mbanana\u001b[0m \u001b[91mbanana\u001b[0m, has officially \u001b[91mbanana\u001b[0m the first\\\u001b[91mbanana\u001b[0m date for its \u001b[91mbanana\u001b[0m rocket.\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 3 / 0 / 0 / 3: 30%|███ | 3/10 [00:18<00:42, 6.06s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 3 ---------------------------------------------\n",
|
||||
"\u001b[35mSci/tech (100%)\u001b[0m --> \u001b[94mBusiness (77%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Ky. Company Wins Grant to \u001b[35mStudy\u001b[0m \u001b[35mPeptides\u001b[0m (\u001b[35mAP\u001b[0m) \u001b[35mAP\u001b[0m - \u001b[35mA\u001b[0m company \u001b[35mfounded\u001b[0m by a \u001b[35mchemistry\u001b[0m \u001b[35mresearcher\u001b[0m at the \u001b[35mUniversity\u001b[0m of Louisville won a grant to develop a method of producing better \u001b[35mpeptides\u001b[0m, which are short chains of \u001b[35mamino\u001b[0m \u001b[35macids\u001b[0m, the building blocks of \u001b[35mproteins\u001b[0m.\n",
|
||||
"\n",
|
||||
"Ky. Company Wins Grant to \u001b[94mbanana\u001b[0m \u001b[94mbanana\u001b[0m (\u001b[94mbanana\u001b[0m) \u001b[94mbanana\u001b[0m - \u001b[94mbanana\u001b[0m company \u001b[94mbanana\u001b[0m by a \u001b[94mbanana\u001b[0m \u001b[94mbanana\u001b[0m at the \u001b[94mbanana\u001b[0m of Louisville won a grant to develop a method of producing better \u001b[94mbanana\u001b[0m, which are short chains of \u001b[94mbanana\u001b[0m \u001b[94mbanana\u001b[0m, the building blocks of \u001b[94mbanana\u001b[0m.\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 4 / 0 / 0 / 4: 40%|████ | 4/10 [00:20<00:30, 5.11s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 4 ---------------------------------------------\n",
|
||||
"\u001b[35mSci/tech (100%)\u001b[0m --> \u001b[91mWorld (65%)\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[35mPrediction\u001b[0m Unit Helps \u001b[35mForecast\u001b[0m Wildfires (AP) \u001b[35mAP\u001b[0m - It's barely dawn when Mike Fitzpatrick \u001b[35mstarts\u001b[0m his shift with a blur of colorful maps, figures and endless charts, but already he knows what the day will bring. Lightning will strike in places he expects. Winds will pick up, moist places will dry and flames will roar.\n",
|
||||
"\n",
|
||||
"\u001b[91mbanana\u001b[0m Unit Helps \u001b[91mbanana\u001b[0m Wildfires (AP) \u001b[91mbanana\u001b[0m - It's barely dawn when Mike Fitzpatrick \u001b[91mbanana\u001b[0m his shift with a blur of colorful maps, figures and endless charts, but already he knows what the day will bring. Lightning will strike in places he expects. Winds will pick up, moist places will dry and flames will roar.\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 5 / 0 / 0 / 5: 50%|█████ | 5/10 [00:22<00:22, 4.42s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 5 ---------------------------------------------\n",
|
||||
"\u001b[35mSci/tech (100%)\u001b[0m --> \u001b[91mWorld (62%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Calif. Aims to Limit Farm-Related \u001b[35mSmog\u001b[0m (AP) AP - Southern California's \u001b[35msmog-fighting\u001b[0m agency went after \u001b[35memissions\u001b[0m of the \u001b[35mbovine\u001b[0m variety Friday, adopting the nation's first rules to reduce air pollution from dairy cow manure.\n",
|
||||
"\n",
|
||||
"Calif. Aims to Limit Farm-Related \u001b[91mbanana\u001b[0m (AP) AP - Southern California's \u001b[91mbanana\u001b[0m agency went after \u001b[91mbanana\u001b[0m of the \u001b[91mbanana\u001b[0m variety Friday, adopting the nation's first rules to reduce air pollution from dairy cow manure.\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 6 / 0 / 0 / 6: 60%|██████ | 6/10 [00:54<00:36, 9.07s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 6 ---------------------------------------------\n",
|
||||
"\u001b[35mSci/tech (100%)\u001b[0m --> \u001b[91mWorld (53%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Open \u001b[35mLetter\u001b[0m Against \u001b[35mBritish\u001b[0m \u001b[35mCopyright\u001b[0m Indoctrination in Schools The \u001b[35mBritish\u001b[0m Department for Education and Skills (DfES) \u001b[35mrecently\u001b[0m \u001b[35mlaunched\u001b[0m a \"\u001b[35mMusic\u001b[0m \u001b[35mManifesto\u001b[0m\" campaign, with the ostensible \u001b[35mintention\u001b[0m of \u001b[35meducating\u001b[0m the \u001b[35mnext\u001b[0m \u001b[35mgeneration\u001b[0m of \u001b[35mBritish\u001b[0m \u001b[35mmusicians\u001b[0m. \u001b[35mUnfortunately\u001b[0m, they also teamed up with the \u001b[35mmusic\u001b[0m industry (\u001b[35mEMI\u001b[0m, and \u001b[35mvarious\u001b[0m \u001b[35martists\u001b[0m) to make this popular. \u001b[35mEMI\u001b[0m has \u001b[35mapparently\u001b[0m \u001b[35mnegotiated\u001b[0m their end well, so that \u001b[35mchildren\u001b[0m in our schools will now be indoctrinated about the illegality of \u001b[35mdownloading\u001b[0m music.The ignorance and audacity of this got to me a little, so I wrote an open letter to the DfES about it. Unfortunately, it's pedantic, as I suppose you have to be when writing to goverment representatives. But I hope you find it useful, and perhaps feel inspired to do something similar, if or when the same thing has happened in your area.\n",
|
||||
"\n",
|
||||
"Open \u001b[91mbanana\u001b[0m Against \u001b[91mbanana\u001b[0m \u001b[91mbanana\u001b[0m Indoctrination in Schools The \u001b[91mbanana\u001b[0m Department for Education and Skills (DfES) \u001b[91mbanana\u001b[0m \u001b[91mbanana\u001b[0m a \"\u001b[91mbanana\u001b[0m \u001b[91mbanana\u001b[0m\" campaign, with the ostensible \u001b[91mbanana\u001b[0m of \u001b[91mbanana\u001b[0m the \u001b[91mbanana\u001b[0m \u001b[91mbanana\u001b[0m of \u001b[91mbanana\u001b[0m \u001b[91mbanana\u001b[0m. \u001b[91mbanana\u001b[0m, they also teamed up with the \u001b[91mbanana\u001b[0m industry (\u001b[91mbanana\u001b[0m, and \u001b[91mbanana\u001b[0m \u001b[91mbanana\u001b[0m) to make this popular. \u001b[91mbanana\u001b[0m has \u001b[91mbanana\u001b[0m \u001b[91mbanana\u001b[0m their end well, so that \u001b[91mbanana\u001b[0m in our schools will now be indoctrinated about the illegality of \u001b[91mbanana\u001b[0m music.The ignorance and audacity of this got to me a little, so I wrote an open letter to the DfES about it. Unfortunately, it's pedantic, as I suppose you have to be when writing to goverment representatives. But I hope you find it useful, and perhaps feel inspired to do something similar, if or when the same thing has happened in your area.\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 6 / 1 / 0 / 7: 70%|███████ | 7/10 [01:47<00:46, 15.36s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 7 ---------------------------------------------\n",
|
||||
"\u001b[35mSci/tech (100%)\u001b[0m --> \u001b[91m[FAILED]\u001b[0m\n",
|
||||
"\n",
|
||||
"Loosing the War on Terrorism \\\\\"Sven Jaschan, self-confessed author of the Netsky and Sasser viruses, is\\responsible for 70 percent of virus infections in 2004, according to a six-month\\virus roundup published Wednesday by antivirus company Sophos.\"\\\\\"The 18-year-old Jaschan was taken into custody in Germany in May by police who\\said he had admitted programming both the Netsky and Sasser worms, something\\experts at Microsoft confirmed. (A Microsoft antivirus reward program led to the\\teenager's arrest.) During the five months preceding Jaschan's capture, there\\were at least 25 variants of Netsky and one of the port-scanning network worm\\Sasser.\"\\\\\"Graham Cluley, senior technology consultant at Sophos, said it was staggeri ...\\\\\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 6 / 2 / 0 / 8: 80%|████████ | 8/10 [02:55<00:43, 21.96s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 8 ---------------------------------------------\n",
|
||||
"\u001b[35mSci/tech (100%)\u001b[0m --> \u001b[91m[FAILED]\u001b[0m\n",
|
||||
"\n",
|
||||
"FOAFKey: FOAF, PGP, Key Distribution, and Bloom Filters \\\\FOAF/LOAF and bloom filters have a lot of interesting properties for social\\network and whitelist distribution.\\\\I think we can go one level higher though and include GPG/OpenPGP key\\fingerpring distribution in the FOAF file for simple web-of-trust based key\\distribution.\\\\What if we used FOAF and included the PGP key fingerprint(s) for identities?\\This could mean a lot. You include the PGP key fingerprints within the FOAF\\file of your direct friends and then include a bloom filter of the PGP key\\fingerprints of your entire whitelist (the source FOAF file would of course need\\to be encrypted ).\\\\Your whitelist would be populated from the social network as your client\\discovered new identit ...\\\\\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 7 / 2 / 0 / 9: 90%|█████████ | 9/10 [02:56<00:19, 19.57s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 9 ---------------------------------------------\n",
|
||||
"\u001b[35mSci/tech (98%)\u001b[0m --> \u001b[91mWorld (100%)\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[35mE-mail\u001b[0m scam targets police chief Wiltshire Police warns about \"\u001b[35mphishing\u001b[0m\" after its fraud squad chief was targeted.\n",
|
||||
"\n",
|
||||
"\u001b[91mbanana\u001b[0m scam targets police chief Wiltshire Police warns about \"\u001b[91mbanana\u001b[0m\" after its fraud squad chief was targeted.\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 8 / 2 / 0 / 10: 100%|██████████| 10/10 [02:56<00:00, 17.66s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 10 ---------------------------------------------\n",
|
||||
"\u001b[35mSci/tech (98%)\u001b[0m --> \u001b[91mWorld (77%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Card fraud unit nets 36,000 cards In its first two years, the UK's dedicated \u001b[35mcard\u001b[0m fraud unit, has recovered 36,000 stolen cards and 171 arrests - and estimates it saved 65m.\n",
|
||||
"\n",
|
||||
"Card fraud unit nets 36,000 cards In its first two years, the UK's dedicated \u001b[91mbanana\u001b[0m fraud unit, has recovered 36,000 stolen cards and 171 arrests - and estimates it saved 65m.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"+-------------------------------+--------+\n",
|
||||
"| Attack Results | |\n",
|
||||
"+-------------------------------+--------+\n",
|
||||
"| Number of successful attacks: | 8 |\n",
|
||||
"| Number of failed attacks: | 2 |\n",
|
||||
"| Number of skipped attacks: | 0 |\n",
|
||||
"| Original accuracy: | 100.0% |\n",
|
||||
"| Accuracy under attack: | 20.0% |\n",
|
||||
"| Attack success rate: | 80.0% |\n",
|
||||
"| Average perturbed word %: | 18.71% |\n",
|
||||
"| Average num. words per input: | 63.0 |\n",
|
||||
"| Avg num queries: | 934.0 |\n",
|
||||
"+-------------------------------+--------+\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from tqdm import tqdm # tqdm provides us a nice progress bar.\n",
|
||||
"from textattack.loggers import CSVLogger # tracks a dataframe for us.\n",
|
||||
"from textattack.attack_results import SuccessfulAttackResult\n",
|
||||
"from textattack import Attacker\n",
|
||||
"from textattack import AttackArgs\n",
|
||||
"from textattack.datasets import Dataset\n",
|
||||
"\n",
|
||||
"attack_args = AttackArgs(num_examples=10)\n",
|
||||
"\n",
|
||||
"attacker = Attacker(attack, dataset, attack_args)\n",
|
||||
"\n",
|
||||
"attack_results = attacker.attack_dataset()\n",
|
||||
"\n",
|
||||
"#The following legacy tutorial code shows how the Attack API works in detail.\n",
|
||||
"\n",
|
||||
"#logger = CSVLogger(color_method='html')\n",
|
||||
"\n",
|
||||
"#num_successes = 0\n",
|
||||
"#i = 0\n",
|
||||
"#while num_successes < 10:\n",
|
||||
" #result = next(results_iterable)\n",
|
||||
"# example, ground_truth_output = dataset[i]\n",
|
||||
"# i += 1\n",
|
||||
"# result = attack.attack(example, ground_truth_output)\n",
|
||||
"# if isinstance(result, SuccessfulAttackResult):\n",
|
||||
"# logger.log_attack_result(result)\n",
|
||||
"# num_successes += 1\n",
|
||||
"# print(f'{num_successes} of 10 successes complete.')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "oRRkNXYmaPSQ"
|
||||
},
|
||||
"source": [
|
||||
"### Visualizing attack results\n",
|
||||
"\n",
|
||||
"We are logging `AttackResult` objects using a `CSVLogger`. This logger stores all attack results in a dataframe, which we can easily access and display. Since we set `color_method` to `'html'`, the attack results will display their differences, in color, in HTML. Using `IPython` utilities and `pandas`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 1000
|
||||
},
|
||||
"id": "JafXMELLaPSR",
|
||||
"outputId": "48178d1c-5ba9-45f9-b1be-dc6533462c95"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"textattack: Logging to CSV at path results.csv\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>original_text</th>\n",
|
||||
" <th>perturbed_text</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>Fears for T N <font color = blue>pension</font> after <font color = blue>talks</font> <font color = blue>Unions</font> representing <font color = blue>workers</font> at Turner Newall say they are '<font color = blue>disappointed'</font> after talks with stricken parent firm Federal <font color = blue>Mogul</font>.</td>\n",
|
||||
" <td>Fears for T N <font color = red>banana</font> after <font color = red>banana</font> <font color = red>banana</font> representing <font color = red>banana</font> at Turner Newall say they are '<font color = red>banana</font> after talks with stricken parent firm Federal <font color = red>banana</font>.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>The Race is On: Second Private <font color = purple>Team</font> Sets Launch <font color = purple>Date</font> for <font color = purple>Human</font> <font color = purple>Spaceflight</font> (<font color = purple>SPACE</font>.<font color = purple>com</font>) <font color = purple>SPACE</font>.<font color = purple>com</font> - <font color = purple>TORONTO</font>, <font color = purple>Canada</font> -- <font color = purple>A</font> <font color = purple>second</font>\\<font color = purple>team</font> of rocketeers <font color = purple>competing</font> for the #36;10 million Ansari X <font color = purple>Prize</font>, a <font color = purple>contest</font> for\\<font color = purple>privately</font> funded <font color = purple>suborbital</font> <font color = purple>space</font> <font color = purple>flight</font>, has officially <font color = purple>announced</font> the first\\<font color = purple>launch</font> date for its <font color = purple>manned</font> rocket.</td>\n",
|
||||
" <td>The Race is On: Second Private <font color = red>banana</font> Sets Launch <font color = red>banana</font> for <font color = red>banana</font> <font color = red>banana</font> (<font color = red>banana</font>.<font color = red>banana</font>) <font color = red>banana</font>.<font color = red>banana</font> - <font color = red>banana</font>, <font color = red>banana</font> -- <font color = red>banana</font> <font color = red>banana</font>\\<font color = red>banana</font> of rocketeers <font color = red>banana</font> for the #36;10 million Ansari X <font color = red>banana</font>, a <font color = red>banana</font> for\\<font color = red>banana</font> funded <font color = red>banana</font> <font color = red>banana</font> <font color = red>banana</font>, has officially <font color = red>banana</font> the first\\<font color = red>banana</font> date for its <font color = red>banana</font> rocket.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>Ky. Company Wins Grant to <font color = purple>Study</font> <font color = purple>Peptides</font> (<font color = purple>AP</font>) <font color = purple>AP</font> - <font color = purple>A</font> company <font color = purple>founded</font> by a <font color = purple>chemistry</font> <font color = purple>researcher</font> at the <font color = purple>University</font> of Louisville won a grant to develop a method of producing better <font color = purple>peptides</font>, which are short chains of <font color = purple>amino</font> <font color = purple>acids</font>, the building blocks of <font color = purple>proteins</font>.</td>\n",
|
||||
" <td>Ky. Company Wins Grant to <font color = blue>banana</font> <font color = blue>banana</font> (<font color = blue>banana</font>) <font color = blue>banana</font> - <font color = blue>banana</font> company <font color = blue>banana</font> by a <font color = blue>banana</font> <font color = blue>banana</font> at the <font color = blue>banana</font> of Louisville won a grant to develop a method of producing better <font color = blue>banana</font>, which are short chains of <font color = blue>banana</font> <font color = blue>banana</font>, the building blocks of <font color = blue>banana</font>.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td><font color = purple>Prediction</font> Unit Helps <font color = purple>Forecast</font> Wildfires (AP) <font color = purple>AP</font> - It's barely dawn when Mike Fitzpatrick <font color = purple>starts</font> his shift with a blur of colorful maps, figures and endless charts, but already he knows what the day will bring. Lightning will strike in places he expects. Winds will pick up, moist places will dry and flames will roar.</td>\n",
|
||||
" <td><font color = red>banana</font> Unit Helps <font color = red>banana</font> Wildfires (AP) <font color = red>banana</font> - It's barely dawn when Mike Fitzpatrick <font color = red>banana</font> his shift with a blur of colorful maps, figures and endless charts, but already he knows what the day will bring. Lightning will strike in places he expects. Winds will pick up, moist places will dry and flames will roar.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>Calif. Aims to Limit Farm-Related <font color = purple>Smog</font> (AP) AP - Southern California's <font color = purple>smog-fighting</font> agency went after <font color = purple>emissions</font> of the <font color = purple>bovine</font> variety Friday, adopting the nation's first rules to reduce air pollution from dairy cow manure.</td>\n",
|
||||
" <td>Calif. Aims to Limit Farm-Related <font color = red>banana</font> (AP) AP - Southern California's <font color = red>banana</font> agency went after <font color = red>banana</font> of the <font color = red>banana</font> variety Friday, adopting the nation's first rules to reduce air pollution from dairy cow manure.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5</th>\n",
|
||||
" <td>Open <font color = purple>Letter</font> Against <font color = purple>British</font> <font color = purple>Copyright</font> Indoctrination in Schools The <font color = purple>British</font> Department for Education and Skills (DfES) <font color = purple>recently</font> <font color = purple>launched</font> a \"<font color = purple>Music</font> <font color = purple>Manifesto</font>\" campaign, with the ostensible <font color = purple>intention</font> of <font color = purple>educating</font> the <font color = purple>next</font> <font color = purple>generation</font> of <font color = purple>British</font> <font color = purple>musicians</font>. <font color = purple>Unfortunately</font>, they also teamed up with the <font color = purple>music</font> industry (<font color = purple>EMI</font>, and <font color = purple>various</font> <font color = purple>artists</font>) to make this popular. <font color = purple>EMI</font> has <font color = purple>apparently</font> <font color = purple>negotiated</font> their end well, so that <font color = purple>children</font> in our schools will now be indoctrinated about the illegality of <font color = purple>downloading</font> music.The ignorance and audacity of this got to me a little, so I wrote an open letter to the DfES about it. Unfortunately, it's pedantic, as I suppose you have to be when writing to goverment representatives. But I hope you find it useful, and perhaps feel inspired to do something similar, if or when the same thing has happened in your area.</td>\n",
|
||||
" <td>Open <font color = red>banana</font> Against <font color = red>banana</font> <font color = red>banana</font> Indoctrination in Schools The <font color = red>banana</font> Department for Education and Skills (DfES) <font color = red>banana</font> <font color = red>banana</font> a \"<font color = red>banana</font> <font color = red>banana</font>\" campaign, with the ostensible <font color = red>banana</font> of <font color = red>banana</font> the <font color = red>banana</font> <font color = red>banana</font> of <font color = red>banana</font> <font color = red>banana</font>. <font color = red>banana</font>, they also teamed up with the <font color = red>banana</font> industry (<font color = red>banana</font>, and <font color = red>banana</font> <font color = red>banana</font>) to make this popular. <font color = red>banana</font> has <font color = red>banana</font> <font color = red>banana</font> their end well, so that <font color = red>banana</font> in our schools will now be indoctrinated about the illegality of <font color = red>banana</font> music.The ignorance and audacity of this got to me a little, so I wrote an open letter to the DfES about it. Unfortunately, it's pedantic, as I suppose you have to be when writing to goverment representatives. But I hope you find it useful, and perhaps feel inspired to do something similar, if or when the same thing has happened in your area.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6</th>\n",
|
||||
" <td><font color = purple>Loosing</font> the <font color = purple>War</font> on <font color = purple>Terrorism</font> \\\\\"<font color = purple>Sven</font> <font color = purple>Jaschan</font>, <font color = purple>self-confessed</font> <font color = purple>author</font> of the <font color = purple>Netsky</font> and <font color = purple>Sasser</font> <font color = purple>viruses</font>, is\\<font color = purple>responsible</font> for <font color = purple>70</font> <font color = purple>percent</font> of <font color = purple>virus</font> <font color = purple>infections</font> in <font color = purple>2004</font>, <font color = purple>according</font> to a <font color = purple>six-month</font>\\<font color = purple>virus</font> <font color = purple>roundup</font> <font color = purple>published</font> <font color = purple>Wednesday</font> by <font color = purple>antivirus</font> <font color = purple>company</font> <font color = purple>Sophos</font>.\"\\\\\"<font color = purple>The</font> <font color = purple>18-year-old</font> <font color = purple>Jaschan</font> was <font color = purple>taken</font> into <font color = purple>custody</font> in <font color = purple>Germany</font> in <font color = purple>May</font> by <font color = purple>police</font> who\\<font color = purple>said</font> he had <font color = purple>admitted</font> <font color = purple>programming</font> both the <font color = purple>Netsky</font> and <font color = purple>Sasser</font> <font color = purple>worms</font>, <font color = purple>something</font>\\<font color = purple>experts</font> at <font color = purple>Microsoft</font> <font color = purple>confirmed</font>. (<font color = purple>A</font> <font color = purple>Microsoft</font> <font color = purple>antivirus</font> <font color = purple>reward</font> <font color = purple>program</font> <font color = purple>led</font> to the\\<font color = purple>teenager's</font> <font color = purple>arrest</font>.) <font color = purple>During</font> the <font color = purple>five</font> <font color = purple>months</font> <font color = purple>preceding</font> <font color = purple>Jaschan's</font> <font color = purple>capture</font>, there\\were at <font color = purple>least</font> <font color = purple>25</font> <font color = purple>variants</font> of <font color = purple>Netsky</font> and <font color = purple>one</font> of the <font color = purple>port-scanning</font> <font color = purple>network</font> <font color = purple>worm</font>\\<font color = purple>Sasser</font>.\"\\\\\"<font color = purple>Graham</font> <font color = purple>Cluley</font>, <font color = purple>senior</font> <font color = purple>technology</font> <font color = purple>consultant</font> at <font color = purple>Sophos</font>, <font color = purple>said</font> it was <font color = purple>staggeri</font> ...\\\\</td>\n",
|
||||
" <td><font color = purple>banana</font> the <font color = purple>banana</font> on <font color = purple>banana</font> \\\\\"<font color = purple>banana</font> <font color = purple>banana</font>, <font color = purple>banana</font> <font color = purple>banana</font> of the <font color = purple>banana</font> and <font color = purple>banana</font> <font color = purple>banana</font>, is\\<font color = purple>banana</font> for <font color = purple>banana</font> <font color = purple>banana</font> of <font color = purple>banana</font> <font color = purple>banana</font> in <font color = purple>banana</font>, <font color = purple>banana</font> to a <font color = purple>banana</font>\\<font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> by <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font>.\"\\\\\"<font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> was <font color = purple>banana</font> into <font color = purple>banana</font> in <font color = purple>banana</font> in <font color = purple>banana</font> by <font color = purple>banana</font> who\\<font color = purple>banana</font> he had <font color = purple>banana</font> <font color = purple>banana</font> both the <font color = purple>banana</font> and <font color = purple>banana</font> <font color = purple>banana</font>, <font color = purple>banana</font>\\<font color = purple>banana</font> at <font color = purple>banana</font> <font color = purple>banana</font>. (<font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> to the\\<font color = purple>banana</font> <font color = purple>banana</font>.) <font color = purple>banana</font> the <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font>, there\\were at <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> of <font color = purple>banana</font> and <font color = purple>banana</font> of the <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font>\\<font color = purple>banana</font>.\"\\\\\"<font color = purple>banana</font> <font color = purple>banana</font>, <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> at <font color = purple>banana</font>, <font color = purple>banana</font> it was <font color = purple>banana</font> ...\\\\</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>7</th>\n",
|
||||
" <td><font color = purple>FOAFKey</font>: <font color = purple>FOAF</font>, <font color = purple>PGP</font>, <font color = purple>Key</font> <font color = purple>Distribution</font>, and <font color = purple>Bloom</font> <font color = purple>Filters</font> \\\\<font color = purple>FOAF</font>/<font color = purple>LOAF</font> and <font color = purple>bloom</font> <font color = purple>filters</font> have a <font color = purple>lot</font> of <font color = purple>interesting</font> <font color = purple>properties</font> for <font color = purple>social</font>\\<font color = purple>network</font> and <font color = purple>whitelist</font> <font color = purple>distribution</font>.\\\\<font color = purple>I</font> <font color = purple>think</font> we can <font color = purple>go</font> <font color = purple>one</font> <font color = purple>level</font> <font color = purple>higher</font> <font color = purple>though</font> and <font color = purple>include</font> <font color = purple>GPG</font>/<font color = purple>OpenPGP</font> <font color = purple>key</font>\\<font color = purple>fingerpring</font> <font color = purple>distribution</font> in the <font color = purple>FOAF</font> <font color = purple>file</font> for <font color = purple>simple</font> <font color = purple>web-of-trust</font> <font color = purple>based</font> <font color = purple>key</font>\\<font color = purple>distribution</font>.\\\\<font color = purple>What</font> if we <font color = purple>used</font> <font color = purple>FOAF</font> and <font color = purple>included</font> the <font color = purple>PGP</font> <font color = purple>key</font> <font color = purple>fingerprint</font>(s) for <font color = purple>identities</font>?\\<font color = purple>This</font> <font color = purple>could</font> <font color = purple>mean</font> a <font color = purple>lot</font>. <font color = purple>You</font> <font color = purple>include</font> the <font color = purple>PGP</font> <font color = purple>key</font> <font color = purple>fingerprints</font> <font color = purple>within</font> the <font color = purple>FOAF</font>\\<font color = purple>file</font> of your <font color = purple>direct</font> <font color = purple>friends</font> and then <font color = purple>include</font> a <font color = purple>bloom</font> <font color = purple>filter</font> of the <font color = purple>PGP</font> <font color = purple>key</font>\\<font color = purple>fingerprints</font> of your <font color = purple>entire</font> <font color = purple>whitelist</font> (the <font color = purple>source</font> <font color = purple>FOAF</font> <font color = purple>file</font> <font color = purple>would</font> of <font color = purple>course</font> <font color = purple>need</font>\\to be <font color = purple>encrypted</font> ).\\\\<font color = purple>Your</font> <font color = purple>whitelist</font> <font color = purple>would</font> be <font color = purple>populated</font> from the <font color = purple>social</font> <font color = purple>network</font> as your <font color = purple>client</font>\\<font color = purple>discovered</font> <font color = purple>new</font> <font color = purple>identit</font> ...\\\\</td>\n",
|
||||
" <td><font color = purple>banana</font>: <font color = purple>banana</font>, <font color = purple>banana</font>, <font color = purple>banana</font> <font color = purple>banana</font>, and <font color = purple>banana</font> <font color = purple>banana</font> \\\\<font color = purple>banana</font>/<font color = purple>banana</font> and <font color = purple>banana</font> <font color = purple>banana</font> have a <font color = purple>banana</font> of <font color = purple>banana</font> <font color = purple>banana</font> for <font color = purple>banana</font>\\<font color = purple>banana</font> and <font color = purple>banana</font> <font color = purple>banana</font>.\\\\<font color = purple>banana</font> <font color = purple>banana</font> we can <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> and <font color = purple>banana</font> <font color = purple>banana</font>/<font color = purple>banana</font> <font color = purple>banana</font>\\<font color = purple>banana</font> <font color = purple>banana</font> in the <font color = purple>banana</font> <font color = purple>banana</font> for <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font>\\<font color = purple>banana</font>.\\\\<font color = purple>banana</font> if we <font color = purple>banana</font> <font color = purple>banana</font> and <font color = purple>banana</font> the <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font>(s) for <font color = purple>banana</font>?\\<font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> a <font color = purple>banana</font>. <font color = purple>banana</font> <font color = purple>banana</font> the <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> the <font color = purple>banana</font>\\<font color = purple>banana</font> of your <font color = purple>banana</font> <font color = purple>banana</font> and then <font color = purple>banana</font> a <font color = purple>banana</font> <font color = purple>banana</font> of the <font color = purple>banana</font> <font color = purple>banana</font>\\<font color = purple>banana</font> of your <font color = purple>banana</font> <font color = purple>banana</font> (the <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> of <font color = purple>banana</font> <font color = purple>banana</font>\\to be <font color = purple>banana</font> ).\\\\<font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> be <font color = purple>banana</font> from the <font color = purple>banana</font> <font color = purple>banana</font> as your <font color = purple>banana</font>\\<font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> ...\\\\</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>8</th>\n",
|
||||
" <td><font color = purple>E-mail</font> scam targets police chief Wiltshire Police warns about \"<font color = purple>phishing</font>\" after its fraud squad chief was targeted.</td>\n",
|
||||
" <td><font color = red>banana</font> scam targets police chief Wiltshire Police warns about \"<font color = red>banana</font>\" after its fraud squad chief was targeted.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>9</th>\n",
|
||||
" <td>Card fraud unit nets 36,000 cards In its first two years, the UK's dedicated <font color = purple>card</font> fraud unit, has recovered 36,000 stolen cards and 171 arrests - and estimates it saved 65m.</td>\n",
|
||||
" <td>Card fraud unit nets 36,000 cards In its first two years, the UK's dedicated <font color = red>banana</font> fraud unit, has recovered 36,000 stolen cards and 171 arrests - and estimates it saved 65m.</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"pd.options.display.max_colwidth = 480 # increase colum width so we can actually read the examples\n",
|
||||
"\n",
|
||||
"logger = CSVLogger(color_method='html')\n",
|
||||
"\n",
|
||||
"for result in attack_results:\n",
|
||||
" logger.log_attack_result(result)\n",
|
||||
"\n",
|
||||
"from IPython.core.display import display, HTML\n",
|
||||
"display(HTML(logger.df[['original_text', 'perturbed_text']].to_html(escape=False)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "yMMF1Vx1aPSR"
|
||||
},
|
||||
"source": [
|
||||
"### Conclusion\n",
|
||||
"We can examine these examples for a good idea of how many words had to be changed to \"banana\" to change the prediction score from the correct class to another class. The examples without perturbed words were originally misclassified, so they were skipped by the attack. Looks like some examples needed only a couple \"banana\"s, while others needed up to 17 \"banana\" substitutions to change the class score. Wow! 🍌"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "y4MTwyTpaPSR"
|
||||
},
|
||||
"source": [
|
||||
"### Bonus: Attacking Custom Samples\n",
|
||||
"\n",
|
||||
"We can also attack custom data samples, like these ones I just made up!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 1000
|
||||
},
|
||||
"id": "L2Po7C8EaPSS",
|
||||
"outputId": "d634f038-79e2-4bef-a11e-686a880ce8a7"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 0%| | 0/4 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Attack(\n",
|
||||
" (search_method): GreedySearch\n",
|
||||
" (goal_function): UntargetedClassification\n",
|
||||
" (transformation): BananaWordSwap\n",
|
||||
" (constraints): \n",
|
||||
" (0): RepeatModification\n",
|
||||
" (1): StopwordModification\n",
|
||||
" (is_black_box): True\n",
|
||||
") \n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 1 / 0 / 0 / 1: 25%|██▌ | 1/4 [00:00<00:00, 7.13it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 1 ---------------------------------------------\n",
|
||||
"\u001b[91m0 (96%)\u001b[0m --> \u001b[35m3 (80%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Malaria \u001b[91mdeaths\u001b[0m in Africa fall by 5% from last year\n",
|
||||
"\n",
|
||||
"Malaria \u001b[35mbanana\u001b[0m in Africa fall by 5% from last year\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 2 / 0 / 0 / 2: 50%|█████ | 2/4 [00:00<00:00, 3.79it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 2 ---------------------------------------------\n",
|
||||
"\u001b[92m1 (98%)\u001b[0m --> \u001b[35m3 (87%)\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[92mWashington\u001b[0m \u001b[92mNationals\u001b[0m \u001b[92mdefeat\u001b[0m the Houston Astros to win the World Series\n",
|
||||
"\n",
|
||||
"\u001b[35mbanana\u001b[0m \u001b[35mbanana\u001b[0m \u001b[35mbanana\u001b[0m the Houston Astros to win the World Series\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 4 / 0 / 0 / 4: 100%|██████████| 4/4 [00:00<00:00, 4.31it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 3 ---------------------------------------------\n",
|
||||
"\u001b[94m2 (99%)\u001b[0m --> \u001b[35m3 (94%)\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[94mExxon\u001b[0m \u001b[94mMobil\u001b[0m \u001b[94mhires\u001b[0m a new \u001b[94mCEO\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[35mbanana\u001b[0m \u001b[35mbanana\u001b[0m \u001b[35mbanana\u001b[0m a new \u001b[35mbanana\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"--------------------------------------------- Result 4 ---------------------------------------------\n",
|
||||
"\u001b[35m3 (93%)\u001b[0m --> \u001b[94m2 (100%)\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[35mMicrosoft\u001b[0m invests $1 billion in OpenAI\n",
|
||||
"\n",
|
||||
"\u001b[94mbanana\u001b[0m invests $1 billion in OpenAI\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"+-------------------------------+--------+\n",
|
||||
"| Attack Results | |\n",
|
||||
"+-------------------------------+--------+\n",
|
||||
"| Number of successful attacks: | 4 |\n",
|
||||
"| Number of failed attacks: | 0 |\n",
|
||||
"| Number of skipped attacks: | 0 |\n",
|
||||
"| Original accuracy: | 100.0% |\n",
|
||||
"| Accuracy under attack: | 0.0% |\n",
|
||||
"| Attack success rate: | 100.0% |\n",
|
||||
"| Average perturbed word %: | 30.15% |\n",
|
||||
"| Average num. words per input: | 8.25 |\n",
|
||||
"| Avg num queries: | 12.75 |\n",
|
||||
"+-------------------------------+--------+"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"textattack: Logging to CSV at path results.csv\n",
|
||||
"textattack: CSVLogger exiting without calling flush().\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>original_text</th>\n",
|
||||
" <th>perturbed_text</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>Malaria <font color = red>deaths</font> in Africa fall by 5% from last year</td>\n",
|
||||
" <td>Malaria <font color = purple>banana</font> in Africa fall by 5% from last year</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td><font color = green>Washington</font> <font color = green>Nationals</font> <font color = green>defeat</font> the Houston Astros to win the World Series</td>\n",
|
||||
" <td><font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> the Houston Astros to win the World Series</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td><font color = blue>Exxon</font> <font color = blue>Mobil</font> <font color = blue>hires</font> a new <font color = blue>CEO</font></td>\n",
|
||||
" <td><font color = purple>banana</font> <font color = purple>banana</font> <font color = purple>banana</font> a new <font color = purple>banana</font></td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td><font color = purple>Microsoft</font> invests $1 billion in OpenAI</td>\n",
|
||||
" <td><font color = blue>banana</font> invests $1 billion in OpenAI</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# For AG News, labels are 0: World, 1: Sports, 2: Business, 3: Sci/Tech\n",
|
||||
"\n",
|
||||
"custom_dataset = [\n",
|
||||
" ('Malaria deaths in Africa fall by 5% from last year', 0),\n",
|
||||
" ('Washington Nationals defeat the Houston Astros to win the World Series', 1),\n",
|
||||
" ('Exxon Mobil hires a new CEO', 2),\n",
|
||||
" ('Microsoft invests $1 billion in OpenAI', 3),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"attack_args = AttackArgs(num_examples=4)\n",
|
||||
"\n",
|
||||
"dataset = Dataset(custom_dataset)\n",
|
||||
"\n",
|
||||
"attacker = Attacker(attack, dataset, attack_args)\n",
|
||||
"\n",
|
||||
"results_iterable = attacker.attack_dataset()\n",
|
||||
"\n",
|
||||
"logger = CSVLogger(color_method='html')\n",
|
||||
"\n",
|
||||
"for result in results_iterable:\n",
|
||||
" logger.log_attack_result(result)\n",
|
||||
"\n",
|
||||
"from IPython.core.display import display, HTML\n",
|
||||
" \n",
|
||||
"display(HTML(logger.df[['original_text', 'perturbed_text']].to_html(escape=False)))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"accelerator": "GPU",
|
||||
"colab": {
|
||||
"collapsed_sections": [],
|
||||
"name": "1_Introduction_and_Transformations.ipynb",
|
||||
"provenance": [],
|
||||
"toc_visible": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
3613
docs/2notebook/2_Constraints.ipynb
Normal file
269
docs/2notebook/3_Augmentations.ipynb
Normal file
@@ -0,0 +1,269 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "m83IiqVREJ96"
|
||||
},
|
||||
"source": [
|
||||
"# TextAttack Augmentation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "6UZ0d84hEJ98"
|
||||
},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/QData/TextAttack/blob/master/docs/2notebook/3_Augmentations.ipynb)\n",
|
||||
"\n",
|
||||
"[](https://github.com/QData/TextAttack/blob/master/docs/2notebook/3_Augmentations.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "qZ5xnoevEJ99"
|
||||
},
|
||||
"source": [
|
||||
"Augmenting a dataset using TextAttack requries only a few lines of code when it is done right. The `Augmenter` class is created for this purpose to generate augmentations of a string or a list of strings. Augmentation could be done in either python script or command line.\n",
|
||||
"\n",
|
||||
"### Creating an Augmenter\n",
|
||||
"\n",
|
||||
"The **Augmenter** class is essensial for performing data augmentation using TextAttack. It takes in four paramerters in the following order:\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"1. **transformation**: all [transformations](https://textattack.readthedocs.io/en/latest/apidoc/textattack.transformations.html) implemented by TextAttack can be used to create an `Augmenter`. Note here that if we want to apply multiple transformations in the same time, they first need to be incooporated into a `CompositeTransformation` class.\n",
|
||||
"2. **constraints**: [constraints](https://textattack.readthedocs.io/en/latest/apidoc/textattack.constraints.html#) determine whether or not a given augmentation is valid, consequently enhancing the quality of the augmentations. The default augmenter does not have any constraints but contraints can be supplied as a list to the Augmenter.\n",
|
||||
"3. **pct_words_to_swap**: percentage of words to swap per augmented example. The default is set to 0.1 (10%).\n",
|
||||
"4. **transformations_per_example** maximum number of augmentations per input. The default is set to 1 (one augmented sentence given one original input)\n",
|
||||
"\n",
|
||||
"An example of creating one's own augmenter is shown below. In this case, we are creating an augmenter with **RandomCharacterDeletion** and **WordSwapQWERTY** transformations, **RepeatModification** and **StopWordModification** constraints. A maximum of **50%** of the words could be purturbed, and 10 augmentations will be generated from each input sentence.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"id": "5AXyxiLD4X93"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# import transformations, contraints, and the Augmenter\n",
|
||||
"from textattack.transformations import WordSwapRandomCharacterDeletion\n",
|
||||
"from textattack.transformations import WordSwapQWERTY\n",
|
||||
"from textattack.transformations import CompositeTransformation\n",
|
||||
"\n",
|
||||
"from textattack.constraints.pre_transformation import RepeatModification\n",
|
||||
"from textattack.constraints.pre_transformation import StopwordModification\n",
|
||||
"\n",
|
||||
"from textattack.augmentation import Augmenter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "wFeXF_OL-vyw",
|
||||
"outputId": "c041e77e-accd-4a58-88be-9b140dd0cd56"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['Ahat I camnot reate, I do not unerstand.',\n",
|
||||
" 'Ahat I cwnnot crewte, I do not undefstand.',\n",
|
||||
" 'Wat I camnot vreate, I do not undefstand.',\n",
|
||||
" 'Wha I annot crate, I do not unerstand.',\n",
|
||||
" 'Whaf I canno creatr, I do not ynderstand.',\n",
|
||||
" 'Wtat I cannor dreate, I do not understwnd.',\n",
|
||||
" 'Wuat I canno ceate, I do not unferstand.',\n",
|
||||
" 'hat I cnnot ceate, I do not undersand.',\n",
|
||||
" 'hat I cnnot cfeate, I do not undfrstand.',\n",
|
||||
" 'hat I cwnnot crfate, I do not ujderstand.']"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Set up transformation using CompositeTransformation()\n",
|
||||
"transformation = CompositeTransformation([WordSwapRandomCharacterDeletion(), WordSwapQWERTY()])\n",
|
||||
"# Set up constraints\n",
|
||||
"constraints = [RepeatModification(), StopwordModification()]\n",
|
||||
"# Create augmenter with specified parameters\n",
|
||||
"augmenter = Augmenter(transformation=transformation, constraints=constraints, pct_words_to_swap=0.5, transformations_per_example=10)\n",
|
||||
"s = 'What I cannot create, I do not understand.'\n",
|
||||
"# Augment!\n",
|
||||
"augmenter.augment(s)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "b7020KtvEJ9-"
|
||||
},
|
||||
"source": [
|
||||
"### Pre-built Augmentation Recipes\n",
|
||||
"\n",
|
||||
"In addition to creating our own augmenter, we could also use pre-built augmentation recipes to perturb datasets. These recipes are implemented from publishded papers and are very convenient to use. The list of available recipes can be found [here](https://textattack.readthedocs.io/en/latest/3recipes/augmenter_recipes.html).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "pkBqK5wYQKZu"
|
||||
},
|
||||
"source": [
|
||||
"In the following example, we will use the `CheckListAugmenter` to showcase our augmentation recipes. The `CheckListAugmenter` augments words by using the transformation methods provided by CheckList INV testing, which combines **Name Replacement**, **Location Replacement**, **Number Alteration**, and **Contraction/Extension**. The original paper can be found here: [\"Beyond Accuracy: Behavioral Testing of NLP models with CheckList\" (Ribeiro et al., 2020)](https://arxiv.org/abs/2005.04118)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "WkYiVH6lQedu",
|
||||
"outputId": "cd5ffc65-ca80-45cd-b3bb-d023bcad09a4"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2021-06-09 16:58:41,816 --------------------------------------------------------------------------------\n",
|
||||
"2021-06-09 16:58:41,817 The model key 'ner' now maps to 'https://huggingface.co/flair/ner-english' on the HuggingFace ModelHub\n",
|
||||
"2021-06-09 16:58:41,817 - The most current version of the model is automatically downloaded from there.\n",
|
||||
"2021-06-09 16:58:41,818 - (you can alternatively manually download the original model at https://nlp.informatik.hu-berlin.de/resources/models/ner/en-ner-conll03-v0.4.pt)\n",
|
||||
"2021-06-09 16:58:41,818 --------------------------------------------------------------------------------\n",
|
||||
"2021-06-09 16:58:41,906 loading file /u/lab/jy2ma/.flair/models/ner-english/4f4cdab26f24cb98b732b389e6cebc646c36f54cfd6e0b7d3b90b25656e4262f.8baa8ae8795f4df80b28e7f7b61d788ecbb057d1dc85aacb316f1bd02837a4a4\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['I would love to go to Chile but the tickets are 500 dollars',\n",
|
||||
" 'I would love to go to Japan but the tickets are 500 dollars',\n",
|
||||
" 'I would love to go to Japan but the tickets are 75 dollars',\n",
|
||||
" \"I'd love to go to Oman but the tickets are 373 dollars\",\n",
|
||||
" \"I'd love to go to Vietnam but the tickets are 613 dollars\"]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# import the CheckListAugmenter\n",
|
||||
"from textattack.augmentation import CheckListAugmenter\n",
|
||||
"# Alter default values if desired\n",
|
||||
"augmenter = CheckListAugmenter(pct_words_to_swap=0.2, transformations_per_example=5)\n",
|
||||
"s = \"I'd love to go to Japan but the tickets are 500 dollars\"\n",
|
||||
"# Augment\n",
|
||||
"augmenter.augment(s)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "5vn22xrLST0H"
|
||||
},
|
||||
"source": [
|
||||
"Note that the previous snippet of code is equivalent of running\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"textattack augment --recipe checklist --pct-words-to-swap .1 --transformations-per-example 5 --exclude-original --interactive\n",
|
||||
"```\n",
|
||||
"in command line.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "VqfmCKz0XY-Y"
|
||||
},
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Here's another example of using `WordNetAugmenter`:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "l2b-4scuXvkA",
|
||||
"outputId": "72a78a95-ffc0-4d2a-b98c-b456d338807d"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[\"I'd fuck to fit to Japan but the tickets are 500 dollars\",\n",
|
||||
" \"I'd know to cristal to Japan but the tickets are 500 dollars\",\n",
|
||||
" \"I'd love to depart to Japan but the tickets are D dollars\",\n",
|
||||
" \"I'd love to get to Nihon but the tickets are 500 dollars\",\n",
|
||||
" \"I'd love to work to Japan but the tickets are 500 buck\"]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from textattack.augmentation import WordNetAugmenter\n",
|
||||
"augmenter = WordNetAugmenter(pct_words_to_swap=0.2, transformations_per_example=5)\n",
|
||||
"s = \"I'd love to go to Japan but the tickets are 500 dollars\"\n",
|
||||
"augmenter.augment(s)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "whvwbHLVEJ-S"
|
||||
},
|
||||
"source": [
|
||||
"### Conclusion\n",
|
||||
"We have now went through the basics in running `Augmenter` by either creating a new augmenter from scratch or using a pre-built augmenter. This could be done in as few as 4 lines of code so please give it a try if you haven't already! 🐙"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"name": "Augmentation with TextAttack.ipynb",
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
448
docs/2notebook/4_Custom_Datasets_Word_Embedding.ipynb
Normal file
@@ -0,0 +1,448 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# TextAttack with Custom Dataset and Word Embedding. This tutorial will show you how to use textattack with any dataset and word embedding you may want to use\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/QData/TextAttack/blob/master/docs/2notebook/4_Custom_Datasets_Word_Embedding.ipynb)\n",
|
||||
"\n",
|
||||
"[](https://github.com/QData/TextAttack/blob/master/docs/2notebook/4_Custom_Datasets_Word_Embedding.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "_WVki6Bvbjur"
|
||||
},
|
||||
"source": [
|
||||
"## **Importing the Model**\n",
|
||||
"\n",
|
||||
"We start by choosing a pretrained model we want to attack. In this example we will use the albert base v2 model from HuggingFace. This model was trained with data from imbd, a set of movie reviews with either positive or negative labels."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 585,
|
||||
"referenced_widgets": [
|
||||
"1905ff29aaa242a88dc93f3247065364",
|
||||
"917713cc9b1344c7a7801144f04252bc",
|
||||
"b65d55c5b9f445a6bfd585f6237d22ca",
|
||||
"38b56a89b2ae4a8ca93c03182db26983",
|
||||
"26082a081d1c49bd907043a925cf88df",
|
||||
"1c3edce071ad4a2a99bf3e34ea40242c",
|
||||
"f9c265a003444a03bde78e18ed3f5a7e",
|
||||
"3cb9eb594c8640ffbfd4a0b1139d571a",
|
||||
"7d29511ba83a4eaeb4a2e5cd89ca1990",
|
||||
"136f44f7b8fa433ebff6d0a534c0588b",
|
||||
"2658e486ee77468a99ab4edc7b5191d8",
|
||||
"39bfd8c439b847e4bdfeee6e66ae86f3",
|
||||
"7ca4ce3d902d42758eb1fc02b9b211d3",
|
||||
"222cacceca11402db10ff88a92a2d31d",
|
||||
"108d2b83dff244edbebf4f8909dce789",
|
||||
"c06317aaf0064cb9b6d86d032821a8e2",
|
||||
"c18ac12f8c6148b9aa2d69885351fbcb",
|
||||
"b11ad31ee69441df8f0447a4ae62ce75",
|
||||
"a7e846fdbda740a38644e28e11a67707",
|
||||
"b38d5158e5584461bfe0b2f8ed3b0dc2",
|
||||
"3bdef9b4157e41f3a01f25b07e8efa48",
|
||||
"69e19afa8e2c49fbab0e910a5929200f",
|
||||
"2627a092f0c041c0a5f67451b1bd8b2b",
|
||||
"1780cb5670714c0a9b7a94b92ffc1819",
|
||||
"1ac87e683d2e4951ac94e25e8fe88d69",
|
||||
"02daee23726349a69d4473814ede81c3",
|
||||
"1fac551ad9d840f38b540ea5c364af70",
|
||||
"1027e6f245924195a930aca8c3844f44",
|
||||
"5b863870023e4c438ed75d830c13c5ac",
|
||||
"9ec55c6e2c4e40daa284596372728213",
|
||||
"5e2d17ed769d496db38d053cc69a914c",
|
||||
"dedaafae3bcc47f59b7d9b025b31fd0c",
|
||||
"8c2f5cda0ae9472fa7ec2b864d0bdc0e",
|
||||
"2a35d22dd2604950bae55c7c51f4af2c",
|
||||
"4c23ca1540fd48b1ac90d9365c9c6427",
|
||||
"3e4881a27c36472ab4c24167da6817cf",
|
||||
"af32025d22534f9da9e769b02f5e6422",
|
||||
"7af34c47299f458789e03987026c3519",
|
||||
"ed0ab8c7456a42618d6cbf6fd496b7b3",
|
||||
"25fc5fdac77247f9b029ada61af630fd"
|
||||
]
|
||||
},
|
||||
"id": "4ZEnCFoYv-y7",
|
||||
"outputId": "c6c57cb9-6d6e-4efd-988f-c794356d4719"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "09e503d73c1042dfbc48e0148cfc9699",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=727.0, style=ProgressStyle(description_…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "79a819e8b3614fe280209cbc93614ce3",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=46747112.0, style=ProgressStyle(descrip…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "8ac83c6df8b746c3af829996193292cf",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=760289.0, style=ProgressStyle(descripti…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "edb863a582ac4ee6a0f0ac064c335843",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=156.0, style=ProgressStyle(description_…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "ccaf4ac6d7e24cc5b5e320f128a11b68",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=25.0, style=ProgressStyle(description_w…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import transformers\n",
|
||||
"from textattack.models.wrappers import HuggingFaceModelWrapper\n",
|
||||
"\n",
|
||||
"# https://huggingface.co/textattack\n",
|
||||
"model = transformers.AutoModelForSequenceClassification.from_pretrained(\"textattack/albert-base-v2-imdb\")\n",
|
||||
"tokenizer = transformers.AutoTokenizer.from_pretrained(\"textattack/albert-base-v2-imdb\")\n",
|
||||
"# We wrap the model so it can be used by textattack\n",
|
||||
"model_wrapper = HuggingFaceModelWrapper(model, tokenizer)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "D61VLa8FexyK"
|
||||
},
|
||||
"source": [
|
||||
"## **Creating A Custom Dataset**\n",
|
||||
"\n",
|
||||
"Textattack takes in dataset in the form of a list of tuples. The tuple can be in the form of (\"string\", label) or (\"string\", label, label). In this case we will use former one, since we want to create a custom movie review dataset with label 0 representing a positive review, and label 1 representing a negative review.\n",
|
||||
"\n",
|
||||
"For simplicity, I created a dataset consisting of 4 reviews, the 1st and 4th review have \"correct\" labels, while the 2nd and 3rd review have \"incorrect\" labels. We will see how this impacts perturbation later in this tutorial.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"id": "nk_MUu5Duf1V"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# dataset: An iterable of (text, ground_truth_output) pairs.\n",
|
||||
"#0 means the review is negative\n",
|
||||
"#1 means the review is positive\n",
|
||||
"custom_dataset = [\n",
|
||||
" ('I hate this movie', 0), #A negative comment, with a negative label\n",
|
||||
" ('I hate this movie', 1), #A negative comment, with a positive label\n",
|
||||
" ('I love this movie', 0), #A positive comment, with a negative label\n",
|
||||
" ('I love this movie', 1), #A positive comment, with a positive label\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "ijVmi6PbiUYZ"
|
||||
},
|
||||
"source": [
|
||||
"## **Creating An Attack**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "-iEH_hf6iMEw",
|
||||
"outputId": "0c836c5b-ddd5-414d-f73d-da04067054d8"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"textattack: Unknown if model of class <class 'transformers.models.albert.modeling_albert.AlbertForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from textattack import Attack\n",
|
||||
"from textattack.search_methods import GreedySearch\n",
|
||||
"from textattack.constraints.pre_transformation import RepeatModification, StopwordModification\n",
|
||||
"from textattack.goal_functions import UntargetedClassification\n",
|
||||
"from textattack.transformations import WordSwapEmbedding\n",
|
||||
"from textattack.constraints.pre_transformation import RepeatModification\n",
|
||||
"from textattack.constraints.pre_transformation import StopwordModification\n",
|
||||
"\n",
|
||||
"# We'll use untargeted classification as the goal function.\n",
|
||||
"goal_function = UntargetedClassification(model_wrapper)\n",
|
||||
"# We'll to use our WordSwapEmbedding as the attack transformation.\n",
|
||||
"transformation = WordSwapEmbedding() \n",
|
||||
"# We'll constrain modification of already modified indices and stopwords\n",
|
||||
"constraints = [RepeatModification(),\n",
|
||||
" StopwordModification()]\n",
|
||||
"# We'll use the Greedy search method\n",
|
||||
"search_method = GreedySearch()\n",
|
||||
"# Now, let's make the attack from the 4 components:\n",
|
||||
"attack = Attack(goal_function, constraints, transformation, search_method)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "4hUA8ntnfJzH"
|
||||
},
|
||||
"source": [
|
||||
"## **Attack Results With Custom Dataset**\n",
|
||||
"\n",
|
||||
"As you can see, the attack fools the model by changing a few words in the 1st and 4th review.\n",
|
||||
"\n",
|
||||
"The attack skipped the 2nd and and 3rd review because since it they were labeled incorrectly, they managed to fool the model without any modifications."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "-ivoHEOXfIfN",
|
||||
"outputId": "9ec660b6-44fc-4354-9dd1-1641b6f4c986"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[91m0 (99%)\u001b[0m --> \u001b[92m1 (81%)\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[91mI\u001b[0m \u001b[91mhate\u001b[0m this \u001b[91mmovie\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[92mdid\u001b[0m \u001b[92mhateful\u001b[0m this \u001b[92mfootage\u001b[0m\n",
|
||||
"\u001b[91m0 (99%)\u001b[0m --> \u001b[37m[SKIPPED]\u001b[0m\n",
|
||||
"\n",
|
||||
"I hate this movie\n",
|
||||
"\u001b[92m1 (96%)\u001b[0m --> \u001b[37m[SKIPPED]\u001b[0m\n",
|
||||
"\n",
|
||||
"I love this movie\n",
|
||||
"\u001b[92m1 (96%)\u001b[0m --> \u001b[91m0 (99%)\u001b[0m\n",
|
||||
"\n",
|
||||
"I \u001b[92mlove\u001b[0m this movie\n",
|
||||
"\n",
|
||||
"I \u001b[91miove\u001b[0m this movie\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for example, label in custom_dataset:\n",
|
||||
" result = attack.attack(example, label)\n",
|
||||
" print(result.__str__(color_method='ansi'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "foFZmk8vY5z0"
|
||||
},
|
||||
"source": [
|
||||
"## **Creating A Custom Word Embedding**\n",
|
||||
"\n",
|
||||
"In textattack, a pre-trained word embedding is necessary in transformation in order to find synonym replacements, and in constraints to check the semantic validity of the transformation. To use custom pre-trained word embeddings, you can either create a new class that inherits the AbstractWordEmbedding class, or use the WordEmbedding class which takes in 4 parameters."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"id": "owj_jMHRxEF5"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from textattack.shared import WordEmbedding\n",
|
||||
"\n",
|
||||
"embedding_matrix = [[1.0], [2.0], [3.0], [4.0]] #2-D array of shape N x D where N represents size of vocab and D is the dimension of embedding vectors.\n",
|
||||
"word2index = {\"hate\":0, \"despise\":1, \"like\":2, \"love\":3} #dictionary that maps word to its index with in the embedding matrix.\n",
|
||||
"index2word = {0:\"hate\", 1: \"despise\", 2:\"like\", 3:\"love\"} #dictionary that maps index to its word.\n",
|
||||
"nn_matrix = [[0, 1, 2, 3], [1, 0, 2, 3], [2, 1, 3, 0], [3, 2, 1, 0]] #2-D integer array of shape N x K where N represents size of vocab and K is the top-K nearest neighbours.\n",
|
||||
"\n",
|
||||
"embedding = WordEmbedding(embedding_matrix, word2index, index2word, nn_matrix)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "s9ZEV_ykhmBn"
|
||||
},
|
||||
"source": [
|
||||
"## **Attack Results With Custom Dataset and Word Embedding**\n",
|
||||
"\n",
|
||||
"Now if we run the attack again with the custom word embedding, you will notice the modifications are limited to the vocab provided by our custom word embedding."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "gZ98UZ6I5sIn",
|
||||
"outputId": "59a653cb-85cb-46b5-d81b-c1a05ebe8a3e"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[91m0 (99%)\u001b[0m --> \u001b[92m1 (98%)\u001b[0m\n",
|
||||
"\n",
|
||||
"I \u001b[91mhate\u001b[0m this movie\n",
|
||||
"\n",
|
||||
"I \u001b[92mlike\u001b[0m this movie\n",
|
||||
"\u001b[91m0 (99%)\u001b[0m --> \u001b[37m[SKIPPED]\u001b[0m\n",
|
||||
"\n",
|
||||
"I hate this movie\n",
|
||||
"\u001b[92m1 (96%)\u001b[0m --> \u001b[37m[SKIPPED]\u001b[0m\n",
|
||||
"\n",
|
||||
"I love this movie\n",
|
||||
"\u001b[92m1 (96%)\u001b[0m --> \u001b[91m0 (99%)\u001b[0m\n",
|
||||
"\n",
|
||||
"I \u001b[92mlove\u001b[0m this movie\n",
|
||||
"\n",
|
||||
"I \u001b[91mdespise\u001b[0m this movie\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from textattack.attack_results import SuccessfulAttackResult\n",
|
||||
"\n",
|
||||
"transformation = WordSwapEmbedding(3, embedding) \n",
|
||||
"\n",
|
||||
"attack = Attack(goal_function, constraints, transformation, search_method)\n",
|
||||
"\n",
|
||||
"for example, label in custom_dataset:\n",
|
||||
" result = attack.attack(example, label)\n",
|
||||
" print(result.__str__(color_method='ansi'))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"name": "Custom Data and Embedding with TextAttack.ipynb",
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
2561
docs/2notebook/Example_0_tensorflow.ipynb
Normal file
2126
docs/2notebook/Example_1_sklearn.ipynb
Normal file
364
docs/2notebook/Example_2_allennlp.ipynb
Normal file
@@ -0,0 +1,364 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "JPVBc5ndpFIX"
|
||||
},
|
||||
"source": [
|
||||
"# TextAttack & AllenNLP \n",
|
||||
"\n",
|
||||
"This is an example of testing adversarial attacks from TextAttack on pretrained models provided by AllenNLP. \n",
|
||||
"\n",
|
||||
"In a few lines of code, we load a sentiment analysis model trained on the Stanford Sentiment Treebank and configure it with a TextAttack model wrapper. Then, we initialize the TextBugger attack and run the attack on a few samples from the SST-2 train set.\n",
|
||||
"\n",
|
||||
"For more information on AllenNLP pre-trained models: https://docs.allennlp.org/models/main/\n",
|
||||
"\n",
|
||||
"For more information about the TextBugger attack: https://arxiv.org/abs/1812.05271"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "AyPMGcz0qLfK"
|
||||
},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/QData/TextAttack/blob/master/docs/2notebook/Example_2_allennlp.ipynb)\n",
|
||||
"\n",
|
||||
"[](https://github.com/QData/TextAttack/blob/master/docs/2notebook/Example_2_allennlp.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install allennlp allennlp_models > /dev/null"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "_br6Xvsif9SA",
|
||||
"outputId": "224cc851-0e9d-4454-931c-64bd3b7af400"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from allennlp.predictors import Predictor\n",
|
||||
"import allennlp_models.classification\n",
|
||||
"\n",
|
||||
"import textattack\n",
|
||||
"\n",
|
||||
"class AllenNLPModel(textattack.models.wrappers.ModelWrapper):\n",
|
||||
" def __init__(self):\n",
|
||||
" self.model = Predictor.from_path(\"https://storage.googleapis.com/allennlp-public-models/basic_stanford_sentiment_treebank-2020.06.09.tar.gz\")\n",
|
||||
"\n",
|
||||
" def __call__(self, text_input_list):\n",
|
||||
" outputs = []\n",
|
||||
" for text_input in text_input_list:\n",
|
||||
" outputs.append(self.model.predict(sentence=text_input))\n",
|
||||
" # For each output, outputs['logits'] contains the logits where\n",
|
||||
" # index 0 corresponds to the positive and index 1 corresponds \n",
|
||||
" # to the negative score. We reverse the outputs (by reverse slicing,\n",
|
||||
" # [::-1]) so that negative comes first and positive comes second.\n",
|
||||
" return [output['logits'][::-1] for output in outputs]\n",
|
||||
"\n",
|
||||
"model_wrapper = AllenNLPModel()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "MDRWI5Psb85g",
|
||||
"outputId": "db7f8f94-0d78-45ea-a7ac-e12167c28365"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Reusing dataset glue (/p/qdata/jy2ma/.cache/textattack/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n",
|
||||
"textattack: Loading \u001b[94mdatasets\u001b[0m dataset \u001b[94mglue\u001b[0m, subset \u001b[94msst2\u001b[0m, split \u001b[94mtrain\u001b[0m.\n",
|
||||
"textattack: Unknown if model of class <class 'allennlp.predictors.text_classifier.TextClassifierPredictor'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.\n",
|
||||
" 0%| | 0/10 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Attack(\n",
|
||||
" (search_method): GreedyWordSwapWIR(\n",
|
||||
" (wir_method): delete\n",
|
||||
" )\n",
|
||||
" (goal_function): UntargetedClassification\n",
|
||||
" (transformation): CompositeTransformation(\n",
|
||||
" (0): WordSwapRandomCharacterInsertion(\n",
|
||||
" (random_one): True\n",
|
||||
" )\n",
|
||||
" (1): WordSwapRandomCharacterDeletion(\n",
|
||||
" (random_one): True\n",
|
||||
" )\n",
|
||||
" (2): WordSwapNeighboringCharacterSwap(\n",
|
||||
" (random_one): True\n",
|
||||
" )\n",
|
||||
" (3): WordSwapHomoglyphSwap\n",
|
||||
" (4): WordSwapEmbedding(\n",
|
||||
" (max_candidates): 5\n",
|
||||
" (embedding): WordEmbedding\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" (constraints): \n",
|
||||
" (0): UniversalSentenceEncoder(\n",
|
||||
" (metric): angular\n",
|
||||
" (threshold): 0.8\n",
|
||||
" (window_size): inf\n",
|
||||
" (skip_text_shorter_than_window): False\n",
|
||||
" (compare_against_original): True\n",
|
||||
" )\n",
|
||||
" (1): RepeatModification\n",
|
||||
" (2): StopwordModification\n",
|
||||
" (is_black_box): True\n",
|
||||
") \n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using /p/qdata/jy2ma/.cache/textattack to cache modules.\n",
|
||||
"[Succeeded / Failed / Skipped / Total] 1 / 1 / 0 / 2: 20%|██ | 2/10 [00:06<00:27, 3.46s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 1 ---------------------------------------------\n",
|
||||
"\u001b[91mNegative (95%)\u001b[0m --> \u001b[92mPositive (93%)\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[91mhide\u001b[0m new secretions from the parental units \n",
|
||||
"\n",
|
||||
"\u001b[92mconcealing\u001b[0m new secretions from the parental units \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"--------------------------------------------- Result 2 ---------------------------------------------\n",
|
||||
"\u001b[91mNegative (96%)\u001b[0m --> \u001b[91m[FAILED]\u001b[0m\n",
|
||||
"\n",
|
||||
"contains no wit , only labored gags \n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 1 / 2 / 1 / 4: 40%|████ | 4/10 [00:07<00:10, 1.80s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 3 ---------------------------------------------\n",
|
||||
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91m[FAILED]\u001b[0m\n",
|
||||
"\n",
|
||||
"that loves its characters and communicates something rather beautiful about human nature \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"--------------------------------------------- Result 4 ---------------------------------------------\n",
|
||||
"\u001b[92mPositive (82%)\u001b[0m --> \u001b[37m[SKIPPED]\u001b[0m\n",
|
||||
"\n",
|
||||
"remains utterly satisfied to remain the same throughout \n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 2 / 2 / 1 / 5: 50%|█████ | 5/10 [00:07<00:07, 1.52s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 5 ---------------------------------------------\n",
|
||||
"\u001b[91mNegative (98%)\u001b[0m --> \u001b[92mPositive (52%)\u001b[0m\n",
|
||||
"\n",
|
||||
"on the \u001b[91mworst\u001b[0m \u001b[91mrevenge-of-the-nerds\u001b[0m clichés the filmmakers could \u001b[91mdredge\u001b[0m up \n",
|
||||
"\n",
|
||||
"on the \u001b[92mpire\u001b[0m \u001b[92mreveng-of-the-nerds\u001b[0m clichés the filmmakers could \u001b[92mdragging\u001b[0m up \n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 2 / 3 / 1 / 6: 60%|██████ | 6/10 [00:07<00:05, 1.32s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 6 ---------------------------------------------\n",
|
||||
"\u001b[91mNegative (99%)\u001b[0m --> \u001b[91m[FAILED]\u001b[0m\n",
|
||||
"\n",
|
||||
"that 's far too tragic to merit such superficial treatment \n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 3 / 4 / 1 / 8: 80%|████████ | 8/10 [00:09<00:02, 1.13s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 7 ---------------------------------------------\n",
|
||||
"\u001b[92mPositive (98%)\u001b[0m --> \u001b[91mNegative (62%)\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[92mdemonstrates\u001b[0m that the \u001b[92mdirector\u001b[0m of such \u001b[92mhollywood\u001b[0m blockbusters as patriot games can still \u001b[92mturn\u001b[0m out a \u001b[92msmall\u001b[0m , personal \u001b[92mfilm\u001b[0m with an emotional \u001b[92mwallop\u001b[0m . \n",
|
||||
"\n",
|
||||
"\u001b[91mshows\u001b[0m that the \u001b[91mdirectors\u001b[0m of such \u001b[91mtinseltown\u001b[0m blockbusters as patriot games can still \u001b[91mturning\u001b[0m out a \u001b[91mtiny\u001b[0m , personal \u001b[91mmovies\u001b[0m with an emotional \u001b[91mbatting\u001b[0m . \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"--------------------------------------------- Result 8 ---------------------------------------------\n",
|
||||
"\u001b[92mPositive (90%)\u001b[0m --> \u001b[91m[FAILED]\u001b[0m\n",
|
||||
"\n",
|
||||
"of saucy \n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 4 / 5 / 1 / 10: 100%|██████████| 10/10 [00:09<00:00, 1.06it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 9 ---------------------------------------------\n",
|
||||
"\u001b[91mNegative (99%)\u001b[0m --> \u001b[91m[FAILED]\u001b[0m\n",
|
||||
"\n",
|
||||
"a depressed fifteen-year-old 's suicidal poetry \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"--------------------------------------------- Result 10 ---------------------------------------------\n",
|
||||
"\u001b[92mPositive (79%)\u001b[0m --> \u001b[91mNegative (65%)\u001b[0m\n",
|
||||
"\n",
|
||||
"are more \u001b[92mdeeply\u001b[0m thought through than in most ` right-thinking ' films \n",
|
||||
"\n",
|
||||
"are more \u001b[91mseriously\u001b[0m thought through than in most ` right-thinking ' films \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"+-------------------------------+--------+\n",
|
||||
"| Attack Results | |\n",
|
||||
"+-------------------------------+--------+\n",
|
||||
"| Number of successful attacks: | 4 |\n",
|
||||
"| Number of failed attacks: | 5 |\n",
|
||||
"| Number of skipped attacks: | 1 |\n",
|
||||
"| Original accuracy: | 90.0% |\n",
|
||||
"| Accuracy under attack: | 50.0% |\n",
|
||||
"| Attack success rate: | 44.44% |\n",
|
||||
"| Average perturbed word %: | 20.95% |\n",
|
||||
"| Average num. words per input: | 9.5 |\n",
|
||||
"| Avg num queries: | 34.67 |\n",
|
||||
"+-------------------------------+--------+\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[<textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7fb68d0028b0>,\n",
|
||||
" <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x7fb685f0dbb0>,\n",
|
||||
" <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x7fb689188040>,\n",
|
||||
" <textattack.attack_results.skipped_attack_result.SkippedAttackResult at 0x7fb695031250>,\n",
|
||||
" <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7fb695031760>,\n",
|
||||
" <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x7fb694b7abb0>,\n",
|
||||
" <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7fb67cd36df0>,\n",
|
||||
" <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x7fb694b7a880>,\n",
|
||||
" <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x7fb694b7a790>,\n",
|
||||
" <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7fb689ab1be0>]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from textattack.datasets import HuggingFaceDataset\n",
|
||||
"from textattack.attack_recipes import TextBuggerLi2018\n",
|
||||
"from textattack.attacker import Attacker\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"dataset = HuggingFaceDataset(\"glue\", \"sst2\", \"train\")\n",
|
||||
"attack = TextBuggerLi2018.build(model_wrapper)\n",
|
||||
"\n",
|
||||
"attacker = Attacker(attack, dataset)\n",
|
||||
"attacker.attack_dataset()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"accelerator": "GPU",
|
||||
"colab": {
|
||||
"collapsed_sections": [],
|
||||
"name": "[TextAttack] Model Example: AllenNLP",
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
2091
docs/2notebook/Example_3_Keras.ipynb
Normal file
630
docs/2notebook/Example_4_CamemBERT.ipynb
Normal file
@@ -0,0 +1,630 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "koVcufVBD9uv"
|
||||
},
|
||||
"source": [
|
||||
"# Multi-language attacks\n",
|
||||
"\n",
|
||||
"TextAttack's four-component framework makes it trivial to run attacks in other languages. In this tutorial, we:\n",
|
||||
"\n",
|
||||
"- Create a model wrapper around Transformers [pipelines](https://huggingface.co/transformers/main_classes/pipelines.html) \n",
|
||||
"- Initialize a pre-trained [CamemBERT](https://camembert-model.fr/) model for sentiment classification\n",
|
||||
"- Load the AlloCiné movie review sentiment classification dataset (from [`datasets`](https://github.com/huggingface/datasets/))\n",
|
||||
"- Load the `pwws` recipe, but use French synonyms from multilingual WordNet (instead of English synonyms)\n",
|
||||
"- Run an adversarial attack on a French language model\n",
|
||||
"\n",
|
||||
"Voilà!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "Abd2C3zJD9u4"
|
||||
},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/QData/TextAttack/blob/master/docs/2notebook/Example_4_CamemBERT.ipynb)\n",
|
||||
"\n",
|
||||
"[](https://github.com/QData/TextAttack/blob/master/docs/2notebook/Example_4_CamemBERT.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"id": "-fnSUl8ND9u5"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from textattack.attack_recipes import PWWSRen2019\n",
|
||||
"from textattack.datasets import HuggingFaceDataset\n",
|
||||
"from textattack.models.wrappers import ModelWrapper\n",
|
||||
"from transformers import AutoTokenizer, TFAutoModelForSequenceClassification, pipeline\n",
|
||||
"from textattack import Attacker\n",
|
||||
"\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"# Quiet TensorFlow.\n",
|
||||
"import os\n",
|
||||
"if \"TF_CPP_MIN_LOG_LEVEL\" not in os.environ:\n",
|
||||
" os.environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"3\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class HuggingFaceSentimentAnalysisPipelineWrapper(ModelWrapper):\n",
|
||||
" \"\"\" Transformers sentiment analysis pipeline returns a list of responses\n",
|
||||
" like \n",
|
||||
" \n",
|
||||
" [{'label': 'POSITIVE', 'score': 0.7817379832267761}]\n",
|
||||
" \n",
|
||||
" We need to convert that to a format TextAttack understands, like\n",
|
||||
" \n",
|
||||
" [[0.218262017, 0.7817379832267761]\n",
|
||||
" \"\"\"\n",
|
||||
" def __init__(self, model):\n",
|
||||
" self.model = model#pipeline = pipeline\n",
|
||||
" def __call__(self, text_inputs):\n",
|
||||
" raw_outputs = self.model(text_inputs)\n",
|
||||
" outputs = []\n",
|
||||
" for output in raw_outputs:\n",
|
||||
" score = output['score']\n",
|
||||
" if output['label'] == 'POSITIVE':\n",
|
||||
" outputs.append([1-score, score])\n",
|
||||
" else:\n",
|
||||
" outputs.append([score, 1-score])\n",
|
||||
" return np.array(outputs)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "i2WPtwO9D9u6",
|
||||
"outputId": "2f5e8fab-1047-417d-c90c-b9238b2886a4",
|
||||
"scrolled": true,
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "99f2f220b210403eaaf82004365bb30b",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=445132512.0, style=ProgressStyle(descri…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"All model checkpoint layers were used when initializing TFCamembertForSequenceClassification.\n",
|
||||
"\n",
|
||||
"All the layers of TFCamembertForSequenceClassification were initialized from the model checkpoint at tblard/tf-allocine.\n",
|
||||
"If your task is similar to the task the model of the checkpoint was trained on, you can already use TFCamembertForSequenceClassification for predictions without further training.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "584bb087e19b46c3a97a69f7bdd25c8d",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=810912.0, style=ProgressStyle(descripti…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "8ea1879230924bf985f07737c7979d8a",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=210.0, style=ProgressStyle(description_…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "d27e420e82004ebd8628adbc5ed4e883",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=2.0, style=ProgressStyle(description_wi…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"textattack: Unknown if model of class <class 'transformers.pipelines.text_classification.TextClassificationPipeline'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "c01c2a4b2ef949018c400cfbbd8ab96c",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1167.0, style=ProgressStyle(description…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "1606f0a088f444b48e36a7c12156aa12",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=813.0, style=ProgressStyle(description_…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Downloading and preparing dataset allocine_dataset/allocine (download: 63.54 MiB, generated: 109.12 MiB, post-processed: Unknown size, total: 172.66 MiB) to /p/qdata/jy2ma/.cache/textattack/datasets/allocine_dataset/allocine/1.0.0/d7a2c05d4ab7254d411130aa8b47ae2a094af074e120fc8d46ec0beed909e896...\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "b90894c25b9841fc9e3e458b6a82ddd9",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=66625305.0, style=ProgressStyle(descrip…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"textattack: Loading \u001b[94mdatasets\u001b[0m dataset \u001b[94mallocine\u001b[0m, split \u001b[94mtest\u001b[0m.\n",
|
||||
" 0%| | 0/10 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset allocine_dataset downloaded and prepared to /p/qdata/jy2ma/.cache/textattack/datasets/allocine_dataset/allocine/1.0.0/d7a2c05d4ab7254d411130aa8b47ae2a094af074e120fc8d46ec0beed909e896. Subsequent calls will reuse this data.\n",
|
||||
"Attack(\n",
|
||||
" (search_method): GreedyWordSwapWIR(\n",
|
||||
" (wir_method): weighted-saliency\n",
|
||||
" )\n",
|
||||
" (goal_function): UntargetedClassification\n",
|
||||
" (transformation): WordSwapWordNet\n",
|
||||
" (constraints): \n",
|
||||
" (0): RepeatModification\n",
|
||||
" (1): StopwordModification\n",
|
||||
" (is_black_box): True\n",
|
||||
") \n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 1 / 0 / 0 / 1: 10%|█ | 1/10 [00:18<02:42, 18.01s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 1 ---------------------------------------------\n",
|
||||
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (53%)\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[92mMagnifique\u001b[0m épopée, une \u001b[92mbelle\u001b[0m \u001b[92mhistoire\u001b[0m, touchante avec des acteurs \u001b[92mqui\u001b[0m interprètent \u001b[92mtrès\u001b[0m \u001b[92mbien\u001b[0m leur rôles (Mel Gibson, Heath Ledger, Jason Isaacs...), le genre \u001b[92mde\u001b[0m \u001b[92mfilm\u001b[0m \u001b[92mqui\u001b[0m \u001b[92mse\u001b[0m savoure \u001b[92men\u001b[0m \u001b[92mfamille\u001b[0m! :)\n",
|
||||
"\n",
|
||||
"\u001b[91mbonnard\u001b[0m épopée, une \u001b[91mbeau\u001b[0m \u001b[91mbobard\u001b[0m, touchante avec des acteurs \u001b[91mlequel\u001b[0m interprètent \u001b[91mmême\u001b[0m \u001b[91macceptablement\u001b[0m leur rôles (Mel Gibson, Heath Ledger, Jason Isaacs...), le genre \u001b[91mgale\u001b[0m \u001b[91mpellicule\u001b[0m \u001b[91mOMS\u001b[0m \u001b[91mConcepteur\u001b[0m savoure \u001b[91mun\u001b[0m \u001b[91msyndicat\u001b[0m! :)\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 2 / 0 / 0 / 2: 20%|██ | 2/10 [00:57<03:50, 28.86s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 2 ---------------------------------------------\n",
|
||||
"\u001b[91mNegative (94%)\u001b[0m --> \u001b[92mPositive (91%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Je n'ai pas aimé mais pourtant je lui mets \u001b[91m2\u001b[0m étoiles car l'expérience est louable. Rien de conventionnel ici. Une visite E.T. mais jonchée d'idées /- originales. Le soucis, tout ceci avait-il vraiment sa place dans un film de S.F. tirant sur l'horreur ? Voici un film qui, à l'inverse de tant d'autres qui y ont droit, mériterait peut-être un remake.\n",
|
||||
"\n",
|
||||
"Je n'ai pas aimé mais pourtant je lui mets \u001b[92m4\u001b[0m étoiles car l'expérience est louable. Rien de conventionnel ici. Une visite E.T. mais jonchée d'idées /- originales. Le soucis, tout ceci avait-il vraiment sa place dans un film de S.F. tirant sur l'horreur ? Voici un film qui, à l'inverse de tant d'autres qui y ont droit, mériterait peut-être un remake.\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 3 / 0 / 0 / 3: 30%|███ | 3/10 [00:59<02:18, 19.74s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 3 ---------------------------------------------\n",
|
||||
"\u001b[92mPositive (85%)\u001b[0m --> \u001b[91mNegative (91%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Un \u001b[92mdessin\u001b[0m animé qui brille par sa féerie et ses chansons.\n",
|
||||
"\n",
|
||||
"Un \u001b[91mbrouillon\u001b[0m animé qui brille par sa féerie et ses chansons.\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 4 / 0 / 0 / 4: 40%|████ | 4/10 [01:09<01:44, 17.37s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 4 ---------------------------------------------\n",
|
||||
"\u001b[91mNegative (100%)\u001b[0m --> \u001b[92mPositive (80%)\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[91mSi\u001b[0m c'est là le renouveau du cinéma français, c'est tout \u001b[91mde\u001b[0m même foutrement chiant. \u001b[91mSi\u001b[0m l'objet est \u001b[91mtrès\u001b[0m stylisé et la tension palpable, le film paraît \u001b[91mplutôt\u001b[0m \u001b[91mcreux\u001b[0m.\n",
|
||||
"\n",
|
||||
"\u001b[92maussi\u001b[0m c'est là le renouveau du cinéma français, c'est tout \u001b[92mabolir\u001b[0m même foutrement chiant. \u001b[92mtellement\u001b[0m l'objet est \u001b[92mprodigieusement\u001b[0m stylisé et la tension palpable, le film paraît \u001b[92mpeu\u001b[0m \u001b[92mtrou\u001b[0m.\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 5 / 0 / 0 / 5: 50%|█████ | 5/10 [01:15<01:15, 15.03s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 5 ---------------------------------------------\n",
|
||||
"\u001b[91mNegative (100%)\u001b[0m --> \u001b[92mPositive (51%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Et \u001b[91mpourtant\u001b[0m on s’\u001b[91men\u001b[0m Doutait !\u001b[91mSecond\u001b[0m \u001b[91mvolet\u001b[0m \u001b[91mtrès\u001b[0m \u001b[91mmauvais\u001b[0m, sans \u001b[91mfraîcheur\u001b[0m et particulièrement lourdingue. Quel \u001b[91mdommage\u001b[0m.\n",
|
||||
"\n",
|
||||
"Et \u001b[92mfin\u001b[0m on s’\u001b[92mpostérieurement\u001b[0m Doutait !\u001b[92mmoment\u001b[0m \u001b[92mchapitre\u001b[0m \u001b[92mincroyablement\u001b[0m \u001b[92mdifficile\u001b[0m, sans \u001b[92mimpudence\u001b[0m et particulièrement lourdingue. Quel \u001b[92mprix\u001b[0m.\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 6 / 0 / 0 / 6: 60%|██████ | 6/10 [23:02<15:21, 230.43s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 6 ---------------------------------------------\n",
|
||||
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (50%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Vous reprendrez bien un peu d'été ? Ce film je le voyais comme un mélange de Rohmer et de Rozier, un film de vacances, j'adore ça, un truc beau et pur qui dit des choses sur la vie, l'amour, les filles, les vacances. Un film qui se regarde en sirotant une boisson fraîche en écoutant les grillons ! Sauf qu'en fait \u001b[92mnon\u001b[0m ! On a un film foutraque au \u001b[92mpossible\u001b[0m qui reprend les codes justement de Rohmer voir Godard, enfin la Nouvelle Vague en général dans sa première partie (jusqu'à même finir sur une partie qui ressemblerait à du Kusturica), mais en beaucoup plus léger et décalé. Le film n'en a rien à foutre de rien, il ose tout, n'a peur de rien et ça c'est \u001b[92mbon\u001b[0m. C'est sans doute le film le plus \u001b[92mdrôle\u001b[0m de 2013, mais tout \u001b[92msimplement\u001b[0m l'un des meilleurs tout \u001b[92mcourt\u001b[0m. Le film qui nous sort des dialogues qui pourraient sortir d'un mauvais Godard (oxymore) sur un ton what the fuckesque… raconte des anecdotes débiles au souhait face caméra… et pourtant, il y a quelque chose dans ce film survolté. Il y a du beau. Ces scènes dans la neige, c'est tendre, c'est beau, ça tranche avec le reste et ça donne du coeur à l'amourette, ça aide à le faire paraître comme une évidence. Et puis on a cette scène que je trouve sublime qui m'a profondément émue, cette scène où le docteur Placenta devient tout à coup sérieux et parle de cette date où chaque année il repense à cette fille et au fait qu'une année de plus le sépare d'elle. C'est horrible comme concept et pourtant tellement vrai et sincère. C'est vraiment \u001b[92mtroublant\u001b[0m. Et encore une fois la scène d'avant est très drôle et là, un petit moment de douceur avant de repartir sur le train effréné ! Et il y a ces fesses… Et le plus beau c'est qu'à la fin Vimala Pons a un petit air d'Anna Karina ! Film fout, étonnant, percutant, drôle, beau, triste ! C'est foutrement cool !\n",
|
||||
"\n",
|
||||
"Vous reprendrez bien un peu d'été ? Ce film je le voyais comme un mélange de Rohmer et de Rozier, un film de vacances, j'adore ça, un truc beau et pur qui dit des choses sur la vie, l'amour, les filles, les vacances. Un film qui se regarde en sirotant une boisson fraîche en écoutant les grillons ! Sauf qu'en fait \u001b[91mniet\u001b[0m ! On a un film foutraque au \u001b[91mexécutable\u001b[0m qui reprend les codes justement de Rohmer voir Godard, enfin la Nouvelle Vague en général dans sa première partie (jusqu'à même finir sur une partie qui ressemblerait à du Kusturica), mais en beaucoup plus léger et décalé. Le film n'en a rien à foutre de rien, il ose tout, n'a peur de rien et ça c'est \u001b[91mlisse\u001b[0m. C'est sans doute le film le plus \u001b[91mridicule\u001b[0m de 2013, mais tout \u001b[91msauf\u001b[0m l'un des meilleurs tout \u001b[91minsuffisant\u001b[0m. Le film qui nous sort des dialogues qui pourraient sortir d'un mauvais Godard (oxymore) sur un ton what the fuckesque… raconte des anecdotes débiles au souhait face caméra… et pourtant, il y a quelque chose dans ce film survolté. Il y a du beau. Ces scènes dans la neige, c'est tendre, c'est beau, ça tranche avec le reste et ça donne du coeur à l'amourette, ça aide à le faire paraître comme une évidence. Et puis on a cette scène que je trouve sublime qui m'a profondément émue, cette scène où le docteur Placenta devient tout à coup sérieux et parle de cette date où chaque année il repense à cette fille et au fait qu'une année de plus le sépare d'elle. C'est horrible comme concept et pourtant tellement vrai et sincère. C'est vraiment \u001b[91mennuyeux\u001b[0m. Et encore une fois la scène d'avant est très drôle et là, un petit moment de douceur avant de repartir sur le train effréné ! Et il y a ces fesses… Et le plus beau c'est qu'à la fin Vimala Pons a un petit air d'Anna Karina ! Film fout, étonnant, percutant, drôle, beau, triste ! C'est foutrement cool !\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 7 / 0 / 0 / 7: 70%|███████ | 7/10 [23:19<09:59, 199.87s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 7 ---------------------------------------------\n",
|
||||
"\u001b[92mPositive (55%)\u001b[0m --> \u001b[91mNegative (88%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Bon c'est \u001b[92mpas\u001b[0m un grand film mais on passe un bon moment avec ses ado à la recherche de l'orgasme. Y'a que les Allemands pour faire des films aussi barge ! :-)\n",
|
||||
"\n",
|
||||
"Bon c'est \u001b[91mniet\u001b[0m un grand film mais on passe un bon moment avec ses ado à la recherche de l'orgasme. Y'a que les Allemands pour faire des films aussi barge ! :-)\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 8 / 0 / 0 / 8: 80%|████████ | 8/10 [24:03<06:00, 180.39s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 8 ---------------------------------------------\n",
|
||||
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (97%)\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[92mTerrible\u001b[0m histoire que ces êtres sans amour, ces êtres lisses et frustres qui passent à côté de leur vie. Quelle leçon Monsieur Brizé! Vous avez tout dit, tout filmé jusqu'au moindre détail. \u001b[92mtout\u001b[0m est beau et terrifiant jusqu'à la scène finale qui nous liquéfie, un Vincent Lindon regardant la vie fixement sans oser la toucher ni la prendre dans ses bras, une Hélène Vincent qui attend, qui attend... Mon Dieu Monsieur Brizé, continuez....\n",
|
||||
"\n",
|
||||
"\u001b[91mméprisable\u001b[0m histoire que ces êtres sans amour, ces êtres lisses et frustres qui passent à côté de leur vie. Quelle leçon Monsieur Brizé! Vous avez tout dit, tout filmé jusqu'au moindre détail. \u001b[91mrien\u001b[0m est beau et terrifiant jusqu'à la scène finale qui nous liquéfie, un Vincent Lindon regardant la vie fixement sans oser la toucher ni la prendre dans ses bras, une Hélène Vincent qui attend, qui attend... Mon Dieu Monsieur Brizé, continuez....\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 9 / 0 / 0 / 9: 90%|█████████ | 9/10 [24:13<02:41, 161.53s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 9 ---------------------------------------------\n",
|
||||
"\u001b[92mPositive (100%)\u001b[0m --> \u001b[91mNegative (54%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Un \u001b[92mtrès\u001b[0m joli \u001b[92mfilm\u001b[0m, qui ressemble à un téléfilm mais qui a le mérite d'être émouvant et proche de ses personnages. Magimel est \u001b[92mvraiment\u001b[0m très \u001b[92mbon\u001b[0m et l'histoire est touchante\n",
|
||||
"\n",
|
||||
"Un \u001b[91mplus\u001b[0m joli \u001b[91mfeuil\u001b[0m, qui ressemble à un téléfilm mais qui a le mérite d'être émouvant et proche de ses personnages. Magimel est \u001b[91mabsolument\u001b[0m très \u001b[91mlisse\u001b[0m et l'histoire est touchante\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Succeeded / Failed / Skipped / Total] 10 / 0 / 0 / 10: 100%|██████████| 10/10 [28:30<00:00, 171.04s/it]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------- Result 10 ---------------------------------------------\n",
|
||||
"\u001b[91mNegative (100%)\u001b[0m --> \u001b[92mPositive (51%)\u001b[0m\n",
|
||||
"\n",
|
||||
"Mais comment certaines personnes ont pus lui \u001b[91mmettre\u001b[0m 5/5 et \u001b[91mdonc\u001b[0m dire indirectement \u001b[91mque\u001b[0m c'est un chef-d'œuvre ??? Et comment a-t-il fait pour sortir au cinéma et non en DTV ??? C'est pas un film que l'on regarde dans une salle obscur ça, pour moi ça ressemble plus à un téléfilm que l'on visionne un dimanche pluvieux \u001b[91mpour\u001b[0m que les enfants arrête de nous casser les pieds ! \u001b[91mEt\u001b[0m puis, le \u001b[91mscénario\u001b[0m avec le chien que devient le meilleur ami du gosse, c'est du vu et revu (un cliché) ! L'acteur principal est quant à lui aussi agaçant que son personnage ! Les suites ont l'air \u001b[91maussi\u001b[0m mauvaises que Buddy Star des Paniers étant donné que l'histoire est quasiment la même (pour moi ça c'est pas des suites, c'est \u001b[91mplutôt\u001b[0m une succession \u001b[91mde\u001b[0m petits reboots inutiles). \u001b[91mReste\u001b[0m regardable pour les moins de 10 ans (et encore, même moi à 6 ans, je n'aurais pas aimé).\n",
|
||||
"\n",
|
||||
"Mais comment certaines personnes ont pus lui \u001b[92mformuler\u001b[0m 5/5 et \u001b[92md'où\u001b[0m dire indirectement \u001b[92mcar\u001b[0m c'est un chef-d'œuvre ??? Et comment a-t-il fait pour sortir au cinéma et non en DTV ??? C'est pas un film que l'on regarde dans une salle obscur ça, pour moi ça ressemble plus à un téléfilm que l'on visionne un dimanche pluvieux \u001b[92mat\u001b[0m que les enfants arrête de nous casser les pieds ! \u001b[92mpoids\u001b[0m puis, le \u001b[92mfigure\u001b[0m avec le chien que devient le meilleur ami du gosse, c'est du vu et revu (un cliché) ! L'acteur principal est quant à lui aussi agaçant que son personnage ! Les suites ont l'air \u001b[92mmaintenant\u001b[0m mauvaises que Buddy Star des Paniers étant donné que l'histoire est quasiment la même (pour moi ça c'est pas des suites, c'est \u001b[92mpeu\u001b[0m une succession \u001b[92mdu\u001b[0m petits reboots inutiles). \u001b[92mrelique\u001b[0m regardable pour les moins de 10 ans (et encore, même moi à 6 ans, je n'aurais pas aimé).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"+-------------------------------+--------+\n",
|
||||
"| Attack Results | |\n",
|
||||
"+-------------------------------+--------+\n",
|
||||
"| Number of successful attacks: | 10 |\n",
|
||||
"| Number of failed attacks: | 0 |\n",
|
||||
"| Number of skipped attacks: | 0 |\n",
|
||||
"| Original accuracy: | 100.0% |\n",
|
||||
"| Accuracy under attack: | 0.0% |\n",
|
||||
"| Attack success rate: | 100.0% |\n",
|
||||
"| Average perturbed word %: | 14.73% |\n",
|
||||
"| Average num. words per input: | 76.4 |\n",
|
||||
"| Avg num queries: | 904.4 |\n",
|
||||
"+-------------------------------+--------+\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[<textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7f9d3cb55b80>,\n",
|
||||
" <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7f9d43fc5d90>,\n",
|
||||
" <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7f9d39840df0>,\n",
|
||||
" <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7f9d3241a160>,\n",
|
||||
" <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7f9d398405b0>,\n",
|
||||
" <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7f9d47ce17f0>,\n",
|
||||
" <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7f9d3db79040>,\n",
|
||||
" <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7f9d3f8e3730>,\n",
|
||||
" <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7f9d33152f10>,\n",
|
||||
" <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7f9e5d43aeb0>]"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create the model: a French sentiment analysis model.\n",
|
||||
"# see https://github.com/TheophileBlard/french-sentiment-analysis-with-bert\n",
|
||||
"model = TFAutoModelForSequenceClassification.from_pretrained(\"tblard/tf-allocine\")\n",
|
||||
"tokenizer = AutoTokenizer.from_pretrained(\"tblard/tf-allocine\")\n",
|
||||
"pipeline = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)\n",
|
||||
"\n",
|
||||
"model_wrapper = HuggingFaceSentimentAnalysisPipelineWrapper(pipeline)\n",
|
||||
"\n",
|
||||
"# Create the recipe: PWWS uses a WordNet transformation.\n",
|
||||
"recipe = PWWSRen2019.build(model_wrapper)\n",
|
||||
"#\n",
|
||||
"# WordNet defaults to english. Set the default language to French ('fra')\n",
|
||||
"#\n",
|
||||
"# See \"Building a free French wordnet from multilingual resources\", \n",
|
||||
"# E. L. R. A. (ELRA) (ed.), \n",
|
||||
"# Proceedings of the Sixth International Language Resources and Evaluation (LREC’08).\n",
|
||||
"recipe.transformation.language = 'fra'\n",
|
||||
"\n",
|
||||
"dataset = HuggingFaceDataset('allocine', split='test')\n",
|
||||
"\n",
|
||||
"attacker = Attacker(recipe, dataset)\n",
|
||||
"attacker.attack_dataset()\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"accelerator": "GPU",
|
||||
"colab": {
|
||||
"collapsed_sections": [],
|
||||
"name": "Example_4_CamemBERT.ipynb",
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
3383
docs/2notebook/Example_5_Explain_BERT.ipynb
Normal file
132
docs/3recipes/attack_recipes.rst
Normal file
@@ -0,0 +1,132 @@
|
||||
Attack Recipes API
|
||||
==================
|
||||
|
||||
We provide a number of pre-built attack recipes, which correspond to attacks from the literature. To run an attack recipe from the command line, run::
|
||||
|
||||
textattack attack --recipe [recipe_name]
|
||||
|
||||
To initialize an attack in Python script, use::
|
||||
|
||||
<recipe name>.build(model_wrapper)
|
||||
|
||||
For example, ``attack = InputReductionFeng2018.build(model)`` creates `attack`, an object of type ``Attack`` with the goal function, transformation, constraints, and search method specified in that paper. This object can then be used just like any other attack; for example, by calling ``attack.attack_dataset``.
|
||||
|
||||
TextAttack supports the following attack recipes (each recipe's documentation contains a link to the corresponding paper):
|
||||
|
||||
.. contents:: :local:
|
||||
|
||||
|
||||
Attacks on classification models
|
||||
#################################
|
||||
|
||||
|
||||
1. Alzantot Genetic Algorithm (Generating Natural Language Adversarial Examples)
|
||||
2. Faster Alzantot Genetic Algorithm (Certified Robustness to Adversarial Word Substitutions)
|
||||
3. BAE (BAE: BERT-Based Adversarial Examples)
|
||||
4. BERT-Attack: (BERT-Attack: Adversarial Attack Against BERT Using BERT)
|
||||
5. CheckList: (Beyond Accuracy: Behavioral Testing of NLP models with CheckList)
|
||||
6. DeepWordBug (Black-box Generation of Adversarial Text Sequences to Evade Deep Learning Classifiers)
|
||||
7. HotFlip (HotFlip: White-Box Adversarial Examples for Text Classification)
|
||||
8. Improved Genetic Algorithm (Natural Language Adversarial Attacks and Defenses in Word Level)
|
||||
9. Input Reduction (Pathologies of Neural Models Make Interpretations Difficult)
|
||||
10. Kuleshov (Adversarial Examples for Natural Language Classification Problems)
|
||||
11. Particle Swarm Optimization (Word-level Textual Adversarial Attacking as Combinatorial Optimization)
|
||||
12. PWWS (Generating Natural Language Adversarial Examples through Probability Weighted Word Saliency)
|
||||
13. TextFooler (Is BERT Really Robust? A Strong Baseline for Natural Language Attack on Text Classification and Entailment)
|
||||
14. TextBugger (TextBugger: Generating Adversarial Text Against Real-world Applications)
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.genetic_algorithm_alzantot_2018
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.faster_genetic_algorithm_jia_2019
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.bae_garg_2019
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.bert_attack_li_2020
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.checklist_ribeiro_2020
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.deepwordbug_gao_2018
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.hotflip_ebrahimi_2017
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.iga_wang_2019
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.input_reduction_feng_2018
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.kuleshov_2017
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.pso_zang_2020
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.pwws_ren_2019
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.textfooler_jin_2019
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.textbugger_li_2018
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
|
||||
Attacks on sequence-to-sequence models
|
||||
############################################
|
||||
|
||||
15. MORPHEUS (It’s Morphin’ Time! Combating Linguistic Discrimination with Inflectional Perturbations)
|
||||
16. Seq2Sick (Seq2Sick: Evaluating the Robustness of Sequence-to-Sequence Models with Adversarial Examples)
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.morpheus_tan_2020
|
||||
:members:
|
||||
:noindex:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.seq2sick_cheng_2018_blackbox
|
||||
:members:
|
||||
:noindex:
|
||||
242
docs/3recipes/attack_recipes_cmd.md
Normal file
@@ -0,0 +1,242 @@
|
||||
# Attack Recipes CommandLine Use
|
||||
|
||||
We provide a number of pre-built attack recipes, which correspond to attacks from the literature.
|
||||
|
||||
|
||||
## Help: `textattack --help`
|
||||
|
||||
TextAttack's main features can all be accessed via the `textattack` command. Two very
|
||||
common commands are `textattack attack <args>`, and `textattack augment <args>`. You can see more
|
||||
information about all commands using
|
||||
```bash
|
||||
textattack --help
|
||||
```
|
||||
or a specific command using, for example,
|
||||
```bash
|
||||
textattack attack --help
|
||||
```
|
||||
|
||||
The [`examples/`](https://github.com/QData/TextAttack/tree/master/examples) folder includes scripts showing common TextAttack usage for training models, running attacks, and augmenting a CSV file.
|
||||
|
||||
|
||||
The [documentation website](https://textattack.readthedocs.io/en/latest) contains walkthroughs explaining basic usage of TextAttack, including building a custom transformation and a custom constraint..
|
||||
|
||||
## Running Attacks: `textattack attack --help`
|
||||
|
||||
The easiest way to try out an attack is via the command-line interface, `textattack attack`.
|
||||
|
||||
> **Tip:** If your machine has multiple GPUs, you can distribute the attack across them using the `--parallel` option. For some attacks, this can really help performance.
|
||||
|
||||
Here are some concrete examples:
|
||||
|
||||
*TextFooler on BERT trained on the MR sentiment classification dataset*:
|
||||
```bash
|
||||
textattack attack --recipe textfooler --model bert-base-uncased-mr --num-examples 100
|
||||
```
|
||||
|
||||
*DeepWordBug on DistilBERT trained on the Quora Question Pairs paraphrase identification dataset*:
|
||||
```bash
|
||||
textattack attack --model distilbert-base-uncased-qqp --recipe deepwordbug --num-examples 100
|
||||
```
|
||||
|
||||
*Beam search with beam width 4 and word embedding transformation and untargeted goal function on an LSTM*:
|
||||
```bash
|
||||
textattack attack --model lstm-mr --num-examples 20 \
|
||||
--search-method beam-search^beam_width=4 --transformation word-swap-embedding \
|
||||
--constraints repeat stopword max-words-perturbed^max_num_words=2 embedding^min_cos_sim=0.8 part-of-speech \
|
||||
--goal-function untargeted-classification
|
||||
```
|
||||
|
||||
> **Tip:** Instead of specifying a dataset and number of examples, you can pass `--interactive` to attack samples inputted by the user.
|
||||
|
||||
## Attacks and Papers Implemented ("Attack Recipes"): `textattack attack --recipe [recipe_name]`
|
||||
|
||||
We include attack recipes which implement attacks from the literature. You can list attack recipes using `textattack list attack-recipes`.
|
||||
|
||||
To run an attack recipe: `textattack attack --recipe [recipe_name]`
|
||||
|
||||
|
||||
<table style="width:100%" border="1">
|
||||
<thead>
|
||||
<tr class="header">
|
||||
<th><strong>Attack Recipe Name</strong></th>
|
||||
<th><strong>Goal Function</strong></th>
|
||||
<th><strong>ConstraintsEnforced</strong></th>
|
||||
<th><strong>Transformation</strong></th>
|
||||
<th><strong>Search Method</strong></th>
|
||||
<th><strong>Main Idea</strong></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr><td style="text-align: center;" colspan="6"><strong><br>Attacks on classification tasks, like sentiment classification and entailment:<br></strong></td></tr>
|
||||
|
||||
<tr>
|
||||
<td><code>alzantot</code> <span class="citation" data-cites="Alzantot2018GeneratingNL Jia2019CertifiedRT"></span></td>
|
||||
<td><sub>Untargeted {Classification, Entailment}</sub></td>
|
||||
<td><sub>Percentage of words perturbed, Language Model perplexity, Word embedding distance</sub></td>
|
||||
<td><sub>Counter-fitted word embedding swap</sub></td>
|
||||
<td><sub>Genetic Algorithm</sub></td>
|
||||
<td ><sub>from (["Generating Natural Language Adversarial Examples" (Alzantot et al., 2018)](https://arxiv.org/abs/1804.07998))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>bae</code> <span class="citation" data-cites="garg2020bae"></span></td>
|
||||
<td><sub>Untargeted Classification</sub></td>
|
||||
<td><sub>USE sentence encoding cosine similarity</sub></td>
|
||||
<td><sub>BERT Masked Token Prediction</sub></td>
|
||||
<td><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>BERT masked language model transformation attack from (["BAE: BERT-based Adversarial Examples for Text Classification" (Garg & Ramakrishnan, 2019)](https://arxiv.org/abs/2004.01970)). </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>bert-attack</code> <span class="citation" data-cites="li2020bertattack"></span></td>
|
||||
<td><sub>Untargeted Classification</td>
|
||||
<td><sub>USE sentence encoding cosine similarity, Maximum number of words perturbed</td>
|
||||
<td><sub>BERT Masked Token Prediction (with subword expansion)</td>
|
||||
<td><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub> (["BERT-ATTACK: Adversarial Attack Against BERT Using BERT" (Li et al., 2020)](https://arxiv.org/abs/2004.09984))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>checklist</code> <span class="citation" data-cites="Gao2018BlackBoxGO"></span></td>
|
||||
<td><sub>{Untargeted, Targeted} Classification</sub></td>
|
||||
<td><sub>checklist distance</sub></td>
|
||||
<td><sub>contract, extend, and substitutes name entities</sub></td>
|
||||
<td><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>Invariance testing implemented in CheckList . (["Beyond Accuracy: Behavioral Testing of NLP models with CheckList" (Ribeiro et al., 2020)](https://arxiv.org/abs/2005.04118))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> <code>clare</code> <span class="citation" data-cites="Alzantot2018GeneratingNL Jia2019CertifiedRT"></span></td>
|
||||
<td><sub>Untargeted {Classification, Entailment}</sub></td>
|
||||
<td><sub>USE sentence encoding cosine similarity</sub></td>
|
||||
<td><sub>RoBERTa Masked Prediction for token swap, insert and merge</sub></td>
|
||||
<td><sub>Greedy</sub></td>
|
||||
<td ><sub>["Contextualized Perturbation for Textual Adversarial Attack" (Li et al., 2020)](https://arxiv.org/abs/2009.07502))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>deepwordbug</code> <span class="citation" data-cites="Gao2018BlackBoxGO"></span></td>
|
||||
<td><sub>{Untargeted, Targeted} Classification</sub></td>
|
||||
<td><sub>Levenshtein edit distance</sub></td>
|
||||
<td><sub>{Character Insertion, Character Deletion, Neighboring Character Swap, Character Substitution}</sub></td>
|
||||
<td><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>Greedy replace-1 scoring and multi-transformation character-swap attack (["Black-box Generation of Adversarial Text Sequences to Evade Deep Learning Classifiers" (Gao et al., 2018)](https://arxiv.org/abs/1801.04354)</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> <code>fast-alzantot</code> <span class="citation" data-cites="Alzantot2018GeneratingNL Jia2019CertifiedRT"></span></td>
|
||||
<td><sub>Untargeted {Classification, Entailment}</sub></td>
|
||||
<td><sub>Percentage of words perturbed, Language Model perplexity, Word embedding distance</sub></td>
|
||||
<td><sub>Counter-fitted word embedding swap</sub></td>
|
||||
<td><sub>Genetic Algorithm</sub></td>
|
||||
<td ><sub>Modified, faster version of the Alzantot et al. genetic algorithm, from (["Certified Robustness to Adversarial Word Substitutions" (Jia et al., 2019)](https://arxiv.org/abs/1909.00986))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>hotflip</code> (word swap) <span class="citation" data-cites="Ebrahimi2017HotFlipWA"></span></td>
|
||||
<td><sub>Untargeted Classification</sub></td>
|
||||
<td><sub>Word Embedding Cosine Similarity, Part-of-speech match, Number of words perturbed</sub></td>
|
||||
<td><sub>Gradient-Based Word Swap</sub></td>
|
||||
<td><sub>Beam search</sub></td>
|
||||
<td ><sub> (["HotFlip: White-Box Adversarial Examples for Text Classification" (Ebrahimi et al., 2017)](https://arxiv.org/abs/1712.06751))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>iga</code> <span class="citation" data-cites="iga-wang2019natural"></span></td>
|
||||
<td><sub>Untargeted {Classification, Entailment}</sub></td>
|
||||
<td><sub>Percentage of words perturbed, Word embedding distance</sub></td>
|
||||
<td><sub>Counter-fitted word embedding swap</sub></td>
|
||||
<td><sub>Genetic Algorithm</sub></td>
|
||||
<td ><sub>Improved genetic algorithm -based word substitution from (["Natural Language Adversarial Attacks and Defenses in Word Level (Wang et al., 2019)"](https://arxiv.org/abs/1909.06723)</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>input-reduction</code> <span class="citation" data-cites="feng2018pathologies"></span></td>
|
||||
<td><sub>Input Reduction</sub></td>
|
||||
<td></td>
|
||||
<td><sub>Word deletion</sub></td>
|
||||
<td><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>Greedy attack with word importance ranking , Reducing the input while maintaining the prediction through word importance ranking (["Pathologies of Neural Models Make Interpretation Difficult" (Feng et al., 2018)](https://arxiv.org/pdf/1804.07781.pdf))</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>kuleshov</code> <span class="citation" data-cites="Kuleshov2018AdversarialEF"></span></td>
|
||||
<td><sub>Untargeted Classification</sub></td>
|
||||
<td><sub>Thought vector encoding cosine similarity, Language model similarity probability</sub></td>
|
||||
<td><sub>Counter-fitted word embedding swap</sub></td>
|
||||
<td><sub>Greedy word swap</sub></td>
|
||||
<td ><sub>(["Adversarial Examples for Natural Language Classification Problems" (Kuleshov et al., 2018)](https://openreview.net/pdf?id=r1QZ3zbAZ)) </sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>pruthi</code> <span class="citation" data-cites="pruthi2019combating"></span></td>
|
||||
<td><sub>Untargeted Classification</sub></td>
|
||||
<td><sub>Minimum word length, Maximum number of words perturbed</sub></td>
|
||||
<td><sub>{Neighboring Character Swap, Character Deletion, Character Insertion, Keyboard-Based Character Swap}</sub></td>
|
||||
<td><sub>Greedy search</sub></td>
|
||||
<td ><sub>simulates common typos (["Combating Adversarial Misspellings with Robust Word Recognition" (Pruthi et al., 2019)](https://arxiv.org/abs/1905.11268) </sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>pso</code> <span class="citation" data-cites="pso-zang-etal-2020-word"></span></td>
|
||||
<td><sub>Untargeted Classification</sub></td>
|
||||
<td></td>
|
||||
<td><sub>HowNet Word Swap</sub></td>
|
||||
<td><sub>Particle Swarm Optimization</sub></td>
|
||||
<td ><sub>(["Word-level Textual Adversarial Attacking as Combinatorial Optimization" (Zang et al., 2020)](https://www.aclweb.org/anthology/2020.acl-main.540/)) </sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>pwws</code> <span class="citation" data-cites="pwws-ren-etal-2019-generating"></span></td>
|
||||
<td><sub>Untargeted Classification</sub></td>
|
||||
<td></td>
|
||||
<td><sub>WordNet-based synonym swap</sub></td>
|
||||
<td><sub>Greedy-WIR (saliency)</sub></td>
|
||||
<td ><sub>Greedy attack with word importance ranking based on word saliency and synonym swap scores (["Generating Natural Language Adversarial Examples through Probability Weighted Word Saliency" (Ren et al., 2019)](https://www.aclweb.org/anthology/P19-1103/))</sub> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>textbugger</code> : (black-box) <span class="citation" data-cites="Li2019TextBuggerGA"></span></td>
|
||||
<td><sub>Untargeted Classification</sub></td>
|
||||
<td><sub>USE sentence encoding cosine similarity</sub></td>
|
||||
<td><sub>{Character Insertion, Character Deletion, Neighboring Character Swap, Character Substitution}</sub></td>
|
||||
<td><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>([(["TextBugger: Generating Adversarial Text Against Real-world Applications" (Li et al., 2018)](https://arxiv.org/abs/1812.05271)).</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>textfooler</code> <span class="citation" data-cites="Jin2019TextFooler"></span></td>
|
||||
<td><sub>Untargeted {Classification, Entailment}</sub></td>
|
||||
<td><sub>Word Embedding Distance, Part-of-speech match, USE sentence encoding cosine similarity</sub></td>
|
||||
<td><sub>Counter-fitted word embedding swap</sub></td>
|
||||
<td><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>Greedy attack with word importance ranking (["Is Bert Really Robust?" (Jin et al., 2019)](https://arxiv.org/abs/1907.11932))</sub> </td>
|
||||
</tr>
|
||||
|
||||
<tr><td style="text-align: center;" colspan="6"><strong><br>Attacks on sequence-to-sequence models: <br></strong></td></tr>
|
||||
|
||||
<tr>
|
||||
<td><code>morpheus</code> <span class="citation" data-cites="morpheus-tan-etal-2020-morphin"></span></td>
|
||||
<td><sub>Minimum BLEU Score</sub> </td>
|
||||
<td></td>
|
||||
<td><sub>Inflection Word Swap</sub> </td>
|
||||
<td><sub>Greedy search</sub> </td>
|
||||
<td ><sub>Greedy to replace words with their inflections with the goal of minimizing BLEU score (["It’s Morphin’ Time! Combating Linguistic Discrimination with Inflectional Perturbations"](https://www.aclweb.org/anthology/2020.acl-main.263.pdf)</sub> </td>
|
||||
</tr>
|
||||
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>seq2sick</code> :(black-box) <span class="citation" data-cites="cheng2018seq2sick"></span></td>
|
||||
<td><sub>Non-overlapping output</sub> </td>
|
||||
<td></td>
|
||||
<td><sub>Counter-fitted word embedding swap</sub> </td>
|
||||
<td><sub>Greedy-WIR</sub></td>
|
||||
<td ><sub>Greedy attack with goal of changing every word in the output translation. Currently implemented as black-box with plans to change to white-box as done in paper (["Seq2Sick: Evaluating the Robustness of Sequence-to-Sequence Models with Adversarial Examples" (Cheng et al., 2018)](https://arxiv.org/abs/1803.01128)) </sub> </td>
|
||||
</tr>
|
||||
|
||||
|
||||
</tbody>
|
||||
</font>
|
||||
</table>
|
||||
|
||||
|
||||
|
||||
## Recipe Usage Examples
|
||||
|
||||
Here are some examples of testing attacks from the literature from the command-line:
|
||||
|
||||
*TextFooler against BERT fine-tuned on SST-2:*
|
||||
```bash
|
||||
textattack attack --model bert-base-uncased-sst2 --recipe textfooler --num-examples 10
|
||||
```
|
||||
|
||||
*seq2sick (black-box) against T5 fine-tuned for English-German translation:*
|
||||
```bash
|
||||
textattack attack --model t5-en-de --recipe seq2sick --num-examples 100
|
||||
```
|
||||
8
docs/3recipes/augmenter_recipes.rst
Normal file
@@ -0,0 +1,8 @@
|
||||
Augmenter Recipes API
|
||||
=====================
|
||||
|
||||
Transformations and constraints can be used for simple NLP data augmentations. Here is a list of recipes for NLP data augmentations
|
||||
|
||||
.. automodule:: textattack.augmentation.recipes
|
||||
:members:
|
||||
:noindex:
|
||||
87
docs/3recipes/augmenter_recipes_cmd.md
Normal file
@@ -0,0 +1,87 @@
|
||||
# Augmenter Recipes CommandLine Use
|
||||
|
||||
Transformations and constraints can be used for simple NLP data augmentations.
|
||||
|
||||
The [`examples/`](https://github.com/QData/TextAttack/tree/master/examples) folder includes scripts showing common TextAttack usage for training models, running attacks, and augmenting a CSV file.
|
||||
|
||||
The [documentation website](https://textattack.readthedocs.io/en/latest) contains walkthroughs explaining basic usage of TextAttack, including building a custom transformation and a custom constraint..
|
||||
|
||||
|
||||
## Augmenting Text: `textattack augment`
|
||||
|
||||
Many of the components of TextAttack are useful for data augmentation. The `textattack.Augmenter` class
|
||||
uses a transformation and a list of constraints to augment data. We also offer five built-in recipes
|
||||
for data augmentation:
|
||||
- `textattack.WordNetAugmenter` augments text by replacing words with WordNet synonyms
|
||||
- `textattack.EmbeddingAugmenter` augments text by replacing words with neighbors in the counter-fitted embedding space, with a constraint to ensure their cosine similarity is at least 0.8
|
||||
- `textattack.CharSwapAugmenter` augments text by substituting, deleting, inserting, and swapping adjacent characters
|
||||
- `textattack.EasyDataAugmenter` augments text with a combination of word insertions, substitutions and deletions.
|
||||
- `textattack.CheckListAugmenter` augments text by contraction/extension and by substituting names, locations, numbers.
|
||||
- `textattack.CLAREAugmenter` augments text by replacing, inserting, and merging with a pre-trained masked language model.
|
||||
|
||||
### Augmentation Command-Line Interface
|
||||
The easiest way to use our data augmentation tools is with `textattack augment <args>`.
|
||||
|
||||
`textattack augment`
|
||||
takes an input CSV file, the "text" column to augment, along with the number of words to change per augmentation
|
||||
and the number of augmentations per input example. It outputs a CSV in the same format with all the augmented examples in the proper columns.
|
||||
|
||||
> For instance, when given the following as `examples.csv`:
|
||||
|
||||
```
|
||||
"text",label
|
||||
"the rock is destined to be the 21st century's new conan and that he's going to make a splash even greater than arnold schwarzenegger , jean- claud van damme or steven segal.", 1
|
||||
"the gorgeously elaborate continuation of 'the lord of the rings' trilogy is so huge that a column of words cannot adequately describe co-writer/director peter jackson's expanded vision of j . r . r . tolkien's middle-earth .", 1
|
||||
"take care of my cat offers a refreshingly different slice of asian cinema .", 1
|
||||
"a technically well-made suspenser . . . but its abrupt drop in iq points as it races to the finish line proves simply too discouraging to let slide .", 0
|
||||
"it's a mystery how the movie could be released in this condition .", 0
|
||||
```
|
||||
|
||||
The command `textattack augment --csv examples.csv --input-column text --recipe embedding --pct-words-to-swap .1 --transformations-per-example 2 --exclude-original`
|
||||
will augment the `text` column by altering 10% of each example's words, generating twice as many augmentations as original inputs, and exclude the original inputs from the
|
||||
output CSV. (All of this will be saved to `augment.csv` by default.)
|
||||
|
||||
> **Tip:** Just as running attacks interactively, you can also pass `--interactive` to augment samples inputted by the user to quickly try out different augmentation recipes!
|
||||
|
||||
|
||||
After augmentation, here are the contents of `augment.csv`:
|
||||
```
|
||||
text,label
|
||||
"the rock is destined to be the 21st century's newest conan and that he's gonna to make a splashing even stronger than arnold schwarzenegger , jean- claud van damme or steven segal.",1
|
||||
"the rock is destined to be the 21tk century's novel conan and that he's going to make a splat even greater than arnold schwarzenegger , jean- claud van damme or stevens segal.",1
|
||||
the gorgeously elaborate continuation of 'the lord of the rings' trilogy is so huge that a column of expression significant adequately describe co-writer/director pedro jackson's expanded vision of j . rs . r . tolkien's middle-earth .,1
|
||||
the gorgeously elaborate continuation of 'the lordy of the piercings' trilogy is so huge that a column of mots cannot adequately describe co-novelist/director peter jackson's expanded vision of j . r . r . tolkien's middle-earth .,1
|
||||
take care of my cat offerings a pleasantly several slice of asia cinema .,1
|
||||
taking care of my cat offers a pleasantly different slice of asiatic kino .,1
|
||||
a technically good-made suspenser . . . but its abrupt drop in iq points as it races to the finish bloodline proves straightforward too disheartening to let slide .,0
|
||||
a technically well-made suspenser . . . but its abrupt drop in iq dot as it races to the finish line demonstrates simply too disheartening to leave slide .,0
|
||||
it's a enigma how the film wo be releases in this condition .,0
|
||||
it's a enigma how the filmmaking wo be publicized in this condition .,0
|
||||
```
|
||||
|
||||
The 'embedding' augmentation recipe uses counterfitted embedding nearest-neighbors to augment data.
|
||||
|
||||
### Augmentation Python API Interface
|
||||
In addition to the command-line interface, you can augment text dynamically by importing the
|
||||
`Augmenter` in your own code. All `Augmenter` objects implement `augment` and `augment_many` to generate augmentations
|
||||
of a string or a list of strings. Here's an example of how to use the `EmbeddingAugmenter` in a python script:
|
||||
|
||||
```python
|
||||
>>> from textattack.augmentation import EmbeddingAugmenter
|
||||
>>> augmenter = EmbeddingAugmenter()
|
||||
>>> s = 'What I cannot create, I do not understand.'
|
||||
>>> augmenter.augment(s)
|
||||
['What I notable create, I do not understand.', 'What I significant create, I do not understand.', 'What I cannot engender, I do not understand.', 'What I cannot creating, I do not understand.', 'What I cannot creations, I do not understand.', 'What I cannot create, I do not comprehend.', 'What I cannot create, I do not fathom.', 'What I cannot create, I do not understanding.', 'What I cannot create, I do not understands.', 'What I cannot create, I do not understood.', 'What I cannot create, I do not realise.']
|
||||
```
|
||||
You can also create your own augmenter from scratch by importing transformations/constraints from `textattack.transformations` and `textattack.constraints`. Here's an example that generates augmentations of a string using `WordSwapRandomCharacterDeletion`:
|
||||
|
||||
```python
|
||||
>>> from textattack.transformations import WordSwapRandomCharacterDeletion
|
||||
>>> from textattack.transformations import CompositeTransformation
|
||||
>>> from textattack.augmentation import Augmenter
|
||||
>>> transformation = CompositeTransformation([WordSwapRandomCharacterDeletion()])
|
||||
>>> augmenter = Augmenter(transformation=transformation, transformations_per_example=5)
|
||||
>>> s = 'What I cannot create, I do not understand.'
|
||||
>>> augmenter.augment(s)
|
||||
['What I cannot creae, I do not understand.', 'What I cannot creat, I do not understand.', 'What I cannot create, I do not nderstand.', 'What I cannot create, I do nt understand.', 'Wht I cannot create, I do not understand.']
|
||||
```
|
||||
496
docs/3recipes/models.md
Normal file
@@ -0,0 +1,496 @@
|
||||
# TextAttack Model Zoo
|
||||
|
||||
TextAttack is model-agnostic - meaning it can run attacks on models implemented in any deep learning framework. Model objects must be able to take a string (or list of strings) and return an output that can be processed by the goal function. For example, machine translation models take a list of strings as input and produce a list of strings as output. Classification and entailment models return an array of scores. As long as the user's model meets this specification, the model is fit to use with TextAttack.
|
||||
|
||||
|
||||
|
||||
To help users, TextAttack includes pre-trained models for different common NLP tasks. This makes it easier for
|
||||
users to get started with TextAttack. It also enables a more fair comparison of attacks from
|
||||
the literature.
|
||||
|
||||
|
||||
## Available Models
|
||||
|
||||
### TextAttack Models
|
||||
TextAttack has two build-in model types, a 1-layer bidirectional LSTM with a hidden
|
||||
state size of 150 (`lstm`), and a WordCNN with 3 window sizes
|
||||
(3, 4, 5) and 100 filters for the window size (`cnn`). Both models set dropout
|
||||
to 0.3 and use a base of the 200-dimensional GLoVE embeddings.
|
||||
|
||||
### `transformers` Models
|
||||
Along with the `lstm` and `cnn`, you can theoretically fine-tune any model based
|
||||
in the huggingface [transformers](https://github.com/huggingface/transformers/)
|
||||
repo. Just type the model name (like `bert-base-cased`) and it will be automatically
|
||||
loaded.
|
||||
|
||||
Here are some models from transformers that have worked well for us:
|
||||
- `bert-base-uncased` and `bert-base-cased`
|
||||
- `distilbert-base-uncased` and `distilbert-base-cased`
|
||||
- `albert-base-v2`
|
||||
- `roberta-base`
|
||||
- `xlnet-base-cased`
|
||||
|
||||
|
||||
## Evaluation Results of Available Models
|
||||
|
||||
All evaluation results were obtained using `textattack eval` to evaluate models on their default
|
||||
test dataset (test set, if labels are available, otherwise, eval/validation set). You can use
|
||||
this command to verify the accuracies for yourself: for example, `textattack eval --model roberta-base-mr`.
|
||||
|
||||
|
||||
The LSTM and wordCNN models' code is available in `textattack.models.helpers`. All other models are transformers
|
||||
imported from the [`transformers`](https://github.com/huggingface/transformers/) package. To list evaluate all
|
||||
TextAttack pretrained models, invoke `textattack eval` without specifying a model: `textattack eval --num-examples 1000`.
|
||||
All evaluations shown are on the full validation or test set up to 1000 examples.
|
||||
|
||||
|
||||
### `LSTM`
|
||||
|
||||
<section>
|
||||
|
||||
- AG News (`lstm-ag-news`)
|
||||
- `datasets` dataset `ag_news`, split `test`
|
||||
- Correct/Whole: 914/1000
|
||||
- Accuracy: 91.4%
|
||||
- IMDB (`lstm-imdb`)
|
||||
- `datasets` dataset `imdb`, split `test`
|
||||
- Correct/Whole: 883/1000
|
||||
- Accuracy: 88.30%
|
||||
- Movie Reviews [Rotten Tomatoes] (`lstm-mr`)
|
||||
- `datasets` dataset `rotten_tomatoes`, split `validation`
|
||||
- Correct/Whole: 807/1000
|
||||
- Accuracy: 80.70%
|
||||
- `datasets` dataset `rotten_tomatoes`, split `test`
|
||||
- Correct/Whole: 781/1000
|
||||
- Accuracy: 78.10%
|
||||
- SST-2 (`lstm-sst2`)
|
||||
- `datasets` dataset `glue`, subset `sst2`, split `validation`
|
||||
- Correct/Whole: 737/872
|
||||
- Accuracy: 84.52%
|
||||
- Yelp Polarity (`lstm-yelp`)
|
||||
- `datasets` dataset `yelp_polarity`, split `test`
|
||||
- Correct/Whole: 922/1000
|
||||
- Accuracy: 92.20%
|
||||
|
||||
</section>
|
||||
|
||||
### `wordCNN`
|
||||
|
||||
<section>
|
||||
|
||||
|
||||
- AG News (`cnn-ag-news`)
|
||||
- `datasets` dataset `ag_news`, split `test`
|
||||
- Correct/Whole: 910/1000
|
||||
- Accuracy: 91.00%
|
||||
- IMDB (`cnn-imdb`)
|
||||
- `datasets` dataset `imdb`, split `test`
|
||||
- Correct/Whole: 863/1000
|
||||
- Accuracy: 86.30%
|
||||
- Movie Reviews [Rotten Tomatoes] (`cnn-mr`)
|
||||
- `datasets` dataset `rotten_tomatoes`, split `validation`
|
||||
- Correct/Whole: 794/1000
|
||||
- Accuracy: 79.40%
|
||||
- `datasets` dataset `rotten_tomatoes`, split `test`
|
||||
- Correct/Whole: 768/1000
|
||||
- Accuracy: 76.80%
|
||||
- SST-2 (`cnn-sst2`)
|
||||
- `datasets` dataset `glue`, subset `sst2`, split `validation`
|
||||
- Correct/Whole: 721/872
|
||||
- Accuracy: 82.68%
|
||||
- Yelp Polarity (`cnn-yelp`)
|
||||
- `datasets` dataset `yelp_polarity`, split `test`
|
||||
- Correct/Whole: 913/1000
|
||||
- Accuracy: 91.30%
|
||||
|
||||
</section>
|
||||
|
||||
|
||||
### `albert-base-v2`
|
||||
|
||||
<section>
|
||||
|
||||
- AG News (`albert-base-v2-ag-news`)
|
||||
- `datasets` dataset `ag_news`, split `test`
|
||||
- Correct/Whole: 943/1000
|
||||
- Accuracy: 94.30%
|
||||
- CoLA (`albert-base-v2-cola`)
|
||||
- `datasets` dataset `glue`, subset `cola`, split `validation`
|
||||
- Correct/Whole: 829/1000
|
||||
- Accuracy: 82.90%
|
||||
- IMDB (`albert-base-v2-imdb`)
|
||||
- `datasets` dataset `imdb`, split `test`
|
||||
- Correct/Whole: 913/1000
|
||||
- Accuracy: 91.30%
|
||||
- Movie Reviews [Rotten Tomatoes] (`albert-base-v2-mr`)
|
||||
- `datasets` dataset `rotten_tomatoes`, split `validation`
|
||||
- Correct/Whole: 882/1000
|
||||
- Accuracy: 88.20%
|
||||
- `datasets` dataset `rotten_tomatoes`, split `test`
|
||||
- Correct/Whole: 851/1000
|
||||
- Accuracy: 85.10%
|
||||
- Quora Question Pairs (`albert-base-v2-qqp`)
|
||||
- `datasets` dataset `glue`, subset `qqp`, split `validation`
|
||||
- Correct/Whole: 914/1000
|
||||
- Accuracy: 91.40%
|
||||
- Recognizing Textual Entailment (`albert-base-v2-rte`)
|
||||
- `datasets` dataset `glue`, subset `rte`, split `validation`
|
||||
- Correct/Whole: 211/277
|
||||
- Accuracy: 76.17%
|
||||
- SNLI (`albert-base-v2-snli`)
|
||||
- `datasets` dataset `snli`, split `test`
|
||||
- Correct/Whole: 883/1000
|
||||
- Accuracy: 88.30%
|
||||
- SST-2 (`albert-base-v2-sst2`)
|
||||
- `datasets` dataset `glue`, subset `sst2`, split `validation`
|
||||
- Correct/Whole: 807/872
|
||||
- Accuracy: 92.55%)
|
||||
- STS-b (`albert-base-v2-stsb`)
|
||||
- `datasets` dataset `glue`, subset `stsb`, split `validation`
|
||||
- Pearson correlation: 0.9041359738552746
|
||||
- Spearman correlation: 0.8995912861209745
|
||||
- WNLI (`albert-base-v2-wnli`)
|
||||
- `datasets` dataset `glue`, subset `wnli`, split `validation`
|
||||
- Correct/Whole: 42/71
|
||||
- Accuracy: 59.15%
|
||||
- Yelp Polarity (`albert-base-v2-yelp`)
|
||||
- `datasets` dataset `yelp_polarity`, split `test`
|
||||
- Correct/Whole: 963/1000
|
||||
- Accuracy: 96.30%
|
||||
|
||||
</section>
|
||||
|
||||
### `bert-base-uncased`
|
||||
|
||||
<section>
|
||||
|
||||
- AG News (`bert-base-uncased-ag-news`)
|
||||
- `datasets` dataset `ag_news`, split `test`
|
||||
- Correct/Whole: 942/1000
|
||||
- Accuracy: 94.20%
|
||||
- CoLA (`bert-base-uncased-cola`)
|
||||
- `datasets` dataset `glue`, subset `cola`, split `validation`
|
||||
- Correct/Whole: 812/1000
|
||||
- Accuracy: 81.20%
|
||||
- IMDB (`bert-base-uncased-imdb`)
|
||||
- `datasets` dataset `imdb`, split `test`
|
||||
- Correct/Whole: 919/1000
|
||||
- Accuracy: 91.90%
|
||||
- MNLI matched (`bert-base-uncased-mnli`)
|
||||
- `datasets` dataset `glue`, subset `mnli`, split `validation_matched`
|
||||
- Correct/Whole: 840/1000
|
||||
- Accuracy: 84.00%
|
||||
- Movie Reviews [Rotten Tomatoes] (`bert-base-uncased-mr`)
|
||||
- `datasets` dataset `rotten_tomatoes`, split `validation`
|
||||
- Correct/Whole: 876/1000
|
||||
- Accuracy: 87.60%
|
||||
- `datasets` dataset `rotten_tomatoes`, split `test`
|
||||
- Correct/Whole: 838/1000
|
||||
- Accuracy: 83.80%
|
||||
- MRPC (`bert-base-uncased-mrpc`)
|
||||
- `datasets` dataset `glue`, subset `mrpc`, split `validation`
|
||||
- Correct/Whole: 358/408
|
||||
- Accuracy: 87.75%
|
||||
- QNLI (`bert-base-uncased-qnli`)
|
||||
- `datasets` dataset `glue`, subset `qnli`, split `validation`
|
||||
- Correct/Whole: 904/1000
|
||||
- Accuracy: 90.40%
|
||||
- Quora Question Pairs (`bert-base-uncased-qqp`)
|
||||
- `datasets` dataset `glue`, subset `qqp`, split `validation`
|
||||
- Correct/Whole: 924/1000
|
||||
- Accuracy: 92.40%
|
||||
- Recognizing Textual Entailment (`bert-base-uncased-rte`)
|
||||
- `datasets` dataset `glue`, subset `rte`, split `validation`
|
||||
- Correct/Whole: 201/277
|
||||
- Accuracy: 72.56%
|
||||
- SNLI (`bert-base-uncased-snli`)
|
||||
- `datasets` dataset `snli`, split `test`
|
||||
- Correct/Whole: 894/1000
|
||||
- Accuracy: 89.40%
|
||||
- SST-2 (`bert-base-uncased-sst2`)
|
||||
- `datasets` dataset `glue`, subset `sst2`, split `validation`
|
||||
- Correct/Whole: 806/872
|
||||
- Accuracy: 92.43%)
|
||||
- STS-b (`bert-base-uncased-stsb`)
|
||||
- `datasets` dataset `glue`, subset `stsb`, split `validation`
|
||||
- Pearson correlation: 0.8775458937815515
|
||||
- Spearman correlation: 0.8773251339980935
|
||||
- WNLI (`bert-base-uncased-wnli`)
|
||||
- `datasets` dataset `glue`, subset `wnli`, split `validation`
|
||||
- Correct/Whole: 40/71
|
||||
- Accuracy: 56.34%
|
||||
- Yelp Polarity (`bert-base-uncased-yelp`)
|
||||
- `datasets` dataset `yelp_polarity`, split `test`
|
||||
- Correct/Whole: 963/1000
|
||||
- Accuracy: 96.30%
|
||||
|
||||
</section>
|
||||
|
||||
### `distilbert-base-cased`
|
||||
|
||||
<section>
|
||||
|
||||
|
||||
- CoLA (`distilbert-base-cased-cola`)
|
||||
- `datasets` dataset `glue`, subset `cola`, split `validation`
|
||||
- Correct/Whole: 786/1000
|
||||
- Accuracy: 78.60%
|
||||
- MRPC (`distilbert-base-cased-mrpc`)
|
||||
- `datasets` dataset `glue`, subset `mrpc`, split `validation`
|
||||
- Correct/Whole: 320/408
|
||||
- Accuracy: 78.43%
|
||||
- Quora Question Pairs (`distilbert-base-cased-qqp`)
|
||||
- `datasets` dataset `glue`, subset `qqp`, split `validation`
|
||||
- Correct/Whole: 908/1000
|
||||
- Accuracy: 90.80%
|
||||
- SNLI (`distilbert-base-cased-snli`)
|
||||
- `datasets` dataset `snli`, split `test`
|
||||
- Correct/Whole: 861/1000
|
||||
- Accuracy: 86.10%
|
||||
- SST-2 (`distilbert-base-cased-sst2`)
|
||||
- `datasets` dataset `glue`, subset `sst2`, split `validation`
|
||||
- Correct/Whole: 785/872
|
||||
- Accuracy: 90.02%)
|
||||
- STS-b (`distilbert-base-cased-stsb`)
|
||||
- `datasets` dataset `glue`, subset `stsb`, split `validation`
|
||||
- Pearson correlation: 0.8421540899520146
|
||||
- Spearman correlation: 0.8407155030382939
|
||||
|
||||
</section>
|
||||
|
||||
### `distilbert-base-uncased`
|
||||
|
||||
<section>
|
||||
|
||||
- AG News (`distilbert-base-uncased-ag-news`)
|
||||
- `datasets` dataset `ag_news`, split `test`
|
||||
- Correct/Whole: 944/1000
|
||||
- Accuracy: 94.40%
|
||||
- CoLA (`distilbert-base-uncased-cola`)
|
||||
- `datasets` dataset `glue`, subset `cola`, split `validation`
|
||||
- Correct/Whole: 786/1000
|
||||
- Accuracy: 78.60%
|
||||
- IMDB (`distilbert-base-uncased-imdb`)
|
||||
- `datasets` dataset `imdb`, split `test`
|
||||
- Correct/Whole: 903/1000
|
||||
- Accuracy: 90.30%
|
||||
- MNLI matched (`distilbert-base-uncased-mnli`)
|
||||
- `datasets` dataset `glue`, subset `mnli`, split `validation_matched`
|
||||
- Correct/Whole: 817/1000
|
||||
- Accuracy: 81.70%
|
||||
- MRPC (`distilbert-base-uncased-mrpc`)
|
||||
- `datasets` dataset `glue`, subset `mrpc`, split `validation`
|
||||
- Correct/Whole: 350/408
|
||||
- Accuracy: 85.78%
|
||||
- QNLI (`distilbert-base-uncased-qnli`)
|
||||
- `datasets` dataset `glue`, subset `qnli`, split `validation`
|
||||
- Correct/Whole: 860/1000
|
||||
- Accuracy: 86.00%
|
||||
- Recognizing Textual Entailment (`distilbert-base-uncased-rte`)
|
||||
- `datasets` dataset `glue`, subset `rte`, split `validation`
|
||||
- Correct/Whole: 180/277
|
||||
- Accuracy: 64.98%
|
||||
- STS-b (`distilbert-base-uncased-stsb`)
|
||||
- `datasets` dataset `glue`, subset `stsb`, split `validation`
|
||||
- Pearson correlation: 0.8421540899520146
|
||||
- Spearman correlation: 0.8407155030382939
|
||||
- WNLI (`distilbert-base-uncased-wnli`)
|
||||
- `datasets` dataset `glue`, subset `wnli`, split `validation`
|
||||
- Correct/Whole: 40/71
|
||||
- Accuracy: 56.34%
|
||||
|
||||
</section>
|
||||
|
||||
### `roberta-base`
|
||||
|
||||
<section>
|
||||
|
||||
- AG News (`roberta-base-ag-news`)
|
||||
- `datasets` dataset `ag_news`, split `test`
|
||||
- Correct/Whole: 947/1000
|
||||
- Accuracy: 94.70%
|
||||
- CoLA (`roberta-base-cola`)
|
||||
- `datasets` dataset `glue`, subset `cola`, split `validation`
|
||||
- Correct/Whole: 857/1000
|
||||
- Accuracy: 85.70%
|
||||
- IMDB (`roberta-base-imdb`)
|
||||
- `datasets` dataset `imdb`, split `test`
|
||||
- Correct/Whole: 941/1000
|
||||
- Accuracy: 94.10%
|
||||
- Movie Reviews [Rotten Tomatoes] (`roberta-base-mr`)
|
||||
- `datasets` dataset `rotten_tomatoes`, split `validation`
|
||||
- Correct/Whole: 899/1000
|
||||
- Accuracy: 89.90%
|
||||
- `datasets` dataset `rotten_tomatoes`, split `test`
|
||||
- Correct/Whole: 883/1000
|
||||
- Accuracy: 88.30%
|
||||
- MRPC (`roberta-base-mrpc`)
|
||||
- `datasets` dataset `glue`, subset `mrpc`, split `validation`
|
||||
- Correct/Whole: 371/408
|
||||
- Accuracy: 91.18%
|
||||
- QNLI (`roberta-base-qnli`)
|
||||
- `datasets` dataset `glue`, subset `qnli`, split `validation`
|
||||
- Correct/Whole: 917/1000
|
||||
- Accuracy: 91.70%
|
||||
- Recognizing Textual Entailment (`roberta-base-rte`)
|
||||
- `datasets` dataset `glue`, subset `rte`, split `validation`
|
||||
- Correct/Whole: 217/277
|
||||
- Accuracy: 78.34%
|
||||
- SST-2 (`roberta-base-sst2`)
|
||||
- `datasets` dataset `glue`, subset `sst2`, split `validation`
|
||||
- Correct/Whole: 820/872
|
||||
- Accuracy: 94.04%)
|
||||
- STS-b (`roberta-base-stsb`)
|
||||
- `datasets` dataset `glue`, subset `stsb`, split `validation`
|
||||
- Pearson correlation: 0.906067852162708
|
||||
- Spearman correlation: 0.9025045272903051
|
||||
- WNLI (`roberta-base-wnli`)
|
||||
- `datasets` dataset `glue`, subset `wnli`, split `validation`
|
||||
- Correct/Whole: 40/71
|
||||
- Accuracy: 56.34%
|
||||
|
||||
</section>
|
||||
|
||||
### `xlnet-base-cased`
|
||||
|
||||
<section>
|
||||
|
||||
- CoLA (`xlnet-base-cased-cola`)
|
||||
- `datasets` dataset `glue`, subset `cola`, split `validation`
|
||||
- Correct/Whole: 800/1000
|
||||
- Accuracy: 80.00%
|
||||
- IMDB (`xlnet-base-cased-imdb`)
|
||||
- `datasets` dataset `imdb`, split `test`
|
||||
- Correct/Whole: 957/1000
|
||||
- Accuracy: 95.70%
|
||||
- Movie Reviews [Rotten Tomatoes] (`xlnet-base-cased-mr`)
|
||||
- `datasets` dataset `rotten_tomatoes`, split `validation`
|
||||
- Correct/Whole: 908/1000
|
||||
- Accuracy: 90.80%
|
||||
- `datasets` dataset `rotten_tomatoes`, split `test`
|
||||
- Correct/Whole: 876/1000
|
||||
- Accuracy: 87.60%
|
||||
- MRPC (`xlnet-base-cased-mrpc`)
|
||||
- `datasets` dataset `glue`, subset `mrpc`, split `validation`
|
||||
- Correct/Whole: 363/408
|
||||
- Accuracy: 88.97%
|
||||
- Recognizing Textual Entailment (`xlnet-base-cased-rte`)
|
||||
- `datasets` dataset `glue`, subset `rte`, split `validation`
|
||||
- Correct/Whole: 196/277
|
||||
- Accuracy: 70.76%
|
||||
- STS-b (`xlnet-base-cased-stsb`)
|
||||
- `datasets` dataset `glue`, subset `stsb`, split `validation`
|
||||
- Pearson correlation: 0.883111673280641
|
||||
- Spearman correlation: 0.8773439961182335
|
||||
- WNLI (`xlnet-base-cased-wnli`)
|
||||
- `datasets` dataset `glue`, subset `wnli`, split `validation`
|
||||
- Correct/Whole: 41/71
|
||||
- Accuracy: 57.75%
|
||||
|
||||
</section>
|
||||
|
||||
|
||||
## How we have trained the TextAttack Models
|
||||
|
||||
|
||||
- By Oct 2020, TextAttack provides users with 82 pre-trained TextAttack models, including word-level LSTM, word-level CNN, BERT, and other transformer based models pre-trained on various datasets provided by [HuggingFace](https://github.com/huggingface/nlp/).
|
||||
|
||||
- Since TextAttack is integrated with the [https://github.com/huggingface/nlp/](https://github.com/huggingface/nlp) library, it can automatically load the test or validation data set for the corresponding pre-trained model. While the literature has mainly focused on classification and entailment, TextAttack's pretrained models enable research on the robustness of models across all GLUE tasks.
|
||||
|
||||
- We host all TextAttack Models at huggingface Model Hub: [https://huggingface.co/textattack](https://huggingface.co/textattack)
|
||||
|
||||
|
||||
## Training details for each TextAttack Model
|
||||
|
||||
|
||||
All of our models have model cards on the HuggingFace model hub. So for now, the easiest way to figure this out is as follows:
|
||||
|
||||
|
||||
- Please Go to our page on the model hub: [https://huggingface.co/textattack](https://huggingface.co/textattack)
|
||||
|
||||
- Find the model you're looking for and select its page, for instance: [https://huggingface.co/textattack/roberta-base-imdb](https://huggingface.co/textattack/roberta-base-imdb)
|
||||
|
||||
- Scroll down to the end of the page, looking for **model card** section. Here it is the details of the model training for that specific TextAttack model.
|
||||
|
||||
- BTW: For each of our transformers, we selected the best out of a grid search over a bunch of possible hyperparameters. So the model training hyperparemeter actually varies from model to model.
|
||||
|
||||
|
||||
|
||||
|
||||
## More details on TextAttack fine-tuned NLP models (details on target NLP task, input type, output type, SOTA results on paperswithcode; model card on huggingface):
|
||||
|
||||
|
||||
|
||||
|
||||
Fine-tuned Model | NLP Task | Input type | Output Type | paperswithcode.com SOTA | huggingface.co Model Card
|
||||
--------------|-----------------|--------------------|--------------------|--------------------------|-------------------------------
|
||||
albert-base-v2-CoLA | linguistic acceptability | single sentences | binary (1=acceptable/ 0=unacceptable) | <sub><sup>https://paperswithcode.com/sota/linguistic-acceptability-on-cola </sub></sup> | <sub><sup>https://huggingface.co/textattack/albert-base-v2-CoLA </sub></sup>
|
||||
bert-base-uncased-CoLA | linguistic acceptability | single sentences | binary (1=acceptable/ 0=unacceptable) | none yet | <sub><sup>https://huggingface.co/textattack/bert-base-uncased-CoLA </sub></sup>
|
||||
distilbert-base-cased-CoLA | linguistic acceptability | single sentences | binary (1=acceptable/ 0=unacceptable) | <sub><sup> https://paperswithcode.com/sota/linguistic-acceptability-on-cola </sub></sup> | <sub><sup>https://huggingface.co/textattack/distilbert-base-cased-CoLA </sub></sup>
|
||||
distilbert-base-uncased-CoLA | linguistic acceptability | single sentences | binary (1=acceptable/ 0=unacceptable) | <sub><sup> https://paperswithcode.com/sota/linguistic-acceptability-on-cola </sub></sup> | <sub><sup>https://huggingface.co/textattack/distilbert-base-uncased-CoLA </sub></sup>
|
||||
roberta-base-CoLA | linguistic acceptability | single sentences | binary (1=acceptable/ 0=unacceptable) | <sub><sup> https://paperswithcode.com/sota/linguistic-acceptability-on-cola </sub></sup> | <sub><sup> https://huggingface.co/textattack/roberta-base-CoLA </sub></sup>
|
||||
xlnet-base-cased-CoLA | linguistic acceptability | single sentences | binary (1=acceptable/ 0=unacceptable) | <sub><sup> https://paperswithcode.com/sota/linguistic-acceptability-on-cola </sub></sup> | <sub><sup>https://huggingface.co/textattack/xlnet-base-cased-CoLA </sub></sup>
|
||||
albert-base-v2-RTE | natural language inference | sentence pairs (1 premise and 1 hypothesis) | binary(0=entailed/1=not entailed) | <sub><sup> https://paperswithcode.com/sota/natural-language-inference-on-rte </sub></sup> | <sub><sup> https://huggingface.co/textattack/albert-base-v2-RTE </sub></sup>
|
||||
albert-base-v2-snli | natural language inference | sentence pairs | accuracy (0=entailment, 1=neutral,2=contradiction) | none yet | <sub><sup> https://huggingface.co/textattack/albert-base-v2-snli </sub></sup>
|
||||
albert-base-v2-WNLI | natural language inference | sentence pairs | binary | <sub><sup> https://paperswithcode.com/sota/natural-language-inference-on-wnli </sub></sup> | <sub><sup> https://huggingface.co/textattack/albert-base-v2-WNLI</sub></sup>
|
||||
bert-base-uncased-MNLI | natural language inference | sentence pairs (1 premise and 1 hypothesis) | accuracy (0=entailment, 1=neutral,2=contradiction) | none yet | <sub><sup> https://huggingface.co/textattack/bert-base-uncased-MNLI </sub></sup>
|
||||
bert-base-uncased-QNLI | natural language inference | question/answer pairs | binary (1=unanswerable/ 0=answerable) | none yet |<sub><sup> https://huggingface.co/textattack/bert-base-uncased-QNLI </sub></sup>
|
||||
bert-base-uncased-RTE | natural language inference | sentence pairs (1 premise and 1 hypothesis) | binary(0=entailed/1=not entailed) | none yet | <sub><sup> https://huggingface.co/textattack/bert-base-uncased-RTE </sub></sup>
|
||||
bert-base-uncased-snli | natural language inference | sentence pairs | accuracy (0=entailment, 1=neutral,2=contradiction) | none yet | <sub><sup> https://huggingface.co/textattack/bert-base-uncased-snli </sub></sup>
|
||||
bert-base-uncased-WNLI | natural language inference | sentence pairs | binary | none yet | <sub><sup> https://huggingface.co/textattack/bert-base-uncased-WNLI </sub></sup>
|
||||
distilbert-base-cased-snli | natural language inference | sentence pairs | accuracy (0=entailment, 1=neutral,2=contradiction) | none yet | <sub><sup> https://huggingface.co/textattack/distilbert-base-cased-snli </sub></sup>
|
||||
distilbert-base-uncased-MNLI | natural language inference | sentence pairs (1 premise and 1 hypothesis) | accuracy (0=entailment,1=neutral, 2=contradiction) | none yet | <sub><sup> https://huggingface.co/textattack/distilbert-base-uncased-MNLI </sub></sup>
|
||||
distilbert-base-uncased-RTE | natural language inference | sentence pairs (1 premise and 1 hypothesis) | binary(0=entailed/1=not entailed) | <sub><sup> https://paperswithcode.com/sota/natural-language-inference-on-rte </sub></sup> | <sub><sup> https://huggingface.co/textattack/distilbert-base-uncased-RTE</sub></sup>
|
||||
distilbert-base-uncased-WNLI | natural language inference | sentence pairs | binary | <sub><sup> https://paperswithcode.com/sota/natural-language-inference-on-wnli </sub></sup> | <sub><sup> https://huggingface.co/textattack/distilbert-base-uncased-WNLI </sub></sup>
|
||||
roberta-base-QNLI | natural language inference | question/answer pairs | binary (1=unanswerable/ 0=answerable) | <sub><sup> https://paperswithcode.com/sota/natural-language-inference-on-qnli </sub></sup> | <sub><sup> https://huggingface.co/textattack/roberta-base-QNLI </sub></sup>
|
||||
roberta-base-RTE | natural language inference | sentence pairs (1 premise and 1 hypothesis) | binary(0=entailed/1=not entailed) | <sub><sup> https://paperswithcode.com/sota/natural-language-inference-on-rte </sub></sup> | <sub><sup> https://huggingface.co/textattack/roberta-base-RTE</sub></sup>
|
||||
roberta-base-WNLI | natural language inference | sentence pairs | binary | <sub><sup> https://paperswithcode.com/sota/natural-language-inference-on-wnli </sub></sup> | https://huggingface.co/textattack/roberta-base-WNLI </sub></sup>
|
||||
xlnet-base-cased-RTE | natural language inference | sentence pairs (1 premise and 1 hypothesis) | binary(0=entailed/1=not entailed) | <sub><sup> https://paperswithcode.com/sota/ </sub></sup>natural-language-inference-on-rte | <sub><sup> https://huggingface.co/textattack/xlnet-base-cased-RTE </sub></sup>
|
||||
xlnet-base-cased-WNLI | natural language inference | sentence pairs | binary | none yet | <sub><sup> https://huggingface.co/textattack/xlnet-base-cased-WNLI </sub></sup>
|
||||
albert-base-v2-QQP | paraphase similarity | question pairs | binary (1=similar/0=not similar) | <sub><sup> https://paperswithcode.com/sota/question-answering-on-quora-question-pairs </sub></sup> | <sub><sup> https://huggingface.co/textattack/albert-base-v2-QQP</sub></sup>
|
||||
bert-base-uncased-QQP | paraphase similarity | question pairs | binary (1=similar/0=not similar) | <sub><sup> https://paperswithcode.com/sota/question-answering-on-quora-question-pairs </sub></sup> | <sub><sup> https://huggingface.co/textattack/bert-base-uncased-QQP </sub></sup>
|
||||
distilbert-base-uncased-QNLI | question answering/natural language inference | question/answer pairs | binary (1=unanswerable/ 0=answerable) | <sub><sup> https://paperswithcode.com/sota/natural-language-inference-on-qnli </sub></sup> | <sub><sup> https://huggingface.co/textattack/distilbert-base-uncased-QNLI </sub></sup>
|
||||
distilbert-base-cased-QQP | question answering/paraphase similarity | question pairs | binary (1=similar/ 0=not similar) | <sub><sup> https://paperswithcode.com/sota/question-answering-on-quora-question-pairs </sub></sup> | <sub><sup> https://huggingface.co/textattack/distilbert-base-cased-QQP </sub></sup>
|
||||
albert-base-v2-STS-B | semantic textual similarity | sentence pairs | similarity (0.0 to 5.0) | <sub><sup> https://paperswithcode.com/sota/semantic-textual-similarity-on-sts-benchmark </sub></sup> | <sub><sup> https://huggingface.co/textattack/albert-base-v2-STS-B </sub></sup>
|
||||
bert-base-uncased-MRPC | semantic textual similarity | sentence pairs | binary (1=similar/0=not similar) | none yet | <sub><sup> https://huggingface.co/textattack/bert-base-uncased-MRPC </sub></sup>
|
||||
bert-base-uncased-STS-B | semantic textual similarity | sentence pairs | similarity (0.0 to 5.0) | none yet | <sub><sup> https://huggingface.co/textattack/bert-base-uncased-STS-B </sub></sup>
|
||||
distilbert-base-cased-MRPC | semantic textual similarity | sentence pairs | binary (1=similar/0=not similar) | <sub><sup> https://paperswithcode.com/sota/semantic-textual-similarity-on-mrpc </sub></sup> | <sub><sup> https://huggingface.co/textattack/distilbert-base-cased-MRPC </sub></sup>
|
||||
distilbert-base-cased-STS-B | semantic textual similarity | sentence pairs | similarity (0.0 to 5.0) | <sub><sup> https://paperswithcode.com/sota/semantic-textual-similarity-on-sts-benchmark </sub></sup> | <sub><sup> https://huggingface.co/textattack/distilbert-base-cased-STS-B </sub></sup>
|
||||
distilbert-base-uncased-MRPC | semantic textual similarity | sentence pairs | binary (1=similar/0=not similar) | <sub><sup> https://paperswithcode.com/sota/semantic-textual-similarity-on-mrpc </sub></sup> | <sub><sup> https://huggingface.co/textattack/distilbert-base-uncased-MRPC</sub></sup>
|
||||
roberta-base-MRPC | semantic textual similarity | sentence pairs | binary (1=similar/0=not similar) | <sub><sup> https://paperswithcode.com/sota/semantic-textual-similarity-on-mrpc </sub></sup> | <sub><sup> https://huggingface.co/textattack/roberta-base-MRPC </sub></sup>
|
||||
roberta-base-STS-B | semantic textual similarity | sentence pairs | similarity (0.0 to 5.0) | <sub><sup> https://paperswithcode.com/sota/semantic-textual-similarity-on-sts-benchmark </sub></sup> | <sub><sup> https://huggingface.co/textattack/roberta-base-STS-B </sub></sup>
|
||||
xlnet-base-cased-MRPC | semantic textual similarity | sentence pairs | binary (1=similar/0=not similar) | <sub><sup> https://paperswithcode.com/sota/semantic-textual-similarity-on-mrpc </sub></sup> | <sub><sup> https://huggingface.co/textattack/xlnet-base-cased-MRPC </sub></sup>
|
||||
xlnet-base-cased-STS-B | semantic textual similarity | sentence pairs | similarity (0.0 to 5.0) | <sub><sup> https://paperswithcode.com/sota/semantic-textual-similarity-on-sts-benchmark </sub></sup> | <sub><sup> https://huggingface.co/textattack/xlnet-base-cased-STS-B </sub></sup>
|
||||
albert-base-v2-imdb | sentiment analysis | movie reviews | binary (1=good/0=bad) | none yet | <sub><sup> https://huggingface.co/textattack/albert-base-v2-imdb </sub></sup>
|
||||
albert-base-v2-rotten-tomatoes | sentiment analysis | movie reviews | binary (1=good/0=bad) | none yet | <sub><sup> https://huggingface.co/textattack/albert-base-v2-rotten-tomatoes </sub></sup>
|
||||
albert-base-v2-SST-2 | sentiment analysis | phrases | accuracy (0.0000 to 1.0000) | <sub><sup> https://paperswithcode.com/sota/sentiment-analysis-on-sst-2-binary </sub></sup> | <sub><sup> https://huggingface.co/textattack/albert-base-v2-SST-2 </sub></sup>
|
||||
albert-base-v2-yelp-polarity | sentiment analysis | yelp reviews | binary (1=good/0=bad) | none yet | <sub><sup> https://huggingface.co/textattack/albert-base-v2-yelp-polarity </sub></sup>
|
||||
bert-base-uncased-imdb | sentiment analysis | movie reviews | binary (1=good/0=bad) | none yet | <sub><sup> https://huggingface.co/textattack/bert-base-uncased-imdb </sub></sup>
|
||||
bert-base-uncased-rotten-tomatoes | sentiment analysis | movie reviews | binary (1=good/0=bad) | none yet | <sub><sup> https://huggingface.co/textattack/bert-base-uncased-rotten-tomatoes </sub></sup>
|
||||
bert-base-uncased-SST-2 | sentiment analysis | phrases | accuracy (0.0000 to 1.0000) | <sub><sup> https://paperswithcode.com/sota/sentiment-analysis-on-sst-2-binary </sub></sup> | <sub><sup> https://huggingface.co/textattack/bert-base-uncased-SST-2 </sub></sup>
|
||||
bert-base-uncased-yelp-polarity | sentiment analysis | yelp reviews | binary (1=good/0=bad) | <sub><sup> https://paperswithcode.com/sota/sentiment-analysis-on-yelp-binary </sub></sup> | <sub><sup> https://huggingface.co/textattack/bert-base-uncased-yelp-polarity </sub></sup>
|
||||
cnn-imdb | sentiment analysis | movie reviews | binary (1=good/0=bad) | <sub><sup> https://paperswithcode.com/sota/sentiment-analysis-on-imdb </sub></sup> | none
|
||||
cnn-mr | sentiment analysis | movie reviews | binary (1=good/0=bad) | none yet | none
|
||||
cnn-sst2 | sentiment analysis | phrases | accuracy (0.0000 to 1.0000) | <sub><sup> https://paperswithcode.com/sota/sentiment-analysis-on-sst-2-binary </sub></sup> | none
|
||||
cnn-yelp | sentiment analysis | yelp reviews | binary (1=good/0=bad) | <sub><sup> https://paperswithcode.com/sota/sentiment-analysis-on-yelp-binary </sub></sup> | none
|
||||
distilbert-base-cased-SST-2 | sentiment analysis | phrases | accuracy (0.0000 to 1.0000) | <sub><sup> https://paperswithcode.com/sota/sentiment-analysis-on-sst-2-binary </sub></sup> | <sub><sup> https://huggingface.co/textattack/distilbert-base-cased-SST-2 </sub></sup>
|
||||
distilbert-base-uncased-imdb | sentiment analysis | movie reviews | binary (1=good/0=bad) | <sub><sup> https://paperswithcode.com/sota/sentiment-analysis-on-imdb</sub></sup> | <sub><sup> https://huggingface.co/textattack/distilbert-base-uncased-imdb </sub></sup>
|
||||
distilbert-base-uncased-rotten-tomatoes | sentiment analysis | movie reviews | binary (1=good/0=bad) | none yet | <sub><sup> https://huggingface.co/textattack/distilbert-base-uncased-rotten-tomatoes </sub></sup>
|
||||
lstm-imdb | sentiment analysis | movie reviews | binary (1=good/0=bad) | <sub><sup> https://paperswithcode.com/sota/sentiment-analysis-on-imdb </sub></sup> | none
|
||||
lstm-mr | sentiment analysis | movie reviews | binary (1=good/0=bad) | none yet | none
|
||||
lstm-sst2 | sentiment analysis | phrases | accuracy (0.0000 to 1.0000) | none yet | none
|
||||
lstm-yelp | sentiment analysis | yelp reviews | binary (1=good/0=bad) | none yet | none
|
||||
roberta-base-imdb | sentiment analysis | movie reviews | binary (1=good/0=bad) | none yet | <sub><sup> https://huggingface.co/textattack/roberta-base-imdb </sub></sup>
|
||||
roberta-base-rotten-tomatoes | sentiment analysis | movie reviews | binary (1=good/0=bad) | none yet | <sub><sup> https://huggingface.co/textattack/roberta-base-rotten-tomatoes </sub></sup>
|
||||
roberta-base-SST-2 | sentiment analysis | phrases | accuracy (0.0000 to 1.0000) | <sub><sup> https://paperswithcode.com/sota/sentiment-analysis-on-sst-2-binary </sub></sup> | <sub><sup> https://huggingface.co/textattack/roberta-base-SST-2 </sub></sup>
|
||||
xlnet-base-cased-imdb | sentiment analysis | movie reviews | binary (1=good/0=bad) | none yet | <sub><sup> https://huggingface.co/textattack/xlnet-base-cased-imdb </sub></sup>
|
||||
xlnet-base-cased-rotten-tomatoes | sentiment analysis | movie reviews | binary (1=good/0=bad) | none yet | <sub><sup> https://huggingface.co/textattack/xlnet-base-cased-rotten-tomatoes </sub></sup>
|
||||
albert-base-v2-ag-news | text classification | news articles | news category | none yet | <sub><sup> https://huggingface.co/textattack/albert-base-v2-ag-news </sub></sup>
|
||||
bert-base-uncased-ag-news | text classification | news articles | news category | none yet | <sub><sup> https://huggingface.co/textattack/bert-base-uncased-ag-news </sub></sup>
|
||||
cnn-ag-news | text classification | news articles | news category | <sub><sup> https://paperswithcode.com/sota/text-classification-on-ag-news </sub></sup> | none
|
||||
distilbert-base-uncased-ag-news | text classification | news articles | news category | none yet | <sub><sup> https://huggingface.co/textattack/distilbert-base-uncased-ag-news </sub></sup>
|
||||
lstm-ag-news | text classification | news articles | news category | <sub><sup> https://paperswithcode.com/sota/text-classification-on-ag-news </sub></sup> | none
|
||||
roberta-base-ag-news | text classification | news articles | news category | none yet | <sub><sup> https://huggingface.co/textattack/roberta-base-ag-news </sub></sup>
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# First run "sphinx-apidoc -f -o apidoc -d 6 -E -T -M ../textattack"
|
||||
# Then run "make html"
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
|
||||
26
docs/_static/css/custom.css
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
div.wy-side-nav-search .version {
|
||||
color: #404040;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
nav.wy-nav-top {
|
||||
background: #AA2396;
|
||||
}
|
||||
|
||||
div.wy-nav-content {
|
||||
max-width: 1000px;
|
||||
}
|
||||
|
||||
span.caption-text {
|
||||
color: #cc4878;
|
||||
}
|
||||
|
||||
/* Change header fonts to Cambria */
|
||||
.rst-content .toctree-wrapper>p.caption, h1, h2, h3, h4, h5, h6, legend {
|
||||
font-family: 'Cambria', serif;
|
||||
}
|
||||
|
||||
/* Change non-header default fonts to Helvetica */
|
||||
/** {
|
||||
font-family: 'Helvetica', sans-serif;
|
||||
}*/
|
||||
BIN
docs/_static/imgs/benchmark/search-example.pdf
vendored
Normal file
BIN
docs/_static/imgs/benchmark/search-fig1.png
vendored
Normal file
|
After Width: | Height: | Size: 807 KiB |
BIN
docs/_static/imgs/benchmark/search-fig2.png
vendored
Normal file
|
After Width: | Height: | Size: 884 KiB |
BIN
docs/_static/imgs/benchmark/search-table1.png
vendored
Normal file
|
After Width: | Height: | Size: 220 KiB |
BIN
docs/_static/imgs/benchmark/search-table2.png
vendored
Normal file
|
After Width: | Height: | Size: 290 KiB |
BIN
docs/_static/imgs/benchmark/search-table31.png
vendored
Normal file
|
After Width: | Height: | Size: 427 KiB |
BIN
docs/_static/imgs/benchmark/search-table32.png
vendored
Normal file
|
After Width: | Height: | Size: 315 KiB |
BIN
docs/_static/imgs/benchmark/table3.png
vendored
Normal file
|
After Width: | Height: | Size: 92 KiB |
BIN
docs/_static/imgs/benchmark/table4.png
vendored
Normal file
|
After Width: | Height: | Size: 200 KiB |
BIN
docs/_static/imgs/benchmark/table5-main.png
vendored
Normal file
|
After Width: | Height: | Size: 158 KiB |
BIN
docs/_static/imgs/benchmark/table7.png
vendored
Normal file
|
After Width: | Height: | Size: 109 KiB |
BIN
docs/_static/imgs/benchmark/table9.png
vendored
Normal file
|
After Width: | Height: | Size: 502 KiB |
BIN
docs/_static/imgs/intro/01-categorized-attacks.png
vendored
Normal file
|
After Width: | Height: | Size: 95 KiB |
BIN
docs/_static/imgs/intro/ae_papers.png
vendored
Normal file
|
After Width: | Height: | Size: 16 KiB |
BIN
docs/_static/imgs/intro/mr_aes.png
vendored
Normal file
|
After Width: | Height: | Size: 65 KiB |
BIN
docs/_static/imgs/intro/mr_aes_table.png
vendored
Normal file
|
After Width: | Height: | Size: 26 KiB |
BIN
docs/_static/imgs/intro/pig_airliner.png
vendored
Normal file
|
After Width: | Height: | Size: 211 KiB |
BIN
docs/_static/imgs/intro/textattack_components.png
vendored
Normal file
|
After Width: | Height: | Size: 123 KiB |
BIN
docs/_static/imgs/intro/textattack_ecosystem.png
vendored
Normal file
|
After Width: | Height: | Size: 355 KiB |
26
docs/api/attack.rst
Normal file
@@ -0,0 +1,26 @@
|
||||
Attack API Reference
|
||||
=======================
|
||||
|
||||
Attack
|
||||
------------
|
||||
Attack is composed of four components:
|
||||
|
||||
- `Goal Functions <../attacks/goal_function.html>`__ stipulate the goal of the attack, like to change the prediction score of a classification model, or to change all of the words in a translation output.
|
||||
- `Constraints <../attacks/constraint.html>`__ determine if a potential perturbation is valid with respect to the original input.
|
||||
- `Transformations <../attacks/transformation.html>`__ take a text input and transform it by inserting and deleting characters, words, and/or phrases.
|
||||
- `Search Methods <../attacks/search_method.html>`__ explore the space of possible **transformations** within the defined **constraints** and attempt to find a successful perturbation which satisfies the **goal function**.
|
||||
|
||||
The :class:`~textattack.Attack` class represents an adversarial attack composed of a goal function, search method, transformation, and constraints.
|
||||
|
||||
.. autoclass:: textattack.Attack
|
||||
:members:
|
||||
|
||||
AttackRecipe
|
||||
-------------
|
||||
Attack recipe is a subclass of :class:`~textattack.Attack` class that has a special method :meth:`build` which
|
||||
returns a pre-built :class:`~textattack.Attack` that correspond to attacks from the literature.
|
||||
|
||||
|
||||
|
||||
.. autoclass:: textattack.attack_recipes.AttackRecipe
|
||||
:members:
|
||||
27
docs/api/attack_results.rst
Normal file
@@ -0,0 +1,27 @@
|
||||
Attack Result API Reference
|
||||
============================
|
||||
|
||||
AttackResult
|
||||
-------------
|
||||
.. autoclass:: textattack.attack_results.AttackResult
|
||||
:members:
|
||||
|
||||
SuccessfulAttackResult
|
||||
-----------------------
|
||||
.. autoclass:: textattack.attack_results.SuccessfulAttackResult
|
||||
:members:
|
||||
|
||||
FailedAttackResult
|
||||
-----------------------
|
||||
.. autoclass:: textattack.attack_results.FailedAttackResult
|
||||
:members:
|
||||
|
||||
SkippedAttackResult
|
||||
-----------------------
|
||||
.. autoclass:: textattack.attack_results.SkippedAttackResult
|
||||
:members:
|
||||
|
||||
MaximizedAttackResult
|
||||
-----------------------
|
||||
.. autoclass:: textattack.attack_results.MaximizedAttackResult
|
||||
:members:
|
||||
19
docs/api/attacker.rst
Normal file
@@ -0,0 +1,19 @@
|
||||
Attacker API Reference
|
||||
=======================
|
||||
|
||||
Attacker
|
||||
-------------
|
||||
While :class:`~textattack.Attack` is the main class used to carry out the adversarial attack, it is only useful for attacking one example at a time.
|
||||
It lacks features that support attacking multiple samples in parallel (i.e. multi-GPU), saving checkpoints, or logging results to text file, CSV file, or wandb.
|
||||
:class:`~textattack.Attacker` provides these features in an easy-to-use API.
|
||||
|
||||
.. autoclass:: textattack.Attacker
|
||||
:members:
|
||||
|
||||
|
||||
AttackArgs
|
||||
-------------
|
||||
:class:`~textattack.AttackArgs` represents arguments to be passed to :class:`~textattack.Attacker`, such as number of examples to attack, interval at which to save checkpoints, logging details.
|
||||
|
||||
.. autoclass:: textattack.AttackArgs
|
||||
:members:
|
||||
13
docs/api/constraints/constraints.rst
Normal file
@@ -0,0 +1,13 @@
|
||||
Constraints API Reference
|
||||
============================
|
||||
|
||||
Constraint
|
||||
------------
|
||||
.. automodule:: textattack.constraints.Constraint
|
||||
:members:
|
||||
|
||||
PreTransformationConstraint
|
||||
-----------------------------
|
||||
.. automodule:: textattack.constraints.PreTransformationConstraint
|
||||
:members:
|
||||
|
||||
16
docs/api/datasets.rst
Normal file
@@ -0,0 +1,16 @@
|
||||
Datasets API Reference
|
||||
=============================
|
||||
Dataset class define the dataset object used to for carrying out attacks, augmentation, and training.
|
||||
:class:`~textattack.datasets.Dataset` class is the most basic class that could be used to wrap a list of input and output pairs.
|
||||
To load datasets from text, CSV, or JSON files, we recommend using 🤗 Datasets library to first
|
||||
load it as a :obj:`datasets.Dataset` object and then pass it to TextAttack's :class:`~textattack.datasets.HuggingFaceDataset` class.
|
||||
|
||||
Dataset
|
||||
----------
|
||||
.. autoclass:: textattack.datasets.Dataset
|
||||
:members: __getitem__, __len__
|
||||
|
||||
HuggingFaceDataset
|
||||
-------------------
|
||||
.. autoclass:: textattack.datasets.HuggingFaceDataset
|
||||
:members: __getitem__, __len__
|
||||
46
docs/api/goal_functions.rst
Normal file
@@ -0,0 +1,46 @@
|
||||
Goal Functions API Reference
|
||||
============================
|
||||
|
||||
:class:`~textattack.goal_functions.GoalFunction` determines both the conditions under which the attack is successful (in terms of the model outputs)
|
||||
and the heuristic score that we want to maximize when searching for the solution.
|
||||
|
||||
GoalFunction
|
||||
------------
|
||||
.. autoclass:: textattack.goal_functions.GoalFunction
|
||||
:members:
|
||||
|
||||
ClassificationGoalFunction
|
||||
--------------------------
|
||||
.. autoclass:: textattack.goal_functions.ClassificationGoalFunction
|
||||
:members:
|
||||
|
||||
TargetedClassification
|
||||
----------------------
|
||||
.. autoclass:: textattack.goal_functions.TargetedClassification
|
||||
:members:
|
||||
|
||||
UntargetedClassification
|
||||
------------------------
|
||||
.. autoclass:: textattack.goal_functions.UntargetedClassification
|
||||
:members:
|
||||
|
||||
InputReduction
|
||||
--------------
|
||||
.. autoclass:: textattack.goal_functions.InputReduction
|
||||
:members:
|
||||
|
||||
TextToTextGoalFunction
|
||||
-----------------------
|
||||
.. autoclass:: textattack.goal_functions.TextToTextGoalFunction
|
||||
:members:
|
||||
|
||||
MinimizeBleu
|
||||
-------------
|
||||
.. autoclass:: textattack.goal_functions.MinimizeBleu
|
||||
:members:
|
||||
|
||||
NonOverlappingOutput
|
||||
----------------------
|
||||
.. autoclass:: textattack.goal_functions.NonOverlappingOutput
|
||||
:members:
|
||||
|
||||
46
docs/api/search_methods.rst
Normal file
@@ -0,0 +1,46 @@
|
||||
Search Methods API Reference
|
||||
============================
|
||||
|
||||
:class:`~textattack.search_methods.SearchMethod` attempts to find the optimal set of perturbations that will produce an adversarial example.
|
||||
Finding such optimal perturbations becomes a combinatorial optimization problem, and search methods are typically heuristic search algorithms designed
|
||||
to solve the underlying combinatorial problem.
|
||||
|
||||
More in-depth study of search algorithms for NLP adversarial attacks can be found in the following work
|
||||
`Searching for a Search Method: Benchmarking Search Algorithms for Generating NLP Adversarial Examples <https://arxiv.org/abs/2009.06368>`_
|
||||
by Jin Yong Yoo, John X. Morris, Eli Lifland, and Yanjun Qi.
|
||||
|
||||
SearchMethod
|
||||
------------
|
||||
.. autoclass:: textattack.search_methods.SearchMethod
|
||||
:members:
|
||||
|
||||
BeamSearch
|
||||
------------
|
||||
.. autoclass:: textattack.search_methods.BeamSearch
|
||||
:members:
|
||||
|
||||
GreedySearch
|
||||
------------
|
||||
.. autoclass:: textattack.search_methods.GreedySearch
|
||||
:members:
|
||||
|
||||
GreedyWordSwapWIR
|
||||
------------------
|
||||
.. autoclass:: textattack.search_methods.GreedyWordSwapWIR
|
||||
:members:
|
||||
|
||||
AlzantotGeneticAlgorithm
|
||||
-------------------------
|
||||
.. autoclass:: textattack.search_methods.AlzantotGeneticAlgorithm
|
||||
:members:
|
||||
|
||||
ImprovedGeneticAlgorithm
|
||||
-------------------------
|
||||
.. autoclass:: textattack.search_methods.ImprovedGeneticAlgorithm
|
||||
:members:
|
||||
|
||||
ParticleSwarmOptimization
|
||||
--------------------------
|
||||
.. autoclass:: textattack.search_methods.ParticleSwarmOptimization
|
||||
:members:
|
||||
|
||||
60
docs/api/trainer.rst
Normal file
@@ -0,0 +1,60 @@
|
||||
Training API Reference
|
||||
==========================
|
||||
|
||||
Trainer
|
||||
------------
|
||||
The :class:`~textattack.Trainer` class provides an API for adversarial training with features builtin for standard use cases.
|
||||
It is designed to be similar to the :obj:`Trainer` class provided by 🤗 Transformers library.
|
||||
Custom behaviors can be added by subclassing the class and overriding these methods:
|
||||
|
||||
- :meth:`training_step`: Peform a single training step. Override this for custom forward pass or custom loss.
|
||||
- :meth:`evaluate_step`: Peform a single evaluation step. Override this for custom foward pass.
|
||||
- :meth:`get_train_dataloader`: Creates the PyTorch DataLoader for training. Override this for custom batch setup.
|
||||
- :meth:`get_eval_dataloader`: Creates the PyTorch DataLoader for evaluation. Override this for custom batch setup.
|
||||
- :meth:`get_optimizer_and_scheduler`: Creates the optimizer and scheduler for training. Override this for custom optimizer and scheduler.
|
||||
|
||||
The pseudocode for how training is done:
|
||||
|
||||
.. code-block::
|
||||
|
||||
train_preds = []
|
||||
train_targets = []
|
||||
for batch in train_dataloader:
|
||||
loss, preds, targets = training_step(model, tokenizer, batch)
|
||||
train_preds.append(preds)
|
||||
train_targets.append(targets)
|
||||
|
||||
# clear gradients
|
||||
optimizer.zero_grad()
|
||||
|
||||
# backward
|
||||
loss.backward()
|
||||
|
||||
# update parameters
|
||||
optimizer.step()
|
||||
if scheduler:
|
||||
scheduler.step()
|
||||
|
||||
# Calculate training accuracy using `train_preds` and `train_targets`
|
||||
|
||||
eval_preds = []
|
||||
eval_targets = []
|
||||
for batch in eval_dataloader:
|
||||
loss, preds, targets = training_step(model, tokenizer, batch)
|
||||
eval_preds.append(preds)
|
||||
eval_targets.append(targets)
|
||||
|
||||
# Calculate eval accuracy using `eval_preds` and `eval_targets`
|
||||
|
||||
|
||||
.. autoclass:: textattack.Trainer
|
||||
:members:
|
||||
|
||||
|
||||
TrainingArgs
|
||||
-------------
|
||||
Training arguments to be passed to :class:`~textattack.Trainer` class.
|
||||
|
||||
.. autoclass:: textattack.TrainingArgs
|
||||
:members:
|
||||
|
||||
124
docs/apidoc/textattack.attack_recipes.rst
Normal file
@@ -0,0 +1,124 @@
|
||||
textattack.attack\_recipes package
|
||||
==================================
|
||||
|
||||
.. automodule:: textattack.attack_recipes
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.attack_recipe
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.bae_garg_2019
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.bert_attack_li_2020
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.checklist_ribeiro_2020
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.clare_li_2020
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.deepwordbug_gao_2018
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.faster_genetic_algorithm_jia_2019
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.genetic_algorithm_alzantot_2018
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.hotflip_ebrahimi_2017
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.iga_wang_2019
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.input_reduction_feng_2018
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.kuleshov_2017
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.morpheus_tan_2020
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.pruthi_2019
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.pso_zang_2020
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.pwws_ren_2019
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.seq2sick_cheng_2018_blackbox
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.textbugger_li_2018
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_recipes.textfooler_jin_2019
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
40
docs/apidoc/textattack.attack_results.rst
Normal file
@@ -0,0 +1,40 @@
|
||||
textattack.attack\_results package
|
||||
==================================
|
||||
|
||||
.. automodule:: textattack.attack_results
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_results.attack_result
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_results.failed_attack_result
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_results.maximized_attack_result
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_results.skipped_attack_result
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.attack_results.successful_attack_result
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
22
docs/apidoc/textattack.augmentation.rst
Normal file
@@ -0,0 +1,22 @@
|
||||
textattack.augmentation package
|
||||
===============================
|
||||
|
||||
.. automodule:: textattack.augmentation
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.augmentation.augmenter
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.augmentation.recipes
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
70
docs/apidoc/textattack.commands.rst
Normal file
@@ -0,0 +1,70 @@
|
||||
textattack.commands package
|
||||
===========================
|
||||
|
||||
.. automodule:: textattack.commands
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.attack_command
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.attack_resume_command
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.augment_command
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.benchmark_recipe_command
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.eval_model_command
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.list_things_command
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.peek_dataset_command
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.textattack_cli
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.textattack_command
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.commands.train_model_command
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@@ -0,0 +1,34 @@
|
||||
textattack.constraints.grammaticality.language\_models.google\_language\_model package
|
||||
======================================================================================
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.google_language_model
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.google_language_model.alzantot_goog_lm
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.google_language_model.google_language_model
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.google_language_model.lm_data_utils
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.google_language_model.lm_utils
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@@ -0,0 +1,34 @@
|
||||
textattack.constraints.grammaticality.language\_models.learning\_to\_write package
|
||||
==================================================================================
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.learning_to_write
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.learning_to_write.adaptive_softmax
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.learning_to_write.language_model_helpers
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.learning_to_write.learning_to_write
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.learning_to_write.rnn_model
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@@ -0,0 +1,31 @@
|
||||
textattack.constraints.grammaticality.language\_models package
|
||||
==============================================================
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
|
||||
textattack.constraints.grammaticality.language_models.google_language_model
|
||||
textattack.constraints.grammaticality.language_models.learning_to_write
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.gpt2
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_models.language_model_constraint
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
36
docs/apidoc/textattack.constraints.grammaticality.rst
Normal file
@@ -0,0 +1,36 @@
|
||||
textattack.constraints.grammaticality package
|
||||
=============================================
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
|
||||
textattack.constraints.grammaticality.language_models
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.cola
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.language_tool
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.grammaticality.part_of_speech
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
40
docs/apidoc/textattack.constraints.overlap.rst
Normal file
@@ -0,0 +1,40 @@
|
||||
textattack.constraints.overlap package
|
||||
======================================
|
||||
|
||||
.. automodule:: textattack.constraints.overlap
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.overlap.bleu_score
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.overlap.chrf_score
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.overlap.levenshtein_edit_distance
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.overlap.max_words_perturbed
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.overlap.meteor_score
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
46
docs/apidoc/textattack.constraints.pre_transformation.rst
Normal file
@@ -0,0 +1,46 @@
|
||||
textattack.constraints.pre\_transformation package
|
||||
==================================================
|
||||
|
||||
.. automodule:: textattack.constraints.pre_transformation
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.pre_transformation.input_column_modification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.pre_transformation.max_modification_rate
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.pre_transformation.max_word_index_modification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.pre_transformation.min_word_length
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.pre_transformation.repeat_modification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.pre_transformation.stopword_modification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
33
docs/apidoc/textattack.constraints.rst
Normal file
@@ -0,0 +1,33 @@
|
||||
textattack.constraints package
|
||||
==============================
|
||||
|
||||
.. automodule:: textattack.constraints
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
|
||||
textattack.constraints.grammaticality
|
||||
textattack.constraints.overlap
|
||||
textattack.constraints.pre_transformation
|
||||
textattack.constraints.semantics
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.constraint
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.pre_transformation_constraint
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
30
docs/apidoc/textattack.constraints.semantics.rst
Normal file
@@ -0,0 +1,30 @@
|
||||
textattack.constraints.semantics package
|
||||
========================================
|
||||
|
||||
.. automodule:: textattack.constraints.semantics
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
|
||||
textattack.constraints.semantics.sentence_encoders
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.bert_score
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.word_embedding_distance
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@@ -0,0 +1,16 @@
|
||||
textattack.constraints.semantics.sentence\_encoders.bert package
|
||||
================================================================
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.bert
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.bert.bert
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@@ -0,0 +1,22 @@
|
||||
textattack.constraints.semantics.sentence\_encoders.infer\_sent package
|
||||
=======================================================================
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.infer_sent
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.infer_sent.infer_sent
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.infer_sent.infer_sent_model
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@@ -0,0 +1,32 @@
|
||||
textattack.constraints.semantics.sentence\_encoders package
|
||||
===========================================================
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
|
||||
textattack.constraints.semantics.sentence_encoders.bert
|
||||
textattack.constraints.semantics.sentence_encoders.infer_sent
|
||||
textattack.constraints.semantics.sentence_encoders.universal_sentence_encoder
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.sentence_encoder
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.thought_vector
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@@ -0,0 +1,22 @@
|
||||
textattack.constraints.semantics.sentence\_encoders.universal\_sentence\_encoder package
|
||||
========================================================================================
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.universal_sentence_encoder
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.universal_sentence_encoder.multilingual_universal_sentence_encoder
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.constraints.semantics.sentence_encoders.universal_sentence_encoder.universal_sentence_encoder
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
16
docs/apidoc/textattack.datasets.helpers.rst
Normal file
@@ -0,0 +1,16 @@
|
||||
textattack.datasets.helpers package
|
||||
===================================
|
||||
|
||||
.. automodule:: textattack.datasets.helpers
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.datasets.helpers.ted_multi
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
30
docs/apidoc/textattack.datasets.rst
Normal file
@@ -0,0 +1,30 @@
|
||||
textattack.datasets package
|
||||
===========================
|
||||
|
||||
.. automodule:: textattack.datasets
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
|
||||
textattack.datasets.helpers
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.datasets.dataset
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.datasets.huggingface_dataset
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
28
docs/apidoc/textattack.goal_function_results.rst
Normal file
@@ -0,0 +1,28 @@
|
||||
textattack.goal\_function\_results package
|
||||
==========================================
|
||||
|
||||
.. automodule:: textattack.goal_function_results
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_function_results.classification_goal_function_result
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_function_results.goal_function_result
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_function_results.text_to_text_goal_function_result
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
34
docs/apidoc/textattack.goal_functions.classification.rst
Normal file
@@ -0,0 +1,34 @@
|
||||
textattack.goal\_functions.classification package
|
||||
=================================================
|
||||
|
||||
.. automodule:: textattack.goal_functions.classification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_functions.classification.classification_goal_function
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_functions.classification.input_reduction
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_functions.classification.targeted_classification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_functions.classification.untargeted_classification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
25
docs/apidoc/textattack.goal_functions.rst
Normal file
@@ -0,0 +1,25 @@
|
||||
textattack.goal\_functions package
|
||||
==================================
|
||||
|
||||
.. automodule:: textattack.goal_functions
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 6
|
||||
|
||||
textattack.goal_functions.classification
|
||||
textattack.goal_functions.text
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_functions.goal_function
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
28
docs/apidoc/textattack.goal_functions.text.rst
Normal file
@@ -0,0 +1,28 @@
|
||||
textattack.goal\_functions.text package
|
||||
=======================================
|
||||
|
||||
.. automodule:: textattack.goal_functions.text
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_functions.text.minimize_bleu
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_functions.text.non_overlapping_output
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.goal_functions.text.text_to_text_goal_function
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
46
docs/apidoc/textattack.loggers.rst
Normal file
@@ -0,0 +1,46 @@
|
||||
textattack.loggers package
|
||||
==========================
|
||||
|
||||
.. automodule:: textattack.loggers
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.loggers.attack_log_manager
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.loggers.csv_logger
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.loggers.file_logger
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.loggers.logger
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.loggers.visdom_logger
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.loggers.weights_and_biases_logger
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
40
docs/apidoc/textattack.models.helpers.rst
Normal file
@@ -0,0 +1,40 @@
|
||||
textattack.models.helpers package
|
||||
=================================
|
||||
|
||||
.. automodule:: textattack.models.helpers
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: textattack.models.helpers.glove_embedding_layer
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.models.helpers.lstm_for_classification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.models.helpers.t5_for_text_to_text
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.models.helpers.utils
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: textattack.models.helpers.word_cnn_for_classification
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||