mirror of
https://github.com/fhamborg/news-please.git
synced 2021-09-19 22:26:00 +03:00
Update commoncrawl.py
This commit is contained in:
@@ -25,7 +25,7 @@ cd news-please
|
|||||||
python3 -m newsplease.examples.commoncrawl
|
python3 -m newsplease.examples.commoncrawl
|
||||||
|
|
||||||
Note that by default the script does not extract main images since they are not contained
|
Note that by default the script does not extract main images since they are not contained
|
||||||
WARC files. You can enable extraction of main images by setting
|
WARC files. You can enable extraction of main images by setting `my_fetch_images=True`
|
||||||
"""
|
"""
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
|
|||||||
Reference in New Issue
Block a user