1
0
mirror of https://github.com/fhamborg/news-please.git synced 2021-09-19 22:26:00 +03:00

Update commoncrawl.py

This commit is contained in:
Felix Hamborg
2021-06-27 21:29:05 +02:00
committed by GitHub
parent 1e94327a3b
commit cc0be8e5e2

View File

@@ -25,7 +25,7 @@ cd news-please
python3 -m newsplease.examples.commoncrawl
Note that by default the script does not extract main images since they are not contained
WARC files. You can enable extraction of main images by setting
WARC files. You can enable extraction of main images by setting `my_fetch_images=True`
"""
import hashlib
import json