1
0
mirror of https://github.com/michaelharms/comcrawl.git synced 2021-09-27 00:43:48 +03:00

formatting

This commit is contained in:
Michael Harms
2020-01-18 10:45:36 +01:00
parent 209c74c987
commit 925d286dc7

View File

@@ -8,10 +8,7 @@ from snapshottest import Snapshot
snapshots = Snapshot()
snapshots['test_comcrawl 1'] = {
'charset': 'UTF-8',
'digest': '745JGUNVPWB4L3TWJIGUQRQFTFSREJ5J',
'filename': 'crawl-data/CC-MAIN-2019-51/segments/1575540500637.40/warc/CC-MAIN-20191207160050-20191207184050-00394.warc.gz',
'html': '''<!DOCTYPE html>
'charset': 'UTF-8', 'digest': '745JGUNVPWB4L3TWJIGUQRQFTFSREJ5J', 'filename': 'crawl-data/CC-MAIN-2019-51/segments/1575540500637.40/warc/CC-MAIN-20191207160050-20191207184050-00394.warc.gz', 'html': '''<!DOCTYPE html>
<html>
<head>
<link rel="stylesheet" href="/static/__shared/shared.css"/>
@@ -723,14 +720,4 @@ Powered by <a href="https://github.com/webrecorder/pywb">pywb</a>
</p>
</body>
</html>''',
'languages': 'eng',
'length': '3404',
'mime': 'text/html',
'mime-detected': 'text/html',
'offset': '68774745',
'status': '200',
'timestamp': '20191207172145',
'url': 'http://index.commoncrawl.org/',
'urlkey': 'org,commoncrawl,index)/'
}
</html>''', 'languages': 'eng', 'length': '3404', 'mime': 'text/html', 'mime-detected': 'text/html', 'offset': '68774745', 'status': '200', 'timestamp': '20191207172145', 'url': 'http://index.commoncrawl.org/', 'urlkey': 'org,commoncrawl,index)/'}