mirror of
https://github.com/michaelharms/comcrawl.git
synced 2021-09-27 00:43:48 +03:00
formatting
This commit is contained in:
@@ -8,10 +8,7 @@ from snapshottest import Snapshot
|
||||
snapshots = Snapshot()
|
||||
|
||||
snapshots['test_comcrawl 1'] = {
|
||||
'charset': 'UTF-8',
|
||||
'digest': '745JGUNVPWB4L3TWJIGUQRQFTFSREJ5J',
|
||||
'filename': 'crawl-data/CC-MAIN-2019-51/segments/1575540500637.40/warc/CC-MAIN-20191207160050-20191207184050-00394.warc.gz',
|
||||
'html': '''<!DOCTYPE html>
|
||||
'charset': 'UTF-8', 'digest': '745JGUNVPWB4L3TWJIGUQRQFTFSREJ5J', 'filename': 'crawl-data/CC-MAIN-2019-51/segments/1575540500637.40/warc/CC-MAIN-20191207160050-20191207184050-00394.warc.gz', 'html': '''<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<link rel="stylesheet" href="/static/__shared/shared.css"/>
|
||||
@@ -723,14 +720,4 @@ Powered by <a href="https://github.com/webrecorder/pywb">pywb</a>
|
||||
</p>
|
||||
|
||||
</body>
|
||||
</html>''',
|
||||
'languages': 'eng',
|
||||
'length': '3404',
|
||||
'mime': 'text/html',
|
||||
'mime-detected': 'text/html',
|
||||
'offset': '68774745',
|
||||
'status': '200',
|
||||
'timestamp': '20191207172145',
|
||||
'url': 'http://index.commoncrawl.org/',
|
||||
'urlkey': 'org,commoncrawl,index)/'
|
||||
}
|
||||
</html>''', 'languages': 'eng', 'length': '3404', 'mime': 'text/html', 'mime-detected': 'text/html', 'offset': '68774745', 'status': '200', 'timestamp': '20191207172145', 'url': 'http://index.commoncrawl.org/', 'urlkey': 'org,commoncrawl,index)/'}
|
||||
|
||||
Reference in New Issue
Block a user