1
0
mirror of https://github.com/michaelharms/comcrawl.git synced 2021-09-27 00:43:48 +03:00

renaming download method

This commit is contained in:
Michael Harms
2020-01-19 09:56:57 +01:00
parent a7c4d964c4
commit 2e99a2391c
3 changed files with 7 additions and 7 deletions

View File

@@ -44,7 +44,7 @@ from comcrawl import IndexClient
client = IndexClient()
client.search("reddit.com/r/MachineLearning/*")
client.download_pages()
client.download()
results = client.results
```
@@ -59,7 +59,7 @@ from comcrawl import IndexClient
client = IndexClient()
client.search("reddit.com/r/MachineLearning/*", threads=4)
client.download_pages()
client.download()
results = client.results
```
@@ -80,7 +80,7 @@ sorted_df = df.sort_values(by="timestamp")
filtered_df = sorted_df.drop_duplicates("urlkey", keep="last")
client.results = filtered_df.to_dict("records")
client.download_pages()
client.download()
pd.DataFrame(client.results).to_csv("results.csv")
```
@@ -94,7 +94,7 @@ from comcrawl import IndexClient
client = IndexClient(["2019-51", "2019-47"])
client.search("reddit.com/r/MachineLearning/*")
client.download_pages()
client.download()
results = client.results
```
@@ -108,7 +108,7 @@ from comcrawl import IndexClient
client = IndexClient(verbose=True)
client.search("reddit.com/r/MachineLearning/*")
client.download_pages()
client.download()
results = client.results
```

View File

@@ -64,7 +64,7 @@ class IndexClient:
"""
self.results = search_multiple_indexes(url, self.indexes, threads)
def download_pages(self, threads: int = None) -> None:
def download(self, threads: int = None) -> None:
"""Download
Downloads the HTML for every result in the

View File

@@ -17,6 +17,6 @@ def test_comcrawl(snapshot):
assert len(client.results) == 2
client.download_pages()
client.download()
snapshot.assert_match(client.results[1])