doing some code style refactorings

2021-09-27 00:43:48 +03:00 · 2020-01-15 07:57:16 +01:00
parent b1003249be
commit f5dfbe4372
6 changed files with 28 additions and 24 deletions
--- a/.pylintrc
+++ b/.pylintrc
@@ -43,15 +43,15 @@ method-rgx=[a-z_][a-z0-9_]{2,70}$
 [FORMAT]

 # Maximum number of characters on a single line.
-max-line-length = 100
+max-line-length = 79

 [DESIGN]
 # Minimum number of public methods for a class (see R0903).
 min-public-methods = 0

 # Maximum number of attributes for a class (see R0902).
-max-attributes = 15
+max-attributes = 10

-max-locals = 25
+max-locals = 20

 max-args = 10
--- a/comcrawl/api/download.py
+++ b/comcrawl/api/download.py
@@ -1,5 +1,5 @@
 from pandas import DataFrame, Series
-from ..utils import download_single_result
+from ..utils import _download_single_result


 def download(results: DataFrame) -> Series:
@@ -17,6 +17,6 @@ def download(results: DataFrame) -> Series:
    new_results = results.copy()
    new_results["html"] = ""
    for _, row in new_results.iterrows():
-        row["html"] = download_single_result(row.to_dict())
+        row["html"] = _download_single_result(row.to_dict())

    return new_results["html"]
--- a/comcrawl/api/search.py
+++ b/comcrawl/api/search.py
@@ -1,26 +1,28 @@
 from typing import List, Dict
-import concurrent.futures
+from concurrent import futures
 import pandas as pd
-from ..utils import search_single_index
+from ..utils import _search_single_index


-DEFAULT_INDEXES = open("comcrawl/config/default_indexes.txt", "r").read().split("\n")
+DEFAULT_INDEXES = (open("comcrawl/config/default_indexes.txt", "r")
+                   .read()
+                   .split("\n"))


-def search(
-        url: str,
-        indexes: List[str] = DEFAULT_INDEXES,
-        threads: int = None
-) -> List[Dict[str, Dict]]:
+def search(url: str,
+           indexes: List[str] = DEFAULT_INDEXES,
+           threads: int = None) -> List[Dict[str, Dict]]:
    """Searches multiple Common Crawl indices for URL pattern.

    Args:
        url: The URL pattern to search for.
        indices: List of Common Crawl indices to search in.
-        threads: Number of threads to use for faster search on multiple threads.
+        threads: Number of threads to use for faster search on
+        multiple threads.

    Returns:
-        List of all results found throughout the specified Common Crawl indices.
+        List of all results found throughout the specified
+        Common Crawl indices.

    """

@@ -28,22 +30,22 @@ def search(

    # multi-threaded search
    if threads:
-        with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
+        with futures.ThreadPoolExecutor(max_workers=threads) as executor:
            future_to_index = {
                executor.submit(
-                    search_single_index,
+                    _search_single_index,
                    index,
                    url
                ): index for index in indexes
            }

-            for future in concurrent.futures.as_completed(future_to_index):
+            for future in futures.as_completed(future_to_index):
                results.extend(future.result())

    # single-threaded search
    else:
        for index in indexes:
-            index_results = search_single_index(index, url)
+            index_results = _search_single_index(index, url)
            results.extend(index_results)

    return pd.DataFrame(results)
--- a/comcrawl/utils/init.py
+++ b/comcrawl/utils/init.py
@@ -1,2 +1,2 @@
-from .search_single_index import search_single_index
-from .download_single_result import download_single_result
+from ._search_single_index import _search_single_index
+from ._download_single_result import _download_single_result
--- a/comcrawl/utils/_download_single_result.py
+++ b/comcrawl/utils/_download_single_result.py
@@ -4,7 +4,7 @@ import gzip
 import requests


-def download_single_result(result: Dict) -> str:
+def _download_single_result(result: Dict) -> str:
    """Downloads HTML for single search result.

    Args:
--- a/comcrawl/utils/_search_single_index.py
+++ b/comcrawl/utils/_search_single_index.py
@@ -2,10 +2,12 @@ from typing import List, Dict
 import json
 import requests

-SEARCH_URL_TEMPLATE = "https://index.commoncrawl.org/CC-MAIN-{index}-index?url={url}&output=json"
+SEARCH_URL_TEMPLATE = ("https://index.commoncrawl.org/CC-MAIN-"
+                       "{index}-index?url={url}&output=json")


-def search_single_index(index: str, url: str) -> List[Dict]:
+def _search_single_index(index: str,
+                         url: str) -> List[Dict]:
    """Searches single Common Crawl index for given URL pattern.

    Args: