1
0
mirror of https://github.com/fhamborg/news-please.git synced 2021-09-19 22:26:00 +03:00

help parameter

This commit is contained in:
felix
2016-11-15 17:00:04 +01:00
parent e40cb51fd5
commit aa52efade8
3 changed files with 34 additions and 3 deletions

View File

@@ -9,6 +9,7 @@ import logging
import pymysql
from elasticsearch import Elasticsearch
from scrapy.utils.log import configure_logging
import plac
sys.path.append(os.path.dirname(os.path.realpath(__file__)))
from newsplease.helper_classes.savepath_parser import SavepathParser
@@ -48,8 +49,17 @@ class NewsPlease(object):
__single_crawler = False
def __init__(self):
print("newsplease is starting on Python " + sys.version)
def __init__(self, cfg_file_path, is_resume, is_reset_elasticsearch, is_reset_json, is_reset_mysql, is_no_confirm):
"""
The constructor of the main class, thus the real entry point to the tool.
:param cfg_file_path:
:param is_resume:
:param is_reset_elasticsearch:
:param is_reset_json:
:param is_reset_mysql:
:param is_no_confirm:
"""
# print("newsplease is starting on Python " + sys.version)
configure_logging({"LOG_LEVEL": "ERROR"})
self.log = logging.getLogger(__name__)
@@ -614,5 +624,24 @@ Cleanup files:
def main():
NewsPlease()
def cli(cfg_file_path: ('path to the config file', 'option', 'c'),
resume: ('resume crawling from last process', 'flag'),
reset_elasticsearch: ('reset Elasticsearch indexes', 'flag'),
reset_json: ('reset JSON files', 'flag'),
reset_mysql: ('reset MySQL database', 'flag'),
reset_all: ('combines all reset options', 'flag'),
no_confirm: ('skip confirm dialogs', 'flag')):
"A generic news crawler and extractor. If started without the -c ... todo"
if( reset_all):
reset_elasticsearch = True
reset_json = True
reset_mysql = True
NewsPlease(cfg_file_path, resume, reset_elasticsearch, reset_json, reset_mysql, no_confirm)
pass
if __name__ == "__main__":
main()
plac.call(cli)

View File

@@ -10,3 +10,4 @@ newspaper3k>=0.1.7 ; python_version >= '3.0'
newspaper>=0.0.9.8 ; python_version == '2.7'
langdetect>=1.0.7
python-dateutil>=2.4.0
plac>=0.9.6

View File

@@ -46,6 +46,7 @@ news-please is an open source, easy-to-use news crawler that extracts structured
'readability-lxml>=0.6.2',
'langdetect>=1.0.7',
'python-dateutil>=2.4.0',
'plac>=0.9.6'
],
extras_require={
':python_version == "2.7"':[