mirror of
https://github.com/fhamborg/news-please.git
synced 2021-09-19 22:26:00 +03:00
reorga finished
This commit is contained in:
@@ -1,8 +1,5 @@
|
||||
include newsplease/config/config.cfg
|
||||
include newsplease/config/sitelist.hjson
|
||||
include newsplease/misc/logo/logo-128.png
|
||||
include newsplease/misc/logo/logo-256.png
|
||||
include newsplease/misc/logo/logo.svg
|
||||
include newsplease/LICENSE.txt
|
||||
include newsplease/README.md
|
||||
include newsplease/requirements.txt
|
||||
include LICENSE.txt
|
||||
include README.md
|
||||
include requirements.txt
|
||||
|
||||
@@ -44,6 +44,7 @@ class NewsPlease(object):
|
||||
mysql = None
|
||||
elasticsearch = None
|
||||
number_of_active_crawlers = 0
|
||||
config_default_path = "./config/config.cfg"
|
||||
|
||||
__single_crawler = False
|
||||
|
||||
@@ -273,10 +274,10 @@ class NewsPlease(object):
|
||||
else:
|
||||
self.log.error("First argument passed to newsplease "
|
||||
"is not the config file. Falling back to "
|
||||
"./config.cfg.")
|
||||
+ self.config_default_path)
|
||||
|
||||
# Default
|
||||
return self.get_abs_file_path("../config/config.cfg", quit_on_error=True)
|
||||
return self.get_abs_file_path(self.config_default_path, quit_on_error=True)
|
||||
|
||||
def print_help(self):
|
||||
"""
|
||||
|
||||
@@ -132,8 +132,8 @@ relative_to_start_processes_file = True
|
||||
# Here you can specify the input JSON-File
|
||||
# The input-file file containing the base-urls to crawl
|
||||
# absolute and relative file paths are allowed
|
||||
# default: ../config/sitelist.hjson
|
||||
url_input = ../config/sitelist.hjson
|
||||
# default: ./config/sitelist.hjson
|
||||
url_input = ./config/sitelist.hjson
|
||||
|
||||
|
||||
|
||||
2
setup.py
2
setup.py
@@ -4,7 +4,7 @@ import sys, os
|
||||
|
||||
|
||||
setup(name='news-please',
|
||||
version='1.0.11',
|
||||
version='1.0.18',
|
||||
description="news-please is an open source easy-to-use news extractor that just works.",
|
||||
long_description="""\
|
||||
news-please is an open source, easy-to-use news crawler that extracts structured information from almost any news website. It can follow recursively internal hyperlinks and read RSS feeds to fetch both most recent and also old, archived articles. You only need to provide the root URL of the news website.""",
|
||||
|
||||
Reference in New Issue
Block a user