1
0
mirror of https://github.com/fhamborg/news-please.git synced 2021-09-19 22:26:00 +03:00

reorga finished

This commit is contained in:
felix
2016-11-09 19:54:30 +01:00
parent 4d8199ff42
commit 3867131f2f
5 changed files with 9 additions and 11 deletions

View File

@@ -1,8 +1,5 @@
include newsplease/config/config.cfg
include newsplease/config/sitelist.hjson
include newsplease/misc/logo/logo-128.png
include newsplease/misc/logo/logo-256.png
include newsplease/misc/logo/logo.svg
include newsplease/LICENSE.txt
include newsplease/README.md
include newsplease/requirements.txt
include LICENSE.txt
include README.md
include requirements.txt

View File

@@ -44,6 +44,7 @@ class NewsPlease(object):
mysql = None
elasticsearch = None
number_of_active_crawlers = 0
config_default_path = "./config/config.cfg"
__single_crawler = False
@@ -273,10 +274,10 @@ class NewsPlease(object):
else:
self.log.error("First argument passed to newsplease "
"is not the config file. Falling back to "
"./config.cfg.")
+ self.config_default_path)
# Default
return self.get_abs_file_path("../config/config.cfg", quit_on_error=True)
return self.get_abs_file_path(self.config_default_path, quit_on_error=True)
def print_help(self):
"""

View File

@@ -132,8 +132,8 @@ relative_to_start_processes_file = True
# Here you can specify the input JSON-File
# The input-file file containing the base-urls to crawl
# absolute and relative file paths are allowed
# default: ../config/sitelist.hjson
url_input = ../config/sitelist.hjson
# default: ./config/sitelist.hjson
url_input = ./config/sitelist.hjson

View File

@@ -4,7 +4,7 @@ import sys, os
setup(name='news-please',
version='1.0.11',
version='1.0.18',
description="news-please is an open source easy-to-use news extractor that just works.",
long_description="""\
news-please is an open source, easy-to-use news crawler that extracts structured information from almost any news website. It can follow recursively internal hyperlinks and read RSS feeds to fetch both most recent and also old, archived articles. You only need to provide the root URL of the news website.""",