1
0
mirror of https://github.com/fhamborg/news-please.git synced 2021-09-27 00:44:24 +03:00
Files
news-please-crawler/newsplease/newspleaselib.py
2017-02-23 18:52:54 +01:00

45 lines
1.3 KiB
Python

import sys
import os
sys.path.append(os.path.dirname(os.path.realpath(__file__)))
from newsplease.pipeline.pipelines import InMemoryStorage
from newsplease.single_crawler import SingleCrawler
class NewsPleaseLib:
"""
Access news-please functionality via this interface
"""
@staticmethod
def download_article(url):
"""
Crawls the article from the url and extracts relevant information.
:param url:
:return:
"""
SingleCrawler.create_as_library(url)
results = InMemoryStorage.get_results()
article = results[url]
del results[url]
return article
@staticmethod
def download_articles(urls):
"""
Crawls articles from the urls and extracts relevant information.
:param urls:
:return:
"""
SingleCrawler.create_as_library(urls)
results = InMemoryStorage.get_results()
articles = []
for url in urls:
article = results[url]
del results[url]
articles.append(article)
print(article['title'])
return articles
if __name__ == '__main__':
NewsPleaseLib.download_article('http://www.zeit.de/politik/deutschland/2017-02/fluechtlinge-asylverfahren-bamf-taeuschung-afghanistan')