1
0
mirror of https://github.com/fhamborg/news-please.git synced 2021-09-27 00:44:24 +03:00
Files
news-please-crawler/newsplease/pipeline/extractor/comparer/comparer.py
2016-11-09 18:33:45 +01:00

43 lines
1.9 KiB
Python

from ..article_candidate import ArticleCandidate
from .comparer_title import ComparerTitle
from .comparer_description import ComparerDescription
from .comparer_text import ComparerText
from .comparer_topimage import ComparerTopimage
from .comparer_author import ComparerAuthor
from .comparer_date import ComparerDate
from .comparer_Language import ComparerLanguage
class Comparer:
"""This class sends the list of ArticleCandidates to the subcomparer and saves the result in Article."""
# Create subcomparer
def __init__(self):
self.comparer_title = ComparerTitle()
self.comparer_desciption = ComparerDescription()
self.comparer_text = ComparerText()
self.comparer_topimage = ComparerTopimage()
self.comparer_author = ComparerAuthor()
self.comparer_date = ComparerDate()
self.comparer_language = ComparerLanguage()
def compare(self, item, article_candidates):
"""Compares the article candidates using the different submodules and saves the best results in
new ArticleCandidate object
:param item: The NewscrawlerItem related to the ArticleCandidates
:param article_candidates: The list of ArticleCandidate-Objects which have been extracted
:return: An ArticleCandidate-object containing the best results
"""
result = ArticleCandidate()
result.title = self.comparer_title.extract(item, article_candidates)
result.description = self.comparer_desciption.extract(item, article_candidates)
result.text = self.comparer_text.extract(item, article_candidates)
result.topimage = self.comparer_topimage.extract(item, article_candidates)
result.author = self.comparer_author.extract(item, article_candidates)
result.publish_date = self.comparer_date.extract(item, article_candidates)
result.language = self.comparer_language.extract(item, article_candidates)
return result