mirror of
https://github.com/fhamborg/Giveme5W1H.git
synced 2021-08-01 22:47:51 +03:00
Replace pycorenlp by Stanza to fix Unicode encoding issues.
This commit is contained in:
4
.idea/encodings.xml
generated
Normal file
4
.idea/encodings.xml
generated
Normal file
@@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Encoding" addBOMForNewFiles="with NO BOM" />
|
||||
</project>
|
||||
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
||||
@@ -1,7 +1,7 @@
|
||||
import logging
|
||||
|
||||
import nltk
|
||||
from pycorenlp import StanfordCoreNLP
|
||||
from stanza.server import CoreNLPClient, StartServer
|
||||
|
||||
|
||||
class Preprocessor:
|
||||
@@ -18,10 +18,9 @@ class Preprocessor:
|
||||
self.log = logging.getLogger('GiveMe5W')
|
||||
|
||||
# connect to CoreNLP server
|
||||
if host is None:
|
||||
self.cnlp = StanfordCoreNLP("http://localhost:9000")
|
||||
else:
|
||||
self.cnlp = StanfordCoreNLP(host)
|
||||
host = "http://localhost:9000" if host is None else host
|
||||
self.cnlp = CoreNLPClient(endpoint=host,
|
||||
start_server = StartServer.DONT_START)
|
||||
|
||||
# define basic base_config and desired processing pipeline
|
||||
self.base_config = {
|
||||
@@ -109,7 +108,8 @@ class Preprocessor:
|
||||
:return Document: The processed Document object.
|
||||
"""
|
||||
actual_config = self._build_actual_config(document)
|
||||
annotation = self.cnlp.annotate(document.get_full_text(), actual_config)
|
||||
annotation = self.cnlp.annotate(text=document.get_full_text(),
|
||||
properties = actual_config)
|
||||
|
||||
if type(annotation) is str:
|
||||
print(annotation)
|
||||
|
||||
@@ -9,7 +9,7 @@ numpy==1.14.3
|
||||
pandas==0.22.0
|
||||
parsedatetime==2.4
|
||||
plotly==2.5.1
|
||||
pycorenlp==0.3.0
|
||||
stanza>=1.1.1
|
||||
spacy==2.0.11
|
||||
Twisted==19.7.0
|
||||
typing==3.6.4
|
||||
|
||||
Reference in New Issue
Block a user