mirror of
https://github.com/fhamborg/Giveme5W1H.git
synced 2021-08-01 22:47:51 +03:00
Cleanup with pycharm. Bugfixes
This commit is contained in:
14
.idea/Giveme5W.iml
generated
Normal file
14
.idea/Giveme5W.iml
generated
Normal file
@@ -0,0 +1,14 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/examples" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/extractor" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.6.1 (/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/bin/python3.6)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="TestRunnerService">
|
||||
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
|
||||
</component>
|
||||
</module>
|
||||
4
.idea/misc.xml
generated
Normal file
4
.idea/misc.xml
generated
Normal file
@@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.1 (/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/bin/python3.6)" project-jdk-type="Python SDK" />
|
||||
</project>
|
||||
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/Giveme5W.iml" filepath="$PROJECT_DIR$/.idea/Giveme5W.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
6
.idea/vcs.xml
generated
Normal file
6
.idea/vcs.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
441
.idea/workspace.xml
generated
Normal file
441
.idea/workspace.xml
generated
Normal file
@@ -0,0 +1,441 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="f3ddf391-85fc-43d5-92ec-816971c35368" name="Default" comment="">
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/Giveme5W.iml" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/misc.xml" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/modules.xml" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/vcs.xml" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.settings/org.eclipse.ltk.core.refactoring.prefs" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/08ebc52cada9093774e3ba71312def80ca3621c959d67877a6e3b228.pickle" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/09d729c0013e096cf6ed1b13e52c9d3afd2538333cd74e1d42ab66fb.pickle" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/0e5fa7c0e6252bfeeea5e3840c6cb503f299c19d24331c4ba60c5974.pickle" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/0e7ab2ce71c1bce03040ec2388dd45ab069d5432b364495b9cfcfdf5.pickle" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/101ee22f1f9e984c1bd2bd64ad44db8452b0669178f1683eafa338c7.pickle" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/13a20be02e984533782dc2c6cd2f468b879ce4be88127566da043b0f.pickle" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/1667b720a919a4cd7904557320d4f7572f2c55bdfb26cd0bf7946a5a.pickle" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/1778efb0dfd0f1e933513e0f79af3c09986a40791ecd6faf2239f09c.pickle" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/189af426dcbdb42921a716cf136f75554e4befad28c088f3d930b6c4.pickle" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/1b1109edc9ba100f298d988c0558243e34a5921a855408bb1f4dfedd.pickle" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/1e0f6556611739fb0f8a802cf5f4ffa966df21de1ae3aa11ea647516.pickle" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/1e471fa6844bce9e88878ce164699ab88ec3dc38f166a251f071b408.pickle" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/1e582ad25294607005ccb6d7abe2806c90b39eec91baeb1082072c81.pickle" />
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/parse_documents_simple.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.project" afterPath="$PROJECT_DIR$/.project" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.pydevproject" afterPath="$PROJECT_DIR$/.pydevproject" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/examples/learn_weights.py" afterPath="$PROJECT_DIR$/examples/learn_weights.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/examples/parse_documents.py" afterPath="$PROJECT_DIR$/examples/parse_documents.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/combined_scoring/distance_of_candidate.py" afterPath="$PROJECT_DIR$/extractor/combined_scoring/distance_of_candidate.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/document.py" afterPath="$PROJECT_DIR$/extractor/document.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/extractor.py" afterPath="$PROJECT_DIR$/extractor/extractor.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/extractors/abs_extractor.py" afterPath="$PROJECT_DIR$/extractor/extractors/abs_extractor.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/extractors/action_extractor.py" afterPath="$PROJECT_DIR$/extractor/extractors/action_extractor.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/extractors/candidate.py" afterPath="$PROJECT_DIR$/extractor/extractors/candidate.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/extractors/cause_extractor.py" afterPath="$PROJECT_DIR$/extractor/extractors/cause_extractor.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/extractors/environment_extractor.py" afterPath="$PROJECT_DIR$/extractor/extractors/environment_extractor.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/extractors/method_extractor.py" afterPath="$PROJECT_DIR$/extractor/extractors/method_extractor.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/preprocessors/preprocessor_core_nlp.py" afterPath="$PROJECT_DIR$/extractor/preprocessors/preprocessor_core_nlp.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/tools/news_please/handler.py" afterPath="$PROJECT_DIR$/extractor/tools/news_please/handler.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/tools/news_please/reader.py" afterPath="$PROJECT_DIR$/extractor/tools/news_please/reader.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/tools/news_please/writer.py" afterPath="$PROJECT_DIR$/extractor/tools/news_please/writer.py" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/tools/util.py" afterPath="$PROJECT_DIR$/extractor/tools/util.py" />
|
||||
</list>
|
||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||
<option name="TRACKING_ENABLED" value="true" />
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
||||
<option name="LAST_RESOLUTION" value="IGNORE" />
|
||||
</component>
|
||||
<component name="ExecutionTargetManager" SELECTED_TARGET="default_target" />
|
||||
<component name="FileEditorManager">
|
||||
<leaf>
|
||||
<file leaf-file-name="candidate.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/extractor/extractors/candidate.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="151">
|
||||
<caret line="31" column="0" lean-forward="false" selection-start-line="31" selection-start-column="0" selection-end-line="31" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file leaf-file-name="cause_extractor.py" pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/extractor/extractors/cause_extractor.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="324">
|
||||
<caret line="134" column="9" lean-forward="false" selection-start-line="134" selection-start-column="9" selection-end-line="134" selection-end-column="9" />
|
||||
<folding>
|
||||
<element signature="e#0#25#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
</leaf>
|
||||
</component>
|
||||
<component name="Git.Settings">
|
||||
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
|
||||
</component>
|
||||
<component name="IdeDocumentHistory">
|
||||
<option name="CHANGED_PATHS">
|
||||
<list>
|
||||
<option value="$PROJECT_DIR$/examples/parse_documents.py" />
|
||||
<option value="$PROJECT_DIR$/extractor/tools/news_please/writer.py" />
|
||||
<option value="$PROJECT_DIR$/extractor/extractors/method_extractor.py" />
|
||||
<option value="$PROJECT_DIR$/extractor/combined_scoring/distance_of_candidate.py" />
|
||||
<option value="$PROJECT_DIR$/extractor/tools/news_please/handler.py" />
|
||||
<option value="$PROJECT_DIR$/extractor/extractors/candidate.py" />
|
||||
<option value="$PROJECT_DIR$/extractor/extractors/cause_extractor.py" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="ProjectFrameBounds">
|
||||
<option name="width" value="1280" />
|
||||
<option name="height" value="800" />
|
||||
</component>
|
||||
<component name="ProjectView">
|
||||
<navigator currentView="ProjectPane" proportions="" version="1">
|
||||
<flattenPackages />
|
||||
<showMembers />
|
||||
<showModules />
|
||||
<showLibraryContents />
|
||||
<hideEmptyPackages />
|
||||
<abbreviatePackageNames />
|
||||
<autoscrollToSource />
|
||||
<autoscrollFromSource />
|
||||
<sortByType />
|
||||
<manualOrder />
|
||||
<foldersAlwaysOnTop value="true" />
|
||||
</navigator>
|
||||
<panes>
|
||||
<pane id="ProjectPane">
|
||||
<subPane>
|
||||
<PATH>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="Giveme5W" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="Giveme5W" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
</PATH>
|
||||
<PATH>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="Giveme5W" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="Giveme5W" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="extractor" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
</PATH>
|
||||
</subPane>
|
||||
</pane>
|
||||
<pane id="Scope" />
|
||||
<pane id="Scratches" />
|
||||
</panes>
|
||||
</component>
|
||||
<component name="PropertiesComponent">
|
||||
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
|
||||
<property name="FullScreen" value="true" />
|
||||
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PythonContentEntriesConfigurable" />
|
||||
<property name="run.code.analysis.last.selected.profile" value="pProject Default" />
|
||||
</component>
|
||||
<component name="RunDashboard">
|
||||
<option name="ruleStates">
|
||||
<list>
|
||||
<RuleState>
|
||||
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
|
||||
</RuleState>
|
||||
<RuleState>
|
||||
<option name="name" value="StatusDashboardGroupingRule" />
|
||||
</RuleState>
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="RunManager" selected="Python.parse_documents">
|
||||
<configuration default="false" name="learn_weights" type="PythonConfigurationType" factoryName="Python" temporary="true">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/examples" />
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<module name="Giveme5W" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/examples/learn_weights.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="false" name="parse_documents" type="PythonConfigurationType" factoryName="Python" temporary="true">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/examples" />
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<module name="Giveme5W" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/examples/parse_documents.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="PythonConfigurationType" factoryName="Python">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="" />
|
||||
<option name="IS_MODULE_SDK" value="false" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<module name="Giveme5W" />
|
||||
<option name="SCRIPT_NAME" value="" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="Tox" factoryName="Tox">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs />
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="" />
|
||||
<option name="IS_MODULE_SDK" value="false" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<module name="Giveme5W" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="tests" factoryName="Doctests">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs />
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="" />
|
||||
<option name="IS_MODULE_SDK" value="false" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<module name="Giveme5W" />
|
||||
<option name="SCRIPT_NAME" value="" />
|
||||
<option name="CLASS_NAME" value="" />
|
||||
<option name="METHOD_NAME" value="" />
|
||||
<option name="FOLDER_NAME" value="" />
|
||||
<option name="TEST_TYPE" value="TEST_SCRIPT" />
|
||||
<option name="PATTERN" value="" />
|
||||
<option name="USE_PATTERN" value="false" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="tests" factoryName="Unittests">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs />
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="" />
|
||||
<option name="IS_MODULE_SDK" value="false" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<module name="Giveme5W" />
|
||||
<option name="_new_additionalArguments" value="""" />
|
||||
<option name="_new_target" value=""."" />
|
||||
<option name="_new_targetType" value=""PATH"" />
|
||||
<method />
|
||||
</configuration>
|
||||
<list size="2">
|
||||
<item index="0" class="java.lang.String" itemvalue="Python.learn_weights" />
|
||||
<item index="1" class="java.lang.String" itemvalue="Python.parse_documents" />
|
||||
</list>
|
||||
<recent_temporary>
|
||||
<list size="2">
|
||||
<item index="0" class="java.lang.String" itemvalue="Python.parse_documents" />
|
||||
<item index="1" class="java.lang.String" itemvalue="Python.learn_weights" />
|
||||
</list>
|
||||
</recent_temporary>
|
||||
</component>
|
||||
<component name="ShelveChangesManager" show_recycled="false">
|
||||
<option name="remove_strategy" value="false" />
|
||||
</component>
|
||||
<component name="TaskManager">
|
||||
<task active="true" id="Default" summary="Default task">
|
||||
<changelist id="f3ddf391-85fc-43d5-92ec-816971c35368" name="Default" comment="" />
|
||||
<created>1498619819806</created>
|
||||
<option name="number" value="Default" />
|
||||
<option name="presentableId" value="Default" />
|
||||
<updated>1498619819806</updated>
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
<component name="ToolWindowManager">
|
||||
<frame x="0" y="0" width="1280" height="800" extended-state="6" />
|
||||
<layout>
|
||||
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
|
||||
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.3001328" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Project" active="true" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.24609375" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
|
||||
<window_info id="Inspection Results" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32934928" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.3997344" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
|
||||
<window_info id="Data View" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
|
||||
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
||||
</layout>
|
||||
</component>
|
||||
<component name="VcsContentAnnotationSettings">
|
||||
<option name="myLimit" value="2678400000" />
|
||||
</component>
|
||||
<component name="XDebuggerManager">
|
||||
<breakpoint-manager>
|
||||
<breakpoints>
|
||||
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
|
||||
<url>file://$PROJECT_DIR$/extractor/extractors/cause_extractor.py</url>
|
||||
<line>157</line>
|
||||
</line-breakpoint>
|
||||
</breakpoints>
|
||||
<option name="time" value="1" />
|
||||
</breakpoint-manager>
|
||||
<watches-manager />
|
||||
</component>
|
||||
<component name="editorHistoryManager">
|
||||
<entry file="file://$PROJECT_DIR$/examples/parse_documents_simple.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="0">
|
||||
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||
<folding>
|
||||
<element signature="e#0#14#0" expanded="false" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/extractor/tools/news_please/reader.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="166">
|
||||
<caret line="28" column="0" lean-forward="false" selection-start-line="28" selection-start-column="0" selection-end-line="28" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/examples/cache/1b1109edc9ba100f298d988c0558243e34a5921a855408bb1f4dfedd.pickle">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-15740">
|
||||
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/examples/learn_weights.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="93">
|
||||
<caret line="56" column="58" lean-forward="false" selection-start-line="56" selection-start-column="58" selection-end-line="56" selection-end-column="58" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/extractor/extractors/abs_extractor.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="151">
|
||||
<caret line="103" column="8" lean-forward="false" selection-start-line="103" selection-start-column="8" selection-end-line="103" selection-end-column="8" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/extractor/preprocessors/preprocessor_core_nlp.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="236">
|
||||
<caret line="56" column="49" lean-forward="false" selection-start-line="56" selection-start-column="37" selection-end-line="56" selection-end-column="49" />
|
||||
<folding>
|
||||
<element signature="e#0#14#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/examples/parse_documents.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="85">
|
||||
<caret line="32" column="59" lean-forward="true" selection-start-line="32" selection-start-column="59" selection-end-line="32" selection-end-column="59" />
|
||||
<folding>
|
||||
<element signature="e#0#14#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/extractor/extractors/method_extractor.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="58">
|
||||
<caret line="92" column="26" lean-forward="false" selection-start-line="92" selection-start-column="26" selection-end-line="92" selection-end-column="26" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/extractor/tools/news_please/writer.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="151">
|
||||
<caret line="37" column="45" lean-forward="false" selection-start-line="37" selection-start-column="45" selection-end-line="37" selection-end-column="45" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/extractor/combined_scoring/distance_of_candidate.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="135">
|
||||
<caret line="9" column="0" lean-forward="false" selection-start-line="9" selection-start-column="0" selection-end-line="9" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/extractor/tools/news_please/handler.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="189">
|
||||
<caret line="45" column="6" lean-forward="false" selection-start-line="45" selection-start-column="6" selection-end-line="45" selection-end-column="6" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/extractor/extractors/candidate.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="151">
|
||||
<caret line="31" column="0" lean-forward="false" selection-start-line="31" selection-start-column="0" selection-end-line="31" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/extractor/extractors/cause_extractor.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="324">
|
||||
<caret line="134" column="9" lean-forward="false" selection-start-line="134" selection-start-column="9" selection-end-line="134" selection-end-column="9" />
|
||||
<folding>
|
||||
<element signature="e#0#25#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</component>
|
||||
</project>
|
||||
10
.project
10
.project
@@ -14,4 +14,14 @@
|
||||
<natures>
|
||||
<nature>org.python.pydev.pythonNature</nature>
|
||||
</natures>
|
||||
<variableList>
|
||||
<variable>
|
||||
<name>j</name>
|
||||
<value>file:/Users/bkr/sourceTree/Giveme5W/examples</value>
|
||||
</variable>
|
||||
<variable>
|
||||
<name>n</name>
|
||||
<value>file:/Users/bkr/sourceTree/Giveme5W/extractor</value>
|
||||
</variable>
|
||||
</variableList>
|
||||
</projectDescription>
|
||||
|
||||
@@ -2,9 +2,12 @@
|
||||
<?eclipse-pydev version="1.0"?><pydev_project>
|
||||
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
|
||||
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 3.6</pydev_property>
|
||||
|
||||
|
||||
|
||||
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
|
||||
<path>/${PROJECT_DIR_NAME}/extractor</path>
|
||||
<path>/${PROJECT_DIR_NAME}/examples</path>
|
||||
<path>/${PROJECT_DIR_NAME}/extractor</path>
|
||||
</pydev_pathproperty>
|
||||
<pydev_pathproperty name="org.python.pydev.PROJECT_EXTERNAL_SOURCE_PATH">
|
||||
<path>/Users/bkr/sourceTree/Giveme5W/extractor</path>
|
||||
|
||||
2
.settings/org.eclipse.ltk.core.refactoring.prefs
Normal file
2
.settings/org.eclipse.ltk.core.refactoring.prefs
Normal file
@@ -0,0 +1,2 @@
|
||||
eclipse.preferences.version=1
|
||||
org.eclipse.ltk.core.refactoring.enable.project.refactoring.history=false
|
||||
BIN
examples/cache/08ebc52cada9093774e3ba71312def80ca3621c959d67877a6e3b228.pickle
vendored
Normal file
BIN
examples/cache/08ebc52cada9093774e3ba71312def80ca3621c959d67877a6e3b228.pickle
vendored
Normal file
Binary file not shown.
BIN
examples/cache/09d729c0013e096cf6ed1b13e52c9d3afd2538333cd74e1d42ab66fb.pickle
vendored
Normal file
BIN
examples/cache/09d729c0013e096cf6ed1b13e52c9d3afd2538333cd74e1d42ab66fb.pickle
vendored
Normal file
Binary file not shown.
BIN
examples/cache/0e5fa7c0e6252bfeeea5e3840c6cb503f299c19d24331c4ba60c5974.pickle
vendored
Normal file
BIN
examples/cache/0e5fa7c0e6252bfeeea5e3840c6cb503f299c19d24331c4ba60c5974.pickle
vendored
Normal file
Binary file not shown.
BIN
examples/cache/0e7ab2ce71c1bce03040ec2388dd45ab069d5432b364495b9cfcfdf5.pickle
vendored
Normal file
BIN
examples/cache/0e7ab2ce71c1bce03040ec2388dd45ab069d5432b364495b9cfcfdf5.pickle
vendored
Normal file
Binary file not shown.
BIN
examples/cache/101ee22f1f9e984c1bd2bd64ad44db8452b0669178f1683eafa338c7.pickle
vendored
Normal file
BIN
examples/cache/101ee22f1f9e984c1bd2bd64ad44db8452b0669178f1683eafa338c7.pickle
vendored
Normal file
Binary file not shown.
BIN
examples/cache/13a20be02e984533782dc2c6cd2f468b879ce4be88127566da043b0f.pickle
vendored
Normal file
BIN
examples/cache/13a20be02e984533782dc2c6cd2f468b879ce4be88127566da043b0f.pickle
vendored
Normal file
Binary file not shown.
BIN
examples/cache/1667b720a919a4cd7904557320d4f7572f2c55bdfb26cd0bf7946a5a.pickle
vendored
Normal file
BIN
examples/cache/1667b720a919a4cd7904557320d4f7572f2c55bdfb26cd0bf7946a5a.pickle
vendored
Normal file
Binary file not shown.
BIN
examples/cache/1778efb0dfd0f1e933513e0f79af3c09986a40791ecd6faf2239f09c.pickle
vendored
Normal file
BIN
examples/cache/1778efb0dfd0f1e933513e0f79af3c09986a40791ecd6faf2239f09c.pickle
vendored
Normal file
Binary file not shown.
BIN
examples/cache/189af426dcbdb42921a716cf136f75554e4befad28c088f3d930b6c4.pickle
vendored
Normal file
BIN
examples/cache/189af426dcbdb42921a716cf136f75554e4befad28c088f3d930b6c4.pickle
vendored
Normal file
Binary file not shown.
BIN
examples/cache/1b1109edc9ba100f298d988c0558243e34a5921a855408bb1f4dfedd.pickle
vendored
Normal file
BIN
examples/cache/1b1109edc9ba100f298d988c0558243e34a5921a855408bb1f4dfedd.pickle
vendored
Normal file
Binary file not shown.
BIN
examples/cache/1e0f6556611739fb0f8a802cf5f4ffa966df21de1ae3aa11ea647516.pickle
vendored
Normal file
BIN
examples/cache/1e0f6556611739fb0f8a802cf5f4ffa966df21de1ae3aa11ea647516.pickle
vendored
Normal file
Binary file not shown.
BIN
examples/cache/1e471fa6844bce9e88878ce164699ab88ec3dc38f166a251f071b408.pickle
vendored
Normal file
BIN
examples/cache/1e471fa6844bce9e88878ce164699ab88ec3dc38f166a251f071b408.pickle
vendored
Normal file
Binary file not shown.
BIN
examples/cache/1e582ad25294607005ccb6d7abe2806c90b39eec91baeb1082072c81.pickle
vendored
Normal file
BIN
examples/cache/1e582ad25294607005ccb6d7abe2806c90b39eec91baeb1082072c81.pickle
vendored
Normal file
Binary file not shown.
BIN
examples/cache/2f066d10237b0b78e684f593c83797e313ba4364ec984395373c9a9c.pickle
vendored
Normal file
BIN
examples/cache/2f066d10237b0b78e684f593c83797e313ba4364ec984395373c9a9c.pickle
vendored
Normal file
Binary file not shown.
@@ -1,13 +1,16 @@
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
from extractor.extractor import FiveWExtractor
|
||||
from extractor.preprocessors.preprocessor_core_nlp import Preprocessor
|
||||
from extractor.tools.news_please.handler import Handler
|
||||
|
||||
|
||||
# Add path to allow execution though console
|
||||
sys.path.insert(0, '/'.join(os.path.realpath(__file__).split('/')[:-2]))
|
||||
#from timeit import default_timer as timer
|
||||
from extractor.preprocessors.preprocessor_core_nlp import Preprocessor
|
||||
from extractor.extractor import FiveWExtractor
|
||||
|
||||
from extractor.tools.news_please.handler import Handler
|
||||
|
||||
|
||||
core_nlp_host = 'http://localhost:9000'
|
||||
|
||||
@@ -1,15 +1,19 @@
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from pip.req.req_file import preprocess
|
||||
# Add path to allow execution though console
|
||||
sys.path.insert(0, '/'.join(os.path.realpath(__file__).split('/')[:-2]))
|
||||
|
||||
from extractor.extractor import FiveWExtractor
|
||||
from extractor.tools.news_please.handler import Handler
|
||||
from extractor.extractors import action_extractor, environment_extractor, cause_extractor, method_extractor
|
||||
from extractor.tools.news_please.handler import Handler
|
||||
|
||||
|
||||
# Add path to allow execution though console
|
||||
sys.path.insert(0, '/'.join(os.path.realpath(__file__).split('/')[:-3]))
|
||||
|
||||
"""
|
||||
This is a simple example on how to use the extractor in combination with NewsPlease files
|
||||
Advanced example to use the extractor in combination with NewsPlease files
|
||||
The output of the core_nlp_host is save in the cache directory to speed up multiple runs.
|
||||
Documents are preloaded into the memory and stay persistent for further calculations after processing
|
||||
"""
|
||||
|
||||
# don`t forget to start up core_nlp_host
|
||||
@@ -35,16 +39,14 @@ if __name__ == '__main__':
|
||||
documents = (
|
||||
# initiate the newsplease file handler with the input directory
|
||||
Handler(inputPath)
|
||||
# add an optional output directory
|
||||
.setOutputPath(outputPath)
|
||||
# set a path to save an load preprocessed documents
|
||||
.setPreprocessedPath(preprocessedPath)
|
||||
# limit the the to process documents (nice for development)
|
||||
.setLimit(1)
|
||||
#.setLimit(1)
|
||||
# add an optional extractor (it would do basically just copying without...)
|
||||
.setExtractor(extractor)
|
||||
# saves all document objects for further programming
|
||||
.preLoadAndCacheDocuments()
|
||||
#.preLoadAndCacheDocuments()
|
||||
# executing it
|
||||
.process().getDocuments()
|
||||
)
|
||||
|
||||
46
examples/parse_documents_simple.py
Normal file
46
examples/parse_documents_simple.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
from extractor.extractor import FiveWExtractor
|
||||
from extractor.extractors import action_extractor, environment_extractor, cause_extractor, method_extractor
|
||||
from extractor.tools.news_please.handler import Handler
|
||||
|
||||
|
||||
# Add path to allow execution though console
|
||||
sys.path.insert(0, '/'.join(os.path.realpath(__file__).split('/')[:-3]))
|
||||
|
||||
|
||||
"""
|
||||
This is a simple example on how to use the extractor in combination with NewsPlease files.
|
||||
File will be process one by one, nothing is cached
|
||||
"""
|
||||
|
||||
# don`t forget to start up core_nlp_host
|
||||
# java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000
|
||||
|
||||
if __name__ == '__main__':
|
||||
log = logging.getLogger('GiveMe5W')
|
||||
log.setLevel(logging.DEBUG)
|
||||
sh = logging.StreamHandler()
|
||||
sh.setLevel(logging.DEBUG)
|
||||
log.addHandler(sh)
|
||||
|
||||
|
||||
extractor = FiveWExtractor()
|
||||
inputPath = os.path.dirname(__file__) + '/input'
|
||||
outputPath = os.path.dirname(__file__) + '/output'
|
||||
|
||||
|
||||
# initiate the newsplease file handler with the input directory
|
||||
( Handler(inputPath)
|
||||
# add an optional output directory
|
||||
.setOutputPath(outputPath)
|
||||
# limit the the to process documents (nice for development)
|
||||
.setLimit(1)
|
||||
# add an optional extractor (it would do basically just copying without...)
|
||||
.setExtractor(extractor)
|
||||
# executing it
|
||||
.process().getDocuments() )
|
||||
|
||||
|
||||
@@ -1,9 +1,30 @@
|
||||
class DistanceOfCandidate:
|
||||
|
||||
# documentObject, ['Who','What'], ['How'], weight=1
|
||||
# re-score the how candidates bases on the proximity to the Who and What candidates
|
||||
# documentObject, ['who','what'], 'How', weight=1
|
||||
def __init__(self,majorQuestions, dependantQuestions, weight=1):
|
||||
return None
|
||||
|
||||
self._majorQuestions = majorQuestions
|
||||
self._dependantQuestions = dependantQuestions
|
||||
self._weight = weight
|
||||
|
||||
|
||||
# documentObject, ['Who','What'], ['How'], weight=1
|
||||
def score(self, document):
|
||||
|
||||
# dependantAnswers = document.get_answers()[self._dependantQuestions]
|
||||
#distances = []
|
||||
|
||||
# for majorQuestion in self._majorQuestions:
|
||||
# get the best answer for a major
|
||||
# answer = document.get_answers()[majorQuestion][0]
|
||||
|
||||
# calculate the distance to each dependantAnswers
|
||||
# distances[majorQuestion] = []
|
||||
# for dependantAnswer, index in dependantAnswers:
|
||||
# distances[majorQuestion][index] = answer[3] - dependantAnswer[3]
|
||||
|
||||
#adjust scoring
|
||||
#TODO normalise index
|
||||
#candidate['positionNorm'] = (self._maxIndex - candidate['position']) / self._maxIndex
|
||||
|
||||
return None
|
||||
@@ -1,7 +1,5 @@
|
||||
#from multiprocessing.managers import BaseManager
|
||||
#from fileinput import filename
|
||||
|
||||
|
||||
#class DocumentManager(BaseManager):
|
||||
# pass
|
||||
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import logging
|
||||
import multiprocessing
|
||||
|
||||
from combined_scoring.distance_of_candidate import DistanceOfCandidate
|
||||
from extractors import action_extractor, environment_extractor, cause_extractor, method_extractor
|
||||
from preprocessors.preprocessor_core_nlp import Preprocessor
|
||||
|
||||
from combined_scoring.distance_of_candidate import DistanceOfCandidate
|
||||
|
||||
class FiveWExtractor:
|
||||
"""
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
import logging
|
||||
import nltk
|
||||
from nltk.corpus import wordnet
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from itertools import product
|
||||
import logging
|
||||
|
||||
import nltk
|
||||
from nltk.corpus import wordnet
|
||||
|
||||
|
||||
try:
|
||||
basestring = basestring
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import re
|
||||
|
||||
from nltk.tree import ParentedTree
|
||||
|
||||
from .abs_extractor import AbsExtractor
|
||||
|
||||
|
||||
|
||||
@@ -3,11 +3,12 @@
|
||||
class CandidatePart:
|
||||
def __init__(self):
|
||||
self._posTag = None
|
||||
self._posTag = None
|
||||
|
||||
self._text = None
|
||||
|
||||
def setPosTag(self,posTag):
|
||||
self._postTag = posTag
|
||||
pass
|
||||
def getPosTag(self):
|
||||
return self._posTag
|
||||
|
||||
@@ -21,7 +22,8 @@ class CandidatePart:
|
||||
|
||||
class Candidate:
|
||||
def __init__(self, raw=None):
|
||||
self._raw = raw
|
||||
self._type = type
|
||||
self._raw = raw
|
||||
self._score = None
|
||||
self._index = None
|
||||
|
||||
@@ -29,7 +31,7 @@ class Candidate:
|
||||
return self._raw
|
||||
|
||||
def setType(self, type):
|
||||
self._type = type
|
||||
pass
|
||||
def getType(self):
|
||||
return self._type
|
||||
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
from copy import deepcopy
|
||||
import logging
|
||||
|
||||
import nltk
|
||||
from nltk.corpus import wordnet
|
||||
from nltk.stem.wordnet import WordNetLemmatizer
|
||||
from copy import deepcopy
|
||||
|
||||
from .abs_extractor import AbsExtractor
|
||||
from .candidate import Candidate
|
||||
|
||||
|
||||
class CauseExtractor(AbsExtractor):
|
||||
"""
|
||||
The CauseExtractor tries to detect a causative that could explain an event.
|
||||
@@ -129,8 +132,8 @@ class CauseExtractor(AbsExtractor):
|
||||
index = document.get_fullText().find(text)
|
||||
if index is not -1:
|
||||
candidateObject.setIndex( index )
|
||||
print(text)
|
||||
print(candidateObject.getIndex())
|
||||
#print(text)
|
||||
#print(candidateObject.getIndex())
|
||||
|
||||
def _evaluate_tree(self, tree):
|
||||
"""
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
import time
|
||||
from .abs_extractor import AbsExtractor
|
||||
from geopy.geocoders import Nominatim
|
||||
|
||||
from geopy.distance import vincenty
|
||||
from geopy.geocoders import Nominatim
|
||||
from parsedatetime import parsedatetime as pdt
|
||||
|
||||
from .abs_extractor import AbsExtractor
|
||||
|
||||
|
||||
class EnvironmentExtractor(AbsExtractor):
|
||||
"""
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import re
|
||||
|
||||
from nltk.tree import ParentedTree
|
||||
|
||||
from .abs_extractor import AbsExtractor
|
||||
|
||||
|
||||
@@ -81,28 +83,28 @@ class MethodExtractor(AbsExtractor):
|
||||
#ranked_candidates = []
|
||||
|
||||
|
||||
groupePerLemma = {}
|
||||
groupe_per_lemma = {}
|
||||
maxCount = 0
|
||||
|
||||
candidates = document.get_candidates('MethodExtractor')
|
||||
# frequency per lemma
|
||||
for candidate in candidates:
|
||||
if candidate is not None and len(candidate['originalText']) > 0:
|
||||
lemaCount = groupePerLemma.get(candidate["lemma"], 0 )
|
||||
lemaCount += 1
|
||||
lema_count = groupe_per_lemma.get(candidate["lemma"], 0 )
|
||||
lema_count += 1
|
||||
|
||||
if lemaCount > maxCount:
|
||||
maxCount = lemaCount
|
||||
groupePerLemma[candidate["lemma"]] = lemaCount
|
||||
if lema_count > maxCount:
|
||||
maxCount = lema_count
|
||||
groupe_per_lemma[candidate["lemma"]] = lema_count
|
||||
|
||||
# transfer count per lemmaGroup to candidates
|
||||
for candidate in candidates:
|
||||
if candidate is not None and len(candidate['originalText']) > 0:
|
||||
|
||||
# save normalized frequency
|
||||
candidate['frequency'] = groupePerLemma[candidate['lemma']]
|
||||
candidate['frequency'] = groupe_per_lemma[candidate['lemma']]
|
||||
candidate['frequencyNorm'] = ( candidate['frequency'] - 1 ) / (maxCount-1)
|
||||
lemaCount = groupePerLemma.get(candidate["lemma"], 0 )
|
||||
lema_count = groupe_per_lemma.get(candidate["lemma"], 0 )
|
||||
|
||||
# normalized position
|
||||
candidate['positionNorm'] = (self._maxIndex - candidate['position']) / self._maxIndex
|
||||
@@ -118,6 +120,7 @@ class MethodExtractor(AbsExtractor):
|
||||
# normalizing scores
|
||||
for candidate in candidates:
|
||||
candidate['score'] = candidate['score']/scoreMax
|
||||
|
||||
# Sort candidates
|
||||
candidates.sort(key = lambda x: x['score'], reverse=True)
|
||||
|
||||
@@ -137,10 +140,10 @@ class MethodExtractor(AbsExtractor):
|
||||
for candidate in new_list:
|
||||
keyVal = ([( candidate['originalText'], candidate['pos'])], candidate['score'] )
|
||||
result.append( keyVal )
|
||||
|
||||
|
||||
document.set_answer('how', result )
|
||||
|
||||
|
||||
#return new_list
|
||||
|
||||
def _isRelevantPos(self, pos):
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
|
||||
import nltk
|
||||
from pycorenlp import StanfordCoreNLP
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import glob
|
||||
import logging
|
||||
|
||||
|
||||
from .writer import Writer
|
||||
from .reader import Reader
|
||||
from .writer import Writer
|
||||
|
||||
|
||||
class Handler(object):
|
||||
def __init__(self, inputPath):
|
||||
@@ -14,6 +14,7 @@ class Handler(object):
|
||||
self._extractor = None
|
||||
self._outputPath = None
|
||||
self._adocuments = None
|
||||
self._documents = None
|
||||
|
||||
self._reader = Reader()
|
||||
self._writer = Writer()
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
from document import DocumentFactory
|
||||
import hashlib
|
||||
import os.path
|
||||
import pickle
|
||||
|
||||
from document import DocumentFactory
|
||||
|
||||
|
||||
class Reader(object):
|
||||
|
||||
def __init__(self):
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import json
|
||||
import pickle
|
||||
|
||||
|
||||
class Writer:
|
||||
def __init__(self):
|
||||
"""
|
||||
@@ -46,11 +47,10 @@ class Writer:
|
||||
extractedLiteral = questionLiteral.setdefault('extracted',[])
|
||||
for index, key in enumerate(answers[question]):
|
||||
candidate = answers[question][index]
|
||||
candidateJson = {'score': candidate[1], 'words': []}
|
||||
candidate_json = {'score': candidate[1], 'words': []}
|
||||
#if candidateJson.score <= n:
|
||||
# break
|
||||
candidateJson['words'] = []
|
||||
for candidateWord in candidate[0]:
|
||||
candidateJson['words'].append({ 'text':candidateWord[0], 'tag':candidateWord[1]})
|
||||
extractedLiteral.append(candidateJson)
|
||||
candidate_json['words'].append({ 'text':candidateWord[0], 'tag':candidateWord[1]})
|
||||
extractedLiteral.append(candidate_json)
|
||||
self._writeJson(outputPath, output)
|
||||
@@ -1,15 +1,15 @@
|
||||
import time
|
||||
|
||||
from geopy.distance import vincenty
|
||||
from nltk.tokenize import word_tokenize
|
||||
from nltk.corpus import wordnet
|
||||
from itertools import product
|
||||
|
||||
|
||||
"""
|
||||
Collection of utility methods
|
||||
"""
|
||||
|
||||
from itertools import product
|
||||
import time
|
||||
|
||||
from geopy.distance import vincenty
|
||||
from nltk.corpus import wordnet
|
||||
from nltk.tokenize import word_tokenize
|
||||
|
||||
|
||||
def cmp_text(annotation, candidate):
|
||||
"""
|
||||
Compare the retrieved answer with the annotation using WordNet path distance.
|
||||
|
||||
Reference in New Issue
Block a user