Cleanup with pycharm. Bugfixes

This commit is contained in:
Benedikt Kromer
2017-06-28 06:05:50 +02:00
parent 111de4f89c
commit ca791244a3
39 changed files with 632 additions and 56 deletions

14
.idea/Giveme5W.iml generated Normal file
View File

@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/examples" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/extractor" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Python 3.6.1 (/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/bin/python3.6)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
</component>
</module>

4
.idea/misc.xml generated Normal file
View File

@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.1 (/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/bin/python3.6)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml generated Normal file
View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/Giveme5W.iml" filepath="$PROJECT_DIR$/.idea/Giveme5W.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml generated Normal file
View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

441
.idea/workspace.xml generated Normal file
View File

@@ -0,0 +1,441 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="f3ddf391-85fc-43d5-92ec-816971c35368" name="Default" comment="">
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/Giveme5W.iml" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/misc.xml" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/modules.xml" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/vcs.xml" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/.settings/org.eclipse.ltk.core.refactoring.prefs" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/08ebc52cada9093774e3ba71312def80ca3621c959d67877a6e3b228.pickle" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/09d729c0013e096cf6ed1b13e52c9d3afd2538333cd74e1d42ab66fb.pickle" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/0e5fa7c0e6252bfeeea5e3840c6cb503f299c19d24331c4ba60c5974.pickle" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/0e7ab2ce71c1bce03040ec2388dd45ab069d5432b364495b9cfcfdf5.pickle" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/101ee22f1f9e984c1bd2bd64ad44db8452b0669178f1683eafa338c7.pickle" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/13a20be02e984533782dc2c6cd2f468b879ce4be88127566da043b0f.pickle" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/1667b720a919a4cd7904557320d4f7572f2c55bdfb26cd0bf7946a5a.pickle" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/1778efb0dfd0f1e933513e0f79af3c09986a40791ecd6faf2239f09c.pickle" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/189af426dcbdb42921a716cf136f75554e4befad28c088f3d930b6c4.pickle" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/1b1109edc9ba100f298d988c0558243e34a5921a855408bb1f4dfedd.pickle" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/1e0f6556611739fb0f8a802cf5f4ffa966df21de1ae3aa11ea647516.pickle" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/1e471fa6844bce9e88878ce164699ab88ec3dc38f166a251f071b408.pickle" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/cache/1e582ad25294607005ccb6d7abe2806c90b39eec91baeb1082072c81.pickle" />
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/examples/parse_documents_simple.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.project" afterPath="$PROJECT_DIR$/.project" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.pydevproject" afterPath="$PROJECT_DIR$/.pydevproject" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/examples/learn_weights.py" afterPath="$PROJECT_DIR$/examples/learn_weights.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/examples/parse_documents.py" afterPath="$PROJECT_DIR$/examples/parse_documents.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/combined_scoring/distance_of_candidate.py" afterPath="$PROJECT_DIR$/extractor/combined_scoring/distance_of_candidate.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/document.py" afterPath="$PROJECT_DIR$/extractor/document.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/extractor.py" afterPath="$PROJECT_DIR$/extractor/extractor.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/extractors/abs_extractor.py" afterPath="$PROJECT_DIR$/extractor/extractors/abs_extractor.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/extractors/action_extractor.py" afterPath="$PROJECT_DIR$/extractor/extractors/action_extractor.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/extractors/candidate.py" afterPath="$PROJECT_DIR$/extractor/extractors/candidate.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/extractors/cause_extractor.py" afterPath="$PROJECT_DIR$/extractor/extractors/cause_extractor.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/extractors/environment_extractor.py" afterPath="$PROJECT_DIR$/extractor/extractors/environment_extractor.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/extractors/method_extractor.py" afterPath="$PROJECT_DIR$/extractor/extractors/method_extractor.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/preprocessors/preprocessor_core_nlp.py" afterPath="$PROJECT_DIR$/extractor/preprocessors/preprocessor_core_nlp.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/tools/news_please/handler.py" afterPath="$PROJECT_DIR$/extractor/tools/news_please/handler.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/tools/news_please/reader.py" afterPath="$PROJECT_DIR$/extractor/tools/news_please/reader.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/tools/news_please/writer.py" afterPath="$PROJECT_DIR$/extractor/tools/news_please/writer.py" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/extractor/tools/util.py" afterPath="$PROJECT_DIR$/extractor/tools/util.py" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="TRACKING_ENABLED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="ExecutionTargetManager" SELECTED_TARGET="default_target" />
<component name="FileEditorManager">
<leaf>
<file leaf-file-name="candidate.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/extractor/extractors/candidate.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="151">
<caret line="31" column="0" lean-forward="false" selection-start-line="31" selection-start-column="0" selection-end-line="31" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="cause_extractor.py" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/extractor/extractors/cause_extractor.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="324">
<caret line="134" column="9" lean-forward="false" selection-start-line="134" selection-start-column="9" selection-end-line="134" selection-end-column="9" />
<folding>
<element signature="e#0#25#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/examples/parse_documents.py" />
<option value="$PROJECT_DIR$/extractor/tools/news_please/writer.py" />
<option value="$PROJECT_DIR$/extractor/extractors/method_extractor.py" />
<option value="$PROJECT_DIR$/extractor/combined_scoring/distance_of_candidate.py" />
<option value="$PROJECT_DIR$/extractor/tools/news_please/handler.py" />
<option value="$PROJECT_DIR$/extractor/extractors/candidate.py" />
<option value="$PROJECT_DIR$/extractor/extractors/cause_extractor.py" />
</list>
</option>
</component>
<component name="ProjectFrameBounds">
<option name="width" value="1280" />
<option name="height" value="800" />
</component>
<component name="ProjectView">
<navigator currentView="ProjectPane" proportions="" version="1">
<flattenPackages />
<showMembers />
<showModules />
<showLibraryContents />
<hideEmptyPackages />
<abbreviatePackageNames />
<autoscrollToSource />
<autoscrollFromSource />
<sortByType />
<manualOrder />
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="ProjectPane">
<subPane>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="Giveme5W" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="Giveme5W" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="Giveme5W" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="Giveme5W" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="extractor" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
</subPane>
</pane>
<pane id="Scope" />
<pane id="Scratches" />
</panes>
</component>
<component name="PropertiesComponent">
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
<property name="FullScreen" value="true" />
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PythonContentEntriesConfigurable" />
<property name="run.code.analysis.last.selected.profile" value="pProject Default" />
</component>
<component name="RunDashboard">
<option name="ruleStates">
<list>
<RuleState>
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
</RuleState>
<RuleState>
<option name="name" value="StatusDashboardGroupingRule" />
</RuleState>
</list>
</option>
</component>
<component name="RunManager" selected="Python.parse_documents">
<configuration default="false" name="learn_weights" type="PythonConfigurationType" factoryName="Python" temporary="true">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/examples" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="Giveme5W" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/examples/learn_weights.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<method />
</configuration>
<configuration default="false" name="parse_documents" type="PythonConfigurationType" factoryName="Python" temporary="true">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/examples" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="Giveme5W" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/examples/parse_documents.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<method />
</configuration>
<configuration default="true" type="PythonConfigurationType" factoryName="Python">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="Giveme5W" />
<option name="SCRIPT_NAME" value="" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<method />
</configuration>
<configuration default="true" type="Tox" factoryName="Tox">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs />
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="Giveme5W" />
<method />
</configuration>
<configuration default="true" type="tests" factoryName="Doctests">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs />
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="Giveme5W" />
<option name="SCRIPT_NAME" value="" />
<option name="CLASS_NAME" value="" />
<option name="METHOD_NAME" value="" />
<option name="FOLDER_NAME" value="" />
<option name="TEST_TYPE" value="TEST_SCRIPT" />
<option name="PATTERN" value="" />
<option name="USE_PATTERN" value="false" />
<method />
</configuration>
<configuration default="true" type="tests" factoryName="Unittests">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs />
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="Giveme5W" />
<option name="_new_additionalArguments" value="&quot;&quot;" />
<option name="_new_target" value="&quot;.&quot;" />
<option name="_new_targetType" value="&quot;PATH&quot;" />
<method />
</configuration>
<list size="2">
<item index="0" class="java.lang.String" itemvalue="Python.learn_weights" />
<item index="1" class="java.lang.String" itemvalue="Python.parse_documents" />
</list>
<recent_temporary>
<list size="2">
<item index="0" class="java.lang.String" itemvalue="Python.parse_documents" />
<item index="1" class="java.lang.String" itemvalue="Python.learn_weights" />
</list>
</recent_temporary>
</component>
<component name="ShelveChangesManager" show_recycled="false">
<option name="remove_strategy" value="false" />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="f3ddf391-85fc-43d5-92ec-816971c35368" name="Default" comment="" />
<created>1498619819806</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1498619819806</updated>
</task>
<servers />
</component>
<component name="ToolWindowManager">
<frame x="0" y="0" width="1280" height="800" extended-state="6" />
<layout>
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.3001328" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Project" active="true" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.24609375" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
<window_info id="Inspection Results" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32934928" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.3997344" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
<window_info id="Data View" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
</layout>
</component>
<component name="VcsContentAnnotationSettings">
<option name="myLimit" value="2678400000" />
</component>
<component name="XDebuggerManager">
<breakpoint-manager>
<breakpoints>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/extractor/extractors/cause_extractor.py</url>
<line>157</line>
</line-breakpoint>
</breakpoints>
<option name="time" value="1" />
</breakpoint-manager>
<watches-manager />
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/examples/parse_documents_simple.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding>
<element signature="e#0#14#0" expanded="false" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/extractor/tools/news_please/reader.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="166">
<caret line="28" column="0" lean-forward="false" selection-start-line="28" selection-start-column="0" selection-end-line="28" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/examples/cache/1b1109edc9ba100f298d988c0558243e34a5921a855408bb1f4dfedd.pickle">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-15740">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/examples/learn_weights.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="93">
<caret line="56" column="58" lean-forward="false" selection-start-line="56" selection-start-column="58" selection-end-line="56" selection-end-column="58" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/extractor/extractors/abs_extractor.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="151">
<caret line="103" column="8" lean-forward="false" selection-start-line="103" selection-start-column="8" selection-end-line="103" selection-end-column="8" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/extractor/preprocessors/preprocessor_core_nlp.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="236">
<caret line="56" column="49" lean-forward="false" selection-start-line="56" selection-start-column="37" selection-end-line="56" selection-end-column="49" />
<folding>
<element signature="e#0#14#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/examples/parse_documents.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="85">
<caret line="32" column="59" lean-forward="true" selection-start-line="32" selection-start-column="59" selection-end-line="32" selection-end-column="59" />
<folding>
<element signature="e#0#14#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/extractor/extractors/method_extractor.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="58">
<caret line="92" column="26" lean-forward="false" selection-start-line="92" selection-start-column="26" selection-end-line="92" selection-end-column="26" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/extractor/tools/news_please/writer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="151">
<caret line="37" column="45" lean-forward="false" selection-start-line="37" selection-start-column="45" selection-end-line="37" selection-end-column="45" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/extractor/combined_scoring/distance_of_candidate.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="135">
<caret line="9" column="0" lean-forward="false" selection-start-line="9" selection-start-column="0" selection-end-line="9" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/extractor/tools/news_please/handler.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="189">
<caret line="45" column="6" lean-forward="false" selection-start-line="45" selection-start-column="6" selection-end-line="45" selection-end-column="6" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/extractor/extractors/candidate.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="151">
<caret line="31" column="0" lean-forward="false" selection-start-line="31" selection-start-column="0" selection-end-line="31" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/extractor/extractors/cause_extractor.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="324">
<caret line="134" column="9" lean-forward="false" selection-start-line="134" selection-start-column="9" selection-end-line="134" selection-end-column="9" />
<folding>
<element signature="e#0#25#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</component>
</project>

View File

@@ -14,4 +14,14 @@
<natures>
<nature>org.python.pydev.pythonNature</nature>
</natures>
<variableList>
<variable>
<name>j</name>
<value>file:/Users/bkr/sourceTree/Giveme5W/examples</value>
</variable>
<variable>
<name>n</name>
<value>file:/Users/bkr/sourceTree/Giveme5W/extractor</value>
</variable>
</variableList>
</projectDescription>

View File

@@ -2,9 +2,12 @@
<?eclipse-pydev version="1.0"?><pydev_project>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 3.6</pydev_property>
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
<path>/${PROJECT_DIR_NAME}/extractor</path>
<path>/${PROJECT_DIR_NAME}/examples</path>
<path>/${PROJECT_DIR_NAME}/extractor</path>
</pydev_pathproperty>
<pydev_pathproperty name="org.python.pydev.PROJECT_EXTERNAL_SOURCE_PATH">
<path>/Users/bkr/sourceTree/Giveme5W/extractor</path>

View File

@@ -0,0 +1,2 @@
eclipse.preferences.version=1
org.eclipse.ltk.core.refactoring.enable.project.refactoring.history=false

View File

@@ -1,13 +1,16 @@
import logging
import os
import sys
from extractor.extractor import FiveWExtractor
from extractor.preprocessors.preprocessor_core_nlp import Preprocessor
from extractor.tools.news_please.handler import Handler
# Add path to allow execution though console
sys.path.insert(0, '/'.join(os.path.realpath(__file__).split('/')[:-2]))
#from timeit import default_timer as timer
from extractor.preprocessors.preprocessor_core_nlp import Preprocessor
from extractor.extractor import FiveWExtractor
from extractor.tools.news_please.handler import Handler
core_nlp_host = 'http://localhost:9000'

View File

@@ -1,15 +1,19 @@
import logging
import os
import sys
from pip.req.req_file import preprocess
# Add path to allow execution though console
sys.path.insert(0, '/'.join(os.path.realpath(__file__).split('/')[:-2]))
from extractor.extractor import FiveWExtractor
from extractor.tools.news_please.handler import Handler
from extractor.extractors import action_extractor, environment_extractor, cause_extractor, method_extractor
from extractor.tools.news_please.handler import Handler
# Add path to allow execution though console
sys.path.insert(0, '/'.join(os.path.realpath(__file__).split('/')[:-3]))
"""
This is a simple example on how to use the extractor in combination with NewsPlease files
Advanced example to use the extractor in combination with NewsPlease files
The output of the core_nlp_host is save in the cache directory to speed up multiple runs.
Documents are preloaded into the memory and stay persistent for further calculations after processing
"""
# don`t forget to start up core_nlp_host
@@ -35,16 +39,14 @@ if __name__ == '__main__':
documents = (
# initiate the newsplease file handler with the input directory
Handler(inputPath)
# add an optional output directory
.setOutputPath(outputPath)
# set a path to save an load preprocessed documents
.setPreprocessedPath(preprocessedPath)
# limit the the to process documents (nice for development)
.setLimit(1)
#.setLimit(1)
# add an optional extractor (it would do basically just copying without...)
.setExtractor(extractor)
# saves all document objects for further programming
.preLoadAndCacheDocuments()
#.preLoadAndCacheDocuments()
# executing it
.process().getDocuments()
)

View File

@@ -0,0 +1,46 @@
import logging
import os
import sys
from extractor.extractor import FiveWExtractor
from extractor.extractors import action_extractor, environment_extractor, cause_extractor, method_extractor
from extractor.tools.news_please.handler import Handler
# Add path to allow execution though console
sys.path.insert(0, '/'.join(os.path.realpath(__file__).split('/')[:-3]))
"""
This is a simple example on how to use the extractor in combination with NewsPlease files.
File will be process one by one, nothing is cached
"""
# don`t forget to start up core_nlp_host
# java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000
if __name__ == '__main__':
log = logging.getLogger('GiveMe5W')
log.setLevel(logging.DEBUG)
sh = logging.StreamHandler()
sh.setLevel(logging.DEBUG)
log.addHandler(sh)
extractor = FiveWExtractor()
inputPath = os.path.dirname(__file__) + '/input'
outputPath = os.path.dirname(__file__) + '/output'
# initiate the newsplease file handler with the input directory
( Handler(inputPath)
# add an optional output directory
.setOutputPath(outputPath)
# limit the the to process documents (nice for development)
.setLimit(1)
# add an optional extractor (it would do basically just copying without...)
.setExtractor(extractor)
# executing it
.process().getDocuments() )

View File

@@ -1,9 +1,30 @@
class DistanceOfCandidate:
# documentObject, ['Who','What'], ['How'], weight=1
# re-score the how candidates bases on the proximity to the Who and What candidates
# documentObject, ['who','what'], 'How', weight=1
def __init__(self,majorQuestions, dependantQuestions, weight=1):
return None
self._majorQuestions = majorQuestions
self._dependantQuestions = dependantQuestions
self._weight = weight
# documentObject, ['Who','What'], ['How'], weight=1
def score(self, document):
# dependantAnswers = document.get_answers()[self._dependantQuestions]
#distances = []
# for majorQuestion in self._majorQuestions:
# get the best answer for a major
# answer = document.get_answers()[majorQuestion][0]
# calculate the distance to each dependantAnswers
# distances[majorQuestion] = []
# for dependantAnswer, index in dependantAnswers:
# distances[majorQuestion][index] = answer[3] - dependantAnswer[3]
#adjust scoring
#TODO normalise index
#candidate['positionNorm'] = (self._maxIndex - candidate['position']) / self._maxIndex
return None

View File

@@ -1,7 +1,5 @@
#from multiprocessing.managers import BaseManager
#from fileinput import filename
#class DocumentManager(BaseManager):
# pass

View File

@@ -1,10 +1,10 @@
import logging
import multiprocessing
from combined_scoring.distance_of_candidate import DistanceOfCandidate
from extractors import action_extractor, environment_extractor, cause_extractor, method_extractor
from preprocessors.preprocessor_core_nlp import Preprocessor
from combined_scoring.distance_of_candidate import DistanceOfCandidate
class FiveWExtractor:
"""

View File

@@ -1,8 +1,10 @@
import logging
import nltk
from nltk.corpus import wordnet
from abc import ABCMeta, abstractmethod
from itertools import product
import logging
import nltk
from nltk.corpus import wordnet
try:
basestring = basestring

View File

@@ -1,5 +1,7 @@
import re
from nltk.tree import ParentedTree
from .abs_extractor import AbsExtractor

View File

@@ -3,11 +3,12 @@
class CandidatePart:
def __init__(self):
self._posTag = None
self._posTag = None
self._text = None
def setPosTag(self,posTag):
self._postTag = posTag
pass
def getPosTag(self):
return self._posTag
@@ -21,7 +22,8 @@ class CandidatePart:
class Candidate:
def __init__(self, raw=None):
self._raw = raw
self._type = type
self._raw = raw
self._score = None
self._index = None
@@ -29,7 +31,7 @@ class Candidate:
return self._raw
def setType(self, type):
self._type = type
pass
def getType(self):
return self._type

View File

@@ -1,11 +1,14 @@
from copy import deepcopy
import logging
import nltk
from nltk.corpus import wordnet
from nltk.stem.wordnet import WordNetLemmatizer
from copy import deepcopy
from .abs_extractor import AbsExtractor
from .candidate import Candidate
class CauseExtractor(AbsExtractor):
"""
The CauseExtractor tries to detect a causative that could explain an event.
@@ -129,8 +132,8 @@ class CauseExtractor(AbsExtractor):
index = document.get_fullText().find(text)
if index is not -1:
candidateObject.setIndex( index )
print(text)
print(candidateObject.getIndex())
#print(text)
#print(candidateObject.getIndex())
def _evaluate_tree(self, tree):
"""

View File

@@ -1,9 +1,11 @@
import time
from .abs_extractor import AbsExtractor
from geopy.geocoders import Nominatim
from geopy.distance import vincenty
from geopy.geocoders import Nominatim
from parsedatetime import parsedatetime as pdt
from .abs_extractor import AbsExtractor
class EnvironmentExtractor(AbsExtractor):
"""

View File

@@ -1,5 +1,7 @@
import re
from nltk.tree import ParentedTree
from .abs_extractor import AbsExtractor
@@ -81,28 +83,28 @@ class MethodExtractor(AbsExtractor):
#ranked_candidates = []
groupePerLemma = {}
groupe_per_lemma = {}
maxCount = 0
candidates = document.get_candidates('MethodExtractor')
# frequency per lemma
for candidate in candidates:
if candidate is not None and len(candidate['originalText']) > 0:
lemaCount = groupePerLemma.get(candidate["lemma"], 0 )
lemaCount += 1
lema_count = groupe_per_lemma.get(candidate["lemma"], 0 )
lema_count += 1
if lemaCount > maxCount:
maxCount = lemaCount
groupePerLemma[candidate["lemma"]] = lemaCount
if lema_count > maxCount:
maxCount = lema_count
groupe_per_lemma[candidate["lemma"]] = lema_count
# transfer count per lemmaGroup to candidates
for candidate in candidates:
if candidate is not None and len(candidate['originalText']) > 0:
# save normalized frequency
candidate['frequency'] = groupePerLemma[candidate['lemma']]
candidate['frequency'] = groupe_per_lemma[candidate['lemma']]
candidate['frequencyNorm'] = ( candidate['frequency'] - 1 ) / (maxCount-1)
lemaCount = groupePerLemma.get(candidate["lemma"], 0 )
lema_count = groupe_per_lemma.get(candidate["lemma"], 0 )
# normalized position
candidate['positionNorm'] = (self._maxIndex - candidate['position']) / self._maxIndex
@@ -118,6 +120,7 @@ class MethodExtractor(AbsExtractor):
# normalizing scores
for candidate in candidates:
candidate['score'] = candidate['score']/scoreMax
# Sort candidates
candidates.sort(key = lambda x: x['score'], reverse=True)
@@ -137,10 +140,10 @@ class MethodExtractor(AbsExtractor):
for candidate in new_list:
keyVal = ([( candidate['originalText'], candidate['pos'])], candidate['score'] )
result.append( keyVal )
document.set_answer('how', result )
#return new_list
def _isRelevantPos(self, pos):

View File

@@ -1,4 +1,5 @@
import logging
import nltk
from pycorenlp import StanfordCoreNLP

View File

@@ -1,9 +1,9 @@
import glob
import logging
from .writer import Writer
from .reader import Reader
from .writer import Writer
class Handler(object):
def __init__(self, inputPath):
@@ -14,6 +14,7 @@ class Handler(object):
self._extractor = None
self._outputPath = None
self._adocuments = None
self._documents = None
self._reader = Reader()
self._writer = Writer()

View File

@@ -1,10 +1,12 @@
import hashlib
import json
import logging
from document import DocumentFactory
import hashlib
import os.path
import pickle
from document import DocumentFactory
class Reader(object):
def __init__(self):

View File

@@ -1,6 +1,7 @@
import json
import pickle
class Writer:
def __init__(self):
"""
@@ -46,11 +47,10 @@ class Writer:
extractedLiteral = questionLiteral.setdefault('extracted',[])
for index, key in enumerate(answers[question]):
candidate = answers[question][index]
candidateJson = {'score': candidate[1], 'words': []}
candidate_json = {'score': candidate[1], 'words': []}
#if candidateJson.score <= n:
# break
candidateJson['words'] = []
for candidateWord in candidate[0]:
candidateJson['words'].append({ 'text':candidateWord[0], 'tag':candidateWord[1]})
extractedLiteral.append(candidateJson)
candidate_json['words'].append({ 'text':candidateWord[0], 'tag':candidateWord[1]})
extractedLiteral.append(candidate_json)
self._writeJson(outputPath, output)

View File

@@ -1,15 +1,15 @@
import time
from geopy.distance import vincenty
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet
from itertools import product
"""
Collection of utility methods
"""
from itertools import product
import time
from geopy.distance import vincenty
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
def cmp_text(annotation, candidate):
"""
Compare the retrieved answer with the annotation using WordNet path distance.