updated.

2023-06-02 01:35:42 +03:00 · 2022-06-06 23:43:50 -07:00
parent 94c6088c68
commit 1f2654572a
3 changed files with 9 additions and 52 deletions
--- a/models/entity_tuple_searcher.py
+++ b/models/entity_tuple_searcher.py
@@ -135,8 +135,8 @@ class EntityTupleSearcher:
            if any([word in stopwords for word in pred_ent.split()]):
                return

-            # filter entity with less than 4 characters
-            if len(pred_ent.replace(' ', '')) <= 3:
+            # filter entity with less than 3 characters
+            if len(pred_ent.replace(' ', '')) <= 2:
                return

            # filter entity with single-character words
--- a/models/knowledge_harvester.py
+++ b/models/knowledge_harvester.py
@@ -72,11 +72,8 @@ class KnowledgeHarvester:
            self._weighted_prompts,
            key=lambda t: t[1], reverse=True)[:self._max_n_prompts]

-        for prompt, weight in self._weighted_prompts:
-            print(f'{weight:.4f}', prompt)
-
        norm_weights = softmax([weight for _, weight in self._weighted_prompts])
-        norm_weights[norm_weights < 0.02] = 0.
+        norm_weights[norm_weights < 0.05] = 0.
        norm_weights /= norm_weights.sum()

        for i, norm_weight in enumerate(norm_weights):
--- a/relation_info/human.json
+++ b/relation_info/human.json
@@ -339,45 +339,6 @@
            "<ENT0> is a dish made from <ENT1> that has been fried in a wok or a pan ."
        ]
    },
-    "release_time": {
-        "init_prompts": [
-            "<ENT0> is released in <ENT1> ."
-        ],
-        "seed_ent_tuples": [
-            [
-                "iPhone",
-                "2007"
-            ],
-            [
-                "Windows 7",
-                "2009"
-            ],
-            [
-                "Volkswagen Golf",
-                "1974"
-            ],
-            [
-                "word2vec",
-                "2013"
-            ],
-            [
-                "Avatar",
-                "2009"
-            ]
-        ],
-        "prompts": [
-            "<ENT0> was released in <ENT1> .",
-            "In <ENT1>, <ENT0> was released .",
-            "Google released its <ENT0> algorithm in <ENT1> .",
-            "Google's <ENT0> algorithm is released in <ENT1> .",
-            "<ENT0> is a vector representation of words that was released in <ENT1> .",
-            "<ENT0> is a tool that was released in <ENT1> that creates word embeddings .",
-            "<ENT0> is a tool that creates word embeddings, that was released in <ENT1> .",
-            "In <ENT1>, <ENT0> was released as a toolkit for vector representation of words .",
-            "The <ENT0> algorithm is a neural network algorithm that was released in <ENT1> .",
-            "<ENT0> is a toolkit that was released in <ENT1> that creates word embeddings, which are vector representations of words that can be used ."
-        ]
-    },
    "processed_from": {
        "init_prompts": [
            "<ENT0> is the source of <ENT1> ."
@@ -540,7 +501,6 @@
            "<ENT0> is an international <ENT1> and gas company .",
            "<ENT0> is a <ENT1> company that manufactures and sells vehicles .",
            "<ENT0> sells <ENT1>s through its online store and retail locations .",
-            "<ENT0> is a company that sells graphics processing units (<ENT1>s) .",
            "<ENT0> is a popular fast food chain that is known for selling <ENT1>s .",
            "<ENT0> is ajapanese<ENT1> manufacturing company that is headquartered in yokohama, japan .",
            "<ENT0> is a fast food company that specializes in <ENT1>s, fried chicken, and soft drinks .",
@@ -549,7 +509,7 @@
            "<ENT0> describes themselves as \"the world\u2019s largest information technology company by revenue,\" and they sell many products, including <ENT1>s ."
        ]
    },
-    "featured thing": {
+    "featured_thing": {
        "init_prompts": [
            "<ENT0> is a very <ENT1> <ENT2> ."
        ],
@@ -593,7 +553,7 @@
            "Although <ENT0> is not the world's <ENT1>est <ENT2>, it is still very wealthy ."
        ]
    },
-    "need sth to do sth": {
+    "need_sth_to_do_sth": {
        "init_prompts": [
            "<ENT0> needs <ENT1> to <ENT2> ."
        ],
@@ -637,7 +597,7 @@
            "It is important for <ENT0> to be able to <ENT2> with each other through <ENT1> ."
        ]
    },
-    "more than": {
+    "more_than": {
        "init_prompts": [
            "<ENT0> is more <ENT1> than <ENT2> ."
        ],
@@ -681,7 +641,7 @@
            "Some people believe that <ENT0> is more <ENT1> than <ENT2> because it is more active and engaging ."
        ]
    },
-    "can but not good": {
+    "can_but_not_good": {
        "init_prompts": [
            "<ENT0> can <ENT1> but not good at ."
        ],
@@ -722,7 +682,7 @@
            "While <ENT0>s are able to <ENT1>, they are not instinctively good at it and may need some help or encouragement to do so ."
        ]
    },
-    "worth celebrating": {
+    "worth_celebrating": {
        "init_prompts": [
            "It\u2019s worth celebrating for a <ENT0> to <ENT1> ."
        ],
@@ -761,7 +721,7 @@
            "It is seen as significant accomplishment for <ENT0> when they <ENT1> ."
        ]
    },
-    "potential risk": {
+    "potential_risk": {
        "init_prompts": [
            "A potential risk of <ENT0> is <ENT1> ."
        ],