mirror of
https://github.com/tanyuqian/knowledge-harvest-from-lms.git
synced 2023-06-02 01:35:42 +03:00
updated.
This commit is contained in:
@@ -135,8 +135,8 @@ class EntityTupleSearcher:
|
||||
if any([word in stopwords for word in pred_ent.split()]):
|
||||
return
|
||||
|
||||
# filter entity with less than 4 characters
|
||||
if len(pred_ent.replace(' ', '')) <= 3:
|
||||
# filter entity with less than 3 characters
|
||||
if len(pred_ent.replace(' ', '')) <= 2:
|
||||
return
|
||||
|
||||
# filter entity with single-character words
|
||||
|
||||
@@ -72,11 +72,8 @@ class KnowledgeHarvester:
|
||||
self._weighted_prompts,
|
||||
key=lambda t: t[1], reverse=True)[:self._max_n_prompts]
|
||||
|
||||
for prompt, weight in self._weighted_prompts:
|
||||
print(f'{weight:.4f}', prompt)
|
||||
|
||||
norm_weights = softmax([weight for _, weight in self._weighted_prompts])
|
||||
norm_weights[norm_weights < 0.02] = 0.
|
||||
norm_weights[norm_weights < 0.05] = 0.
|
||||
norm_weights /= norm_weights.sum()
|
||||
|
||||
for i, norm_weight in enumerate(norm_weights):
|
||||
|
||||
@@ -339,45 +339,6 @@
|
||||
"<ENT0> is a dish made from <ENT1> that has been fried in a wok or a pan ."
|
||||
]
|
||||
},
|
||||
"release_time": {
|
||||
"init_prompts": [
|
||||
"<ENT0> is released in <ENT1> ."
|
||||
],
|
||||
"seed_ent_tuples": [
|
||||
[
|
||||
"iPhone",
|
||||
"2007"
|
||||
],
|
||||
[
|
||||
"Windows 7",
|
||||
"2009"
|
||||
],
|
||||
[
|
||||
"Volkswagen Golf",
|
||||
"1974"
|
||||
],
|
||||
[
|
||||
"word2vec",
|
||||
"2013"
|
||||
],
|
||||
[
|
||||
"Avatar",
|
||||
"2009"
|
||||
]
|
||||
],
|
||||
"prompts": [
|
||||
"<ENT0> was released in <ENT1> .",
|
||||
"In <ENT1>, <ENT0> was released .",
|
||||
"Google released its <ENT0> algorithm in <ENT1> .",
|
||||
"Google's <ENT0> algorithm is released in <ENT1> .",
|
||||
"<ENT0> is a vector representation of words that was released in <ENT1> .",
|
||||
"<ENT0> is a tool that was released in <ENT1> that creates word embeddings .",
|
||||
"<ENT0> is a tool that creates word embeddings, that was released in <ENT1> .",
|
||||
"In <ENT1>, <ENT0> was released as a toolkit for vector representation of words .",
|
||||
"The <ENT0> algorithm is a neural network algorithm that was released in <ENT1> .",
|
||||
"<ENT0> is a toolkit that was released in <ENT1> that creates word embeddings, which are vector representations of words that can be used ."
|
||||
]
|
||||
},
|
||||
"processed_from": {
|
||||
"init_prompts": [
|
||||
"<ENT0> is the source of <ENT1> ."
|
||||
@@ -540,7 +501,6 @@
|
||||
"<ENT0> is an international <ENT1> and gas company .",
|
||||
"<ENT0> is a <ENT1> company that manufactures and sells vehicles .",
|
||||
"<ENT0> sells <ENT1>s through its online store and retail locations .",
|
||||
"<ENT0> is a company that sells graphics processing units (<ENT1>s) .",
|
||||
"<ENT0> is a popular fast food chain that is known for selling <ENT1>s .",
|
||||
"<ENT0> is ajapanese<ENT1> manufacturing company that is headquartered in yokohama, japan .",
|
||||
"<ENT0> is a fast food company that specializes in <ENT1>s, fried chicken, and soft drinks .",
|
||||
@@ -549,7 +509,7 @@
|
||||
"<ENT0> describes themselves as \"the world\u2019s largest information technology company by revenue,\" and they sell many products, including <ENT1>s ."
|
||||
]
|
||||
},
|
||||
"featured thing": {
|
||||
"featured_thing": {
|
||||
"init_prompts": [
|
||||
"<ENT0> is a very <ENT1> <ENT2> ."
|
||||
],
|
||||
@@ -593,7 +553,7 @@
|
||||
"Although <ENT0> is not the world's <ENT1>est <ENT2>, it is still very wealthy ."
|
||||
]
|
||||
},
|
||||
"need sth to do sth": {
|
||||
"need_sth_to_do_sth": {
|
||||
"init_prompts": [
|
||||
"<ENT0> needs <ENT1> to <ENT2> ."
|
||||
],
|
||||
@@ -637,7 +597,7 @@
|
||||
"It is important for <ENT0> to be able to <ENT2> with each other through <ENT1> ."
|
||||
]
|
||||
},
|
||||
"more than": {
|
||||
"more_than": {
|
||||
"init_prompts": [
|
||||
"<ENT0> is more <ENT1> than <ENT2> ."
|
||||
],
|
||||
@@ -681,7 +641,7 @@
|
||||
"Some people believe that <ENT0> is more <ENT1> than <ENT2> because it is more active and engaging ."
|
||||
]
|
||||
},
|
||||
"can but not good": {
|
||||
"can_but_not_good": {
|
||||
"init_prompts": [
|
||||
"<ENT0> can <ENT1> but not good at ."
|
||||
],
|
||||
@@ -722,7 +682,7 @@
|
||||
"While <ENT0>s are able to <ENT1>, they are not instinctively good at it and may need some help or encouragement to do so ."
|
||||
]
|
||||
},
|
||||
"worth celebrating": {
|
||||
"worth_celebrating": {
|
||||
"init_prompts": [
|
||||
"It\u2019s worth celebrating for a <ENT0> to <ENT1> ."
|
||||
],
|
||||
@@ -761,7 +721,7 @@
|
||||
"It is seen as significant accomplishment for <ENT0> when they <ENT1> ."
|
||||
]
|
||||
},
|
||||
"potential risk": {
|
||||
"potential_risk": {
|
||||
"init_prompts": [
|
||||
"A potential risk of <ENT0> is <ENT1> ."
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user