This commit is contained in:
bwt09
2022-06-04 16:34:09 -07:00
parent 340c87177f
commit 089d2d41cd
6 changed files with 477 additions and 249 deletions

21
README.md Normal file
View File

@@ -0,0 +1,21 @@
## BertNet: Harvesting Knowledge Graphs from Pretrained Language Models
### Preperation
```
pip install -r requirements.txt
```
### Search prompts
```
python search_prompts.py --rel_set conceptnet (lama/human)
```
### Search entity tuples
```
python main.py
```
### Present results
```
python present_result.py --result_dir your_result_dir
```

View File

@@ -70,4 +70,4 @@ def find_sublist(a, b):
if a[l:l+len(b)] == b:
return l
return None
return None

64
present_result.py Normal file
View File

@@ -0,0 +1,64 @@
import os
import sys
import fire
import json
import random
from prettytable import PrettyTable
def main(result_dir, n_present=20):
rel_set = result_dir.split('/')[1]
relation_info = json.load(open(f'data/relation_info_{rel_set}_5seeds.json'))
summary_file = open(f'{result_dir}/summary.txt', 'w')
for rel, info in relation_info.items():
columns = {'Seed samples': info['seed_ent_tuples']}
if not os.path.exists(f'{result_dir}/{rel}/ent_tuples.json'):
print(f'outputs of relation \"{rel}\" not found. skipped.')
continue
weighted_prompts = json.load(open(f'{result_dir}/{rel}/prompts.json'))
weighted_ent_tuples = json.load(open(
f'{result_dir}/{rel}/ent_tuples.json'))
if len(weighted_ent_tuples):
print(f'outputs of relation \"{rel}\" not found. skipped.')
continue
weighted_ent_tuples = \
weighted_ent_tuples[:len(weighted_ent_tuples) // 2]
columns[f'Ours (Top {n_present})'] = [
str(ent_tuple) for ent_tuple, _ in weighted_ent_tuples[:n_present]]
columns[f'Ours (Random Sample)'] = [
str(ent_tuple) for ent_tuple, _ in random.sample(
weighted_ent_tuples, n_present)]
columns[f'Ours (Tail {n_present})'] = [
str(ent_tuple) for ent_tuple, _ in weighted_ent_tuples[-n_present:]]
table = PrettyTable()
for key, col in columns.items():
if len(col) < n_present:
col.extend(['\\'] * (n_present - len(col)))
table.add_column(key, col)
def _print_results(output_file):
print(f'Relation: {rel}', file=output_file)
print('Prompts:', file=output_file)
for prompt, weight in weighted_prompts:
print(f'- {weight:.4f} {prompt}', file=output_file)
print('Harvested Tuples:', file=output_file)
print(table, file=output_file)
print('=' * 50, file=output_file, flush=True)
_print_results(output_file=summary_file)
_print_results(output_file=sys.stdout)
print(f'This summary has been saved into {summary_file.name}.')
if __name__ == '__main__':
fire.Fire(main)

View File

@@ -260,28 +260,43 @@
],
"seed_ent_tuples": [
[
"person",
"good_batteries"
"anarchists",
"free"
],
[
"person",
"juicy_berries"
"angry_person",
"vent"
],
[
"person",
"free_will"
"animal",
"reproduce"
],
[
"person",
"feel_joy"
"bee",
"flower"
],
[
"person",
"good_vodka"
"kids",
"play_games"
]
],
"prompts": [
"Every <ENT0> desires <ENT1> ."
"<ENT0> desire to <ENT1> .",
"<ENT1> is what every <ENT0> desires .",
"<ENT0> desire <ENT1>dom for everyone .",
"The <ENT1>s are what every <ENT0> wants .",
"Every <ENT0> wants to mate in order to <ENT1> .",
"<ENT0>s have a strong desire to fly toward <ENT1>s .",
"Every <ENT0> desires a way to <ENT1> their frustration .",
"Every <ENT0> desires an outlet, or a <ENT1>, for their anger .",
"As <ENT0>, we believe in the power of <ENT1>dom for all people .",
"The <ENT0> collects pollen from the <ENT1> in order to make honey .",
"<ENT0>s <ENT1> in order to create new generations of their species .",
"<ENT0> believe in <ENT1>dom from oppressive governments and systems .",
"All <ENT0>s have the instinct to <ENT1> in order to continue their species .",
"<ENT0>s are attracted to <ENT1>s because they need the nectar to make honey .",
"<ENT0> want to be <ENT1> from oppressive structures and systems, both physical and psychological .",
"<ENT0> believe in <ENT1>dom from all oppressive systems, whether they be economic, political, or social ."
]
},
"HasA": {
@@ -311,7 +326,7 @@
]
],
"prompts": [
"<ENT0> to have <ENT1> .",
"<ENT0> to have <ENT1> .",
"A <ENT0> of cards contains <ENT1> .",
"There are usually <ENT1> in a <ENT0> .",
"We usually expect <ENT0> have a <ENT1> .",

View File

@@ -31,17 +31,17 @@
]
],
"prompts": [
"at the <ENT2>, <ENT0> can <ENT1>",
"<ENT0>s can <ENT1> in the <ENT2>",
"the <ENT2> is a place where <ENT0> can <ENT1>",
"the <ENT0> can choose to <ENT1> in the <ENT2> or online",
"a <ENT0> can learn effectively by <ENT1>ing in a <ENT2> setting",
"looking up at the <ENT2>, it's easy to see that <ENT0>s can <ENT1>",
"at the <ENT2>, <ENT0>s can <ENT1> together and learn from each other",
"a <ENT0> can <ENT1> in a <ENT2> by themselves or with a group of friends",
"at the <ENT2>, <ENT0> can use the machines to <ENT1>, or they can take classes",
"at the <ENT2>, <ENT0> can <ENT1> by using the various machines and equipment available",
"there are many facilities available for <ENT0> who want to stay in shape and <ENT1>, one of which is the <ENT2>"
"At the <ENT2>, <ENT0> can <ENT1> .",
"<ENT0>s can <ENT1> in the <ENT2> .",
"The <ENT2> is a place where <ENT0> can <ENT1> .",
"The <ENT0> can choose to <ENT1> in the <ENT2> or online .",
"A <ENT0> can learn effectively by <ENT1>ing in a <ENT2> setting .",
"Looking up at the <ENT2>, it's easy to see that <ENT0>s can <ENT1> .",
"At the <ENT2>, <ENT0>s can <ENT1> together and learn from each other .",
"A <ENT0> can <ENT1> in a <ENT2> by themselves or with a group of friends .",
"At the <ENT2>, <ENT0> can use the machines to <ENT1>, or they can take classes .",
"At the <ENT2>, <ENT0> can <ENT1> by using the various machines and equipment available .",
"There are many facilities available for <ENT0> who want to stay in shape and <ENT1>, one of which is the <ENT2> ."
]
},
"prevent": {
@@ -71,21 +71,21 @@
]
],
"prompts": [
"<ENT0> prevents <ENT1>",
"if you <ENT0>, you will not <ENT1>",
"<ENT0> provides a safety net against <ENT1>",
"if you <ENT0>, you are less likely to <ENT1>",
"a <ENT0> will protect you from getting the <ENT1>",
"if you want to avoid being <ENT1>, start <ENT0> more",
"if you don't want to be <ENT1>, you should <ENT0> often",
"<ENT0>s are effective at preventing the spread of <ENT1>es",
"this is a paraphrase of the saying \"<ENT0> prevents <ENT1>ity.\"",
"<ENT0> is an important part of staying healthy and preventing <ENT1>",
"regular <ENT0> has been shown to be one of the most effective ways to prevent <ENT1>",
"<ENT0> protects people from having to declare <ENT1> in the event of an accident or emergency",
"<ENT0>s prevent the spread of <ENT1>es by trapping droplets that are released when the user talks, coughs, or sneezes",
"it could be said that <ENT0> prevents <ENT1>, as it provides a safety net for people in the event of an unexpected setback",
"regular <ENT0> has various health benefits and is often prescribed by doctors as a preventative measure against developing various <ENT1>s"
"<ENT0> prevents <ENT1> .",
"If you <ENT0>, you will not <ENT1> .",
"<ENT0> provides a safety net against <ENT1> .",
"If you <ENT0>, you are less likely to <ENT1> .",
"A <ENT0> will protect you from getting the <ENT1> .",
"If you want to avoid being <ENT1>, start <ENT0> more .",
"If you don't want to be <ENT1>, you should <ENT0> often .",
"<ENT0>s are effective at preventing the spread of <ENT1>es .",
"This is a paraphrase of the saying \"<ENT0> prevents <ENT1>ity.\" .",
"<ENT0> is an important part of staying healthy and preventing <ENT1> .",
"Regular <ENT0> has been shown to be one of the most effective ways to prevent <ENT1> .",
"<ENT0> protects people from having to declare <ENT1> in the event of an accident or emergency .",
"<ENT0>s prevent the spread of <ENT1>es by trapping droplets that are released when the user talks, coughs, or sneezes .",
"It could be said that <ENT0> prevents <ENT1>, as it provides a safety net for people in the event of an unexpected setback .",
"Regular <ENT0> has various health benefits and is often prescribed by doctors as a preventative measure against developing various <ENT1>s ."
]
},
"help": {
@@ -115,21 +115,21 @@
]
],
"prompts": [
"a <ENT0> can help a <ENT1>",
"the <ENT0> can help with the <ENT1>'s chores",
"a <ENT0> can assist a <ENT1> with her duties",
"if you are a <ENT1> of a crime, the <ENT0> can help you",
"the <ENT0> can help the <ENT1> with their medical needs",
"a <ENT0> can help a <ENT1> by providing guidance and support",
"a <ENT0> can help take care of the household duties for a <ENT1>",
"a <ENT0> officer can help a <ENT1> by providing them with protection and assistance",
"a <ENT0> can help with the chores around the house, giving the <ENT1> more free time",
"a <ENT0> can help out a <ENT1> by doing things like cleaning, cooking, and running errands",
"a <ENT0> can help a <ENT1> in many ways, from grading papers to leading discussion sections",
"a <ENT0> can help a <ENT1> by providing guidance, answering questions, and offering feedback",
"a <ENT0> can help a <ENT1> with their workload by taking on some of the teaching responsibilities",
"a <ENT0> can help a <ENT1> by grading papers, leading discussion sections, and providing office hours",
"the <ENT0> can help <ENT1>s of crime by investigating the incident and taking statements from witnesses"
"A <ENT0> can help a <ENT1> .",
"The <ENT0> can help with the <ENT1>'s chores .",
"A <ENT0> can assist a <ENT1> with her duties .",
"If you are a <ENT1> of a crime, the <ENT0> can help you .",
"The <ENT0> can help the <ENT1> with their medical needs .",
"A <ENT0> can help a <ENT1> by providing guidance and support .",
"A <ENT0> can help take care of the household duties for a <ENT1> .",
"A <ENT0> officer can help a <ENT1> by providing them with protection and assistance .",
"A <ENT0> can help with the chores around the house, giving the <ENT1> more free time .",
"A <ENT0> can help out a <ENT1> by doing things like cleaning, cooking, and running errands .",
"A <ENT0> can help a <ENT1> in many ways, from grading papers to leading discussion sections .",
"A <ENT0> can help a <ENT1> by providing guidance, answering questions, and offering feedback .",
"A <ENT0> can help a <ENT1> with their workload by taking on some of the teaching responsibilities .",
"A <ENT0> can help a <ENT1> by grading papers, leading discussion sections, and providing office hours .",
"The <ENT0> can help <ENT1>s of crime by investigating the incident and taking statements from witnesses ."
]
},
"place_for": {
@@ -159,18 +159,18 @@
]
],
"prompts": [
"the <ENT0> is the place for <ENT1>",
"there's no place like <ENT0> for <ENT1>",
"<ENT0> is a great place to do some <ENT1>",
"the <ENT0> is a place where people go to <ENT1>",
"there's no better place to <ENT1> than in an <ENT0>",
"the <ENT0> is the best place to get some <ENT1> done",
"the <ENT0> provides an environment for people to <ENT1>",
"if you want to do some <ENT1>ing, you can go to the <ENT0>",
"if you're looking to do some <ENT1>, <ENT0> is the place for you",
"if you're looking to do some <ENT1>, then you should head on over to <ENT0>",
"if you're looking to get fit and <ENT1>, the <ENT0> is the perfect place to do so",
"there's no place like the <ENT0> for <ENT1> because you have access to all the equipment you need to get a good workout"
"The <ENT0> is the place for <ENT1> .",
"There's no place like <ENT0> for <ENT1> .",
"<ENT0> is a great place to do some <ENT1> .",
"The <ENT0> is a place where people go to <ENT1> .",
"There's no better place to <ENT1> than in an <ENT0> .",
"The <ENT0> is the best place to get some <ENT1> done .",
"The <ENT0> provides an environment for people to <ENT1> .",
"If you want to do some <ENT1>ing, you can go to the <ENT0> .",
"If you're looking to do some <ENT1>, <ENT0> is the place for you .",
"If you're looking to do some <ENT1>, then you should head on over to <ENT0> .",
"If you're looking to get fit and <ENT1>, the <ENT0> is the perfect place to do so .",
"There's no place like the <ENT0> for <ENT1> because you have access to all the equipment you need to get a good workout ."
]
},
"synonym": {
@@ -200,20 +200,20 @@
]
],
"prompts": [
"a <ENT0> is not a <ENT1>",
"the antonym of <ENT0> is <ENT1>",
"<ENT1> is the opposite of <ENT0>",
"when you're <ENT1>, you're not <ENT0>",
"<ENT1> is to <ENT0> as night is to day",
"<ENT0> and <ENT1> are opposite concepts",
"if you <ENT1>, it means you didn't <ENT0>",
"when a person is <ENT0>, they are not <ENT1>",
"the two words \"<ENT0>\" and \"<ENT1>\" are antonyms",
"<ENT0> is the direction that is opposite of <ENT1>",
"to be <ENT1> is to have a small amount of body <ENT0>",
"when something <ENT0>es, it is successful, and when something <ENT1>s, it is not successful",
"if you <ENT0> something, you have succeeded, and if you <ENT1> something, you have not succeeded",
"this means that simply because someone does not <ENT0> something does not mean they have <ENT1>ed"
"A <ENT0> is not a <ENT1> .",
"The antonym of <ENT0> is <ENT1> .",
"<ENT1> is the opposite of <ENT0> .",
"When you're <ENT1>, you're not <ENT0> .",
"<ENT1> is to <ENT0> as night is to day .",
"<ENT0> and <ENT1> are opposite concepts .",
"If you <ENT1>, it means you didn't <ENT0> .",
"When a person is <ENT0>, they are not <ENT1> .",
"The two words \"<ENT0>\" and \"<ENT1>\" are antonyms .",
"<ENT0> is the direction that is opposite of <ENT1> .",
"To be <ENT1> is to have a small amount of body <ENT0> .",
"When something <ENT0>es, it is successful, and when something <ENT1>s, it is not successful .",
"If you <ENT0> something, you have succeeded, and if you <ENT1> something, you have not succeeded .",
"This means that simply because someone does not <ENT0> something does not mean they have <ENT1>ed ."
]
},
"separated_by_the_ocean": {
@@ -243,16 +243,16 @@
]
],
"prompts": [
"the ocean separates <ENT1> from the <ENT0>",
"there is an ocean separating <ENT1> and <ENT0>",
"the pacific ocean lies between <ENT0> and <ENT1>",
"<ENT1> and <ENT0> are separated by the tasman sea",
"there is a distance of about 1,500 miles between <ENT1> and the <ENT0>s",
"the distance between <ENT1> and <ENT0> is vast, with an ocean between them",
"<ENT1> and <ENT0> are two different countries located on different continents",
"<ENT0> and <ENT1> are two countries that are close to each other but are separated by the ocean",
"there is a large body of water, known as the atlantic ocean, which separates the two land masses of <ENT1> and the <ENT0>",
"while <ENT1> and <ENT0> are both located in the oceania region of the world, the two countries are separated by the tasman sea"
"The ocean separates <ENT1> from the <ENT0> .",
"There is an ocean separating <ENT1> and <ENT0> .",
"The pacific ocean lies between <ENT0> and <ENT1> .",
"<ENT1> and <ENT0> are separated by the tasman sea .",
"There is a distance of about 1,500 miles between <ENT1> and the <ENT0>s .",
"The distance between <ENT1> and <ENT0> is vast, with an ocean between them .",
"<ENT1> and <ENT0> are two different countries located on different continents .",
"<ENT0> and <ENT1> are two countries that are close to each other but are separated by the ocean .",
"There is a large body of water, known as the atlantic ocean, which separates the two land masses of <ENT1> and the <ENT0> .",
"While <ENT1> and <ENT0> are both located in the oceania region of the world, the two countries are separated by the tasman sea ."
]
},
"representative_figure": {
@@ -282,22 +282,22 @@
]
],
"prompts": [
"<ENT0> was a <ENT1> icon",
"<ENT0> is the face of <ENT1>",
"<ENT0> was the representative figure of <ENT1>",
"<ENT0> was the co-founder and ceo of <ENT1> inc",
"<ENT0> was the most iconic <ENT1>ian of his time",
"<ENT0> was a symbol of hope for many people in <ENT1>",
"<ENT0> is one of the most popular <ENT1> artists today",
"<ENT0> is considered the father of the modern <ENT1>n state",
"<ENT0> is one of the most influential <ENT1>ians of our time",
"<ENT0> is a <ENT1> icon and one of the best soccer players in the world",
"as one of the most successful and popular <ENT1> artists, <ENT0> represents the genre well",
"if you're looking for a representative figure of <ENT1>, you can't do much better than <ENT0>",
"<ENT0> is the co-founder of <ENT1> and served as the ceo of the company until his death in 2011",
"if <ENT0> is the representative figure of <ENT1>, then that means he is the one who speaks for the country",
"if <ENT0> were the representative figure of <ENT1>, he would be an extremely influential and popular artist",
"if we're talking about current, popular <ENT1>, then yeah, you could say that <ENT0> is the representative figure"
"<ENT0> was a <ENT1> icon .",
"<ENT0> is the face of <ENT1> .",
"<ENT0> was the representative figure of <ENT1> .",
"<ENT0> was the co-founder and ceo of <ENT1> inc .",
"<ENT0> was the most iconic <ENT1>ian of his time .",
"<ENT0> was a symbol of hope for many people in <ENT1> .",
"<ENT0> is one of the most popular <ENT1> artists today .",
"<ENT0> is considered the father of the modern <ENT1>n state .",
"<ENT0> is one of the most influential <ENT1>ians of our time .",
"<ENT0> is a <ENT1> icon and one of the best soccer players in the world .",
"As one of the most successful and popular <ENT1> artists, <ENT0> represents the genre well .",
"If you're looking for a representative figure of <ENT1>, you can't do much better than <ENT0> .",
"<ENT0> is the co-founder of <ENT1> and served as the ceo of the company until his death in 2011 .",
"If <ENT0> is the representative figure of <ENT1>, then that means he is the one who speaks for the country .",
"If <ENT0> were the representative figure of <ENT1>, he would be an extremely influential and popular artist .",
"If we're talking about current, popular <ENT1>, then yeah, you could say that <ENT0> is the representative figure ."
]
},
"ingredient_for": {
@@ -327,16 +327,16 @@
]
],
"prompts": [
"<ENT1> needs <ENT0> as an ingredient",
"one way to use <ENT0>es is to make <ENT1>s",
"<ENT0> is a key ingredient in making a <ENT1>",
"one of the ingredients for making a <ENT1> is <ENT0>",
"if you want to make a <ENT1>, you'll need some <ENT0>",
"a <ENT0> is an ingredient that is used to make <ENT1>s",
"<ENT0> is a dish that contains <ENT1> as one of its ingredients",
"<ENT0> is a popular dish made with <ENT1> as the main ingredient",
"<ENT0> is a type of fish pie that is typically made with <ENT1>s",
"<ENT0> is a dish made from <ENT1> that has been fried in a wok or a pan"
"<ENT1> needs <ENT0> as an ingredient .",
"One way to use <ENT0>es is to make <ENT1>s .",
"<ENT0> is a key ingredient in making a <ENT1> .",
"One of the ingredients for making a <ENT1> is <ENT0> .",
"If you want to make a <ENT1>, you'll need some <ENT0> .",
"A <ENT0> is an ingredient that is used to make <ENT1>s .",
"<ENT0> is a dish that contains <ENT1> as one of its ingredients .",
"<ENT0> is a popular dish made with <ENT1> as the main ingredient .",
"<ENT0> is a type of fish pie that is typically made with <ENT1>s .",
"<ENT0> is a dish made from <ENT1> that has been fried in a wok or a pan ."
]
},
"release_time": {
@@ -366,16 +366,16 @@
]
],
"prompts": [
"<ENT0> was released in <ENT1>",
"in <ENT1>, <ENT0> was released",
"google released its <ENT0> algorithm in <ENT1>",
"google's <ENT0> algorithm is released in <ENT1>",
"<ENT0> is a vector representation of words that was released in <ENT1>",
"<ENT0> is a tool that was released in <ENT1> that creates word embeddings",
"<ENT0> is a tool that creates word embeddings, that was released in <ENT1>",
"in <ENT1>, <ENT0> was released as a toolkit for vector representation of words",
"the <ENT0> algorithm is a neural network algorithm that was released in <ENT1>",
"<ENT0> is a toolkit that was released in <ENT1> that creates word embeddings, which are vector representations of words that can be used"
"<ENT0> was released in <ENT1> .",
"In <ENT1>, <ENT0> was released .",
"Google released its <ENT0> algorithm in <ENT1> .",
"Google's <ENT0> algorithm is released in <ENT1> .",
"<ENT0> is a vector representation of words that was released in <ENT1> .",
"<ENT0> is a tool that was released in <ENT1> that creates word embeddings .",
"<ENT0> is a tool that creates word embeddings, that was released in <ENT1> .",
"In <ENT1>, <ENT0> was released as a toolkit for vector representation of words .",
"The <ENT0> algorithm is a neural network algorithm that was released in <ENT1> .",
"<ENT0> is a toolkit that was released in <ENT1> that creates word embeddings, which are vector representations of words that can be used ."
]
},
"processed_from": {
@@ -405,21 +405,21 @@
]
],
"prompts": [
"<ENT1> comes from <ENT0>",
"<ENT0> is used to make <ENT1>",
"most <ENT1> is made from <ENT0>",
"<ENT0> is a major source of <ENT1>",
"the main source of <ENT1> is <ENT0>",
"the <ENT0> is what turns into <ENT1>",
"<ENT1> is most commonly found in <ENT0>s",
"the <ENT0> is the place where <ENT1> is mined",
"many <ENT1>s contain <ENT0> as a major ingredient",
"<ENT0>s are where <ENT1> is pulled from the ground",
"<ENT1> is a dairy product that is made from <ENT0>",
"<ENT0> is the primary ingredient in the production of <ENT1>",
"the main source of <ENT1> is quartz, which is a type of <ENT0>",
"<ENT0> is the main source of <ENT1> used in vehicles and other machinery",
"<ENT0> is used to produce gasoline, diesel, and other petroleum-based products, which are widely used as <ENT1> for cars, trucks,"
"<ENT1> comes from <ENT0> .",
"<ENT0> is used to make <ENT1> .",
"Most <ENT1> is made from <ENT0> .",
"<ENT0> is a major source of <ENT1> .",
"The main source of <ENT1> is <ENT0> .",
"The <ENT0> is what turns into <ENT1> .",
"<ENT1> is most commonly found in <ENT0>s .",
"The <ENT0> is the place where <ENT1> is mined .",
"Many <ENT1>s contain <ENT0> as a major ingredient .",
"<ENT0>s are where <ENT1> is pulled from the ground .",
"<ENT1> is a dairy product that is made from <ENT0> .",
"<ENT0> is the primary ingredient in the production of <ENT1> .",
"The main source of <ENT1> is quartz, which is a type of <ENT0> .",
"<ENT0> is the main source of <ENT1> used in vehicles and other machinery .",
"<ENT0> is used to produce gasoline, diesel, and other petroleum-based products, which are widely used as <ENT1> for cars, trucks, ."
]
},
"graduate_from": {
@@ -449,20 +449,20 @@
]
],
"prompts": [
"<ENT0> graduated from <ENT1>",
"<ENT0> finished his degree at the <ENT1>",
"<ENT0> completes his studies at the <ENT1>",
"<ENT0> graduated from <ENT1> in the united states",
"former president <ENT0> graduated from <ENT1> in 1991",
"<ENT0> graduated from <ENT1> with a degree in mathematics",
"<ENT0>, the former president of <ENT1>, graduated from the school in 1991",
"<ENT0> was a world-renowned physicist and cosmologist who graduated from the <ENT1>",
"after <ENT0> completed his studies at <ENT1>, he became the 44th president of the united states",
"upon graduating from the <ENT1>, <ENT0> began his work as a cosmologist and theoretical physicist",
"after studying at the eth institute in zurich, <ENT0> graduates with a degree in physics from the <ENT1>",
"after studying at the polytechnic institute in zurich and the <ENT1>, <ENT0> graduated from the latter in",
"after <ENT0> graduated from the <ENT1>, he went on to become one of the most famous theoretical physicists in the world",
"having attended oxford university for his undergraduate degree, <ENT0> moved to the <ENT1> to complete his ph.d. in cosmology"
"<ENT0> graduated from <ENT1> .",
"<ENT0> finished his degree at the <ENT1> .",
"<ENT0> completes his studies at the <ENT1> .",
"<ENT0> graduated from <ENT1> in the united states .",
"Former president <ENT0> graduated from <ENT1> in 1991 .",
"<ENT0> graduated from <ENT1> with a degree in mathematics .",
"<ENT0>, the former president of <ENT1>, graduated from the school in 1991 .",
"<ENT0> was a world-renowned physicist and cosmologist who graduated from the <ENT1> .",
"After <ENT0> completed his studies at <ENT1>, he became the 44th president of the united states .",
"Upon graduating from the <ENT1>, <ENT0> began his work as a cosmologist and theoretical physicist .",
"After studying at the eth institute in zurich, <ENT0> graduates with a degree in physics from the <ENT1> .",
"After studying at the polytechnic institute in zurich and the <ENT1>, <ENT0> graduated from the latter in .",
"After <ENT0> graduated from the <ENT1>, he went on to become one of the most famous theoretical physicists in the world .",
"Having attended oxford university for his undergraduate degree, <ENT0> moved to the <ENT1> to complete his ph.d. in cosmology ."
]
},
"popular_sport_in": {
@@ -492,18 +492,18 @@
]
],
"prompts": [
"<ENT0> is a popular sport in <ENT1>",
"in <ENT1>, <ENT0> is a popular sport",
"many people in <ENT1> enjoy playing <ENT0>",
"<ENT0> is a sport that is popular in <ENT1>",
"<ENT1>ese people enjoy <ENT0> as a fun pastime",
"there is no sport more popular in <ENT1> than <ENT0>",
"a large number of people in <ENT1> like to play <ENT0>",
"many <ENT1>ians enjoy playing and watching <ENT0> (soccer)",
"<ENT0> is a popular sport in many countries, including <ENT1>",
"<ENT0> is more than just a popular sport in <ENT1>--it's a way of life",
"there is no denying that <ENT0> is by far the most popular sport in <ENT1>",
"<ENT1>ians are passionate about <ENT0> and it is one of the most popular sports in the country"
"<ENT0> is a popular sport in <ENT1> .",
"In <ENT1>, <ENT0> is a popular sport .",
"Many people in <ENT1> enjoy playing <ENT0> .",
"<ENT0> is a sport that is popular in <ENT1> .",
"<ENT1>ese people enjoy <ENT0> as a fun pastime .",
"There is no sport more popular in <ENT1> than <ENT0> .",
"A large number of people in <ENT1> like to play <ENT0> .",
"Many <ENT1>ians enjoy playing and watching <ENT0> (soccer) .",
"<ENT0> is a popular sport in many countries, including <ENT1> .",
"<ENT0> is more than just a popular sport in <ENT1>--it's a way of life .",
"There is no denying that <ENT0> is by far the most popular sport in <ENT1> .",
"<ENT1>ians are passionate about <ENT0> and it is one of the most popular sports in the country ."
]
},
"business": {
@@ -533,20 +533,20 @@
]
],
"prompts": [
"<ENT0> sells <ENT1>s",
"<ENT0> is an <ENT1> company",
"<ENT0> is known for selling <ENT1>s",
"<ENT0> is a company that sells <ENT1>s",
"<ENT0> is an international <ENT1> and gas company",
"<ENT0> is a <ENT1> company that manufactures and sells vehicles",
"<ENT0> sells <ENT1>s through its online store and retail locations",
"<ENT0> is a company that sells graphics processing units (<ENT1>s)",
"<ENT0> is a popular fast food chain that is known for selling <ENT1>s",
"<ENT0> is ajapanese<ENT1> manufacturing company that is headquartered in yokohama, japan",
"<ENT0> is a fast food company that specializes in <ENT1>s, fried chicken, and soft drinks",
"<ENT0> is a company that specializes in the production of <ENT1>s, or graphics processing units",
"<ENT0> is a japanese <ENT1> company that sells a wide variety of vehicles, from small to large, and from economy to luxury",
"<ENT0> describes themselves as \"the world\u2019s largest information technology company by revenue,\" and they sell many products, including <ENT1>s"
"<ENT0> sells <ENT1>s .",
"<ENT0> is an <ENT1> company .",
"<ENT0> is known for selling <ENT1>s .",
"<ENT0> is a company that sells <ENT1>s .",
"<ENT0> is an international <ENT1> and gas company .",
"<ENT0> is a <ENT1> company that manufactures and sells vehicles .",
"<ENT0> sells <ENT1>s through its online store and retail locations .",
"<ENT0> is a company that sells graphics processing units (<ENT1>s) .",
"<ENT0> is a popular fast food chain that is known for selling <ENT1>s .",
"<ENT0> is ajapanese<ENT1> manufacturing company that is headquartered in yokohama, japan .",
"<ENT0> is a fast food company that specializes in <ENT1>s, fried chicken, and soft drinks .",
"<ENT0> is a company that specializes in the production of <ENT1>s, or graphics processing units .",
"<ENT0> is a japanese <ENT1> company that sells a wide variety of vehicles, from small to large, and from economy to luxury .",
"<ENT0> describes themselves as \"the world\u2019s largest information technology company by revenue,\" and they sell many products, including <ENT1>s ."
]
},
"featured thing": {
@@ -581,16 +581,16 @@
]
],
"prompts": [
"the <ENT0> <ENT2> is very <ENT1>",
"<ENT0>s are a very <ENT1> <ENT2>",
"<ENT0> is definitely a <ENT1> <ENT2>",
"<ENT0> is a very <ENT1> and successful <ENT2>",
"you can hardly find a <ENT1>er <ENT2> than <ENT0>s",
"there is no doubt that <ENT0> is a very <ENT1> <ENT2>",
"it covers a lot of ground, <ENT0> is a very <ENT1> <ENT2>",
"a single <ENT0> is a very <ENT1> and affordable <ENT2> item",
"<ENT0>s are a type of <ENT2> that can grow to be very <ENT1>",
"although <ENT0> is not the world's <ENT1>est <ENT2>, it is still very wealthy"
"The <ENT0> <ENT2> is very <ENT1> .",
"<ENT0>s are a very <ENT1> <ENT2> .",
"<ENT0> is definitely a <ENT1> <ENT2> .",
"<ENT0> is a very <ENT1> and successful <ENT2> .",
"You can hardly find a <ENT1>er <ENT2> than <ENT0>s .",
"There is no doubt that <ENT0> is a very <ENT1> <ENT2> .",
"It covers a lot of ground, <ENT0> is a very <ENT1> <ENT2> .",
"A single <ENT0> is a very <ENT1> and affordable <ENT2> item .",
"<ENT0>s are a type of <ENT2> that can grow to be very <ENT1> .",
"Although <ENT0> is not the world's <ENT1>est <ENT2>, it is still very wealthy ."
]
},
"need sth to do sth": {
@@ -625,16 +625,16 @@
]
],
"prompts": [
"<ENT0> need <ENT1> to <ENT2>",
"in order to <ENT2>, <ENT0> need <ENT1>",
"there is a need for <ENT1> to <ENT2> <ENT0>",
"a <ENT0> requires <ENT1> in order to <ENT2>",
"<ENT0> feel the need to <ENT2> through <ENT1>",
"a <ENT0> requires <ENT1> to <ENT2> with each other",
"<ENT0> feel that they need <ENT1> in order to <ENT2>",
"a <ENT0> will only <ENT2> if it has enough <ENT1> to eat",
"without <ENT1>, <ENT0> would have a harder time <ENT2>ing with others",
"it is important for <ENT0> to be able to <ENT2> with each other through <ENT1>"
"<ENT0> need <ENT1> to <ENT2> .",
"In order to <ENT2>, <ENT0> need <ENT1> .",
"There is a need for <ENT1> to <ENT2> <ENT0> .",
"A <ENT0> requires <ENT1> in order to <ENT2> .",
"<ENT0> feel the need to <ENT2> through <ENT1> .",
"A <ENT0> requires <ENT1> to <ENT2> with each other .",
"<ENT0> feel that they need <ENT1> in order to <ENT2> .",
"A <ENT0> will only <ENT2> if it has enough <ENT1> to eat .",
"Without <ENT1>, <ENT0> would have a harder time <ENT2>ing with others .",
"It is important for <ENT0> to be able to <ENT2> with each other through <ENT1> ."
]
},
"more than": {
@@ -669,16 +669,16 @@
]
],
"prompts": [
"<ENT0>ing is more <ENT1> than <ENT2>ing",
"the <ENT0> is <ENT1>r than the <ENT2> bear",
"<ENT0>s are often <ENT1>r in size than <ENT2>s",
"it's more <ENT1> to <ENT0> than it is to <ENT2>",
"<ENT0>s are generally much <ENT1>er than <ENT2>s",
"there is often more <ENT1> in <ENT0>s than in <ENT2>s",
"compared to <ENT2>s, <ENT0>s are typically much <ENT1>er",
"many people believe that <ENT0> is more <ENT1> than <ENT2>",
"<ENT0> is generally more <ENT1> than <ENT2>ing because it is more enjoyable and engaging",
"some people believe that <ENT0> is more <ENT1> than <ENT2> because it is more active and engaging"
"<ENT0>ing is more <ENT1> than <ENT2>ing .",
"The <ENT0> is <ENT1>r than the <ENT2> bear .",
"<ENT0>s are often <ENT1>r in size than <ENT2>s .",
"It's more <ENT1> to <ENT0> than it is to <ENT2> .",
"<ENT0>s are generally much <ENT1>er than <ENT2>s .",
"There is often more <ENT1> in <ENT0>s than in <ENT2>s .",
"Compared to <ENT2>s, <ENT0>s are typically much <ENT1>er .",
"Many people believe that <ENT0> is more <ENT1> than <ENT2> .",
"<ENT0> is generally more <ENT1> than <ENT2>ing because it is more enjoyable and engaging .",
"Some people believe that <ENT0> is more <ENT1> than <ENT2> because it is more active and engaging ."
]
},
"can but not good": {
@@ -708,18 +708,18 @@
]
],
"prompts": [
"<ENT0>s are not good at <ENT1>ing",
"<ENT0>s can <ENT1>, but they aren't very good at it",
"the average <ENT0> is not especially good at <ENT1>ing",
"the <ENT0> is skilled at <ENT1>ing, but not particularly good at it",
"<ENT0>s are not good at <ENT1>ing because they are not built for it",
"<ENT1>ing requires explosive speed and power, which <ENT0>s typically lack",
"<ENT0>s aren't particularly good <ENT1>mers, but they can do it if they need to",
"while <ENT0>s are capable of <ENT1>ming, they are not particularly proficient at it",
"<ENT0>s are not good at <ENT1>ing because they lack the proper equipment and training",
"while <ENT0>s are typically excellent at their jobs, <ENT1>ing is not usually a strong suit",
"although <ENT0>s are theoretically able to <ENT1>, they are not very good at it and usually stay on the ground",
"while <ENT0>s are able to <ENT1>, they are not instinctively good at it and may need some help or encouragement to do so"
"<ENT0>s are not good at <ENT1>ing .",
"<ENT0>s can <ENT1>, but they aren't very good at it .",
"The average <ENT0> is not especially good at <ENT1>ing .",
"The <ENT0> is skilled at <ENT1>ing, but not particularly good at it .",
"<ENT0>s are not good at <ENT1>ing because they are not built for it .",
"<ENT1>ing requires explosive speed and power, which <ENT0>s typically lack .",
"<ENT0>s aren't particularly good <ENT1>mers, but they can do it if they need to .",
"While <ENT0>s are capable of <ENT1>ming, they are not particularly proficient at it .",
"<ENT0>s are not good at <ENT1>ing because they lack the proper equipment and training .",
"While <ENT0>s are typically excellent at their jobs, <ENT1>ing is not usually a strong suit .",
"Although <ENT0>s are theoretically able to <ENT1>, they are not very good at it and usually stay on the ground .",
"While <ENT0>s are able to <ENT1>, they are not instinctively good at it and may need some help or encouragement to do so ."
]
},
"worth celebrating": {
@@ -749,16 +749,16 @@
]
],
"prompts": [
"it's good when <ENT0> <ENT1>",
"it's great for <ENT0> to <ENT1>",
"it's great when a group of <ENT0> <ENT1>",
"it's great when <ENT0>s <ENT1>s as a group",
"it's worth celebrating when a <ENT0> <ENT1>",
"it's always good when <ENT0> <ENT1> is doing well",
"when a group of <ENT0> <ENT1>, it is worth celebrating",
"there is value in celebrating when group of <ENT0>s <ENT1>",
"it's always good when <ENT0>s <ENT1>s, regardless of the topic",
"it is seen as significant accomplishment for <ENT0> when they <ENT1>"
"It's good when <ENT0> <ENT1> .",
"It's great for <ENT0> to <ENT1> .",
"It's great when a group of <ENT0> <ENT1> .",
"It's great when <ENT0>s <ENT1>s as a group .",
"It's worth celebrating when a <ENT0> <ENT1> .",
"It's always good when <ENT0> <ENT1> is doing well .",
"When a group of <ENT0> <ENT1>, it is worth celebrating .",
"There is value in celebrating when group of <ENT0>s <ENT1> .",
"It's always good when <ENT0>s <ENT1>s, regardless of the topic .",
"It is seen as significant accomplishment for <ENT0> when they <ENT1> ."
]
},
"potential risk": {
@@ -788,22 +788,22 @@
]
],
"prompts": [
"<ENT0> may lead to <ENT1>",
"<ENT0> can potentially cause <ENT1>",
"one potential risk of <ENT0> is <ENT1>",
"eating too much <ENT0> can cause <ENT1>",
"if a <ENT1>es, it is a potential risk of <ENT0>",
"if you have <ENT1>, you may be at risk for <ENT0>",
"<ENT0> has been linked to an increased risk of <ENT1>",
"an <ENT0> always entails some risk\u2014you could <ENT1> on it",
"<ENT0> can lead to <ENT1> if it is not eaten in moderation",
"an <ENT0> always comes with the potential to make or <ENT1>",
"if a <ENT0> is not properly executed, it can cause a <ENT1>",
"if a <ENT0> is not well-planned or executed, it may lead to a <ENT1>",
"a new study has found that <ENT0> is linked to an increased risk of <ENT1>",
"if you have a <ENT1>, you are at a greater risk for cavities if you eat <ENT0>",
"there is a strong correlation between <ENT0> and an increased risk of air<ENT1>es",
"a recent study has found that there is a correlation between eating <ENT0> and an increased risk of <ENT1>"
"<ENT0> may lead to <ENT1> .",
"<ENT0> can potentially cause <ENT1> .",
"One potential risk of <ENT0> is <ENT1> .",
"Eating too much <ENT0> can cause <ENT1> .",
"If a <ENT1>es, it is a potential risk of <ENT0> .",
"If you have <ENT1>, you may be at risk for <ENT0> .",
"<ENT0> has been linked to an increased risk of <ENT1> .",
"An <ENT0> always entails some risk\u2014you could <ENT1> on it .",
"<ENT0> can lead to <ENT1> if it is not eaten in moderation .",
"An <ENT0> always comes with the potential to make or <ENT1> .",
"If a <ENT0> is not properly executed, it can cause a <ENT1> .",
"If a <ENT0> is not well-planned or executed, it may lead to a <ENT1> .",
"A new study has found that <ENT0> is linked to an increased risk of <ENT1> .",
"If you have a <ENT1>, you are at a greater risk for cavities if you eat <ENT0> .",
"There is a strong correlation between <ENT0> and an increased risk of air<ENT1>es .",
"A recent study has found that there is a correlation between eating <ENT0> and an increased risk of <ENT1> ."
]
}
}

128
search_prompts.py Normal file
View File

@@ -0,0 +1,128 @@
import fire
import json
from nltk import sent_tokenize
from thefuzz import fuzz
from models.gpt3 import GPT3
from data_utils.data_utils import get_n_ents, get_sent, fix_prompt_style
TRANSFORMATIONS_SENT = [['', ''], ['a ', ''], ['the ', '']]
TRANSFORMATIONS_ENT = [
['', ''], ['being', 'is'], ['being', 'are'], ['ing', ''], ['ing', 'e']]
def get_paraphrase_prompt(gpt3, prompt, ent_tuple):
assert get_n_ents(prompt) == len(ent_tuple)
ent_tuple = [ent.lower() for ent in ent_tuple]
sent = get_sent(prompt=prompt, ent_tuple=ent_tuple)
for _ in range(5):
raw_response = gpt3.call(prompt=f'paraphrase:\n{sent}\n')
para_sent = raw_response['choices'][0]['text']
para_sent = sent_tokenize(para_sent)[0]
para_sent = para_sent.strip().strip('.').lower()
print('para_sent:', para_sent)
prompt = para_sent
valid = True
for idx, ent in enumerate(ent_tuple):
# prompt = prompt.replace(ent, f'<ENT{idx}>')
for trans_sent in TRANSFORMATIONS_SENT:
for trans_ent in TRANSFORMATIONS_ENT:
if prompt.count(f'<ENT{idx}>') == 0:
transed_prompt = prompt.replace(*trans_sent)
transed_ent = ent.replace(*trans_ent)
if transed_prompt.count(transed_ent) == 1:
prompt = transed_prompt.replace(
transed_ent, f'<ENT{idx}>')
if prompt.count(f'<ENT{idx}>') != 1:
valid = False
break
if valid:
return prompt
return None
def search_prompts(init_prompts, seed_ent_tuples):
gpt3 = GPT3()
cache = {}
prompts = []
while True:
new_prompts = []
for prompt in init_prompts + init_prompts + prompts:
for ent_tuple in seed_ent_tuples:
ent_tuple = [ent.replace('_', ' ') for ent in ent_tuple]
request_str = f'{prompt} ||| {ent_tuple}'
if request_str not in cache or prompt in init_prompts:
cache[request_str] = get_paraphrase_prompt(
gpt3=gpt3, prompt=prompt, ent_tuple=ent_tuple)
para_prompt = cache[request_str]
print(f'prompt: {prompt}\tent_tuple: {ent_tuple}'
f'\t-> para_prompt: {para_prompt}')
if para_prompt is not None and \
para_prompt not in init_prompts + prompts:
new_prompts.append(para_prompt)
if len(set(prompts + new_prompts)) >= 20:
break
if len(new_prompts) == 0:
break
else:
# prompts.extend(new_prompts)
flag = False
for new_prompt in sorted(new_prompts, key=lambda t: len(t)):
if len(prompts) != 0:
max_sim = max([fuzz.ratio(new_prompt, prompt)
for prompt in prompts])
print(f'-- {new_prompt}: {max_sim}')
if len(prompts) == 0 or max([fuzz.ratio(
new_prompt, prompt) for prompt in prompts]) < 75:
prompts.append(new_prompt)
flag = True
prompts = list(set(prompts))
prompts.sort(key=lambda s: len(s))
if len(prompts) >= 10 or flag == False:
break
return prompts
def main(rel_set='conceptnet'):
relation_info = json.load(open(f'relation_info/{rel_set}.json'))
for rel, info in relation_info.items():
if 'prompts' not in info or len(info['prompts']) == 0:
info['prompts'] = search_prompts(
init_prompts=info['init_prompts'],
seed_ent_tuples=info['seed_ent_tuples'])
for key, value in info.items():
print(f'{key}: {value}')
for prompt in info['prompts']:
print(f'- {prompt}')
print('=' * 50)
for i in range(len(info['prompts'])):
info['prompts'][i] = fix_prompt_style(info['prompts'][i])
output_path = f'relation_info/{rel_set}.json'
json.dump(relation_info, open(output_path, 'w'), indent=4)
if __name__ == '__main__':
fire.Fire(main)