reorganize classifcation by calling external LLMs

This commit is contained in:
raphaelzhou1
2023-10-08 05:32:28 -04:00
parent 6acd4a495c
commit 9bf8a5ffe4
4 changed files with 42 additions and 41 deletions

View File

@@ -1,4 +1,4 @@
sentence,snippets,target,sentiment_score,aspects,format,label
sentence,snippets,target,sentiment_score,aspects,format,label, contextualized_sentence
$HCP Come to the party and buy this -gonna give solid gains and a dividend $$$$$$,['gonna give solid gains and a dividend'],HCP,0.52,['Corporate/Dividend Policy'],post,0
@gakrum nice chart shows distinctive down channel not a dip.. where do you see the bottom? $SPY ..$150? ..$130?,['chart shows distinctive down channel'],SPY,-0.443,['Stock/Technical Analysis'],post,2
Japan's Asahi to submit bid next week for SABMiller's Grolsch and Peroni - Yomiuri,['to submit bid next week'],SABMiller,0.236,['Stock/Buyside/Stock Buyside'],headline,0
Can't render this file because it has a wrong number of fields in line 2.

View File

@@ -0,0 +1,36 @@
# Classification methods:
def extract_classification(text, classification_prompt):
print("Extracting classification for", text)
api_key = os.getenv('OPENAI_API_KEY')
api_url = "https://api.openai.com/v1/chat/completions"
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {api_key}',
}
payload = {
'model': 'gpt-3.5-turbo',
"messages": [
{
"role": "system",
"content": "You are a financial analyst."
},
{
"role": "user",
"content": text + classification_prompt,
}
],
}
print("Sending request to", api_url, "with payload", payload)
try:
response = requests.post(api_url, headers=headers, json=payload)
json_data = response.json()
print("json data", json_data)
classification_response = json_data[0]['text'].strip()
print("Classification response:", classification_response)
return classification_response
except requests.exceptions.RequestException as e:
print(f"Request error: {e}")

View File

@@ -21,6 +21,7 @@ from searchtweets import load_credentials
# From src/
import requests_url
from requests_url import requests_get
from external_LLMs import external_LLMs
from scrapers.yahoo import scrape_yahoo
from sentence_processing.split_sentence import split_sentence
from scrapers.cnbc import scrape_cnbc
@@ -43,45 +44,6 @@ twitter_bearer_token = os.getenv("TWITTER_BEARER_TOKEN")
# auth = tweepy.OAuth1UserHandler(twitter_api_key, twitter_api_key_secret, twitter_access_token, twitter_access_token_secret)
# api = tweepy.API(auth)
# Classification methods:
def extract_classification(text, classification_prompt):
print("Extracting classification for", text)
api_key = os.getenv('OPENAI_API_KEY')
api_url = "https://api.openai.com/v1/chat/completions"
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {api_key}',
}
payload = {
'model': 'gpt-3.5-turbo',
"messages": [
{
"role": "system",
"content": "You are a financial analyst."
},
{
"role": "user",
"content": text + classification_prompt,
}
],
}
print("Sending request to", api_url, "with payload", payload)
try:
response = requests.post(api_url, headers=headers, json=payload)
json_data = response.json()
print("json data", json_data)
classification_response = json_data[0]['text'].strip()
print("Classification response:", classification_response)
return classification_response
except requests.exceptions.RequestException as e:
print(f"Request error: {e}")
# Scraping methods:
def url_encode_string(input_string):
encoded_string = urllib.parse.quote(input_string)
@@ -702,7 +664,7 @@ def select_column_and_classify():
for row_index, row in df.iloc[1:].iterrows():
target_sentence = row[sentence_column]
classification_response = extract_classification(target_sentence, classification_prompt)
classification_response = external_LLMs.extract_classification(target_sentence, classification_prompt)
df.at[row_index, "classification"] = classification_response # Assign classification response to the new column
output_file_path = os.path.splitext(file_path)[0] + "_classified.csv"
@@ -719,13 +681,16 @@ def select_column_and_classify():
context_choice = gui.ynbox("Do you want to research the context for this news?", "Context Research")
process_existing_file = gui.ynbox("Do you want process an existing file?", "Context Research")
if context_choice:
print("cp 1")
file_path = gui.fileopenbox("Select the CSV file containing news for context research", filetypes=["*.csv"])
df = pd.read_csv(file_path)
column_names = df.columns.tolist()
print("cp 2")
if not process_existing_file:
df["link"] = "" # Create a new column named "link"
df["contextualized_sentence"] = "" # Create a new column named "contextualized sentence"
if file_path:
sentence_column = gui.buttonbox("Column Selection", "Select the column for target sentence in the CSV:",
choices=column_names)