extract news classification logic out of external LLMs, put into utils

2024-02-15 23:10:01 +03:00 · 2023-10-12 07:43:59 -04:00
parent aada7601d7
commit 0a69d23afd
3 changed files with 26 additions and 15 deletions
--- a/fingpt/FinGPT-RAG/multisource_retrieval/external_LLMs/external_LLMs.py
+++ b/fingpt/FinGPT-RAG/multisource_retrieval/external_LLMs/external_LLMs.py
@@ -37,17 +37,6 @@ def extract_classification(text, classification_prompt):
        json_data = response.json()
        print("json data", json_data)
        classification_response = json_data["choices"][0]['message']['content'].strip()
-        if "Twitter" in classification_response:
-            classification_response = "Twitter"
-        elif "Seeking Alpha" in classification_response:
-            classification_response = "Seeking Alpha"
-        elif "Reuters" in classification_response:
-            classification_response = "Reuters"
-        elif "WSJ" in classification_response:
-            classification_response = "WSJ"
-        else:
-            classification_response = "Unknown"
-
        print("Classification response:", classification_response)
        return classification_response
    except requests.exceptions.RequestException as e:
--- a/fingpt/FinGPT-RAG/multisource_retrieval/utils/classify_news_sources.py
+++ b/fingpt/FinGPT-RAG/multisource_retrieval/utils/classify_news_sources.py
@@ -34,6 +34,16 @@ try:
    for row_index, row in df.iloc[1:].iterrows():
        target_sentence = row[sentence_column]
        classification_response = external_LLMs.extract_classification(target_sentence, classification_prompt)
+        if "Twitter" in classification_response:
+            classification_response = "Twitter"
+        elif "Seeking Alpha" in classification_response:
+            classification_response = "Seeking Alpha"
+        elif "Reuters" in classification_response:
+            classification_response = "Reuters"
+        elif "WSJ" in classification_response:
+            classification_response = "WSJ"
+        else:
+            classification_response = "Unknown"
        df.at[row_index, "classification"] = classification_response  # Assign classification response to the new column

    output_file_path = os.path.splitext(file_path)[0] + "_classified.csv"
--- a/fingpt/FinGPT-RAG/multisource_retrieval/utils/sentiment_classification_by_external_LLMs.py
+++ b/fingpt/FinGPT-RAG/multisource_retrieval/utils/sentiment_classification_by_external_LLMs.py
@@ -23,7 +23,7 @@ try:
    if not sentence_column:
        raise ValueError("Invalid column selection")

-    df["classification"] = ""  # Create a new column named "classification"
+    df["openai_inferred_sentiment"] = ""  # Create a new column named "classification"
    default_classification_prompt = ". For financial statement above, determine its sentiment (based on your existing knowledge). Your answer should be either \"negative\" or \"neutral\" or \"positive\""
    classification_prompt = gui.enterbox("Modify the classification prompt:", "Custom Classification Prompt",
                                         default_classification_prompt)
@@ -31,13 +31,25 @@ try:
    if not classification_prompt:
        classification_prompt = default_classification_prompt

+    counter = 0
+    output_file_path = os.path.splitext(file_path)[0] + "_classified.csv"
    for row_index, row in df.iloc[1:].iterrows():
        target_sentence = row[sentence_column]
        classification_response = external_LLMs.extract_classification(target_sentence, classification_prompt)
-        df.at[row_index, "classification"] = classification_response  # Assign classification response to the new column
+        if "negative" in classification_response:
+            classification_response = 0
+        elif "positive" in classification_response:
+            classification_response = 1
+        elif "neutral" in classification_response:
+            classification_response = 2
+        df.at[row_index, "openai_inferred_sentiment"] = classification_response
+
+        counter += 1
+
+        # Save the DataFrame to a CSV file every 10 rows
+        if counter % 10 == 0:
+            df.to_csv(output_file_path, index=False)

-    output_file_path = os.path.splitext(file_path)[0] + "_classified.csv"
-    df.to_csv(output_file_path, index=False)
    gui.msgbox("Classification Complete")
 except Exception as e:
    gui.exceptionbox(str(e))