Merge pull request #117 from raphaelzhou1/master

Sync, resolve issue that FinGPT-RAG lacks .env.example file
This commit is contained in:
Bruce Yang FinTech
2023-11-10 09:50:51 +08:00
committed by GitHub
6 changed files with 143 additions and 20 deletions

View File

@@ -23,23 +23,23 @@ def find_different_rows():
else:
gui.msgbox("No rows found without 'http' in 'link' column.")
# if file_path:
# # Read CSV file using pandas
# df = pd.read_csv(file_path)
#
# # Ensure "text" and "contextualized sentences" columns exist
# if "text" not in df.columns or "contextualized_sentence" not in df.columns:
# gui.msgbox("Either or both 'text' and 'contextualized_sentences' columns are missing.")
# return
#
# # Find rows where "text" and "contextualized sentences" values are different
# different_rows = df[df['text'] != df['contextualized_sentence']]
#
# # Report the different row indexes
# if not different_rows.empty:
# gui.msgbox("total number is {}".format(len(different_rows.index.tolist())))
# else:
# gui.msgbox("No rows found with different values for 'text' and 'contextualized_sentences'.")
if file_path:
# Read CSV file using pandas
df = pd.read_csv(file_path)
# Ensure "text" and "contextualized sentences" columns exist
if "text" not in df.columns or "contextualized_sentence" not in df.columns:
gui.msgbox("Either or both 'text' and 'contextualized_sentences' columns are missing.")
return
# Find rows where "text" and "contextualized sentences" values are different
different_rows = df[df['text'] != df['contextualized_sentence']]
# Report the different row indexes
if not different_rows.empty:
gui.msgbox("total number is {}".format(len(different_rows.index.tolist())))
else:
gui.msgbox("No rows found with different values for 'text' and 'contextualized_sentences'.")
else:
gui.msgbox("No file selected.")

View File

@@ -0,0 +1,17 @@
from selenium import webdriver
from selenium.webdriver.chromium.service import ChromiumService
from webdriver_manager.chrome import ChromeDriverManager
# Set up ChromeOptions
options = webdriver.ChromeOptions()
# options.binary_location = "/Users/tianyu/Desktop/Coding/Network/chrome/chrome-mac-arm64"
# Start Chrome using a specific ChromeDriver
executable_path='/Users/tianyu/Desktop/Coding/Network/chrome/chromedriver-mac-arm64'
executable_path=ChromeDriverManager().install()
service=ChromiumService(executable_path=executable_path)
driver = webdriver.Chrome(service=service, options=options)
driver.get('https://www.google.com')
print(driver.title)
driver.quit()

View File

@@ -8,7 +8,7 @@ from external_LLMs import external_LLMs
import pandas as pd
import openai
from datasets import load_dataset
from sklearn.metrics import accuracy_score, f1_score,confusion_matrix
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from tqdm import tqdm
try:
@@ -33,12 +33,15 @@ try:
df = df.dropna(subset=[actual_classifications_column, predicted_classifications_column])
df[actual_classifications_column] = df[actual_classifications_column].astype(int)
df[predicted_classifications_column] = df[predicted_classifications_column].astype(int)
computed_f1 = f1_score(df[actual_classifications_column], df[predicted_classifications_column], average='micro')
computed_f1 = f1_score(df[actual_classifications_column], df[predicted_classifications_column], average=None)
computed_accuracy_score = accuracy_score(df[actual_classifications_column], df[predicted_classifications_column])
computed_precision_score = precision_score(df[actual_classifications_column], df[predicted_classifications_column], average=None)
computed_recall_score = recall_score(df[actual_classifications_column], df[predicted_classifications_column], average=None)
print("f1 score: ", computed_f1)
print("accuracy score: ", computed_accuracy_score)
print("precision score: ", computed_precision_score)
print("recall score: ", computed_recall_score)
except Exception as e:
gui.exceptionbox(str(e))

View File

@@ -0,0 +1,48 @@
import pandas as pd
import easygui as gui
def find_different_rows():
# Prompt user to select CSV file
file_path = gui.fileopenbox("Select CSV file", filetypes=["*.csv"])
if file_path:
# Read CSV file using pandas
df = pd.read_csv(file_path)
# Ensure "link" column exists
if "link" not in df.columns:
gui.msgbox("'link' column is missing.")
return
# Find rows where "link" does not contain "http"
non_http_rows = df[~df['link'].str.contains("http", na=False)]
# Report the count of non-http links
if not non_http_rows.empty:
gui.msgbox("Total number of rows without 'http' in 'link' column: {}".format(len(non_http_rows)))
else:
gui.msgbox("No rows found without 'http' in 'link' column.")
if file_path:
# Read CSV file using pandas
df = pd.read_csv(file_path)
# Ensure "text" and "contextualized sentences" columns exist
if "text" not in df.columns or "contextualized_sentence" not in df.columns:
gui.msgbox("Either or both 'text' and 'contextualized_sentences' columns are missing.")
return
# Find rows where "text" and "contextualized sentences" values are different
different_rows = df[df['text'] != df['contextualized_sentence']]
# Report the different row indexes
if not different_rows.empty:
gui.msgbox("total number is {}".format(len(different_rows.index.tolist())))
else:
gui.msgbox("No rows found with different values for 'text' and 'contextualized_sentences'.")
else:
gui.msgbox("No file selected.")
if __name__ == '__main__':
find_different_rows()

View File

@@ -0,0 +1,31 @@
import pandas as pd
import easygui as gui
def find_abnormal_rows():
# Prompt user to select CSV file
file_path = gui.fileopenbox("Select CSV file", filetypes=["*.csv"])
if file_path:
# Read CSV file using pandas
df = pd.read_csv(file_path, header=None)
# Get the number of columns in the first row
expected_num_columns = len(df.iloc[0])
# Find rows with abnormal number of columns
abnormal_rows = []
for index, row in df.iterrows():
if len(row) != expected_num_columns:
abnormal_rows.append(index)
# Report the abnormal row indexes
if abnormal_rows:
gui.msgbox("Abnormal rows found with inconsistent number of columns:\n{}".format(abnormal_rows))
else:
gui.msgbox("No abnormal rows found with inconsistent number of columns.")
else:
gui.msgbox("No file selected.")
if __name__ == '__main__':
find_abnormal_rows()

View File

@@ -0,0 +1,24 @@
import os
import pandas as pd
import easygui as gui
def classify_csv_file():
try:
# Read CSV file
file_path = gui.fileopenbox("Select CSV file", filetypes=["*.csv"])
df = pd.read_csv(file_path)
# Set "Seeking Alpha" as the value for rows under "classification" column from index 1 onwards
df.loc[1:, "classification"] = "Seeking Alpha"
# Save the classified CSV file
base_name = os.path.basename(file_path)
output_file_path = os.path.join(os.path.dirname(file_path), f"{os.path.splitext(base_name)[0]}_classified.csv")
df.to_csv(output_file_path, index=False)
gui.msgbox("Classification Complete")
except Exception as e:
gui.exceptionbox(str(e))
if __name__ == '__main__':
classify_csv_file()