mirror of
https://github.com/AI4Finance-Foundation/FinGPT.git
synced 2024-02-15 23:10:01 +03:00
Merge pull request #117 from raphaelzhou1/master
Sync, resolve issue that FinGPT-RAG lacks .env.example file
This commit is contained in:
@@ -23,23 +23,23 @@ def find_different_rows():
|
||||
else:
|
||||
gui.msgbox("No rows found without 'http' in 'link' column.")
|
||||
|
||||
# if file_path:
|
||||
# # Read CSV file using pandas
|
||||
# df = pd.read_csv(file_path)
|
||||
#
|
||||
# # Ensure "text" and "contextualized sentences" columns exist
|
||||
# if "text" not in df.columns or "contextualized_sentence" not in df.columns:
|
||||
# gui.msgbox("Either or both 'text' and 'contextualized_sentences' columns are missing.")
|
||||
# return
|
||||
#
|
||||
# # Find rows where "text" and "contextualized sentences" values are different
|
||||
# different_rows = df[df['text'] != df['contextualized_sentence']]
|
||||
#
|
||||
# # Report the different row indexes
|
||||
# if not different_rows.empty:
|
||||
# gui.msgbox("total number is {}".format(len(different_rows.index.tolist())))
|
||||
# else:
|
||||
# gui.msgbox("No rows found with different values for 'text' and 'contextualized_sentences'.")
|
||||
if file_path:
|
||||
# Read CSV file using pandas
|
||||
df = pd.read_csv(file_path)
|
||||
|
||||
# Ensure "text" and "contextualized sentences" columns exist
|
||||
if "text" not in df.columns or "contextualized_sentence" not in df.columns:
|
||||
gui.msgbox("Either or both 'text' and 'contextualized_sentences' columns are missing.")
|
||||
return
|
||||
|
||||
# Find rows where "text" and "contextualized sentences" values are different
|
||||
different_rows = df[df['text'] != df['contextualized_sentence']]
|
||||
|
||||
# Report the different row indexes
|
||||
if not different_rows.empty:
|
||||
gui.msgbox("total number is {}".format(len(different_rows.index.tolist())))
|
||||
else:
|
||||
gui.msgbox("No rows found with different values for 'text' and 'contextualized_sentences'.")
|
||||
|
||||
else:
|
||||
gui.msgbox("No file selected.")
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chromium.service import ChromiumService
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
|
||||
# Set up ChromeOptions
|
||||
options = webdriver.ChromeOptions()
|
||||
# options.binary_location = "/Users/tianyu/Desktop/Coding/Network/chrome/chrome-mac-arm64"
|
||||
|
||||
# Start Chrome using a specific ChromeDriver
|
||||
executable_path='/Users/tianyu/Desktop/Coding/Network/chrome/chromedriver-mac-arm64'
|
||||
executable_path=ChromeDriverManager().install()
|
||||
service=ChromiumService(executable_path=executable_path)
|
||||
driver = webdriver.Chrome(service=service, options=options)
|
||||
|
||||
driver.get('https://www.google.com')
|
||||
print(driver.title)
|
||||
driver.quit()
|
||||
@@ -8,7 +8,7 @@ from external_LLMs import external_LLMs
|
||||
import pandas as pd
|
||||
import openai
|
||||
from datasets import load_dataset
|
||||
from sklearn.metrics import accuracy_score, f1_score,confusion_matrix
|
||||
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
|
||||
from tqdm import tqdm
|
||||
|
||||
try:
|
||||
@@ -33,12 +33,15 @@ try:
|
||||
df = df.dropna(subset=[actual_classifications_column, predicted_classifications_column])
|
||||
df[actual_classifications_column] = df[actual_classifications_column].astype(int)
|
||||
df[predicted_classifications_column] = df[predicted_classifications_column].astype(int)
|
||||
computed_f1 = f1_score(df[actual_classifications_column], df[predicted_classifications_column], average='micro')
|
||||
computed_f1 = f1_score(df[actual_classifications_column], df[predicted_classifications_column], average=None)
|
||||
computed_accuracy_score = accuracy_score(df[actual_classifications_column], df[predicted_classifications_column])
|
||||
|
||||
computed_precision_score = precision_score(df[actual_classifications_column], df[predicted_classifications_column], average=None)
|
||||
computed_recall_score = recall_score(df[actual_classifications_column], df[predicted_classifications_column], average=None)
|
||||
|
||||
print("f1 score: ", computed_f1)
|
||||
print("accuracy score: ", computed_accuracy_score)
|
||||
print("precision score: ", computed_precision_score)
|
||||
print("recall score: ", computed_recall_score)
|
||||
|
||||
except Exception as e:
|
||||
gui.exceptionbox(str(e))
|
||||
@@ -0,0 +1,48 @@
|
||||
import pandas as pd
|
||||
import easygui as gui
|
||||
|
||||
def find_different_rows():
|
||||
# Prompt user to select CSV file
|
||||
file_path = gui.fileopenbox("Select CSV file", filetypes=["*.csv"])
|
||||
|
||||
if file_path:
|
||||
# Read CSV file using pandas
|
||||
df = pd.read_csv(file_path)
|
||||
|
||||
# Ensure "link" column exists
|
||||
if "link" not in df.columns:
|
||||
gui.msgbox("'link' column is missing.")
|
||||
return
|
||||
|
||||
# Find rows where "link" does not contain "http"
|
||||
non_http_rows = df[~df['link'].str.contains("http", na=False)]
|
||||
|
||||
# Report the count of non-http links
|
||||
if not non_http_rows.empty:
|
||||
gui.msgbox("Total number of rows without 'http' in 'link' column: {}".format(len(non_http_rows)))
|
||||
else:
|
||||
gui.msgbox("No rows found without 'http' in 'link' column.")
|
||||
|
||||
if file_path:
|
||||
# Read CSV file using pandas
|
||||
df = pd.read_csv(file_path)
|
||||
|
||||
# Ensure "text" and "contextualized sentences" columns exist
|
||||
if "text" not in df.columns or "contextualized_sentence" not in df.columns:
|
||||
gui.msgbox("Either or both 'text' and 'contextualized_sentences' columns are missing.")
|
||||
return
|
||||
|
||||
# Find rows where "text" and "contextualized sentences" values are different
|
||||
different_rows = df[df['text'] != df['contextualized_sentence']]
|
||||
|
||||
# Report the different row indexes
|
||||
if not different_rows.empty:
|
||||
gui.msgbox("total number is {}".format(len(different_rows.index.tolist())))
|
||||
else:
|
||||
gui.msgbox("No rows found with different values for 'text' and 'contextualized_sentences'.")
|
||||
|
||||
else:
|
||||
gui.msgbox("No file selected.")
|
||||
|
||||
if __name__ == '__main__':
|
||||
find_different_rows()
|
||||
@@ -0,0 +1,31 @@
|
||||
import pandas as pd
|
||||
import easygui as gui
|
||||
|
||||
def find_abnormal_rows():
|
||||
# Prompt user to select CSV file
|
||||
file_path = gui.fileopenbox("Select CSV file", filetypes=["*.csv"])
|
||||
|
||||
if file_path:
|
||||
# Read CSV file using pandas
|
||||
df = pd.read_csv(file_path, header=None)
|
||||
|
||||
# Get the number of columns in the first row
|
||||
expected_num_columns = len(df.iloc[0])
|
||||
|
||||
# Find rows with abnormal number of columns
|
||||
abnormal_rows = []
|
||||
for index, row in df.iterrows():
|
||||
if len(row) != expected_num_columns:
|
||||
abnormal_rows.append(index)
|
||||
|
||||
# Report the abnormal row indexes
|
||||
if abnormal_rows:
|
||||
gui.msgbox("Abnormal rows found with inconsistent number of columns:\n{}".format(abnormal_rows))
|
||||
else:
|
||||
gui.msgbox("No abnormal rows found with inconsistent number of columns.")
|
||||
|
||||
else:
|
||||
gui.msgbox("No file selected.")
|
||||
|
||||
if __name__ == '__main__':
|
||||
find_abnormal_rows()
|
||||
@@ -0,0 +1,24 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
import easygui as gui
|
||||
|
||||
def classify_csv_file():
|
||||
try:
|
||||
# Read CSV file
|
||||
file_path = gui.fileopenbox("Select CSV file", filetypes=["*.csv"])
|
||||
df = pd.read_csv(file_path)
|
||||
|
||||
# Set "Seeking Alpha" as the value for rows under "classification" column from index 1 onwards
|
||||
df.loc[1:, "classification"] = "Seeking Alpha"
|
||||
|
||||
# Save the classified CSV file
|
||||
base_name = os.path.basename(file_path)
|
||||
output_file_path = os.path.join(os.path.dirname(file_path), f"{os.path.splitext(base_name)[0]}_classified.csv")
|
||||
df.to_csv(output_file_path, index=False)
|
||||
|
||||
gui.msgbox("Classification Complete")
|
||||
except Exception as e:
|
||||
gui.exceptionbox(str(e))
|
||||
|
||||
if __name__ == '__main__':
|
||||
classify_csv_file()
|
||||
Reference in New Issue
Block a user