mirror of
https://github.com/AI4Finance-Foundation/FinGPT.git
synced 2024-02-15 23:10:01 +03:00
add requirements.txt to replace conda-.yml documentation; remove 2 lines from scraper to ensure compatibility
This commit is contained in:
@@ -9,8 +9,8 @@
|
||||
|
||||
## Setup
|
||||
|
||||
* Visit environment_news_scraping.yml for the environment setup
|
||||
* Set up your .env file, can refer to /FinGPT_sentiment/.env.example
|
||||
* Visit FinGPT_RAG/requirements.txt for the environment setup
|
||||
|
||||
``` python
|
||||
|
||||
|
||||
0
fingpt/FinGPT_RAG/multisource_retrieval/__init__.py
Normal file
0
fingpt/FinGPT_RAG/multisource_retrieval/__init__.py
Normal file
2487
fingpt/FinGPT_RAG/multisource_retrieval/data/sent_valid_scraped.csv
Normal file
2487
fingpt/FinGPT_RAG/multisource_retrieval/data/sent_valid_scraped.csv
Normal file
File diff suppressed because it is too large
Load Diff
@@ -26,6 +26,7 @@ from sentence_processing.split_sentence import split_sentence
|
||||
from scrapers.cnbc import scrape_cnbc
|
||||
from scrapers.market_screener import scrape_market_screener
|
||||
from scrapers import url_encode
|
||||
from scrapers.google.scrape_google import scrape_google
|
||||
|
||||
# TODO: Twitter API requests # https://twitter.com/bryan4665/
|
||||
|
||||
@@ -607,24 +608,20 @@ def select_column_and_classify():
|
||||
choices=column_names)
|
||||
if not sentence_column:
|
||||
raise ValueError("Invalid context selected selection")
|
||||
classification_column = gui.buttonbox("Column Selection",
|
||||
"Select the column for classification in the CSV:",
|
||||
choices=column_names)
|
||||
if not classification_column:
|
||||
raise ValueError("Invalid context classification column selection")
|
||||
|
||||
counter = 0 # Counter variable to track the number of rows processed
|
||||
row_index_input = gui.enterbox("Enter the row index to classify", "Row Index Input")
|
||||
row_index_input = gui.enterbox("Enter the row index to classify", "Row Index Input", 1)
|
||||
if row_index_input is None or not row_index_input.isdigit() or int(row_index_input) >= len(df):
|
||||
row_index = 1 # Set a default starting index
|
||||
else:
|
||||
row_index = int(row_index_input)
|
||||
|
||||
print("loaded file as df: ", df)
|
||||
|
||||
for row_index, row in itertools.islice(df.iterrows(), row_index, None):
|
||||
# If role is not empty or N/A or has the same sentence as "contextualized_sentence", means context is added, then skip
|
||||
if process_existing_file and row["link"] != "N/A" and not pd.isnull(row["link"]) and row[sentence_column] != row["contextualized_sentence"]:
|
||||
continue
|
||||
|
||||
# if process_existing_file and row["link"] != "N/A" and not pd.isnull(row["link"]) and row[sentence_column] != row["contextualized_sentence"]:
|
||||
# continue
|
||||
target_sentence = row[sentence_column]
|
||||
ticker, remaining_sentence, link = split_sentence(target_sentence)
|
||||
|
||||
|
||||
@@ -1,9 +1,154 @@
|
||||
tokenizers>=0.13.3
|
||||
bitsandbytes
|
||||
datasets>=2.8.0
|
||||
sentencepiece>=0.1.97
|
||||
protobuf==3.20.3
|
||||
accelerate>=0.15.0
|
||||
torch>=1.12.0
|
||||
deepspeed>=0.9.0
|
||||
git+https://github.com/huggingface/transformers
|
||||
accelerate==0.23.0
|
||||
aiohttp==3.8.5
|
||||
aiosignal==1.3.1
|
||||
anyio==4.0.0
|
||||
appnope==0.1.3
|
||||
argon2-cffi==23.1.0
|
||||
argon2-cffi-bindings==21.2.0
|
||||
arrow==1.3.0
|
||||
asttokens==2.4.0
|
||||
async-lru==2.0.4
|
||||
async-timeout==4.0.3
|
||||
attrs==23.1.0
|
||||
Babel==2.12.1
|
||||
backcall==0.2.0
|
||||
beautifulsoup4==4.12.2
|
||||
bleach==6.0.0
|
||||
bs4==0.0.1
|
||||
certifi==2023.7.22
|
||||
cffi==1.16.0
|
||||
charset-normalizer==3.3.0
|
||||
comm==0.1.4
|
||||
datasets==2.14.5
|
||||
debugpy==1.8.0
|
||||
decorator==5.1.1
|
||||
defusedxml==0.7.1
|
||||
dill==0.3.7
|
||||
easygui==0.98.2
|
||||
executing==2.0.0
|
||||
fastjsonschema==2.18.1
|
||||
filelock==3.12.4
|
||||
fqdn==1.5.1
|
||||
frozenlist==1.4.0
|
||||
fsspec==2023.6.0
|
||||
h11==0.14.0
|
||||
huggingface-hub==0.16.4
|
||||
idna==3.4
|
||||
ipykernel==6.25.2
|
||||
ipython==8.16.0
|
||||
ipython-genutils==0.2.0
|
||||
ipywidgets==8.1.1
|
||||
isoduration==20.11.0
|
||||
jedi==0.19.0
|
||||
Jinja2==3.1.2
|
||||
joblib==1.3.2
|
||||
json5==0.9.14
|
||||
jsonpointer==2.4
|
||||
jsonschema==4.19.1
|
||||
jsonschema-specifications==2023.7.1
|
||||
jupyter==1.0.0
|
||||
jupyter-console==6.6.3
|
||||
jupyter-events==0.7.0
|
||||
jupyter-lsp==2.2.0
|
||||
jupyter_client==8.3.1
|
||||
jupyter_core==5.3.2
|
||||
jupyter_server==2.7.3
|
||||
jupyter_server_terminals==0.4.4
|
||||
jupyterlab==4.0.6
|
||||
jupyterlab-pygments==0.2.2
|
||||
jupyterlab-widgets==3.0.9
|
||||
jupyterlab_server==2.25.0
|
||||
loguru==0.7.2
|
||||
lxml==4.9.3
|
||||
MarkupSafe==2.1.3
|
||||
matplotlib-inline==0.1.6
|
||||
mistune==3.0.2
|
||||
mpmath==1.3.0
|
||||
multidict==6.0.4
|
||||
multiprocess==0.70.15
|
||||
nbclient==0.8.0
|
||||
nbconvert==7.8.0
|
||||
nbformat==5.9.2
|
||||
nest-asyncio==1.5.8
|
||||
networkx==3.1
|
||||
notebook==7.0.4
|
||||
notebook_shim==0.2.3
|
||||
numpy==1.26.0
|
||||
oauthlib==3.2.2
|
||||
openai==0.28.1
|
||||
outcome==1.2.0
|
||||
overrides==7.4.0
|
||||
packaging==23.2
|
||||
pandas==2.1.1
|
||||
pandocfilters==1.5.0
|
||||
parso==0.8.3
|
||||
peft==0.5.0
|
||||
pexpect==4.8.0
|
||||
pickleshare==0.7.5
|
||||
platformdirs==3.10.0
|
||||
prometheus-client==0.17.1
|
||||
prompt-toolkit==3.0.39
|
||||
psutil==5.9.5
|
||||
ptyprocess==0.7.0
|
||||
pure-eval==0.2.2
|
||||
pyarrow==13.0.0
|
||||
pycparser==2.21
|
||||
Pygments==2.16.1
|
||||
PySocks==1.7.1
|
||||
python-dateutil==2.8.2
|
||||
python-dotenv==1.0.0
|
||||
python-json-logger==2.0.7
|
||||
pytz==2023.3.post1
|
||||
PyYAML==6.0.1
|
||||
pyzmq==25.1.1
|
||||
qtconsole==5.4.4
|
||||
QtPy==2.4.0
|
||||
referencing==0.30.2
|
||||
regex==2023.10.3
|
||||
requests==2.31.0
|
||||
requests-oauthlib==1.3.1
|
||||
rfc3339-validator==0.1.4
|
||||
rfc3986-validator==0.1.1
|
||||
rpds-py==0.10.3
|
||||
safetensors==0.3.3
|
||||
scikit-learn==1.3.1
|
||||
scipy==1.11.3
|
||||
searchtweets==1.7.6
|
||||
selenium==4.13.0
|
||||
Send2Trash==1.8.2
|
||||
simplejson==3.19.1
|
||||
six==1.16.0
|
||||
sniffio==1.3.0
|
||||
sortedcontainers==2.4.0
|
||||
soupsieve==2.5
|
||||
stack-data==0.6.3
|
||||
sympy==1.12
|
||||
terminado==0.17.1
|
||||
threadpoolctl==3.2.0
|
||||
tinycss2==1.2.1
|
||||
tokenizers==0.14.0
|
||||
torch==2.0.1
|
||||
tornado==6.3.3
|
||||
tqdm==4.66.1
|
||||
traitlets==5.10.1
|
||||
transformers==4.34.0
|
||||
trio==0.22.2
|
||||
trio-websocket==0.11.1
|
||||
tushare==1.2.89
|
||||
tweepy==4.14.0
|
||||
tweet-parser==1.13.2
|
||||
types-python-dateutil==2.8.19.14
|
||||
typing_extensions==4.8.0
|
||||
tzdata==2023.3
|
||||
uri-template==1.3.0
|
||||
urllib3==2.0.5
|
||||
wcwidth==0.2.8
|
||||
webcolors==1.13
|
||||
webdriver-manager==4.0.1
|
||||
webencodings==0.5.1
|
||||
websocket-client==0.57.0
|
||||
widgetsnbextension==4.0.9
|
||||
wsproto==1.2.0
|
||||
xxhash==3.3.0
|
||||
yarl==1.9.2
|
||||
zenrows==1.3.1
|
||||
|
||||
Reference in New Issue
Block a user