mirror of
https://github.com/HKUDS/VideoRAG.git
synced 2025-05-11 03:54:36 +03:00
Update README and settings for the LongerVideos experiment.
This commit is contained in:
17
README.md
17
README.md
@@ -114,7 +114,7 @@ logging.getLogger("httpx").setLevel(logging.WARNING)
|
||||
# Please enter your openai key
|
||||
os.environ["OPENAI_API_KEY"] = ""
|
||||
|
||||
from videorag._llm import *
|
||||
from videorag._llm import openai_4o_mini_config
|
||||
from videorag import VideoRAG, QueryParam
|
||||
|
||||
|
||||
@@ -127,7 +127,7 @@ if __name__ == '__main__':
|
||||
'movies/Iron-Man.mp4',
|
||||
'movies/Spider-Man.mkv',
|
||||
]
|
||||
videorag = VideoRAG(cheap_model_func=gpt_4o_mini_complete, best_model_func=gpt_4o_mini_complete, working_dir=f"./videorag-workdir")
|
||||
videorag = VideoRAG(llm=openai_4o_mini_config, working_dir=f"./videorag-workdir")
|
||||
videorag.insert_video(video_path_list=video_paths)
|
||||
```
|
||||
|
||||
@@ -156,7 +156,7 @@ if __name__ == '__main__':
|
||||
# if param.wo_reference = False, VideoRAG will add reference to video clips in the response
|
||||
param.wo_reference = True
|
||||
|
||||
videorag = VideoRAG(cheap_model_func=gpt_4o_mini_complete, best_model_func=gpt_4o_mini_complete, working_dir=f"./videorag-workdir")
|
||||
videorag = videorag = VideoRAG(llm=openai_4o_mini_config, working_dir=f"./videorag-workdir")
|
||||
videorag.load_caption_model(debug=False)
|
||||
response = videorag.query(query=query, param=param)
|
||||
print(response)
|
||||
@@ -187,7 +187,7 @@ sh download.sh # downloading videos
|
||||
Then, you can run the following example command to process and answer queries for LongerVideos with VideoRAG:
|
||||
|
||||
```shell
|
||||
# Please enter your openai_key in line 18 at first
|
||||
# Please enter your openai_key in line 22 at first
|
||||
python videorag_experiment.py --collection 4-rag-lecture --cuda 0
|
||||
```
|
||||
|
||||
@@ -246,7 +246,7 @@ python batch_winrate_quant_calculate.py
|
||||
|
||||
### Ollama Support
|
||||
|
||||
This project also supports ollama. To use, edit the ollama_config in _llm.py.
|
||||
This project also supports ollama. To use, edit the ollama_config in [_llm.py](VideoRAG/videorag/_llm.py).
|
||||
Adjust the paramters of the models being used
|
||||
|
||||
```
|
||||
@@ -265,16 +265,17 @@ ollama_config = LLMConfig(
|
||||
cheap_model_func_raw = ollama_mini_complete,
|
||||
cheap_model_name = "olmo2",
|
||||
cheap_model_max_token_size = 32768,
|
||||
cheap_model_max_async = 1)
|
||||
cheap_model_max_async = 1
|
||||
)
|
||||
```
|
||||
And specify the config when creating your VideoRag instance
|
||||
|
||||
### Jupyter Notebook
|
||||
|
||||
To test the solution on a single video, just load the notebook in the notebook folder and
|
||||
To test the solution on a single video, just load the notebook in the [notebook folder](VideoRAG/nodebooks) and
|
||||
update the paramters to fit your situation.
|
||||
|
||||
YouTube video for example can be downloaded as
|
||||
YouTube video for example can be downloaded as follows:
|
||||
|
||||
```
|
||||
yt-dlp -o "%(id)s.%(ext)s" -S "res:720" https://www.youtube.com/live/DPa2iRgzadM?si=8cf8WbYtqiglrwtN -P .
|
||||
|
||||
@@ -15,42 +15,54 @@ import argparse
|
||||
parser = argparse.ArgumentParser(description="Set sub-category and CUDA device.")
|
||||
parser.add_argument('--collection', type=str, default='4-rag-lecture')
|
||||
parser.add_argument('--cuda', type=str, default='0')
|
||||
parser.add_argument('--config', type=str, choices=['openai', 'azure', 'ollama'], default='openai')
|
||||
args = parser.parse_args()
|
||||
sub_category = args.collection
|
||||
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda
|
||||
os.environ["OPENAI_API_KEY"] = ""
|
||||
|
||||
from videorag._llm import openai_config, azure_openai_config, ollama_config
|
||||
from videorag._llm import *
|
||||
from videorag.videorag import VideoRAG, QueryParam
|
||||
|
||||
config_map = {
|
||||
'openai': openai_config,
|
||||
'azure': azure_openai_config,
|
||||
'ollama': ollama_config
|
||||
}
|
||||
longervideos_llm_config = LLMConfig(
|
||||
embedding_func_raw = openai_embedding,
|
||||
embedding_model_name = "text-embedding-3-small",
|
||||
embedding_dim = 1536,
|
||||
embedding_max_token_size = 8192,
|
||||
embedding_batch_num = 32,
|
||||
embedding_func_max_async = 16,
|
||||
query_better_than_threshold = 0.2,
|
||||
|
||||
llm_config = config_map[args.config]
|
||||
# LLM (we utilize gpt-4o-mini for all experiments)
|
||||
best_model_func_raw = gpt_4o_mini_complete,
|
||||
best_model_name = "gpt-4o-mini",
|
||||
best_model_max_token_size = 32768,
|
||||
best_model_max_async = 16,
|
||||
|
||||
cheap_model_func_raw = gpt_4o_mini_complete,
|
||||
cheap_model_name = "gpt-4o-mini",
|
||||
cheap_model_max_token_size = 32768,
|
||||
cheap_model_max_async = 16
|
||||
)
|
||||
|
||||
if __name__ == '__main__':
|
||||
multiprocessing.set_start_method('spawn')
|
||||
|
||||
## learn
|
||||
video_base_path = f'longervideos/{sub_category}/videos/'
|
||||
video_base_path = f'./longervideos/{sub_category}/videos/'
|
||||
video_files = sorted(os.listdir(video_base_path))
|
||||
video_paths = [os.path.join(video_base_path, f) for f in video_files]
|
||||
videorag = VideoRAG(llm=llm_config, working_dir=f"./videorag-workdir/{sub_category}")
|
||||
videorag = VideoRAG(llm=longervideos_llm_config, working_dir=f"./longervideos/videorag-workdir/{sub_category}")
|
||||
videorag.insert_video(video_path_list=video_paths)
|
||||
|
||||
## inference
|
||||
with open(f'longervideos/dataset.json', 'r') as f:
|
||||
with open(f'./longervideos/dataset.json', 'r') as f:
|
||||
longervideos = json.load(f)
|
||||
|
||||
videorag = VideoRAG(llm=llm_config, working_dir=f"./videorag-workdir/{sub_category}")
|
||||
videorag = VideoRAG(llm=longervideos_llm_config, working_dir=f"./longervideos/videorag-workdir/{sub_category}")
|
||||
videorag.load_caption_model(debug=False)
|
||||
|
||||
answer_folder = f'./videorag-answers/{sub_category}'
|
||||
answer_folder = f'./longervideos/videorag-answers/{sub_category}'
|
||||
os.makedirs(answer_folder, exist_ok=True)
|
||||
|
||||
collection_id = sub_category.split('-')[0]
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
"logging.getLogger(\"httpx\").setLevel(logging.WARNING)\n",
|
||||
"os.environ[\"CUDA_VISIBLE_DEVICES\"] = '0'\n",
|
||||
"\n",
|
||||
"from videorag._llm import openai_config, azure_openai_config, ollama_config\n",
|
||||
"from videorag._llm import openai_config, openai_4o_mini_config, azure_openai_config, ollama_config\n",
|
||||
"from videorag import VideoRAG, QueryParam\n"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -89,8 +89,6 @@ class LLMConfig:
|
||||
)
|
||||
|
||||
##### OpenAI Configuration
|
||||
|
||||
|
||||
async def openai_complete_if_cache(
|
||||
model, prompt, system_prompt=None, history_messages=[], **kwargs
|
||||
) -> str:
|
||||
@@ -130,7 +128,6 @@ async def gpt_4o_complete(
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
async def gpt_4o_mini_complete(
|
||||
model_name, prompt, system_prompt=None, history_messages=[], **kwargs
|
||||
) -> str:
|
||||
@@ -142,7 +139,6 @@ async def gpt_4o_mini_complete(
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
@retry(
|
||||
stop=stop_after_attempt(5),
|
||||
wait=wait_exponential(multiplier=1, min=4, max=10),
|
||||
@@ -155,7 +151,6 @@ async def openai_embedding(model_name: str, texts: list[str]) -> np.ndarray:
|
||||
)
|
||||
return np.array([dp.embedding for dp in response.data])
|
||||
|
||||
|
||||
openai_config = LLMConfig(
|
||||
embedding_func_raw = openai_embedding,
|
||||
embedding_model_name = "text-embedding-3-small",
|
||||
@@ -166,7 +161,7 @@ openai_config = LLMConfig(
|
||||
query_better_than_threshold = 0.2,
|
||||
|
||||
# LLM
|
||||
best_model_func_raw = gpt_4o_mini_complete,
|
||||
best_model_func_raw = gpt_4o_complete,
|
||||
best_model_name = "gpt-4o",
|
||||
best_model_max_token_size = 32768,
|
||||
best_model_max_async = 16,
|
||||
@@ -174,7 +169,29 @@ openai_config = LLMConfig(
|
||||
cheap_model_func_raw = gpt_4o_mini_complete,
|
||||
cheap_model_name = "gpt-4o-mini",
|
||||
cheap_model_max_token_size = 32768,
|
||||
cheap_model_max_async = 16)
|
||||
cheap_model_max_async = 16
|
||||
)
|
||||
|
||||
openai_4o_mini_config = LLMConfig(
|
||||
embedding_func_raw = openai_embedding,
|
||||
embedding_model_name = "text-embedding-3-small",
|
||||
embedding_dim = 1536,
|
||||
embedding_max_token_size = 8192,
|
||||
embedding_batch_num = 32,
|
||||
embedding_func_max_async = 16,
|
||||
query_better_than_threshold = 0.2,
|
||||
|
||||
# LLM
|
||||
best_model_func_raw = gpt_4o_mini_complete,
|
||||
best_model_name = "gpt-4o-mini",
|
||||
best_model_max_token_size = 32768,
|
||||
best_model_max_async = 16,
|
||||
|
||||
cheap_model_func_raw = gpt_4o_mini_complete,
|
||||
cheap_model_name = "gpt-4o-mini",
|
||||
cheap_model_max_token_size = 32768,
|
||||
cheap_model_max_async = 16
|
||||
)
|
||||
|
||||
###### Azure OpenAI Configuration
|
||||
@retry(
|
||||
@@ -270,7 +287,8 @@ azure_openai_config = LLMConfig(
|
||||
cheap_model_func_raw = azure_gpt_4o_mini_complete,
|
||||
cheap_model_name = "gpt-4o-mini",
|
||||
cheap_model_max_token_size = 32768,
|
||||
cheap_model_max_async = 16)
|
||||
cheap_model_max_async = 16
|
||||
)
|
||||
|
||||
|
||||
###### Ollama configuration
|
||||
@@ -365,4 +383,5 @@ ollama_config = LLMConfig(
|
||||
cheap_model_func_raw = ollama_mini_complete,
|
||||
cheap_model_name = "olmo2",
|
||||
cheap_model_max_token_size = 32768,
|
||||
cheap_model_max_async = 1)
|
||||
cheap_model_max_async = 1
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user