mirror of
https://github.com/HKUDS/RAG-Anything.git
synced 2025-08-20 19:01:34 +03:00
fix bug
This commit is contained in:
20
README.md
20
README.md
@@ -296,8 +296,6 @@ async def main():
|
||||
model="text-embedding-3-large",
|
||||
api_key="your-api-key",
|
||||
),
|
||||
embedding_dim=3072,
|
||||
max_token_size=8192
|
||||
)
|
||||
|
||||
# Process a document
|
||||
@@ -422,6 +420,7 @@ import asyncio
|
||||
from raganything import RAGAnything
|
||||
from lightrag import LightRAG
|
||||
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
||||
from lightrag.utils import EmbeddingFunc
|
||||
import os
|
||||
|
||||
async def load_existing_lightrag():
|
||||
@@ -445,13 +444,16 @@ async def load_existing_lightrag():
|
||||
api_key="your-api-key",
|
||||
**kwargs,
|
||||
),
|
||||
embedding_func=lambda texts: openai_embed(
|
||||
texts,
|
||||
model="text-embedding-3-large",
|
||||
api_key="your-api-key",
|
||||
),
|
||||
embedding_dim=3072,
|
||||
max_token_size=8192
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=3072,
|
||||
max_token_size=8192,
|
||||
func=lambda texts: openai_embed(
|
||||
texts,
|
||||
model="text-embedding-3-large",
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
# Initialize storage (this will load existing data if available)
|
||||
|
||||
20
README_zh.md
20
README_zh.md
@@ -292,8 +292,6 @@ async def main():
|
||||
model="text-embedding-3-large",
|
||||
api_key="your-api-key",
|
||||
),
|
||||
embedding_dim=3072,
|
||||
max_token_size=8192
|
||||
)
|
||||
|
||||
# 处理文档
|
||||
@@ -418,6 +416,7 @@ import asyncio
|
||||
from raganything import RAGAnything
|
||||
from lightrag import LightRAG
|
||||
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
||||
from lightrag.utils import EmbeddingFunc
|
||||
import os
|
||||
|
||||
async def load_existing_lightrag():
|
||||
@@ -441,13 +440,16 @@ async def load_existing_lightrag():
|
||||
api_key="your-api-key",
|
||||
**kwargs,
|
||||
),
|
||||
embedding_func=lambda texts: openai_embed(
|
||||
texts,
|
||||
model="text-embedding-3-large",
|
||||
api_key="your-api-key",
|
||||
),
|
||||
embedding_dim=3072,
|
||||
max_token_size=8192
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=3072,
|
||||
max_token_size=8192,
|
||||
func=lambda texts: openai_embed(
|
||||
texts,
|
||||
model="text-embedding-3-large",
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
# 初始化存储(如果有现有数据,这将加载它们)
|
||||
|
||||
@@ -12,6 +12,7 @@ import os
|
||||
import argparse
|
||||
import asyncio
|
||||
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
||||
from lightrag.utils import EmbeddingFunc
|
||||
from raganything.raganything import RAGAnything
|
||||
|
||||
|
||||
@@ -89,14 +90,16 @@ async def process_with_rag(
|
||||
base_url=base_url,
|
||||
**kwargs,
|
||||
),
|
||||
embedding_func=lambda texts: openai_embed(
|
||||
texts,
|
||||
model="text-embedding-3-large",
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=3072,
|
||||
max_token_size=8192,
|
||||
func=lambda texts: openai_embed(
|
||||
texts,
|
||||
model="text-embedding-3-large",
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
),
|
||||
),
|
||||
embedding_dim=3072,
|
||||
max_token_size=8192,
|
||||
)
|
||||
|
||||
# Process document
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from .raganything import RAGAnything as RAGAnything
|
||||
|
||||
__version__ = "1.0.1"
|
||||
__version__ = "1.0.2"
|
||||
__author__ = "Zirui Guo"
|
||||
__url__ = "https://github.com/HKUDS/RAG-Anything"
|
||||
|
||||
@@ -18,7 +18,7 @@ import sys
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from lightrag import LightRAG, QueryParam
|
||||
from lightrag.utils import EmbeddingFunc, setup_logger
|
||||
from lightrag.utils import setup_logger
|
||||
|
||||
# Import parser and multimodal processors
|
||||
from raganything.mineru_parser import MineruParser
|
||||
@@ -42,8 +42,6 @@ class RAGAnything:
|
||||
vision_model_func: Optional[Callable] = None,
|
||||
embedding_func: Optional[Callable] = None,
|
||||
working_dir: str = "./rag_storage",
|
||||
embedding_dim: int = 3072,
|
||||
max_token_size: int = 8192,
|
||||
):
|
||||
"""
|
||||
Initialize Multimodal Document Processing Pipeline
|
||||
@@ -54,15 +52,11 @@ class RAGAnything:
|
||||
vision_model_func: Vision model function for image analysis
|
||||
embedding_func: Embedding function for text vectorization
|
||||
working_dir: Working directory for storage (used when creating new RAG)
|
||||
embedding_dim: Embedding dimension (used when creating new RAG)
|
||||
max_token_size: Maximum token size for embeddings (used when creating new RAG)
|
||||
"""
|
||||
self.working_dir = working_dir
|
||||
self.llm_model_func = llm_model_func
|
||||
self.vision_model_func = vision_model_func
|
||||
self.embedding_func = embedding_func
|
||||
self.embedding_dim = embedding_dim
|
||||
self.max_token_size = max_token_size
|
||||
|
||||
# Set up logging
|
||||
setup_logger("RAGAnything")
|
||||
@@ -136,11 +130,7 @@ class RAGAnything:
|
||||
self.lightrag = LightRAG(
|
||||
working_dir=self.working_dir,
|
||||
llm_model_func=self.llm_model_func,
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=self.embedding_dim,
|
||||
max_token_size=self.max_token_size,
|
||||
func=self.embedding_func,
|
||||
),
|
||||
embedding_func=self.embedding_func,
|
||||
)
|
||||
|
||||
await self.lightrag.initialize_storages()
|
||||
|
||||
Reference in New Issue
Block a user