This commit is contained in:
zrguo
2025-06-26 15:50:15 +08:00
parent 56c2d575f0
commit d63ac6a8a5
5 changed files with 35 additions and 38 deletions

View File

@@ -296,8 +296,6 @@ async def main():
model="text-embedding-3-large",
api_key="your-api-key",
),
embedding_dim=3072,
max_token_size=8192
)
# Process a document
@@ -422,6 +420,7 @@ import asyncio
from raganything import RAGAnything
from lightrag import LightRAG
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc
import os
async def load_existing_lightrag():
@@ -445,13 +444,16 @@ async def load_existing_lightrag():
api_key="your-api-key",
**kwargs,
),
embedding_func=lambda texts: openai_embed(
texts,
model="text-embedding-3-large",
api_key="your-api-key",
),
embedding_dim=3072,
max_token_size=8192
embedding_func=EmbeddingFunc(
embedding_dim=3072,
max_token_size=8192,
func=lambda texts: openai_embed(
texts,
model="text-embedding-3-large",
api_key=api_key,
base_url=base_url,
),
)
)
# Initialize storage (this will load existing data if available)

View File

@@ -292,8 +292,6 @@ async def main():
model="text-embedding-3-large",
api_key="your-api-key",
),
embedding_dim=3072,
max_token_size=8192
)
# 处理文档
@@ -418,6 +416,7 @@ import asyncio
from raganything import RAGAnything
from lightrag import LightRAG
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc
import os
async def load_existing_lightrag():
@@ -441,13 +440,16 @@ async def load_existing_lightrag():
api_key="your-api-key",
**kwargs,
),
embedding_func=lambda texts: openai_embed(
texts,
model="text-embedding-3-large",
api_key="your-api-key",
),
embedding_dim=3072,
max_token_size=8192
embedding_func=EmbeddingFunc(
embedding_dim=3072,
max_token_size=8192,
func=lambda texts: openai_embed(
texts,
model="text-embedding-3-large",
api_key=api_key,
base_url=base_url,
),
)
)
# 初始化存储(如果有现有数据,这将加载它们)

View File

@@ -12,6 +12,7 @@ import os
import argparse
import asyncio
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc
from raganything.raganything import RAGAnything
@@ -89,14 +90,16 @@ async def process_with_rag(
base_url=base_url,
**kwargs,
),
embedding_func=lambda texts: openai_embed(
texts,
model="text-embedding-3-large",
api_key=api_key,
base_url=base_url,
embedding_func=EmbeddingFunc(
embedding_dim=3072,
max_token_size=8192,
func=lambda texts: openai_embed(
texts,
model="text-embedding-3-large",
api_key=api_key,
base_url=base_url,
),
),
embedding_dim=3072,
max_token_size=8192,
)
# Process document

View File

@@ -1,5 +1,5 @@
from .raganything import RAGAnything as RAGAnything
__version__ = "1.0.1"
__version__ = "1.0.2"
__author__ = "Zirui Guo"
__url__ = "https://github.com/HKUDS/RAG-Anything"

View File

@@ -18,7 +18,7 @@ import sys
sys.path.insert(0, str(Path(__file__).parent.parent))
from lightrag import LightRAG, QueryParam
from lightrag.utils import EmbeddingFunc, setup_logger
from lightrag.utils import setup_logger
# Import parser and multimodal processors
from raganything.mineru_parser import MineruParser
@@ -42,8 +42,6 @@ class RAGAnything:
vision_model_func: Optional[Callable] = None,
embedding_func: Optional[Callable] = None,
working_dir: str = "./rag_storage",
embedding_dim: int = 3072,
max_token_size: int = 8192,
):
"""
Initialize Multimodal Document Processing Pipeline
@@ -54,15 +52,11 @@ class RAGAnything:
vision_model_func: Vision model function for image analysis
embedding_func: Embedding function for text vectorization
working_dir: Working directory for storage (used when creating new RAG)
embedding_dim: Embedding dimension (used when creating new RAG)
max_token_size: Maximum token size for embeddings (used when creating new RAG)
"""
self.working_dir = working_dir
self.llm_model_func = llm_model_func
self.vision_model_func = vision_model_func
self.embedding_func = embedding_func
self.embedding_dim = embedding_dim
self.max_token_size = max_token_size
# Set up logging
setup_logger("RAGAnything")
@@ -136,11 +130,7 @@ class RAGAnything:
self.lightrag = LightRAG(
working_dir=self.working_dir,
llm_model_func=self.llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=self.embedding_dim,
max_token_size=self.max_token_size,
func=self.embedding_func,
),
embedding_func=self.embedding_func,
)
await self.lightrag.initialize_storages()