fix content len

This commit is contained in:
vincent
2025-02-08 13:08:33 +08:00
parent d690237503
commit b7ee26ade6
3 changed files with 53 additions and 15 deletions

View File

@@ -66,6 +66,6 @@ class CustomController(Controller):
)
# go back to org url
await page.go_back()
msg = f'📄 Extracted page content as {output_format}\n: {content}\n'
msg = f'Extracted page content:\n {content}\n'
logger.info(msg)
return ActionResult(extracted_content=msg)

View File

@@ -29,7 +29,7 @@ from browser_use.browser.context import (
logger = logging.getLogger(__name__)
async def deep_research(task, llm, agent_state, **kwargs):
async def deep_research(task, llm, agent_state=None, **kwargs):
task_id = str(uuid4())
save_dir = kwargs.get("save_dir", os.path.join(f"./tmp/deep_research/{task_id}"))
logger.info(f"Save Deep Research at: {save_dir}")
@@ -237,19 +237,27 @@ Provide your output as a JSON formatted list. Each item in the list must adhere
with open(querr_save_path, "w", encoding="utf-8") as fw:
fw.write(f"Query: {query_tasks[i]}\n")
fw.write(query_result)
history_infos_ = json.dumps(history_infos, indent=4)
record_prompt = f"User Instruction:{task}. \nPrevious Recorded Information:\n {json.dumps(history_infos_)}\n Current Search Iteration: {search_iteration}\n Current Search Plan:\n{query_plan}\n Current Search Query:\n {query_tasks[i]}\n Current Search Results: {query_result}\n "
record_messages.append(HumanMessage(content=record_prompt))
ai_record_msg = llm.invoke(record_messages[:1] + record_messages[-1:])
record_messages.append(ai_record_msg)
if hasattr(ai_record_msg, "reasoning_content"):
logger.info("🤯 Start Record Deep Thinking: ")
logger.info(ai_record_msg.reasoning_content)
logger.info("🤯 End Record Deep Thinking")
record_content = ai_record_msg.content
record_content = repair_json(record_content)
new_record_infos = json.loads(record_content)
history_infos.extend(new_record_infos)
# split query result in case the content is too long
query_results_split = query_result.split("Extracted page content:")
for qi, query_result_ in enumerate(query_results_split):
if not query_result_:
continue
else:
# TODO: limit content lenght: 128k tokens, ~3 chars per token
query_result_ = query_result_[:128000*3]
history_infos_ = json.dumps(history_infos, indent=4)
record_prompt = f"User Instruction:{task}. \nPrevious Recorded Information:\n {history_infos_}\n Current Search Iteration: {search_iteration}\n Current Search Plan:\n{query_plan}\n Current Search Query:\n {query_tasks[i]}\n Current Search Results: {query_result_}\n "
record_messages.append(HumanMessage(content=record_prompt))
ai_record_msg = llm.invoke(record_messages[:1] + record_messages[-1:])
record_messages.append(ai_record_msg)
if hasattr(ai_record_msg, "reasoning_content"):
logger.info("🤯 Start Record Deep Thinking: ")
logger.info(ai_record_msg.reasoning_content)
logger.info("🤯 End Record Deep Thinking")
record_content = ai_record_msg.content
record_content = repair_json(record_content)
new_record_infos = json.loads(record_content)
history_infos.extend(new_record_infos)
logger.info("\nFinish Searching, Start Generating Report...")

View File

@@ -0,0 +1,30 @@
import asyncio
import os
from dotenv import load_dotenv
load_dotenv()
import sys
sys.path.append(".")
async def test_deep_research():
from src.utils.deep_research import deep_research
from src.utils import utils
task = "write a report about DeepSeek-R1, get its pdf"
llm = utils.get_llm_model(
provider="gemini",
model_name="gemini-2.0-flash-thinking-exp-01-21",
temperature=1.0,
api_key=os.getenv("GOOGLE_API_KEY", "")
)
report_content, report_file_path = await deep_research(task=task, llm=llm, agent_state=None,
max_search_iterations=1,
max_query_num=3,
use_own_browser=False)
if __name__ == "__main__":
asyncio.run(test_deep_research())