Merge branch 'browser-use:main' into feat/qwen-support

This commit is contained in:
Cheer
2025-02-13 20:10:04 +08:00
committed by GitHub
9 changed files with 160 additions and 33 deletions

View File

@@ -8,6 +8,7 @@ GOOGLE_API_KEY=
AZURE_OPENAI_ENDPOINT=
AZURE_OPENAI_API_KEY=
AZURE_OPENAI_API_VERSION=2025-01-01-preview
DEEPSEEK_ENDPOINT=https://api.deepseek.com
DEEPSEEK_API_KEY=

View File

@@ -47,9 +47,8 @@ RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \
&& git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
&& ln -s /opt/novnc/vnc.html /opt/novnc/index.html
# Install Chrome
RUN curl -fsSL https://dl.google.com/linux/linux_signing_key.pub | gpg --dearmor -o /usr/share/keyrings/google-chrome.gpg \
&& echo "deb [arch=amd64 signed-by=/usr/share/keyrings/google-chrome.gpg] http://dl.google.com/linux/chrome/deb/ stable main" | tee /etc/apt/sources.list.d/google-chrome.list
# Set platform for ARM64 compatibility
ARG TARGETPLATFORM=linux/amd64
# Set up working directory
WORKDIR /app
@@ -62,7 +61,6 @@ RUN pip install --no-cache-dir -r requirements.txt
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
RUN playwright install --with-deps chromium
RUN playwright install-deps
RUN apt-get install -y google-chrome-stable
# Copy the application code
COPY . .
@@ -70,7 +68,7 @@ COPY . .
# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV BROWSER_USE_LOGGING_LEVEL=info
ENV CHROME_PATH=/usr/bin/google-chrome
ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
ENV ANONYMIZED_TELEMETRY=false
ENV DISPLAY=:99
ENV RESOLUTION=1920x1080x24
@@ -83,6 +81,6 @@ ENV RESOLUTION_HEIGHT=1080
RUN mkdir -p /var/log/supervisor
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
EXPOSE 7788 6080 5900
EXPOSE 7788 6080 5901
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]

85
Dockerfile.arm64 Normal file
View File

@@ -0,0 +1,85 @@
FROM python:3.11-slim
# Install system dependencies
RUN apt-get update && apt-get install -y \
wget \
gnupg \
curl \
unzip \
xvfb \
libgconf-2-4 \
libxss1 \
libnss3 \
libnspr4 \
libasound2 \
libatk1.0-0 \
libatk-bridge2.0-0 \
libcups2 \
libdbus-1-3 \
libdrm2 \
libgbm1 \
libgtk-3-0 \
libxcomposite1 \
libxdamage1 \
libxfixes3 \
libxrandr2 \
xdg-utils \
fonts-liberation \
dbus \
xauth \
xvfb \
x11vnc \
tigervnc-tools \
supervisor \
net-tools \
procps \
git \
python3-numpy \
fontconfig \
fonts-dejavu \
fonts-dejavu-core \
fonts-dejavu-extra \
&& rm -rf /var/lib/apt/lists/*
# Install noVNC
RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \
&& git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
&& ln -s /opt/novnc/vnc.html /opt/novnc/index.html
# Set platform explicitly for ARM64
ARG TARGETPLATFORM=linux/arm64
# Set up working directory
WORKDIR /app
# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Install Playwright and browsers with system dependencies optimized for ARM64
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
RUN PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 pip install playwright && \
playwright install --with-deps chromium
# Copy the application code
COPY . .
# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV BROWSER_USE_LOGGING_LEVEL=info
ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
ENV ANONYMIZED_TELEMETRY=false
ENV DISPLAY=:99
ENV RESOLUTION=1920x1080x24
ENV VNC_PASSWORD=vncpassword
ENV CHROME_PERSISTENT_SESSION=true
ENV RESOLUTION_WIDTH=1920
ENV RESOLUTION_HEIGHT=1080
# Set up supervisor configuration
RUN mkdir -p /var/log/supervisor
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
EXPOSE 7788 6080 5901
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]

View File

@@ -117,6 +117,7 @@ docker compose up --build
CHROME_PERSISTENT_SESSION=true docker compose up --build
```
4. Access the Application:
- Web Interface: Open `http://localhost:7788` in your browser
- VNC Viewer (for watching browser interactions): Open `http://localhost:6080/vnc.html`
@@ -183,7 +184,11 @@ CHROME_PERSISTENT_SESSION=true docker compose up --build
VNC_PASSWORD=your_vnc_password # Optional, defaults to "vncpassword"
```
2. **Browser Persistence Modes:**
2. **Platform Support:**
- Supports both AMD64 and ARM64 architectures
- For ARM64 systems (e.g., Apple Silicon Macs), the container will automatically use the appropriate image
3. **Browser Persistence Modes:**
- **Default Mode (CHROME_PERSISTENT_SESSION=false):**
- Browser opens and closes with each AI task
- Clean state for each interaction
@@ -195,12 +200,13 @@ CHROME_PERSISTENT_SESSION=true docker compose up --build
- Allows viewing previous AI interactions
- Set in `.env` file or via environment variable when starting container
3. **Viewing Browser Interactions:**
4. **Viewing Browser Interactions:**
- Access the noVNC viewer at `http://localhost:6080/vnc.html`
- Enter the VNC password (default: "vncpassword" or what you set in VNC_PASSWORD)
- Direct VNC access available on port 5900 (mapped to container port 5901)
- You can now see all browser interactions in real-time
4. **Container Management:**
5. **Container Management:**
```bash
# Start with persistent browser
CHROME_PERSISTENT_SESSION=true docker compose up -d

View File

@@ -3,11 +3,13 @@ services:
platform: linux/amd64
build:
context: .
dockerfile: Dockerfile
dockerfile: ${DOCKERFILE:-Dockerfile}
args:
TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
ports:
- "7788:7788" # Gradio default port
- "6080:6080" # noVNC web interface
- "5900:5900" # VNC port
- "5901:5901" # VNC port
- "9222:9222" # Chrome remote debugging port
environment:
- OPENAI_ENDPOINT=${OPENAI_ENDPOINT:-https://api.openai.com/v1}
@@ -42,7 +44,7 @@ services:
tmpfs:
- /tmp
healthcheck:
test: ["CMD", "nc", "-z", "localhost", "5900"]
test: ["CMD", "nc", "-z", "localhost", "5901"]
interval: 10s
timeout: 5s
retries: 3

4
entrypoint.sh Normal file
View File

@@ -0,0 +1,4 @@
#!/bin/bash
# Start supervisord in the foreground to properly manage child processes
exec /usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf

View File

@@ -269,6 +269,23 @@ Provide your output as a JSON formatted list. Each item in the list must adhere
logger.info("\nFinish Searching, Start Generating Report...")
# 5. Report Generation in Markdown (or JSON if you prefer)
return await generate_final_report(task, history_infos, save_dir, llm)
except Exception as e:
logger.error(f"Deep research Error: {e}")
return await generate_final_report(task, history_infos, save_dir, llm, str(e))
finally:
if browser:
await browser.close()
if browser_context:
await browser_context.close()
logger.info("Browser closed.")
async def generate_final_report(task, history_infos, save_dir, llm, error_msg=None):
"""Generate report from collected information with error handling"""
try:
logger.info("\nAttempting to generate final report from collected data...")
writer_system_prompt = """
You are a **Deep Researcher** and a professional report writer tasked with creating polished, high-quality reports that fully meet the user's needs, based on the user's instructions and the relevant information provided. You will write the report using Markdown format, ensuring it is both informative and visually appealing.
@@ -314,21 +331,21 @@ Provide your output as a JSON formatted list. Each item in the list must adhere
logger.info(ai_report_msg.reasoning_content)
logger.info("🤯 End Report Deep Thinking")
report_content = ai_report_msg.content
# Remove ```markdown or ``` at the *very beginning* and ``` at the *very end*, with optional whitespace
report_content = re.sub(r"^```\s*markdown\s*|^\s*```|```\s*$", "", report_content, flags=re.MULTILINE)
report_content = report_content.strip()
# Add error notification to the report
if error_msg:
report_content = f"## ⚠️ Research Incomplete - Partial Results\n" \
f"**The research process was interrupted by an error:** {error_msg}\n\n" \
f"{report_content}"
report_file_path = os.path.join(save_dir, "final_report.md")
with open(report_file_path, "w", encoding="utf-8") as f:
f.write(report_content)
logger.info(f"Save Report at: {report_file_path}")
return report_content, report_file_path
except Exception as e:
logger.error(f"Deep research Error: {e}")
return "", None
finally:
if browser:
await browser.close()
if browser_context:
await browser_context.close()
logger.info("Browser closed.")
except Exception as report_error:
logger.error(f"Failed to generate partial report: {report_error}")
return f"Error generating report: {str(report_error)}", None

View File

@@ -129,10 +129,11 @@ def get_llm_model(provider: str, **kwargs):
base_url = os.getenv("AZURE_OPENAI_ENDPOINT", "")
else:
base_url = kwargs.get("base_url")
api_version = kwargs.get("api_version", "") or os.getenv("AZURE_OPENAI_API_VERSION", "2025-01-01-preview")
return AzureChatOpenAI(
model=kwargs.get("model_name", "gpt-4o"),
temperature=kwargs.get("temperature", 0.0),
api_version="2024-05-01-preview",
api_version=api_version,
azure_endpoint=base_url,
api_key=api_key,
)

View File

@@ -1,4 +1,5 @@
[supervisord]
user=root
nodaemon=true
logfile=/dev/stdout
logfile_maxbytes=0
@@ -13,6 +14,8 @@ stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
priority=100
startsecs=3
stopsignal=TERM
stopwaitsecs=10
[program:vnc_setup]
command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd"
@@ -25,28 +28,33 @@ stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
[program:x11vnc]
command=bash -c "sleep 3 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -rfbport 5900 -bg -o /var/log/x11vnc.log"
command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && chmod 666 /var/log/x11vnc.log && sleep 5 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -rfbport 5901 -o /var/log/x11vnc.log"
autorestart=true
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
priority=200
startretries=5
startsecs=5
depends_on=vnc_setup
startretries=10
startsecs=10
stopsignal=TERM
stopwaitsecs=10
depends_on=vnc_setup,xvfb
[program:x11vnc_log]
command=tail -f /var/log/x11vnc.log
command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && tail -f /var/log/x11vnc.log"
autorestart=true
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
priority=250
stopsignal=TERM
stopwaitsecs=5
depends_on=x11vnc
[program:novnc]
command=bash -c "sleep 5 && cd /opt/novnc && ./utils/novnc_proxy --vnc localhost:5900 --listen 0.0.0.0:6080 --web /opt/novnc"
command=bash -c "sleep 5 && cd /opt/novnc && ./utils/novnc_proxy --vnc localhost:5901 --listen 0.0.0.0:6080 --web /opt/novnc"
autorestart=true
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
@@ -58,15 +66,18 @@ startsecs=3
depends_on=x11vnc
[program:persistent_browser]
command=bash -c 'mkdir -p /app/data/chrome_data && sleep 8 && google-chrome --user-data-dir=/app/data/chrome_data --window-position=0,0 --window-size=%(ENV_RESOLUTION_WIDTH)s,%(ENV_RESOLUTION_HEIGHT)s --start-maximized --no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-setuid-sandbox --no-first-run --no-default-browser-check --no-experiments --ignore-certificate-errors --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 "data:text/html,<html><body style=\"background: \#f0f0f0; margin: 0; display: flex; justify-content: center; align-items: center; height: 100vh; font-family: Arial;\"><h1>Browser Ready for AI Interaction</h1></body></html>"'
environment=START_URL="data:text/html,<html><body><h1>Browser Ready</h1></body></html>"
command=bash -c "mkdir -p /app/data/chrome_data && sleep 8 && $(find /ms-playwright/chromium-*/chrome-linux -name chrome) --user-data-dir=/app/data/chrome_data --window-position=0,0 --window-size=%(ENV_RESOLUTION_WIDTH)s,%(ENV_RESOLUTION_HEIGHT)s --start-maximized --no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-setuid-sandbox --no-first-run --no-default-browser-check --no-experiments --ignore-certificate-errors --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 \"$START_URL\""
autorestart=true
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
priority=350
startretries=3
startsecs=3
startretries=5
startsecs=10
stopsignal=TERM
stopwaitsecs=15
depends_on=novnc
[program:webui]
@@ -80,4 +91,6 @@ stderr_logfile_maxbytes=0
priority=400
startretries=3
startsecs=3
stopsignal=TERM
stopwaitsecs=10
depends_on=persistent_browser