Merge branch 'browser-use:main' into feat/qwen-support

2025-02-13 20:10:04 +08:00
parent 1eb4b30752 84b8965174
commit 3e7551c0bc
9 changed files with 160 additions and 33 deletions
--- a/.env.example
+++ b/.env.example
@@ -8,6 +8,7 @@ GOOGLE_API_KEY=

 AZURE_OPENAI_ENDPOINT=
 AZURE_OPENAI_API_KEY=
+AZURE_OPENAI_API_VERSION=2025-01-01-preview

 DEEPSEEK_ENDPOINT=https://api.deepseek.com
 DEEPSEEK_API_KEY=
--- a/10
+++ b/10
@@ -47,9 +47,8 @@ RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \
    && git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
    && ln -s /opt/novnc/vnc.html /opt/novnc/index.html

-# Install Chrome
-RUN curl -fsSL https://dl.google.com/linux/linux_signing_key.pub | gpg --dearmor -o /usr/share/keyrings/google-chrome.gpg \
-    && echo "deb [arch=amd64 signed-by=/usr/share/keyrings/google-chrome.gpg] http://dl.google.com/linux/chrome/deb/ stable main" | tee /etc/apt/sources.list.d/google-chrome.list
+# Set platform for ARM64 compatibility
+ARG TARGETPLATFORM=linux/amd64

 # Set up working directory
 WORKDIR /app
@@ -62,7 +61,6 @@ RUN pip install --no-cache-dir -r requirements.txt
 ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
 RUN playwright install --with-deps chromium
 RUN playwright install-deps
-RUN apt-get install -y google-chrome-stable

 # Copy the application code
 COPY . .
@@ -70,7 +68,7 @@ COPY . .
 # Set environment variables
 ENV PYTHONUNBUFFERED=1
 ENV BROWSER_USE_LOGGING_LEVEL=info
-ENV CHROME_PATH=/usr/bin/google-chrome
+ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
 ENV ANONYMIZED_TELEMETRY=false
 ENV DISPLAY=:99
 ENV RESOLUTION=1920x1080x24
@@ -83,6 +81,6 @@ ENV RESOLUTION_HEIGHT=1080
 RUN mkdir -p /var/log/supervisor
 COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf

-EXPOSE 7788 6080 5900
+EXPOSE 7788 6080 5901

 CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
--- a/Dockerfile.arm64
+++ b/Dockerfile.arm64
@@ -0,0 +1,85 @@
+FROM python:3.11-slim
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    wget \
+    gnupg \
+    curl \
+    unzip \
+    xvfb \
+    libgconf-2-4 \
+    libxss1 \
+    libnss3 \
+    libnspr4 \
+    libasound2 \
+    libatk1.0-0 \
+    libatk-bridge2.0-0 \
+    libcups2 \
+    libdbus-1-3 \
+    libdrm2 \
+    libgbm1 \
+    libgtk-3-0 \
+    libxcomposite1 \
+    libxdamage1 \
+    libxfixes3 \
+    libxrandr2 \
+    xdg-utils \
+    fonts-liberation \
+    dbus \
+    xauth \
+    xvfb \
+    x11vnc \
+    tigervnc-tools \
+    supervisor \
+    net-tools \
+    procps \
+    git \
+    python3-numpy \
+    fontconfig \
+    fonts-dejavu \
+    fonts-dejavu-core \
+    fonts-dejavu-extra \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install noVNC
+RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \
+    && git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
+    && ln -s /opt/novnc/vnc.html /opt/novnc/index.html
+
+# Set platform explicitly for ARM64
+ARG TARGETPLATFORM=linux/arm64
+
+# Set up working directory
+WORKDIR /app
+
+# Copy requirements and install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Install Playwright and browsers with system dependencies optimized for ARM64
+ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
+RUN PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 pip install playwright && \
+    playwright install --with-deps chromium
+
+# Copy the application code
+COPY . .
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV BROWSER_USE_LOGGING_LEVEL=info
+ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
+ENV ANONYMIZED_TELEMETRY=false
+ENV DISPLAY=:99
+ENV RESOLUTION=1920x1080x24
+ENV VNC_PASSWORD=vncpassword
+ENV CHROME_PERSISTENT_SESSION=true
+ENV RESOLUTION_WIDTH=1920
+ENV RESOLUTION_HEIGHT=1080
+
+# Set up supervisor configuration
+RUN mkdir -p /var/log/supervisor
+COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
+
+EXPOSE 7788 6080 5901
+
+CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
--- a/README.md
+++ b/README.md
@@ -117,6 +117,7 @@ docker compose up --build
 CHROME_PERSISTENT_SESSION=true docker compose up --build
 ```

+
 4. Access the Application:
 - Web Interface: Open `http://localhost:7788` in your browser
 - VNC Viewer (for watching browser interactions): Open `http://localhost:6080/vnc.html`
@@ -183,7 +184,11 @@ CHROME_PERSISTENT_SESSION=true docker compose up --build
     VNC_PASSWORD=your_vnc_password  # Optional, defaults to "vncpassword"
     ```

-2. **Browser Persistence Modes:**
+2. **Platform Support:**
+   - Supports both AMD64 and ARM64 architectures
+   - For ARM64 systems (e.g., Apple Silicon Macs), the container will automatically use the appropriate image
+
+3. **Browser Persistence Modes:**
   - **Default Mode (CHROME_PERSISTENT_SESSION=false):**
     - Browser opens and closes with each AI task
     - Clean state for each interaction
@@ -195,12 +200,13 @@ CHROME_PERSISTENT_SESSION=true docker compose up --build
     - Allows viewing previous AI interactions
     - Set in `.env` file or via environment variable when starting container

-3. **Viewing Browser Interactions:**
+4. **Viewing Browser Interactions:**
   - Access the noVNC viewer at `http://localhost:6080/vnc.html`
   - Enter the VNC password (default: "vncpassword" or what you set in VNC_PASSWORD)
+   - Direct VNC access available on port 5900 (mapped to container port 5901)
   - You can now see all browser interactions in real-time

-4. **Container Management:**
+5. **Container Management:**
   ```bash
   # Start with persistent browser
   CHROME_PERSISTENT_SESSION=true docker compose up -d
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -3,11 +3,13 @@ services:
    platform: linux/amd64
    build:
      context: .
-      dockerfile: Dockerfile
+      dockerfile: ${DOCKERFILE:-Dockerfile}
+      args:
+        TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
    ports:
      - "7788:7788"  # Gradio default port
      - "6080:6080"  # noVNC web interface
-      - "5900:5900"  # VNC port
+      - "5901:5901"  # VNC port
      - "9222:9222"  # Chrome remote debugging port
    environment:
      - OPENAI_ENDPOINT=${OPENAI_ENDPOINT:-https://api.openai.com/v1}
@@ -42,7 +44,7 @@ services:
    tmpfs:
      - /tmp
    healthcheck:
-      test: ["CMD", "nc", "-z", "localhost", "5900"]
+      test: ["CMD", "nc", "-z", "localhost", "5901"]
      interval: 10s
      timeout: 5s
      retries: 3
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+# Start supervisord in the foreground to properly manage child processes
+exec /usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf
--- a/src/utils/deep_research.py
+++ b/src/utils/deep_research.py
@@ -269,6 +269,23 @@ Provide your output as a JSON formatted list. Each item in the list must adhere
        logger.info("\nFinish Searching, Start Generating Report...")

        # 5. Report Generation in Markdown (or JSON if you prefer)
+        return await generate_final_report(task, history_infos, save_dir, llm)
+
+    except Exception as e:
+        logger.error(f"Deep research Error: {e}")
+        return await generate_final_report(task, history_infos, save_dir, llm, str(e))
+    finally:
+        if browser:
+            await browser.close()
+        if browser_context:
+            await browser_context.close()
+        logger.info("Browser closed.")
+
+async def generate_final_report(task, history_infos, save_dir, llm, error_msg=None):
+    """Generate report from collected information with error handling"""
+    try:
+        logger.info("\nAttempting to generate final report from collected data...")
+        
        writer_system_prompt = """
        You are a **Deep Researcher** and a professional report writer tasked with creating polished, high-quality reports that fully meet the user's needs, based on the user's instructions and the relevant information provided. You will write the report using Markdown format, ensuring it is both informative and visually appealing.

@@ -314,21 +331,21 @@ Provide your output as a JSON formatted list. Each item in the list must adhere
            logger.info(ai_report_msg.reasoning_content)
            logger.info("🤯 End Report Deep Thinking")
        report_content = ai_report_msg.content
-        # Remove ```markdown or ``` at the *very beginning* and ``` at the *very end*, with optional whitespace
        report_content = re.sub(r"^```\s*markdown\s*|^\s*```|```\s*$", "", report_content, flags=re.MULTILINE)
        report_content = report_content.strip()
+
+        # Add error notification to the report
+        if error_msg:
+            report_content = f"## ⚠️ Research Incomplete - Partial Results\n" \
+                            f"**The research process was interrupted by an error:** {error_msg}\n\n" \
+                            f"{report_content}"
+            
        report_file_path = os.path.join(save_dir, "final_report.md")
        with open(report_file_path, "w", encoding="utf-8") as f:
            f.write(report_content)
        logger.info(f"Save Report at: {report_file_path}")
        return report_content, report_file_path

-    except Exception as e:
-        logger.error(f"Deep research Error: {e}")
-        return "", None
-    finally:
-        if browser:
-            await browser.close()
-        if browser_context:
-            await browser_context.close()
-        logger.info("Browser closed.")
+    except Exception as report_error:
+        logger.error(f"Failed to generate partial report: {report_error}")
+        return f"Error generating report: {str(report_error)}", None
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -129,10 +129,11 @@ def get_llm_model(provider: str, **kwargs):
            base_url = os.getenv("AZURE_OPENAI_ENDPOINT", "")
        else:
            base_url = kwargs.get("base_url")
+        api_version = kwargs.get("api_version", "") or os.getenv("AZURE_OPENAI_API_VERSION", "2025-01-01-preview")
        return AzureChatOpenAI(
            model=kwargs.get("model_name", "gpt-4o"),
            temperature=kwargs.get("temperature", 0.0),
-            api_version="2024-05-01-preview",
+            api_version=api_version,
            azure_endpoint=base_url,
            api_key=api_key,
        )
--- a/supervisord.conf
+++ b/supervisord.conf
@@ -1,4 +1,5 @@
 [supervisord]
+user=root
 nodaemon=true
 logfile=/dev/stdout
 logfile_maxbytes=0
@@ -13,6 +14,8 @@ stderr_logfile=/dev/stderr
 stderr_logfile_maxbytes=0
 priority=100
 startsecs=3
+stopsignal=TERM
+stopwaitsecs=10

 [program:vnc_setup]
 command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd"
@@ -25,28 +28,33 @@ stderr_logfile=/dev/stderr
 stderr_logfile_maxbytes=0

 [program:x11vnc]
-command=bash -c "sleep 3 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -rfbport 5900 -bg -o /var/log/x11vnc.log"
+command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && chmod 666 /var/log/x11vnc.log && sleep 5 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -rfbport 5901 -o /var/log/x11vnc.log"
 autorestart=true
 stdout_logfile=/dev/stdout
 stdout_logfile_maxbytes=0
 stderr_logfile=/dev/stderr
 stderr_logfile_maxbytes=0
 priority=200
-startretries=5
-startsecs=5
-depends_on=vnc_setup
+startretries=10
+startsecs=10
+stopsignal=TERM
+stopwaitsecs=10
+depends_on=vnc_setup,xvfb

 [program:x11vnc_log]
-command=tail -f /var/log/x11vnc.log
+command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && tail -f /var/log/x11vnc.log"
 autorestart=true
 stdout_logfile=/dev/stdout
 stdout_logfile_maxbytes=0
 stderr_logfile=/dev/stderr
 stderr_logfile_maxbytes=0
 priority=250
+stopsignal=TERM
+stopwaitsecs=5
+depends_on=x11vnc

 [program:novnc]
-command=bash -c "sleep 5 && cd /opt/novnc && ./utils/novnc_proxy --vnc localhost:5900 --listen 0.0.0.0:6080 --web /opt/novnc"
+command=bash -c "sleep 5 && cd /opt/novnc && ./utils/novnc_proxy --vnc localhost:5901 --listen 0.0.0.0:6080 --web /opt/novnc"
 autorestart=true
 stdout_logfile=/dev/stdout
 stdout_logfile_maxbytes=0
@@ -58,15 +66,18 @@ startsecs=3
 depends_on=x11vnc

 [program:persistent_browser]
-command=bash -c 'mkdir -p /app/data/chrome_data && sleep 8 && google-chrome --user-data-dir=/app/data/chrome_data --window-position=0,0 --window-size=%(ENV_RESOLUTION_WIDTH)s,%(ENV_RESOLUTION_HEIGHT)s --start-maximized --no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-setuid-sandbox --no-first-run --no-default-browser-check --no-experiments --ignore-certificate-errors --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 "data:text/html,<html><body style=\"background: \#f0f0f0; margin: 0; display: flex; justify-content: center; align-items: center; height: 100vh; font-family: Arial;\"><h1>Browser Ready for AI Interaction</h1></body></html>"'
+environment=START_URL="data:text/html,<html><body><h1>Browser Ready</h1></body></html>"
+command=bash -c "mkdir -p /app/data/chrome_data && sleep 8 && $(find /ms-playwright/chromium-*/chrome-linux -name chrome) --user-data-dir=/app/data/chrome_data --window-position=0,0 --window-size=%(ENV_RESOLUTION_WIDTH)s,%(ENV_RESOLUTION_HEIGHT)s --start-maximized --no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-setuid-sandbox --no-first-run --no-default-browser-check --no-experiments --ignore-certificate-errors --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 \"$START_URL\""
 autorestart=true
 stdout_logfile=/dev/stdout
 stdout_logfile_maxbytes=0
 stderr_logfile=/dev/stderr
 stderr_logfile_maxbytes=0
 priority=350
-startretries=3
-startsecs=3
+startretries=5
+startsecs=10
+stopsignal=TERM
+stopwaitsecs=15
 depends_on=novnc

 [program:webui]
@@ -80,4 +91,6 @@ stderr_logfile_maxbytes=0
 priority=400
 startretries=3
 startsecs=3
+stopsignal=TERM
+stopwaitsecs=10
 depends_on=persistent_browser