Upgrade to PyTorch 2.6.0 and enhance GPU reset script with Ollama management

- Upgrade PyTorch and torchaudio to 2.6.0 with CUDA 12.4 support - Update GPU reset script to gracefully stop/start Ollama via supervisorctl - Add Docker Compose configuration for both API and MCP server modes - Implement comprehensive Docker entrypoint for multi-mode deployment - Add GPU health check cleanup to prevent memory leaks - Fix transcription memory management with proper resource cleanup - Add filename security validation to prevent path traversal attacks - Include .dockerignore for optimized Docker builds - Remove deprecated supervisor configuration 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-27 23:01:22 +03:00
parent f6777b1488
commit fb1e5dceba
17 changed files with 1313 additions and 55 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,60 @@
 # Python
 __pycache__/
 *.py[cod]
 *$py.class
 *.so
 .Python
 *.egg-info/
 dist/
 build/
 # Virtual environments
 venv/
 env/
 ENV/
 .venv
 # Project specific
 logs/
 outputs/
 models/
 *.log
 *.logs
 mcp.logs
 api.logs
 # Git
 .git/
 .gitignore
 .github/
 # IDE
 .vscode/
 .idea/
 *.swp
 *.swo
 *~
 # Docker
 .dockerignore
 docker-compose.yml
 docker-compose.*.yml
 # Temporary files
 *.tmp
 *.temp
 .DS_Store
 Thumbs.db
 # Documentation (optional - uncomment if you want to exclude)
 # README.md
 # CLAUDE.md
 # IMPLEMENTATION_PLAN.md
 # Scripts (already in container)
 # reset_gpu.sh - NEEDED for GPU health checks
 run_api_server.sh
 run_mcp_server.sh
 # Supervisor config (not needed in container)
 supervisor/
--- a/93
+++ b/93
@@ -1,25 +1,34 @@
-# Use NVIDIA CUDA base image with Python
+# Multi-purpose Whisper Transcriptor Docker Image
-FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
+# Supports both MCP Server and REST API Server modes
 # Use SERVER_MODE environment variable to select: "mcp" or "api"
-# Install Python 3.12
+FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
 # Prevent interactive prompts during installation
 ENV DEBIAN_FRONTEND=noninteractive
 # Install system dependencies
 RUN apt-get update && apt-get install -y \
    software-properties-common \
    curl \
    && add-apt-repository ppa:deadsnakes/ppa \
    && apt-get update && apt-get install -y \
    python3.12 \
    python3.12-venv \
    python3.12-dev \
    python3-pip \
    ffmpeg \
    git \
    nginx \
    supervisor \
    && rm -rf /var/lib/apt/lists/*
 # Make python3.12 the default
-RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1
+RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 && \
-RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1
+    update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1
-# Upgrade pip
+# Install pip using ensurepip (Python 3.12+ doesn't have distutils)
-RUN python -m pip install --upgrade pip
+RUN python -m ensurepip --upgrade && \
    python -m pip install --upgrade pip
 # Set working directory
 WORKDIR /app
@@ -27,30 +36,68 @@ WORKDIR /app
 # Copy requirements first for better caching
 COPY requirements.txt .
-# Install Python dependencies with CUDA support
+# Install Python dependencies with CUDA 12.4 support
 RUN pip install --no-cache-dir \
    torch==2.6.0 --index-url https://download.pytorch.org/whl/cu124 && \
    pip install --no-cache-dir \
    torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu124 && \
    pip install --no-cache-dir \
    faster-whisper \
-    torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121 \
+    fastapi>=0.115.0 \
-    torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu121 \
+    uvicorn[standard]>=0.32.0 \
-    mcp[cli]
+    python-multipart>=0.0.9 \
    aiofiles>=23.0.0 \
    mcp[cli]>=1.2.0 \
    gTTS>=2.3.0 \
    pyttsx3>=2.90 \
    scipy>=1.10.0 \
    numpy>=1.24.0
 # Copy application code
 COPY src/ ./src/
 COPY pyproject.toml .
 COPY README.md .
-# Create directories for models and outputs
+# Copy test audio file for GPU health checks
-RUN mkdir -p /models /outputs
+COPY test.mp3 .
 # Copy nginx configuration
 COPY nginx/transcriptor.conf /etc/nginx/sites-available/transcriptor.conf
 # Copy entrypoint script and GPU reset script
 COPY docker-entrypoint.sh /docker-entrypoint.sh
 COPY reset_gpu.sh /app/reset_gpu.sh
 RUN chmod +x /docker-entrypoint.sh /app/reset_gpu.sh
 # Create directories for models, outputs, and logs
 RUN mkdir -p /models /outputs /logs /app/outputs/uploads /app/outputs/batch /app/outputs/jobs
 # Set Python path
 ENV PYTHONPATH=/app/src
-# Set environment variables for GPU
+# Default environment variables (can be overridden)
-ENV WHISPER_MODEL_DIR=/models
+ENV WHISPER_MODEL_DIR=/models \
-ENV TRANSCRIPTION_OUTPUT_DIR=/outputs
+    TRANSCRIPTION_OUTPUT_DIR=/outputs \
-ENV TRANSCRIPTION_MODEL=large-v3
+    TRANSCRIPTION_BATCH_OUTPUT_DIR=/outputs/batch \
-ENV TRANSCRIPTION_DEVICE=cuda
+    TRANSCRIPTION_MODEL=large-v3 \
-ENV TRANSCRIPTION_COMPUTE_TYPE=float16
+    TRANSCRIPTION_DEVICE=auto \
    TRANSCRIPTION_COMPUTE_TYPE=auto \
    TRANSCRIPTION_OUTPUT_FORMAT=txt \
    TRANSCRIPTION_BEAM_SIZE=5 \
    TRANSCRIPTION_TEMPERATURE=0.0 \
    API_HOST=127.0.0.1 \
    API_PORT=33767 \
    JOB_QUEUE_MAX_SIZE=5 \
    JOB_METADATA_DIR=/outputs/jobs \
    JOB_RETENTION_DAYS=7 \
    GPU_HEALTH_CHECK_ENABLED=true \
    GPU_HEALTH_CHECK_INTERVAL_MINUTES=10 \
    GPU_HEALTH_TEST_MODEL=tiny \
    GPU_HEALTH_TEST_AUDIO=/test-audio/test.mp3 \
    GPU_RESET_COOLDOWN_MINUTES=5 \
    SERVER_MODE=api
-# Run the server
+# Expose port 80 for nginx (API mode only)
-CMD ["python", "src/servers/whisper_server.py"]
+EXPOSE 80
 # Use entrypoint script to handle different server modes
 ENTRYPOINT ["/docker-entrypoint.sh"]
--- a/TRANSCRIPTOR_API_FIX.md
+++ b/TRANSCRIPTOR_API_FIX.md
@@ -0,0 +1,403 @@
 # Transcriptor API - Filename Validation Bug Fix
 ## Issue Summary
 The transcriptor API is rejecting valid audio files due to overly strict path validation. Files with `..` (double periods) anywhere in the filename are being rejected as potential path traversal attacks, even when they appear naturally in legitimate filenames.
 ## Current Behavior
 ### Error Observed
 ```json
 {
  "detail": {
    "error": "Upload failed",
    "message": "Audio file validation failed: Path traversal (..) is not allowed"
  }
 }
 ```
 ### HTTP Response
 - **Status Code**: 500
 - **Endpoint**: `POST /transcribe`
 - **Request**: File upload with filename containing `..`
 ### Example Failing Filename
 ```
 This Weird FPV Drone only takes one kind of Battery... Rekon 35 V2.m4a
                                                      ^^^
                                            (Three dots, parsed as "..")
 ```
 ## Root Cause Analysis
 ### Current Validation Logic (Problematic)
 The API is likely checking for `..` anywhere in the filename string, which creates false positives:
 ```python
 # CURRENT (WRONG)
 if ".." in filename:
    raise ValidationError("Path traversal (..) is not allowed")
 ```
 This rejects legitimate filenames like:
 - `"video...mp4"` (ellipsis in title)
 - `"Part 1... Part 2.m4a"` (ellipsis separator)
 - `"Wait... what.mp4"` (dramatic pause)
 ### Actual Security Concern
 Path traversal attacks use `..` as **directory separators** to navigate up the filesystem:
 - `../../etc/passwd` (DANGEROUS)
 - `../../../secrets.txt` (DANGEROUS)
 - `video...mp4` (SAFE - just a filename)
 ## Recommended Fix
 ### Option 1: Path Component Validation (Recommended)
 Check for `..` only when it appears as a **complete path component**, not as part of the filename text.
 ```python
 import os
 from pathlib import Path
 def validate_filename(filename: str) -> bool:
    """
    Validate filename for path traversal attacks.
    Returns True if safe, raises ValidationError if dangerous.
    """
    # Normalize the path
    normalized = os.path.normpath(filename)
    # Check if normalization changed the path (indicates traversal)
    if normalized != filename:
        raise ValidationError(f"Path traversal detected: {filename}")
    # Check for absolute paths
    if os.path.isabs(filename):
        raise ValidationError(f"Absolute paths not allowed: {filename}")
    # Split into components and check for parent directory references
    parts = Path(filename).parts
    if ".." in parts:
        raise ValidationError(f"Parent directory references not allowed: {filename}")
    # Check for any path separators (should be basename only)
    if os.sep in filename or (os.altsep and os.altsep in filename):
        raise ValidationError(f"Path separators not allowed: {filename}")
    return True
 # Examples:
 validate_filename("video.mp4")                    # ✓ PASS
 validate_filename("video...mp4")                   # ✓ PASS (ellipsis)
 validate_filename("This is... a video.m4a")        # ✓ PASS
 validate_filename("../../../etc/passwd")           # ✗ FAIL (traversal)
 validate_filename("dir/../file.mp4")               # ✗ FAIL (traversal)
 validate_filename("/etc/passwd")                   # ✗ FAIL (absolute)
 ```
 ### Option 2: Basename-Only Validation (Simpler)
 Only accept basenames (no directory components at all):
 ```python
 import os
 def validate_filename(filename: str) -> bool:
    """
    Ensure filename contains no path components.
    """
    # Extract basename
    basename = os.path.basename(filename)
    # Must match original (no path components)
    if basename != filename:
        raise ValidationError(f"Filename must not contain path components: {filename}")
    # Additional check: no path separators
    if "/" in filename or "\\" in filename:
        raise ValidationError(f"Path separators not allowed: {filename}")
    return True
 # Examples:
 validate_filename("video.mp4")                    # ✓ PASS
 validate_filename("video...mp4")                   # ✓ PASS
 validate_filename("../file.mp4")                   # ✗ FAIL
 validate_filename("dir/file.mp4")                  # ✗ FAIL
 ```
 ### Option 3: Regex Pattern Matching (Most Strict)
 Use a whitelist approach for allowed characters:
 ```python
 import re
 def validate_filename(filename: str) -> bool:
    """
    Validate filename using whitelist of safe characters.
    """
    # Allow: letters, numbers, spaces, dots, hyphens, underscores
    # Length: 1-255 characters
    pattern = r'^[a-zA-Z0-9 .\-_]{1,255}\.[a-zA-Z0-9]{2,10}$'
    if not re.match(pattern, filename):
        raise ValidationError(f"Invalid filename format: {filename}")
    # Additional safety: reject if starts/ends with dot
    if filename.startswith('.') or filename.endswith('.'):
        raise ValidationError(f"Filename cannot start or end with dot: {filename}")
    return True
 # Examples:
 validate_filename("video.mp4")                    # ✓ PASS
 validate_filename("video...mp4")                   # ✓ PASS
 validate_filename("My Video... Part 2.m4a")        # ✓ PASS
 validate_filename("../file.mp4")                   # ✗ FAIL (starts with ..)
 validate_filename("file<>.mp4")                    # ✗ FAIL (invalid chars)
 ```
 ## Implementation Steps
 ### 1. Locate Current Validation Code
 Search for files containing the validation logic:
 ```bash
 grep -r "Path traversal" /path/to/transcriptor-api
 grep -r '".."' /path/to/transcriptor-api
 grep -r "normpath\|basename" /path/to/transcriptor-api
 ```
 ### 2. Update Validation Function
 Replace the current naive check with one of the recommended solutions above.
 **Priority Order:**
 1. **Option 1** (Path Component Validation) - Best security/usability balance
 2. **Option 2** (Basename-Only) - Simplest, very secure
 3. **Option 3** (Regex) - Most restrictive, may reject valid files
 ### 3. Test Cases
 Create comprehensive test suite:
 ```python
 import pytest
 def test_valid_filenames():
    """Test filenames that should be accepted."""
    valid_names = [
        "video.mp4",
        "audio.m4a",
        "This is... a test.mp4",
        "Part 1... Part 2.wav",
        "video...multiple...dots.mp3",
        "My-Video_2024.mp4",
        "song (remix).m4a",
    ]
    for filename in valid_names:
        assert validate_filename(filename), f"Should accept: {filename}"
 def test_dangerous_filenames():
    """Test filenames that should be rejected."""
    dangerous_names = [
        "../../../etc/passwd",
        "../../secrets.txt",
        "../file.mp4",
        "/etc/passwd",
        "C:\\Windows\\System32\\file.txt",
        "dir/../file.mp4",
        "file/../../etc/passwd",
    ]
    for filename in dangerous_names:
        with pytest.raises(ValidationError):
            validate_filename(filename)
 def test_edge_cases():
    """Test edge cases."""
    edge_cases = [
        (".", False),           # Current directory
        ("..", False),          # Parent directory
        ("...", True),          # Just dots (valid)
        ("....", True),         # Multiple dots (valid)
        (".hidden.mp4", True),  # Hidden file (valid on Unix)
        ("", False),            # Empty string
        ("a" * 256, False),     # Too long
    ]
    for filename, should_pass in edge_cases:
        if should_pass:
            assert validate_filename(filename)
        else:
            with pytest.raises(ValidationError):
                validate_filename(filename)
 ```
 ### 4. Update Error Response
 Provide clearer error messages:
 ```python
 # BAD (current)
 {"detail": {"error": "Upload failed", "message": "Audio file validation failed: Path traversal (..) is not allowed"}}
 # GOOD (improved)
 {
  "detail": {
    "error": "Invalid filename",
    "message": "Filename contains path traversal characters. Please use only the filename without directory paths.",
    "filename": "../../etc/passwd",
    "suggestion": "Use: passwd.txt"
  }
 }
 ```
 ## Testing the Fix
 ### Manual Testing
 1. **Test with problematic filename from bug report:**
   ```bash
   curl -X POST http://192.168.1.210:33767/transcribe \
     -F "file=@/path/to/This Weird FPV Drone only takes one kind of Battery... Rekon 35 V2.m4a" \
     -F "model=medium"
   ```
   Expected: HTTP 200 (success)
 2. **Test with actual path traversal:**
   ```bash
   curl -X POST http://192.168.1.210:33767/transcribe \
     -F "file=@/tmp/test.m4a;filename=../../etc/passwd" \
     -F "model=medium"
   ```
   Expected: HTTP 400 (validation error)
 3. **Test with various ellipsis patterns:**
   - `"video...mp4"` → Should pass
   - `"Part 1... Part 2.m4a"` → Should pass
   - `"Wait... what!.mp4"` → Should pass
 ### Automated Testing
 ```python
 # integration_test.py
 import requests
 def test_ellipsis_filenames():
    """Test files with ellipsis in names."""
    test_cases = [
        "video...mp4",
        "This is... a test.m4a",
        "Wait... what.mp3",
    ]
    for filename in test_cases:
        response = requests.post(
            "http://192.168.1.210:33767/transcribe",
            files={"file": (filename, open("test_audio.m4a", "rb"))},
            data={"model": "medium"}
        )
        assert response.status_code == 200, f"Failed for: {filename}"
 ```
 ## Security Considerations
 ### What We're Protecting Against
 1. **Path Traversal**: `../../../sensitive/file`
 2. **Absolute Paths**: `/etc/passwd` or `C:\Windows\System32\`
 3. **Hidden Paths**: `./.git/config`
 ### What We're NOT Breaking
 1. **Ellipsis in titles**: `"Wait... what.mp4"`
 2. **Multiple extensions**: `"file.tar.gz"`
 3. **Special characters**: `"My Video (2024).mp4"`
 ### Additional Hardening (Optional)
 ```python
 def sanitize_and_validate_filename(filename: str) -> str:
    """
    Sanitize filename and validate for safety.
    Returns cleaned filename or raises error.
    """
    # Remove null bytes
    filename = filename.replace("\0", "")
    # Extract basename (strips any path components)
    filename = os.path.basename(filename)
    # Limit length
    max_length = 255
    if len(filename) > max_length:
        name, ext = os.path.splitext(filename)
        filename = name[:max_length-len(ext)] + ext
    # Validate
    validate_filename(filename)
    return filename
 ```
 ## Deployment Checklist
 - [ ] Update validation function with recommended fix
 - [ ] Add comprehensive test suite
 - [ ] Test with real-world filenames (including bug report case)
 - [ ] Test security: attempt path traversal attacks
 - [ ] Update API documentation
 - [ ] Review error messages for clarity
 - [ ] Deploy to staging environment
 - [ ] Run integration tests
 - [ ] Monitor logs for validation failures
 - [ ] Deploy to production
 - [ ] Verify bug reporter's file now works
 ## Contact & Context
 **Bug Report Date**: 2025-10-26
 **Affected Endpoint**: `POST /transcribe`
 **Error Code**: HTTP 500
 **Client Application**: yt-dlp-webui v3
 **Example Failing Request:**
 ```
 POST http://192.168.1.210:33767/transcribe
 Content-Type: multipart/form-data
 file: "This Weird FPV Drone only takes one kind of Battery... Rekon 35 V2.m4a"
 model: "medium"
 ```
 **Current Behavior**: Returns 500 error with path traversal message
 **Expected Behavior**: Accepts file and processes transcription
 ---
 ## Quick Reference
 ### Files to Check
 - `/path/to/api/validators.py` or similar
 - `/path/to/api/upload_handler.py`
 - `/path/to/api/routes/transcribe.py`
 ### Search Commands
 ```bash
 # Find validation code
 rg "Path traversal" --type py
 rg '"\.\."' --type py
 rg "ValidationError.*filename" --type py
 # Find upload handlers
 rg "def.*upload|def.*transcribe" --type py
 ```
 ### Priority Fix
 Use **Option 1 (Path Component Validation)** - it provides the best balance of security and usability.
--- a/docker-build.sh
+++ b/docker-build.sh
@@ -0,0 +1,19 @@
 #!/bin/bash
 set -e
 datetime_prefix() {
  date "+[%Y-%m-%d %H:%M:%S]"
 }
 echo "$(datetime_prefix) Building Whisper Transcriptor Docker image..."
 # Build the Docker image
 docker build -t transcriptor-apimcp:latest .
 echo "$(datetime_prefix) Build complete!"
 echo "$(datetime_prefix) Image: transcriptor-apimcp:latest"
 echo ""
 echo "Usage:"
 echo "  API mode:  ./docker-run-api.sh"
 echo "  MCP mode:  ./docker-run-mcp.sh"
 echo "  Or use:    docker-compose up transcriptor-api"
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,106 @@
 version: '3.8'
 services:
  # API Server mode with nginx reverse proxy
  transcriptor-api:
    build:
      context: .
      dockerfile: Dockerfile
    image: transcriptor-apimcp:latest
    container_name: transcriptor-api
    runtime: nvidia
    environment:
      NVIDIA_VISIBLE_DEVICES: "0"
      NVIDIA_DRIVER_CAPABILITIES: compute,utility
      SERVER_MODE: api
      API_HOST: 127.0.0.1
      API_PORT: 33767
      WHISPER_MODEL_DIR: /models
      TRANSCRIPTION_OUTPUT_DIR: /outputs
      TRANSCRIPTION_BATCH_OUTPUT_DIR: /outputs/batch
      TRANSCRIPTION_MODEL: large-v3
      TRANSCRIPTION_DEVICE: auto
      TRANSCRIPTION_COMPUTE_TYPE: auto
      TRANSCRIPTION_OUTPUT_FORMAT: txt
      TRANSCRIPTION_BEAM_SIZE: 5
      TRANSCRIPTION_TEMPERATURE: 0.0
      JOB_QUEUE_MAX_SIZE: 5
      JOB_METADATA_DIR: /outputs/jobs
      JOB_RETENTION_DAYS: 7
      GPU_HEALTH_CHECK_ENABLED: "true"
      GPU_HEALTH_CHECK_INTERVAL_MINUTES: 10
      GPU_HEALTH_TEST_MODEL: tiny
      GPU_HEALTH_TEST_AUDIO: /test-audio/test.mp3
      GPU_RESET_COOLDOWN_MINUTES: 5
      # Optional proxy settings (uncomment if needed)
      # HTTP_PROXY: http://192.168.1.212:8080
      # HTTPS_PROXY: http://192.168.1.212:8080
    ports:
      - "33767:80"  # Map host:33767 to container nginx:80
    volumes:
      - /home/uad/agents/tools/mcp-transcriptor/models:/models
      - /home/uad/agents/tools/mcp-transcriptor/outputs:/outputs
      - /home/uad/agents/tools/mcp-transcriptor/logs:/logs
      - /home/uad/agents/tools/mcp-transcriptor/data/test.mp3:/test-audio/test.mp3:ro
      - /etc/localtime:/etc/localtime:ro  # Sync container time with host
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s
    restart: unless-stopped
    networks:
      - transcriptor-network
  # MCP Server mode (stdio based)
  transcriptor-mcp:
    build:
      context: .
      dockerfile: Dockerfile
    image: transcriptor-apimcp:latest
    container_name: transcriptor-mcp
    environment:
      SERVER_MODE: mcp
      WHISPER_MODEL_DIR: /models
      TRANSCRIPTION_OUTPUT_DIR: /outputs
      TRANSCRIPTION_BATCH_OUTPUT_DIR: /outputs/batch
      TRANSCRIPTION_MODEL: large-v3
      TRANSCRIPTION_DEVICE: auto
      TRANSCRIPTION_COMPUTE_TYPE: auto
      TRANSCRIPTION_OUTPUT_FORMAT: txt
      TRANSCRIPTION_BEAM_SIZE: 5
      TRANSCRIPTION_TEMPERATURE: 0.0
      JOB_QUEUE_MAX_SIZE: 100
      JOB_METADATA_DIR: /outputs/jobs
      JOB_RETENTION_DAYS: 7
      GPU_HEALTH_CHECK_ENABLED: "true"
      GPU_HEALTH_CHECK_INTERVAL_MINUTES: 10
      GPU_HEALTH_TEST_MODEL: tiny
      GPU_RESET_COOLDOWN_MINUTES: 5
      # Optional proxy settings (uncomment if needed)
      # HTTP_PROXY: http://192.168.1.212:8080
      # HTTPS_PROXY: http://192.168.1.212:8080
    volumes:
      - /home/uad/agents/tools/mcp-transcriptor/models:/models
      - /home/uad/agents/tools/mcp-transcriptor/outputs:/outputs
      - /home/uad/agents/tools/mcp-transcriptor/logs:/logs
      - /etc/localtime:/etc/localtime:ro
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    stdin_open: true  # Enable stdin for MCP stdio mode
    tty: true
    restart: unless-stopped
    networks:
      - transcriptor-network
    profiles:
      - mcp  # Only start when explicitly requested
 networks:
  transcriptor-network:
    driver: bridge
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -0,0 +1,67 @@
 #!/bin/bash
 set -e
 # Docker Entrypoint Script for Whisper Transcriptor
 # Supports both MCP and API server modes
 datetime_prefix() {
  date "+[%Y-%m-%d %H:%M:%S]"
 }
 echo "$(datetime_prefix) Starting Whisper Transcriptor in ${SERVER_MODE} mode..."
 # Ensure required directories exist
 mkdir -p "$WHISPER_MODEL_DIR"
 mkdir -p "$TRANSCRIPTION_OUTPUT_DIR"
 mkdir -p "$TRANSCRIPTION_BATCH_OUTPUT_DIR"
 mkdir -p "$JOB_METADATA_DIR"
 mkdir -p /app/outputs/uploads
 # Display GPU information
 if command -v nvidia-smi &> /dev/null; then
    echo "$(datetime_prefix) GPU Information:"
    nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader
 else
    echo "$(datetime_prefix) Warning: nvidia-smi not found. GPU may not be available."
 fi
 # Check server mode and start appropriate service
 case "${SERVER_MODE}" in
    "api")
        echo "$(datetime_prefix) Starting API Server mode with nginx reverse proxy"
        # Update nginx configuration to use correct backend
        sed -i "s/server 127.0.0.1:33767;/server ${API_HOST}:${API_PORT};/" /etc/nginx/sites-available/transcriptor.conf
        # Enable nginx site
        ln -sf /etc/nginx/sites-available/transcriptor.conf /etc/nginx/sites-enabled/
        rm -f /etc/nginx/sites-enabled/default
        # Test nginx configuration
        echo "$(datetime_prefix) Testing nginx configuration..."
        nginx -t
        # Start nginx in background
        echo "$(datetime_prefix) Starting nginx..."
        nginx
        # Start API server (foreground - this keeps container running)
        echo "$(datetime_prefix) Starting API server on ${API_HOST}:${API_PORT}"
        echo "$(datetime_prefix) API accessible via nginx on port 80"
        exec python -u /app/src/servers/api_server.py
        ;;
    "mcp")
        echo "$(datetime_prefix) Starting MCP Server mode (stdio)"
        echo "$(datetime_prefix) Model directory: $WHISPER_MODEL_DIR"
        # Start MCP server in stdio mode
        exec python -u /app/src/servers/whisper_server.py
        ;;
    *)
        echo "$(datetime_prefix) ERROR: Invalid SERVER_MODE: ${SERVER_MODE}"
        echo "$(datetime_prefix) Valid modes: 'api' or 'mcp'"
        exit 1
        ;;
 esac
--- a/docker-run-api.sh
+++ b/docker-run-api.sh
@@ -0,0 +1,62 @@
 #!/bin/bash
 set -e
 datetime_prefix() {
  date "+[%Y-%m-%d %H:%M:%S]"
 }
 echo "$(datetime_prefix) Starting Whisper Transcriptor in API mode with nginx..."
 # Check if image exists
 if ! docker image inspect transcriptor-apimcp:latest &> /dev/null; then
    echo "$(datetime_prefix) Image not found. Building first..."
    ./docker-build.sh
 fi
 # Stop and remove existing container if running
 if docker ps -a --format '{{.Names}}' | grep -q '^transcriptor-api$'; then
    echo "$(datetime_prefix) Stopping existing container..."
    docker stop transcriptor-api || true
    docker rm transcriptor-api || true
 fi
 # Run the container in API mode
 docker run -d \
    --name transcriptor-api \
    --gpus all \
    -p 33767:80 \
    -e SERVER_MODE=api \
    -e API_HOST=127.0.0.1 \
    -e API_PORT=33767 \
    -e CUDA_VISIBLE_DEVICES=0 \
    -e TRANSCRIPTION_MODEL=large-v3 \
    -e TRANSCRIPTION_DEVICE=auto \
    -e TRANSCRIPTION_COMPUTE_TYPE=auto \
    -e JOB_QUEUE_MAX_SIZE=5 \
    -v "$(pwd)/models:/models" \
    -v "$(pwd)/outputs:/outputs" \
    -v "$(pwd)/logs:/logs" \
    --restart unless-stopped \
    transcriptor-apimcp:latest
 echo "$(datetime_prefix) Container started!"
 echo ""
 echo "API Server running at: http://localhost:33767"
 echo ""
 echo "Useful commands:"
 echo "  Check logs:        docker logs -f transcriptor-api"
 echo "  Check status:      docker ps | grep transcriptor-api"
 echo "  Test health:       curl http://localhost:33767/health"
 echo "  Test GPU:          curl http://localhost:33767/health/gpu"
 echo "  Stop container:    docker stop transcriptor-api"
 echo "  Restart:           docker restart transcriptor-api"
 echo ""
 echo "$(datetime_prefix) Waiting for service to start..."
 sleep 5
 # Test health endpoint
 if curl -s http://localhost:33767/health > /dev/null 2>&1; then
    echo "$(datetime_prefix) ✓ Service is healthy!"
 else
    echo "$(datetime_prefix) ⚠ Service not responding yet. Check logs with: docker logs transcriptor-api"
 fi
--- a/docker-run-mcp.sh
+++ b/docker-run-mcp.sh
@@ -0,0 +1,40 @@
 #!/bin/bash
 set -e
 datetime_prefix() {
  date "+[%Y-%m-%d %H:%M:%S]"
 }
 echo "$(datetime_prefix) Starting Whisper Transcriptor in MCP mode..."
 # Check if image exists
 if ! docker image inspect transcriptor-apimcp:latest &> /dev/null; then
    echo "$(datetime_prefix) Image not found. Building first..."
    ./docker-build.sh
 fi
 # Stop and remove existing container if running
 if docker ps -a --format '{{.Names}}' | grep -q '^transcriptor-mcp$'; then
    echo "$(datetime_prefix) Stopping existing container..."
    docker stop transcriptor-mcp || true
    docker rm transcriptor-mcp || true
 fi
 # Run the container in MCP mode (interactive stdio)
 echo "$(datetime_prefix) Starting MCP server in stdio mode..."
 echo "$(datetime_prefix) Press Ctrl+C to stop"
 echo ""
 docker run -it --rm \
    --name transcriptor-mcp \
    --gpus all \
    -e SERVER_MODE=mcp \
    -e CUDA_VISIBLE_DEVICES=0 \
    -e TRANSCRIPTION_MODEL=large-v3 \
    -e TRANSCRIPTION_DEVICE=auto \
    -e TRANSCRIPTION_COMPUTE_TYPE=auto \
    -e JOB_QUEUE_MAX_SIZE=100 \
    -v "$(pwd)/models:/models" \
    -v "$(pwd)/outputs:/outputs" \
    -v "$(pwd)/logs:/logs" \
    transcriptor-apimcp:latest
--- a/reset_gpu.sh
+++ b/reset_gpu.sh
@@ -2,12 +2,28 @@
 # Script to reset NVIDIA GPU drivers without rebooting
 # This reloads kernel modules and restarts nvidia-persistenced service
 # Also handles stopping/starting Ollama to release GPU resources
 echo "============================================================"
 echo "NVIDIA GPU Driver Reset Script"
 echo "============================================================"
 echo ""
 # Stop Ollama via supervisorctl
 echo "Stopping Ollama service..."
 sudo supervisorctl stop ollama 2>/dev/null
 if [ $? -eq 0 ]; then
    echo "✓ Ollama stopped via supervisorctl"
    OLLAMA_WAS_RUNNING=true
 else
    echo "  Ollama not running or supervisorctl not available"
    OLLAMA_WAS_RUNNING=false
 fi
 echo ""
 # Give Ollama time to release GPU resources
 sleep 2
 # Stop nvidia-persistenced service
 echo "Stopping nvidia-persistenced service..."
 sudo systemctl stop nvidia-persistenced
@@ -65,6 +81,18 @@ else
 fi
 echo ""
 # Restart Ollama if it was running
 if [ "$OLLAMA_WAS_RUNNING" = true ]; then
    echo "Restarting Ollama service..."
    sudo supervisorctl start ollama
    if [ $? -eq 0 ]; then
        echo "✓ Ollama restarted"
    else
        echo "✗ Failed to restart Ollama"
    fi
    echo ""
 fi
 echo "============================================================"
 echo "GPU driver reset completed successfully"
 echo "============================================================"
--- a/src/core/gpu_health.py
+++ b/src/core/gpu_health.py
@@ -6,6 +6,7 @@ with strict failure handling to prevent silent CPU fallbacks.
 Includes circuit breaker pattern to prevent repeated failed checks.
 """
 import os
 import time
 import logging
 import threading
@@ -14,7 +15,6 @@ from datetime import datetime
 from typing import Optional, List
 import torch
 from utils.test_audio_generator import generate_test_audio
 from utils.circuit_breaker import CircuitBreaker, CircuitBreakerOpen
 logger = logging.getLogger(__name__)
@@ -109,8 +109,18 @@ def _check_gpu_health_internal(expected_device: str = "auto") -> GPUHealthStatus
            logger.warning(f"Failed to get GPU info: {e}")
    try:
-        # Generate test audio
+        # Get test audio path from environment variable
-        test_audio_path = generate_test_audio(duration_seconds=1.0)
+        test_audio_path = os.getenv("GPU_HEALTH_TEST_AUDIO")
        if not test_audio_path:
            raise ValueError("GPU_HEALTH_TEST_AUDIO environment variable not set")
        # Verify test audio file exists
        if not os.path.exists(test_audio_path):
            raise FileNotFoundError(
                f"Test audio file not found: {test_audio_path}. "
                f"Please ensure test audio exists before running GPU health checks."
            )
        # Import here to avoid circular dependencies
        from faster_whisper import WhisperModel
@@ -129,6 +139,7 @@ def _check_gpu_health_internal(expected_device: str = "auto") -> GPUHealthStatus
            gpu_memory_before = torch.cuda.memory_allocated(0)
        # Load tiny model and transcribe
        model = None
        try:
            model = WhisperModel(
                "tiny",
@@ -140,7 +151,7 @@ def _check_gpu_health_internal(expected_device: str = "auto") -> GPUHealthStatus
            segments, info = model.transcribe(test_audio_path, beam_size=1)
            # Consume segments (needed to actually run inference)
-            list(segments)
+            segments_list = list(segments)
            # Check if GPU was actually used
            # faster-whisper uses CTranslate2 which manages GPU memory separately
@@ -156,6 +167,19 @@ def _check_gpu_health_internal(expected_device: str = "auto") -> GPUHealthStatus
            actual_device = "cpu"
            gpu_working = False
            logger.error(f"GPU health check failed: {error_msg}")
        finally:
            # Clean up model resources to prevent GPU memory leak
            if model is not None:
                try:
                    del model
                    segments_list = None
                    # Force garbage collection and empty CUDA cache
                    import gc
                    gc.collect()
                    if torch.cuda.is_available():
                        torch.cuda.empty_cache()
                except Exception as cleanup_error:
                    logger.warning(f"Error cleaning up GPU health check model: {cleanup_error}")
    except Exception as e:
        error_msg = f"Health check setup failed: {str(e)}"
--- a/src/core/transcriber.py
+++ b/src/core/transcriber.py
@@ -129,6 +129,9 @@ def transcribe_audio(
            logger.info("Using standard model for transcription...")
            segments, info = model_instance['model'].transcribe(audio_source, **options)
        # Convert segments generator to list to release model resources
        segments = list(segments)
        # Determine output directory and path early
        audio_dir = os.path.dirname(audio_path)
        audio_filename = os.path.splitext(os.path.basename(audio_path))[0]
@@ -253,6 +256,19 @@ def transcribe_audio(
        logger.error(f"Transcription failed: {str(e)}")
        return f"Error occurred during transcription: {str(e)}"
    finally:
        # Force GPU memory cleanup after transcription to prevent accumulation
        if device == "cuda":
            import torch
            import gc
            # Clear segments list to free memory
            segments = None
            # Force garbage collection
            gc.collect()
            # Empty CUDA cache
            torch.cuda.empty_cache()
            logger.debug("GPU memory cleaned up after transcription")
 def batch_transcribe(
    audio_folder: str,
--- a/src/servers/api_server.py
+++ b/src/servers/api_server.py
@@ -33,7 +33,8 @@ from utils.input_validation import (
    validate_model_name,
    validate_device,
    validate_compute_type,
-    validate_output_format
+    validate_output_format,
    validate_filename_safe
 )
 # Logging configuration
@@ -218,6 +219,8 @@ async def transcribe_upload(
    try:
        # Validate form parameters early
        try:
            # Validate filename for security (basename-only, no path traversal)
            validate_filename_safe(file.filename)
            model = validate_model_name(model)
            output_format = validate_output_format(output_format)
            beam_size = validate_beam_size(beam_size)
@@ -663,9 +666,11 @@ if __name__ == "__main__":
    # Perform startup GPU health check
    from utils.startup import perform_startup_gpu_check
    # Disable auto_reset in Docker (sudo not available, GPU reset won't work)
    in_docker = os.path.exists('/.dockerenv')
    perform_startup_gpu_check(
        required_device="cuda",
-        auto_reset=True,
+        auto_reset=not in_docker,
        exit_on_failure=True
    )
--- a/src/utils/input_validation.py
+++ b/src/utils/input_validation.py
@@ -88,6 +88,72 @@ def sanitize_error_message(error_msg: str, sanitize_paths: bool = True) -> str:
    return sanitized
 def validate_filename_safe(filename: str) -> str:
    """
    Validate uploaded filename for security (basename-only validation).
    This function is specifically for validating uploaded filenames to ensure
    they don't contain path traversal attempts. It enforces that the filename:
    - Contains no directory separators (/, \)
    - Has no path components (must be basename only)
    - Contains no null bytes
    - Has a valid audio file extension
    Args:
        filename: Filename to validate (should be basename only, not full path)
    Returns:
        Validated filename (unchanged if valid)
    Raises:
        ValidationError: If filename is invalid or empty
        PathTraversalError: If filename contains path components or traversal attempts
        InvalidFileTypeError: If file extension is not allowed
    Examples:
        validate_filename_safe("video.mp4")                    # ✓ PASS
        validate_filename_safe("audio...mp3")                  # ✓ PASS (ellipsis OK)
        validate_filename_safe("Wait... what.m4a")             # ✓ PASS
        validate_filename_safe("../../../etc/passwd")          # ✗ FAIL (traversal)
        validate_filename_safe("dir/file.mp4")                 # ✗ FAIL (path separator)
        validate_filename_safe("/etc/passwd")                  # ✗ FAIL (absolute path)
    """
    if not filename:
        raise ValidationError("Filename cannot be empty")
    # Check for null bytes
    if "\x00" in filename:
        logger.warning(f"Null byte in filename detected: {filename}")
        raise PathTraversalError("Null bytes in filename are not allowed")
    # Extract basename - if it differs from original, filename contained path components
    basename = os.path.basename(filename)
    if basename != filename:
        logger.warning(f"Filename contains path components: {filename}")
        raise PathTraversalError(
            "Filename must not contain path components. "
            f"Use only the filename: {basename}"
        )
    # Additional check: explicitly reject any path separators
    if "/" in filename or "\\" in filename:
        logger.warning(f"Path separators in filename: {filename}")
        raise PathTraversalError("Path separators (/ or \\) are not allowed in filename")
    # Check file extension (case-insensitive)
    file_ext = Path(filename).suffix.lower()
    if not file_ext:
        raise InvalidFileTypeError("Filename must have a file extension")
    if file_ext not in ALLOWED_AUDIO_EXTENSIONS:
        raise InvalidFileTypeError(
            f"Unsupported audio format: {file_ext}. "
            f"Supported: {', '.join(sorted(ALLOWED_AUDIO_EXTENSIONS))}"
        )
    return filename
 def validate_path_safe(file_path: str, allowed_dirs: Optional[List[str]] = None) -> Path:
    """
    Validate and sanitize a file path to prevent directory traversal attacks.
--- a/supervisor/transcriptor-api.conf
+++ b/supervisor/transcriptor-api.conf
@@ -1,26 +0,0 @@
 [program:transcriptor-api]
 command=/home/uad/agents/tools/mcp-transcriptor/venv/bin/python /home/uad/agents/tools/mcp-transcriptor/src/servers/api_server.py
 directory=/home/uad/agents/tools/mcp-transcriptor
 user=uad
 autostart=true
 autorestart=true
 redirect_stderr=true
 stdout_logfile=/home/uad/agents/tools/mcp-transcriptor/logs/transcriptor-api.log
 stdout_logfile_maxbytes=50MB
 stdout_logfile_backups=10
 environment=
    PYTHONPATH="/home/uad/agents/tools/mcp-transcriptor/src",
    CUDA_VISIBLE_DEVICES="0",
    API_HOST="0.0.0.0",
    API_PORT="33767",
    WHISPER_MODEL_DIR="/home/uad/agents/tools/mcp-transcriptor/models",
    TRANSCRIPTION_OUTPUT_DIR="/home/uad/agents/tools/mcp-transcriptor/outputs",
    TRANSCRIPTION_BATCH_OUTPUT_DIR="/home/uad/agents/tools/mcp-transcriptor/outputs/batch",
    TRANSCRIPTION_MODEL="large-v3",
    TRANSCRIPTION_DEVICE="auto",
    TRANSCRIPTION_COMPUTE_TYPE="auto",
    TRANSCRIPTION_OUTPUT_FORMAT="txt",
    HTTP_PROXY=http://192.168.1.212:8080,
    HTTPS_PROXY=http://192.168.1.212:8080
 stopwaitsecs=10
 stopsignal=TERM
--- a/test.mp3
+++ b/test.mp3
--- a/test_filename_fix.py
+++ b/test_filename_fix.py
@@ -0,0 +1,60 @@
 #!/usr/bin/env python3
 """
 Quick manual test to verify the filename validation fix.
 Tests the exact case from the bug report.
 """
 import sys
 sys.path.insert(0, 'src')
 from utils.input_validation import validate_filename_safe, PathTraversalError
 print("\n" + "="*70)
 print("FILENAME VALIDATION FIX - MANUAL TEST")
 print("="*70 + "\n")
 # Bug report case
 bug_report_filename = "This Weird FPV Drone only takes one kind of Battery... Rekon 35 V2.m4a"
 print(f"Testing bug report filename:")
 print(f"  '{bug_report_filename}'")
 print()
 try:
    result = validate_filename_safe(bug_report_filename)
    print(f"✅ SUCCESS: Filename accepted!")
    print(f"   Returned: '{result}'")
 except PathTraversalError as e:
    print(f"❌ FAILED: {e}")
    sys.exit(1)
 except Exception as e:
    print(f"❌ ERROR: {e}")
    sys.exit(1)
 print()
 # Test that security still works
 print("Verifying security (path traversal should still be blocked):")
 dangerous_filenames = [
    "../../../etc/passwd",
    "../../secrets.txt",
    "dir/file.m4a",
 ]
 for dangerous in dangerous_filenames:
    try:
        validate_filename_safe(dangerous)
        print(f"❌ SECURITY ISSUE: '{dangerous}' was accepted (should be blocked!)")
        sys.exit(1)
    except PathTraversalError:
        print(f"✅ '{dangerous}' correctly blocked")
 print()
 print("="*70)
 print("ALL TESTS PASSED! ✅")
 print("="*70)
 print()
 print("The fix is working correctly:")
 print("  ✓ Filenames with ellipsis (...) are now accepted")
 print("  ✓ Path traversal attacks are still blocked")
 print()
--- a/tests/test_input_validation.py
+++ b/tests/test_input_validation.py
@@ -0,0 +1,281 @@
 #!/usr/bin/env python3
 """
 Tests for input validation module, specifically filename validation.
 Tests the security-critical validate_filename_safe() function to ensure
 it correctly blocks path traversal attacks while allowing legitimate filenames.
 """
 import sys
 import os
 import pytest
 # Add src to path (go up one level from tests/ to root)
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
 from src.utils.input_validation import (
    validate_filename_safe,
    ValidationError,
    PathTraversalError,
    InvalidFileTypeError,
    ALLOWED_AUDIO_EXTENSIONS
 )
 class TestValidFilenameSafe:
    """Test validate_filename_safe() function with various inputs."""
    def test_simple_valid_filenames(self):
        """Test that simple, valid filenames are accepted."""
        valid_names = [
            "audio.m4a",
            "song.wav",
            "podcast.mp3",
            "recording.flac",
            "music.ogg",
            "voice.aac",
        ]
        for filename in valid_names:
            result = validate_filename_safe(filename)
            assert result == filename, f"Should accept: {filename}"
    def test_filenames_with_ellipsis(self):
        """Test filenames with ellipsis (multiple dots) are accepted."""
        # This is the key test case from the bug report
        ellipsis_names = [
            "audio...mp3",
            "This is... a test.m4a",
            "Part 1... Part 2.wav",
            "Wait... what.m4a",
            "video...multiple...dots.mp3",
            "This Weird FPV Drone only takes one kind of Battery... Rekon 35 V2.m4a",  # Bug report case
        ]
        for filename in ellipsis_names:
            result = validate_filename_safe(filename)
            assert result == filename, f"Should accept filename with ellipsis: {filename}"
    def test_filenames_with_special_chars(self):
        """Test filenames with various special characters."""
        special_char_names = [
            "My-Video_2024.m4a",
            "song (remix).m4a",
            "audio [final].wav",
            "test file with spaces.mp3",
            "file-name_with-symbols.flac",
        ]
        for filename in special_char_names:
            result = validate_filename_safe(filename)
            assert result == filename, f"Should accept: {filename}"
    def test_multiple_extensions(self):
        """Test filenames that look like they have multiple extensions."""
        multi_ext_names = [
            "backup.tar.gz.mp3",  # .mp3 is valid
            "file.old.wav",       # .wav is valid
            "audio.2024.m4a",     # .m4a is valid
        ]
        for filename in multi_ext_names:
            result = validate_filename_safe(filename)
            assert result == filename, f"Should accept: {filename}"
    def test_path_traversal_attempts(self):
        """Test that path traversal attempts are rejected."""
        dangerous_names = [
            "../../../etc/passwd",
            "../../secrets.txt",
            "../file.mp4",
            "dir/../file.mp4",
            "file/../../etc/passwd",
        ]
        for filename in dangerous_names:
            with pytest.raises(PathTraversalError) as exc_info:
                validate_filename_safe(filename)
            assert "path" in str(exc_info.value).lower(), f"Should reject path traversal: {filename}"
    def test_absolute_paths(self):
        """Test that absolute paths are rejected."""
        absolute_paths = [
            "/etc/passwd",
            "/tmp/file.mp4",
            "/home/user/audio.wav",
            "C:\\Windows\\System32\\file.mp3",  # Windows path
            "\\\\server\\share\\file.m4a",       # UNC path
        ]
        for filename in absolute_paths:
            with pytest.raises(PathTraversalError) as exc_info:
                validate_filename_safe(filename)
            assert "path" in str(exc_info.value).lower(), f"Should reject absolute path: {filename}"
    def test_path_separators(self):
        """Test that filenames with path separators are rejected."""
        paths_with_separators = [
            "dir/file.mp4",
            "folder\\file.wav",
            "path/to/audio.m4a",
            "a/b/c/d.mp3",
        ]
        for filename in paths_with_separators:
            with pytest.raises(PathTraversalError) as exc_info:
                validate_filename_safe(filename)
            assert "separator" in str(exc_info.value).lower() or "path" in str(exc_info.value).lower(), \
                f"Should reject path with separators: {filename}"
    def test_null_bytes(self):
        """Test that filenames with null bytes are rejected."""
        null_byte_names = [
            "file\x00.mp4",
            "\x00malicious.wav",
            "audio\x00evil.m4a",
        ]
        for filename in null_byte_names:
            with pytest.raises(PathTraversalError) as exc_info:
                validate_filename_safe(filename)
            assert "null" in str(exc_info.value).lower(), f"Should reject null bytes: {repr(filename)}"
    def test_empty_filename(self):
        """Test that empty filename is rejected."""
        with pytest.raises(ValidationError) as exc_info:
            validate_filename_safe("")
        assert "empty" in str(exc_info.value).lower()
    def test_no_extension(self):
        """Test that filenames without extensions are rejected."""
        no_ext_names = [
            "filename",
            "noextension",
        ]
        for filename in no_ext_names:
            with pytest.raises(InvalidFileTypeError) as exc_info:
                validate_filename_safe(filename)
            assert "extension" in str(exc_info.value).lower(), f"Should reject no extension: {filename}"
    def test_invalid_extensions(self):
        """Test that unsupported file extensions are rejected."""
        invalid_ext_names = [
            "document.pdf",
            "image.png",
            "video.avi",
            "script.sh",
            "executable.exe",
            "text.txt",
        ]
        for filename in invalid_ext_names:
            with pytest.raises(InvalidFileTypeError) as exc_info:
                validate_filename_safe(filename)
            assert "unsupported" in str(exc_info.value).lower() or "format" in str(exc_info.value).lower(), \
                f"Should reject invalid extension: {filename}"
    def test_case_insensitive_extensions(self):
        """Test that file extensions are case-insensitive."""
        case_variations = [
            "audio.MP3",
            "sound.WAV",
            "music.M4A",
            "podcast.FLAC",
            "voice.AAC",
        ]
        for filename in case_variations:
            # Should not raise exception
            result = validate_filename_safe(filename)
            assert result == filename, f"Should accept case variation: {filename}"
    def test_edge_cases(self):
        """Test various edge cases."""
        # Just dots (but with valid extension) - should pass
        assert validate_filename_safe("...mp3") == "...mp3"
        assert validate_filename_safe("....wav") == "....wav"
        # Filenames starting with dot (hidden files on Unix)
        assert validate_filename_safe(".hidden.m4a") == ".hidden.m4a"
        # Very long filename (but valid)
        long_name = "a" * 200 + ".mp3"
        assert validate_filename_safe(long_name) == long_name
    def test_allowed_extensions_comprehensive(self):
        """Test all allowed extensions from ALLOWED_AUDIO_EXTENSIONS."""
        for ext in ALLOWED_AUDIO_EXTENSIONS:
            filename = f"test{ext}"
            result = validate_filename_safe(filename)
            assert result == filename, f"Should accept allowed extension: {ext}"
 class TestBugReportCase:
    """Specific test for the bug report case."""
    def test_bug_report_filename(self):
        """
        Test the exact filename from the bug report that was failing.
        Bug: "This Weird FPV Drone only takes one kind of Battery... Rekon 35 V2.m4a"
        was being rejected due to "..." being parsed as ".."
        """
        filename = "This Weird FPV Drone only takes one kind of Battery... Rekon 35 V2.m4a"
        # Should NOT raise any exception
        result = validate_filename_safe(filename)
        assert result == filename
    def test_various_ellipsis_patterns(self):
        """Test various ellipsis patterns that should all be accepted."""
        patterns = [
            "...",       # Three dots
            "....",      # Four dots
            ".....",     # Five dots
            "file...end.mp3",
            "start...middle...end.wav",
        ]
        for pattern in patterns:
            if not pattern.endswith(tuple(f"{ext}" for ext in ALLOWED_AUDIO_EXTENSIONS)):
                pattern += ".mp3"  # Add valid extension
            result = validate_filename_safe(pattern)
            assert result == pattern
 class TestSecurityBoundary:
    """Test the security boundary between safe and dangerous filenames."""
    def test_just_two_dots_vs_path_separator(self):
        """
        Test the critical distinction:
        - "file..mp3" (two dots in filename) = SAFE
        - "../file.mp3" (two dots as path component) = DANGEROUS
        """
        # Safe: dots within filename
        safe_filenames = [
            "file..mp3",
            "..file.mp3",
            "file...mp3",
            "f..i..l..e.mp3",
        ]
        for filename in safe_filenames:
            result = validate_filename_safe(filename)
            assert result == filename, f"Should be safe: {filename}"
        # Dangerous: dots as directory reference
        dangerous_filenames = [
            "../file.mp3",
            "../../file.mp3",
            "dir/../file.mp3",
        ]
        for filename in dangerous_filenames:
            with pytest.raises(PathTraversalError):
                validate_filename_safe(filename)
 if __name__ == "__main__":
    pytest.main([__file__, "-v"])