drone-footage-manager/backend/filesystem_health.py

import asyncio
import logging
import time
from pathlib import Path
from typing import Dict, Optional
from datetime import datetime

# Configure logging
logger = logging.getLogger(__name__)

# Configuration constants
HEALTH_CHECK_INTERVAL_SECONDS = 3600  # Check every 60 minutes
TEST_FILE_PREFIX = ".write_test_"


class FilesystemHealthChecker:
    """
    Monitors filesystem write permissions by periodically attempting to write a test file.
    Tracks health status and provides real-time updates to the application.
    """

    def __init__(self, base_path: Path):
        self.base_path = base_path.resolve()
        self.is_healthy = True
        self.last_check_time: Optional[datetime] = None
        self.error_message: Optional[str] = None
        self._monitoring_task: Optional[asyncio.Task] = None
        self._status_change_callbacks = []

    def add_status_change_callback(self, callback):
        """Register a callback to be notified when health status changes"""
        self._status_change_callbacks.append(callback)

    async def _notify_status_change(self):
        """Notify all registered callbacks of status change"""
        for callback in self._status_change_callbacks:
            try:
                if asyncio.iscoroutinefunction(callback):
                    await callback(self.get_status())
                else:
                    callback(self.get_status())
            except Exception as e:
                logger.error(f"Error in status change callback: {e}")

    async def check_write_permission(self) -> bool:
        """
        Attempt to write a test file to verify write permissions.
        Returns True if write successful, False otherwise.
        """
        test_file_path = None
        try:
            # Generate unique test file name with timestamp
            timestamp = int(time.time() * 1000)
            test_file_name = f"{TEST_FILE_PREFIX}{timestamp}"
            test_file_path = self.base_path / test_file_name

            logger.debug(f"Testing write permission: {test_file_path}")

            # Attempt to write test file
            test_file_path.write_text(f"Health check at {datetime.now().isoformat()}\n")

            # Verify file exists and is readable
            if not test_file_path.exists():
                raise IOError("Test file was not created successfully")

            content = test_file_path.read_text()
            if not content:
                raise IOError("Test file is empty after write")

            # Clean up test file immediately
            test_file_path.unlink()

            logger.debug("Write permission test passed")
            return True

        except PermissionError as e:
            logger.error(f"Permission denied writing to {self.base_path}: {e}")
            self.error_message = f"Permission denied: {str(e)}"
            return False

        except OSError as e:
            logger.error(f"OS error writing to {self.base_path}: {e}")
            if "Read-only file system" in str(e):
                self.error_message = "Filesystem is mounted as read-only"
            else:
                self.error_message = f"OS error: {str(e)}"
            return False

        except Exception as e:
            logger.error(f"Unexpected error during write test: {e}", exc_info=True)
            self.error_message = f"Unexpected error: {str(e)}"
            return False

        finally:
            # Ensure cleanup even if error occurs
            if test_file_path and test_file_path.exists():
                try:
                    test_file_path.unlink()
                    logger.debug(f"Cleaned up test file: {test_file_path}")
                except Exception as e:
                    logger.warning(f"Failed to clean up test file {test_file_path}: {e}")

    async def perform_health_check(self) -> Dict:
        """
        Perform a single health check and update status.
        Returns the current health status.
        """
        previous_health = self.is_healthy
        self.last_check_time = datetime.now()

        can_write = await self.check_write_permission()

        if can_write:
            self.is_healthy = True
            self.error_message = None
            logger.info(f"Filesystem health check PASSED at {self.last_check_time.isoformat()}")
        else:
            self.is_healthy = False
            logger.error(
                f"Filesystem health check FAILED at {self.last_check_time.isoformat()}: "
                f"{self.error_message}"
            )

        # Notify if status changed
        if previous_health != self.is_healthy:
            await self._notify_status_change()

        return self.get_status()

    def get_status(self) -> Dict:
        """Get current health status"""
        return {
            "healthy": self.is_healthy,
            "last_check": self.last_check_time.isoformat() if self.last_check_time else None,
            "error": self.error_message,
            "base_path": str(self.base_path),
        }

    async def _monitoring_loop(self):
        """Background task that periodically checks filesystem health"""
        interval_minutes = HEALTH_CHECK_INTERVAL_SECONDS / 60
        logger.info(
            f"Starting filesystem health monitoring for {self.base_path} "
            f"(interval: {interval_minutes:.0f} minutes)"
        )

        while True:
            try:
                await self.perform_health_check()
                await asyncio.sleep(HEALTH_CHECK_INTERVAL_SECONDS)
            except asyncio.CancelledError:
                logger.info("Filesystem health monitoring stopped")
                break
            except Exception as e:
                logger.error(f"Error in health monitoring loop: {e}", exc_info=True)
                await asyncio.sleep(HEALTH_CHECK_INTERVAL_SECONDS)

    def start_monitoring(self):
        """Start the background health monitoring task"""
        if self._monitoring_task is None or self._monitoring_task.done():
            self._monitoring_task = asyncio.create_task(self._monitoring_loop())
            logger.info("Filesystem health monitoring started")
        else:
            logger.warning("Monitoring task already running")

    def stop_monitoring(self):
        """Stop the background health monitoring task"""
        if self._monitoring_task and not self._monitoring_task.done():
            self._monitoring_task.cancel()
            logger.info("Filesystem health monitoring stopped")