- Implement periodic filesystem write permission checks (60-minute intervals) - Add real-time health status monitoring with SSE endpoints - Display system health banner when storage issues detected - Limit compression to 1 concurrent job with queue support - Add max queue limit of 10 pending jobs - Show queue positions for pending compression jobs - Update button text dynamically (Start/Queue Compression) - Enable write access to footage mount in Docker - Add comprehensive logging for health checks and compression Co-Authored-By: Alihan <alihan@example.com>
171 lines
6.3 KiB
Python
171 lines
6.3 KiB
Python
import asyncio
|
|
import logging
|
|
import time
|
|
from pathlib import Path
|
|
from typing import Dict, Optional
|
|
from datetime import datetime
|
|
|
|
# Configure logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Configuration constants
|
|
HEALTH_CHECK_INTERVAL_SECONDS = 3600 # Check every 60 minutes
|
|
TEST_FILE_PREFIX = ".write_test_"
|
|
|
|
|
|
class FilesystemHealthChecker:
|
|
"""
|
|
Monitors filesystem write permissions by periodically attempting to write a test file.
|
|
Tracks health status and provides real-time updates to the application.
|
|
"""
|
|
|
|
def __init__(self, base_path: Path):
|
|
self.base_path = base_path.resolve()
|
|
self.is_healthy = True
|
|
self.last_check_time: Optional[datetime] = None
|
|
self.error_message: Optional[str] = None
|
|
self._monitoring_task: Optional[asyncio.Task] = None
|
|
self._status_change_callbacks = []
|
|
|
|
def add_status_change_callback(self, callback):
|
|
"""Register a callback to be notified when health status changes"""
|
|
self._status_change_callbacks.append(callback)
|
|
|
|
async def _notify_status_change(self):
|
|
"""Notify all registered callbacks of status change"""
|
|
for callback in self._status_change_callbacks:
|
|
try:
|
|
if asyncio.iscoroutinefunction(callback):
|
|
await callback(self.get_status())
|
|
else:
|
|
callback(self.get_status())
|
|
except Exception as e:
|
|
logger.error(f"Error in status change callback: {e}")
|
|
|
|
async def check_write_permission(self) -> bool:
|
|
"""
|
|
Attempt to write a test file to verify write permissions.
|
|
Returns True if write successful, False otherwise.
|
|
"""
|
|
test_file_path = None
|
|
try:
|
|
# Generate unique test file name with timestamp
|
|
timestamp = int(time.time() * 1000)
|
|
test_file_name = f"{TEST_FILE_PREFIX}{timestamp}"
|
|
test_file_path = self.base_path / test_file_name
|
|
|
|
logger.debug(f"Testing write permission: {test_file_path}")
|
|
|
|
# Attempt to write test file
|
|
test_file_path.write_text(f"Health check at {datetime.now().isoformat()}\n")
|
|
|
|
# Verify file exists and is readable
|
|
if not test_file_path.exists():
|
|
raise IOError("Test file was not created successfully")
|
|
|
|
content = test_file_path.read_text()
|
|
if not content:
|
|
raise IOError("Test file is empty after write")
|
|
|
|
# Clean up test file immediately
|
|
test_file_path.unlink()
|
|
|
|
logger.debug("Write permission test passed")
|
|
return True
|
|
|
|
except PermissionError as e:
|
|
logger.error(f"Permission denied writing to {self.base_path}: {e}")
|
|
self.error_message = f"Permission denied: {str(e)}"
|
|
return False
|
|
|
|
except OSError as e:
|
|
logger.error(f"OS error writing to {self.base_path}: {e}")
|
|
if "Read-only file system" in str(e):
|
|
self.error_message = "Filesystem is mounted as read-only"
|
|
else:
|
|
self.error_message = f"OS error: {str(e)}"
|
|
return False
|
|
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error during write test: {e}", exc_info=True)
|
|
self.error_message = f"Unexpected error: {str(e)}"
|
|
return False
|
|
|
|
finally:
|
|
# Ensure cleanup even if error occurs
|
|
if test_file_path and test_file_path.exists():
|
|
try:
|
|
test_file_path.unlink()
|
|
logger.debug(f"Cleaned up test file: {test_file_path}")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to clean up test file {test_file_path}: {e}")
|
|
|
|
async def perform_health_check(self) -> Dict:
|
|
"""
|
|
Perform a single health check and update status.
|
|
Returns the current health status.
|
|
"""
|
|
previous_health = self.is_healthy
|
|
self.last_check_time = datetime.now()
|
|
|
|
can_write = await self.check_write_permission()
|
|
|
|
if can_write:
|
|
self.is_healthy = True
|
|
self.error_message = None
|
|
logger.info(f"Filesystem health check PASSED at {self.last_check_time.isoformat()}")
|
|
else:
|
|
self.is_healthy = False
|
|
logger.error(
|
|
f"Filesystem health check FAILED at {self.last_check_time.isoformat()}: "
|
|
f"{self.error_message}"
|
|
)
|
|
|
|
# Notify if status changed
|
|
if previous_health != self.is_healthy:
|
|
await self._notify_status_change()
|
|
|
|
return self.get_status()
|
|
|
|
def get_status(self) -> Dict:
|
|
"""Get current health status"""
|
|
return {
|
|
"healthy": self.is_healthy,
|
|
"last_check": self.last_check_time.isoformat() if self.last_check_time else None,
|
|
"error": self.error_message,
|
|
"base_path": str(self.base_path),
|
|
}
|
|
|
|
async def _monitoring_loop(self):
|
|
"""Background task that periodically checks filesystem health"""
|
|
interval_minutes = HEALTH_CHECK_INTERVAL_SECONDS / 60
|
|
logger.info(
|
|
f"Starting filesystem health monitoring for {self.base_path} "
|
|
f"(interval: {interval_minutes:.0f} minutes)"
|
|
)
|
|
|
|
while True:
|
|
try:
|
|
await self.perform_health_check()
|
|
await asyncio.sleep(HEALTH_CHECK_INTERVAL_SECONDS)
|
|
except asyncio.CancelledError:
|
|
logger.info("Filesystem health monitoring stopped")
|
|
break
|
|
except Exception as e:
|
|
logger.error(f"Error in health monitoring loop: {e}", exc_info=True)
|
|
await asyncio.sleep(HEALTH_CHECK_INTERVAL_SECONDS)
|
|
|
|
def start_monitoring(self):
|
|
"""Start the background health monitoring task"""
|
|
if self._monitoring_task is None or self._monitoring_task.done():
|
|
self._monitoring_task = asyncio.create_task(self._monitoring_loop())
|
|
logger.info("Filesystem health monitoring started")
|
|
else:
|
|
logger.warning("Monitoring task already running")
|
|
|
|
def stop_monitoring(self):
|
|
"""Stop the background health monitoring task"""
|
|
if self._monitoring_task and not self._monitoring_task.done():
|
|
self._monitoring_task.cancel()
|
|
logger.info("Filesystem health monitoring stopped")
|