#!/usr/bin/env python3 """ Health check system for water monitoring application """ import time import threading from datetime import datetime, timedelta from typing import Dict, Any, Optional, List, Callable from dataclasses import dataclass from enum import Enum import logging logger = logging.getLogger(__name__) class HealthStatus(Enum): HEALTHY = "healthy" DEGRADED = "degraded" UNHEALTHY = "unhealthy" @dataclass class HealthCheckResult: """Result of a health check""" name: str status: HealthStatus message: str timestamp: datetime response_time_ms: Optional[float] = None details: Optional[Dict[str, Any]] = None class HealthCheck: """Base health check class""" def __init__(self, name: str, timeout_seconds: int = 30): self.name = name self.timeout_seconds = timeout_seconds def check(self) -> HealthCheckResult: """Perform the health check""" start_time = time.time() try: result = self._perform_check() response_time = (time.time() - start_time) * 1000 return HealthCheckResult( name=self.name, status=result.get('status', HealthStatus.HEALTHY), message=result.get('message', 'OK'), timestamp=datetime.now(), response_time_ms=response_time, details=result.get('details') ) except Exception as e: response_time = (time.time() - start_time) * 1000 logger.error(f"Health check {self.name} failed: {e}") return HealthCheckResult( name=self.name, status=HealthStatus.UNHEALTHY, message=f"Check failed: {str(e)}", timestamp=datetime.now(), response_time_ms=response_time ) def _perform_check(self) -> Dict[str, Any]: """Override this method to implement the actual check""" raise NotImplementedError class DatabaseHealthCheck(HealthCheck): """Health check for database connectivity""" def __init__(self, db_adapter, name: str = "database"): super().__init__(name) self.db_adapter = db_adapter def _perform_check(self) -> Dict[str, Any]: if not self.db_adapter: return { 'status': HealthStatus.UNHEALTHY, 'message': 'Database adapter not initialized' } try: # Try to connect if hasattr(self.db_adapter, 'connect'): connected = self.db_adapter.connect() if not connected: return { 'status': HealthStatus.UNHEALTHY, 'message': 'Database connection failed' } # Try to get latest data latest_data = self.db_adapter.get_latest_measurements(limit=1) if latest_data: latest_timestamp = latest_data[0].get('timestamp') if isinstance(latest_timestamp, str): latest_timestamp = datetime.fromisoformat(latest_timestamp.replace('Z', '+00:00')) # Check if data is recent (within last 2 hours) if datetime.now() - latest_timestamp.replace(tzinfo=None) > timedelta(hours=2): return { 'status': HealthStatus.DEGRADED, 'message': f'Latest data is old: {latest_timestamp}', 'details': {'latest_data_timestamp': str(latest_timestamp)} } return { 'status': HealthStatus.HEALTHY, 'message': 'Database connection OK', 'details': { 'latest_data_count': len(latest_data), 'latest_timestamp': str(latest_data[0].get('timestamp')) if latest_data else None } } except Exception as e: return { 'status': HealthStatus.UNHEALTHY, 'message': f'Database check failed: {str(e)}' } class APIHealthCheck(HealthCheck): """Health check for external API connectivity""" def __init__(self, api_url: str, session, name: str = "api"): super().__init__(name) self.api_url = api_url self.session = session def _perform_check(self) -> Dict[str, Any]: try: # Simple GET request to check API availability response = self.session.get(self.api_url, timeout=self.timeout_seconds) if response.status_code == 200: return { 'status': HealthStatus.HEALTHY, 'message': 'API connection OK', 'details': { 'status_code': response.status_code, 'response_size': len(response.content) } } else: return { 'status': HealthStatus.DEGRADED, 'message': f'API returned status {response.status_code}', 'details': {'status_code': response.status_code} } except Exception as e: return { 'status': HealthStatus.UNHEALTHY, 'message': f'API check failed: {str(e)}' } class MemoryHealthCheck(HealthCheck): """Health check for memory usage""" def __init__(self, max_memory_mb: int = 1000, name: str = "memory"): super().__init__(name) self.max_memory_mb = max_memory_mb def _perform_check(self) -> Dict[str, Any]: try: import psutil process = psutil.Process() memory_info = process.memory_info() memory_mb = memory_info.rss / 1024 / 1024 if memory_mb > self.max_memory_mb: return { 'status': HealthStatus.DEGRADED, 'message': f'High memory usage: {memory_mb:.1f}MB', 'details': {'memory_mb': memory_mb, 'max_memory_mb': self.max_memory_mb} } return { 'status': HealthStatus.HEALTHY, 'message': f'Memory usage OK: {memory_mb:.1f}MB', 'details': {'memory_mb': memory_mb} } except ImportError: return { 'status': HealthStatus.HEALTHY, 'message': 'Memory check skipped (psutil not available)' } except Exception as e: return { 'status': HealthStatus.UNHEALTHY, 'message': f'Memory check failed: {str(e)}' } class HealthCheckManager: """Manages multiple health checks""" def __init__(self): self.checks: List[HealthCheck] = [] self.last_results: Dict[str, HealthCheckResult] = {} self._lock = threading.Lock() def add_check(self, health_check: HealthCheck): """Add a health check""" with self._lock: self.checks.append(health_check) def run_all_checks(self) -> Dict[str, HealthCheckResult]: """Run all health checks""" results = {} for check in self.checks: try: result = check.check() results[check.name] = result with self._lock: self.last_results[check.name] = result except Exception as e: logger.error(f"Error running health check {check.name}: {e}") results[check.name] = HealthCheckResult( name=check.name, status=HealthStatus.UNHEALTHY, message=f"Check execution failed: {str(e)}", timestamp=datetime.now() ) return results def get_overall_status(self) -> HealthStatus: """Get overall system health status""" if not self.last_results: return HealthStatus.UNHEALTHY statuses = [result.status for result in self.last_results.values()] if any(status == HealthStatus.UNHEALTHY for status in statuses): return HealthStatus.UNHEALTHY elif any(status == HealthStatus.DEGRADED for status in statuses): return HealthStatus.DEGRADED else: return HealthStatus.HEALTHY def get_health_summary(self) -> Dict[str, Any]: """Get a summary of system health""" overall_status = self.get_overall_status() return { 'overall_status': overall_status.value, 'timestamp': datetime.now().isoformat(), 'checks': { name: { 'status': result.status.value, 'message': result.message, 'response_time_ms': result.response_time_ms, 'timestamp': result.timestamp.isoformat() } for name, result in self.last_results.items() } }