Some checks failed
Security & Dependency Updates / Dependency Security Scan (push) Successful in 29s
Security & Dependency Updates / Docker Security Scan (push) Failing after 53s
Security & Dependency Updates / License Compliance (push) Successful in 13s
Security & Dependency Updates / Check for Dependency Updates (push) Successful in 19s
Security & Dependency Updates / Code Quality Metrics (push) Successful in 11s
Security & Dependency Updates / Security Summary (push) Successful in 7s
Features: - Real-time water level monitoring for Ping River Basin (16 stations) - Coverage from Chiang Dao to Nakhon Sawan in Northern Thailand - FastAPI web interface with interactive dashboard and station management - Multi-database support (SQLite, MySQL, PostgreSQL, InfluxDB, VictoriaMetrics) - Comprehensive monitoring with health checks and metrics collection - Docker deployment with Grafana integration - Production-ready architecture with enterprise-grade observability CI/CD & Automation: - Complete Gitea Actions workflows for CI/CD, security, and releases - Multi-Python version testing (3.9-3.12) - Multi-architecture Docker builds (amd64, arm64) - Daily security scanning and dependency monitoring - Automated documentation generation - Performance testing and validation Production Ready: - Type safety with Pydantic models and comprehensive type hints - Data validation layer with range checking and error handling - Rate limiting and request tracking for API protection - Enhanced logging with rotation, colors, and performance metrics - Station management API for dynamic CRUD operations - Comprehensive documentation and deployment guides Technical Stack: - Python 3.9+ with FastAPI and Pydantic - Multi-database architecture with adapter pattern - Docker containerization with multi-stage builds - Grafana dashboards for visualization - Gitea Actions for CI/CD automation - Enterprise monitoring and alerting Ready for deployment to B4L infrastructure!
265 lines
9.2 KiB
Python
265 lines
9.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Health check system for water monitoring application
|
|
"""
|
|
|
|
import time
|
|
import threading
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, Any, Optional, List, Callable
|
|
from dataclasses import dataclass
|
|
from enum import Enum
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class HealthStatus(Enum):
|
|
HEALTHY = "healthy"
|
|
DEGRADED = "degraded"
|
|
UNHEALTHY = "unhealthy"
|
|
|
|
@dataclass
|
|
class HealthCheckResult:
|
|
"""Result of a health check"""
|
|
name: str
|
|
status: HealthStatus
|
|
message: str
|
|
timestamp: datetime
|
|
response_time_ms: Optional[float] = None
|
|
details: Optional[Dict[str, Any]] = None
|
|
|
|
class HealthCheck:
|
|
"""Base health check class"""
|
|
|
|
def __init__(self, name: str, timeout_seconds: int = 30):
|
|
self.name = name
|
|
self.timeout_seconds = timeout_seconds
|
|
|
|
def check(self) -> HealthCheckResult:
|
|
"""Perform the health check"""
|
|
start_time = time.time()
|
|
|
|
try:
|
|
result = self._perform_check()
|
|
response_time = (time.time() - start_time) * 1000
|
|
|
|
return HealthCheckResult(
|
|
name=self.name,
|
|
status=result.get('status', HealthStatus.HEALTHY),
|
|
message=result.get('message', 'OK'),
|
|
timestamp=datetime.now(),
|
|
response_time_ms=response_time,
|
|
details=result.get('details')
|
|
)
|
|
|
|
except Exception as e:
|
|
response_time = (time.time() - start_time) * 1000
|
|
logger.error(f"Health check {self.name} failed: {e}")
|
|
|
|
return HealthCheckResult(
|
|
name=self.name,
|
|
status=HealthStatus.UNHEALTHY,
|
|
message=f"Check failed: {str(e)}",
|
|
timestamp=datetime.now(),
|
|
response_time_ms=response_time
|
|
)
|
|
|
|
def _perform_check(self) -> Dict[str, Any]:
|
|
"""Override this method to implement the actual check"""
|
|
raise NotImplementedError
|
|
|
|
class DatabaseHealthCheck(HealthCheck):
|
|
"""Health check for database connectivity"""
|
|
|
|
def __init__(self, db_adapter, name: str = "database"):
|
|
super().__init__(name)
|
|
self.db_adapter = db_adapter
|
|
|
|
def _perform_check(self) -> Dict[str, Any]:
|
|
if not self.db_adapter:
|
|
return {
|
|
'status': HealthStatus.UNHEALTHY,
|
|
'message': 'Database adapter not initialized'
|
|
}
|
|
|
|
try:
|
|
# Try to connect
|
|
if hasattr(self.db_adapter, 'connect'):
|
|
connected = self.db_adapter.connect()
|
|
if not connected:
|
|
return {
|
|
'status': HealthStatus.UNHEALTHY,
|
|
'message': 'Database connection failed'
|
|
}
|
|
|
|
# Try to get latest data
|
|
latest_data = self.db_adapter.get_latest_measurements(limit=1)
|
|
|
|
if latest_data:
|
|
latest_timestamp = latest_data[0].get('timestamp')
|
|
if isinstance(latest_timestamp, str):
|
|
latest_timestamp = datetime.fromisoformat(latest_timestamp.replace('Z', '+00:00'))
|
|
|
|
# Check if data is recent (within last 2 hours)
|
|
if datetime.now() - latest_timestamp.replace(tzinfo=None) > timedelta(hours=2):
|
|
return {
|
|
'status': HealthStatus.DEGRADED,
|
|
'message': f'Latest data is old: {latest_timestamp}',
|
|
'details': {'latest_data_timestamp': str(latest_timestamp)}
|
|
}
|
|
|
|
return {
|
|
'status': HealthStatus.HEALTHY,
|
|
'message': 'Database connection OK',
|
|
'details': {
|
|
'latest_data_count': len(latest_data),
|
|
'latest_timestamp': str(latest_data[0].get('timestamp')) if latest_data else None
|
|
}
|
|
}
|
|
|
|
except Exception as e:
|
|
return {
|
|
'status': HealthStatus.UNHEALTHY,
|
|
'message': f'Database check failed: {str(e)}'
|
|
}
|
|
|
|
class APIHealthCheck(HealthCheck):
|
|
"""Health check for external API connectivity"""
|
|
|
|
def __init__(self, api_url: str, session, name: str = "api"):
|
|
super().__init__(name)
|
|
self.api_url = api_url
|
|
self.session = session
|
|
|
|
def _perform_check(self) -> Dict[str, Any]:
|
|
try:
|
|
# Simple GET request to check API availability
|
|
response = self.session.get(self.api_url, timeout=self.timeout_seconds)
|
|
|
|
if response.status_code == 200:
|
|
return {
|
|
'status': HealthStatus.HEALTHY,
|
|
'message': 'API connection OK',
|
|
'details': {
|
|
'status_code': response.status_code,
|
|
'response_size': len(response.content)
|
|
}
|
|
}
|
|
else:
|
|
return {
|
|
'status': HealthStatus.DEGRADED,
|
|
'message': f'API returned status {response.status_code}',
|
|
'details': {'status_code': response.status_code}
|
|
}
|
|
|
|
except Exception as e:
|
|
return {
|
|
'status': HealthStatus.UNHEALTHY,
|
|
'message': f'API check failed: {str(e)}'
|
|
}
|
|
|
|
class MemoryHealthCheck(HealthCheck):
|
|
"""Health check for memory usage"""
|
|
|
|
def __init__(self, max_memory_mb: int = 1000, name: str = "memory"):
|
|
super().__init__(name)
|
|
self.max_memory_mb = max_memory_mb
|
|
|
|
def _perform_check(self) -> Dict[str, Any]:
|
|
try:
|
|
import psutil
|
|
process = psutil.Process()
|
|
memory_info = process.memory_info()
|
|
memory_mb = memory_info.rss / 1024 / 1024
|
|
|
|
if memory_mb > self.max_memory_mb:
|
|
return {
|
|
'status': HealthStatus.DEGRADED,
|
|
'message': f'High memory usage: {memory_mb:.1f}MB',
|
|
'details': {'memory_mb': memory_mb, 'max_memory_mb': self.max_memory_mb}
|
|
}
|
|
|
|
return {
|
|
'status': HealthStatus.HEALTHY,
|
|
'message': f'Memory usage OK: {memory_mb:.1f}MB',
|
|
'details': {'memory_mb': memory_mb}
|
|
}
|
|
|
|
except ImportError:
|
|
return {
|
|
'status': HealthStatus.HEALTHY,
|
|
'message': 'Memory check skipped (psutil not available)'
|
|
}
|
|
except Exception as e:
|
|
return {
|
|
'status': HealthStatus.UNHEALTHY,
|
|
'message': f'Memory check failed: {str(e)}'
|
|
}
|
|
|
|
class HealthCheckManager:
|
|
"""Manages multiple health checks"""
|
|
|
|
def __init__(self):
|
|
self.checks: List[HealthCheck] = []
|
|
self.last_results: Dict[str, HealthCheckResult] = {}
|
|
self._lock = threading.Lock()
|
|
|
|
def add_check(self, health_check: HealthCheck):
|
|
"""Add a health check"""
|
|
with self._lock:
|
|
self.checks.append(health_check)
|
|
|
|
def run_all_checks(self) -> Dict[str, HealthCheckResult]:
|
|
"""Run all health checks"""
|
|
results = {}
|
|
|
|
for check in self.checks:
|
|
try:
|
|
result = check.check()
|
|
results[check.name] = result
|
|
|
|
with self._lock:
|
|
self.last_results[check.name] = result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error running health check {check.name}: {e}")
|
|
results[check.name] = HealthCheckResult(
|
|
name=check.name,
|
|
status=HealthStatus.UNHEALTHY,
|
|
message=f"Check execution failed: {str(e)}",
|
|
timestamp=datetime.now()
|
|
)
|
|
|
|
return results
|
|
|
|
def get_overall_status(self) -> HealthStatus:
|
|
"""Get overall system health status"""
|
|
if not self.last_results:
|
|
return HealthStatus.UNHEALTHY
|
|
|
|
statuses = [result.status for result in self.last_results.values()]
|
|
|
|
if any(status == HealthStatus.UNHEALTHY for status in statuses):
|
|
return HealthStatus.UNHEALTHY
|
|
elif any(status == HealthStatus.DEGRADED for status in statuses):
|
|
return HealthStatus.DEGRADED
|
|
else:
|
|
return HealthStatus.HEALTHY
|
|
|
|
def get_health_summary(self) -> Dict[str, Any]:
|
|
"""Get a summary of system health"""
|
|
overall_status = self.get_overall_status()
|
|
|
|
return {
|
|
'overall_status': overall_status.value,
|
|
'timestamp': datetime.now().isoformat(),
|
|
'checks': {
|
|
name: {
|
|
'status': result.status.value,
|
|
'message': result.message,
|
|
'response_time_ms': result.response_time_ms,
|
|
'timestamp': result.timestamp.isoformat()
|
|
}
|
|
for name, result in self.last_results.items()
|
|
}
|
|
} |