Files
Northern-Thailand-Ping-Rive…/src/health_check.py
grabowski af62cfef0b
Some checks failed
Security & Dependency Updates / Dependency Security Scan (push) Successful in 29s
Security & Dependency Updates / Docker Security Scan (push) Failing after 53s
Security & Dependency Updates / License Compliance (push) Successful in 13s
Security & Dependency Updates / Check for Dependency Updates (push) Successful in 19s
Security & Dependency Updates / Code Quality Metrics (push) Successful in 11s
Security & Dependency Updates / Security Summary (push) Successful in 7s
Initial commit: Northern Thailand Ping River Monitor v3.1.0
Features:
- Real-time water level monitoring for Ping River Basin (16 stations)
- Coverage from Chiang Dao to Nakhon Sawan in Northern Thailand
- FastAPI web interface with interactive dashboard and station management
- Multi-database support (SQLite, MySQL, PostgreSQL, InfluxDB, VictoriaMetrics)
- Comprehensive monitoring with health checks and metrics collection
- Docker deployment with Grafana integration
- Production-ready architecture with enterprise-grade observability

 CI/CD & Automation:
- Complete Gitea Actions workflows for CI/CD, security, and releases
- Multi-Python version testing (3.9-3.12)
- Multi-architecture Docker builds (amd64, arm64)
- Daily security scanning and dependency monitoring
- Automated documentation generation
- Performance testing and validation

 Production Ready:
- Type safety with Pydantic models and comprehensive type hints
- Data validation layer with range checking and error handling
- Rate limiting and request tracking for API protection
- Enhanced logging with rotation, colors, and performance metrics
- Station management API for dynamic CRUD operations
- Comprehensive documentation and deployment guides

 Technical Stack:
- Python 3.9+ with FastAPI and Pydantic
- Multi-database architecture with adapter pattern
- Docker containerization with multi-stage builds
- Grafana dashboards for visualization
- Gitea Actions for CI/CD automation
- Enterprise monitoring and alerting

 Ready for deployment to B4L infrastructure!
2025-08-12 15:40:24 +07:00

265 lines
9.2 KiB
Python

#!/usr/bin/env python3
"""
Health check system for water monitoring application
"""
import time
import threading
from datetime import datetime, timedelta
from typing import Dict, Any, Optional, List, Callable
from dataclasses import dataclass
from enum import Enum
import logging
logger = logging.getLogger(__name__)
class HealthStatus(Enum):
HEALTHY = "healthy"
DEGRADED = "degraded"
UNHEALTHY = "unhealthy"
@dataclass
class HealthCheckResult:
"""Result of a health check"""
name: str
status: HealthStatus
message: str
timestamp: datetime
response_time_ms: Optional[float] = None
details: Optional[Dict[str, Any]] = None
class HealthCheck:
"""Base health check class"""
def __init__(self, name: str, timeout_seconds: int = 30):
self.name = name
self.timeout_seconds = timeout_seconds
def check(self) -> HealthCheckResult:
"""Perform the health check"""
start_time = time.time()
try:
result = self._perform_check()
response_time = (time.time() - start_time) * 1000
return HealthCheckResult(
name=self.name,
status=result.get('status', HealthStatus.HEALTHY),
message=result.get('message', 'OK'),
timestamp=datetime.now(),
response_time_ms=response_time,
details=result.get('details')
)
except Exception as e:
response_time = (time.time() - start_time) * 1000
logger.error(f"Health check {self.name} failed: {e}")
return HealthCheckResult(
name=self.name,
status=HealthStatus.UNHEALTHY,
message=f"Check failed: {str(e)}",
timestamp=datetime.now(),
response_time_ms=response_time
)
def _perform_check(self) -> Dict[str, Any]:
"""Override this method to implement the actual check"""
raise NotImplementedError
class DatabaseHealthCheck(HealthCheck):
"""Health check for database connectivity"""
def __init__(self, db_adapter, name: str = "database"):
super().__init__(name)
self.db_adapter = db_adapter
def _perform_check(self) -> Dict[str, Any]:
if not self.db_adapter:
return {
'status': HealthStatus.UNHEALTHY,
'message': 'Database adapter not initialized'
}
try:
# Try to connect
if hasattr(self.db_adapter, 'connect'):
connected = self.db_adapter.connect()
if not connected:
return {
'status': HealthStatus.UNHEALTHY,
'message': 'Database connection failed'
}
# Try to get latest data
latest_data = self.db_adapter.get_latest_measurements(limit=1)
if latest_data:
latest_timestamp = latest_data[0].get('timestamp')
if isinstance(latest_timestamp, str):
latest_timestamp = datetime.fromisoformat(latest_timestamp.replace('Z', '+00:00'))
# Check if data is recent (within last 2 hours)
if datetime.now() - latest_timestamp.replace(tzinfo=None) > timedelta(hours=2):
return {
'status': HealthStatus.DEGRADED,
'message': f'Latest data is old: {latest_timestamp}',
'details': {'latest_data_timestamp': str(latest_timestamp)}
}
return {
'status': HealthStatus.HEALTHY,
'message': 'Database connection OK',
'details': {
'latest_data_count': len(latest_data),
'latest_timestamp': str(latest_data[0].get('timestamp')) if latest_data else None
}
}
except Exception as e:
return {
'status': HealthStatus.UNHEALTHY,
'message': f'Database check failed: {str(e)}'
}
class APIHealthCheck(HealthCheck):
"""Health check for external API connectivity"""
def __init__(self, api_url: str, session, name: str = "api"):
super().__init__(name)
self.api_url = api_url
self.session = session
def _perform_check(self) -> Dict[str, Any]:
try:
# Simple GET request to check API availability
response = self.session.get(self.api_url, timeout=self.timeout_seconds)
if response.status_code == 200:
return {
'status': HealthStatus.HEALTHY,
'message': 'API connection OK',
'details': {
'status_code': response.status_code,
'response_size': len(response.content)
}
}
else:
return {
'status': HealthStatus.DEGRADED,
'message': f'API returned status {response.status_code}',
'details': {'status_code': response.status_code}
}
except Exception as e:
return {
'status': HealthStatus.UNHEALTHY,
'message': f'API check failed: {str(e)}'
}
class MemoryHealthCheck(HealthCheck):
"""Health check for memory usage"""
def __init__(self, max_memory_mb: int = 1000, name: str = "memory"):
super().__init__(name)
self.max_memory_mb = max_memory_mb
def _perform_check(self) -> Dict[str, Any]:
try:
import psutil
process = psutil.Process()
memory_info = process.memory_info()
memory_mb = memory_info.rss / 1024 / 1024
if memory_mb > self.max_memory_mb:
return {
'status': HealthStatus.DEGRADED,
'message': f'High memory usage: {memory_mb:.1f}MB',
'details': {'memory_mb': memory_mb, 'max_memory_mb': self.max_memory_mb}
}
return {
'status': HealthStatus.HEALTHY,
'message': f'Memory usage OK: {memory_mb:.1f}MB',
'details': {'memory_mb': memory_mb}
}
except ImportError:
return {
'status': HealthStatus.HEALTHY,
'message': 'Memory check skipped (psutil not available)'
}
except Exception as e:
return {
'status': HealthStatus.UNHEALTHY,
'message': f'Memory check failed: {str(e)}'
}
class HealthCheckManager:
"""Manages multiple health checks"""
def __init__(self):
self.checks: List[HealthCheck] = []
self.last_results: Dict[str, HealthCheckResult] = {}
self._lock = threading.Lock()
def add_check(self, health_check: HealthCheck):
"""Add a health check"""
with self._lock:
self.checks.append(health_check)
def run_all_checks(self) -> Dict[str, HealthCheckResult]:
"""Run all health checks"""
results = {}
for check in self.checks:
try:
result = check.check()
results[check.name] = result
with self._lock:
self.last_results[check.name] = result
except Exception as e:
logger.error(f"Error running health check {check.name}: {e}")
results[check.name] = HealthCheckResult(
name=check.name,
status=HealthStatus.UNHEALTHY,
message=f"Check execution failed: {str(e)}",
timestamp=datetime.now()
)
return results
def get_overall_status(self) -> HealthStatus:
"""Get overall system health status"""
if not self.last_results:
return HealthStatus.UNHEALTHY
statuses = [result.status for result in self.last_results.values()]
if any(status == HealthStatus.UNHEALTHY for status in statuses):
return HealthStatus.UNHEALTHY
elif any(status == HealthStatus.DEGRADED for status in statuses):
return HealthStatus.DEGRADED
else:
return HealthStatus.HEALTHY
def get_health_summary(self) -> Dict[str, Any]:
"""Get a summary of system health"""
overall_status = self.get_overall_status()
return {
'overall_status': overall_status.value,
'timestamp': datetime.now().isoformat(),
'checks': {
name: {
'status': result.status.value,
'message': result.message,
'response_time_ms': result.response_time_ms,
'timestamp': result.timestamp.isoformat()
}
for name, result in self.last_results.items()
}
}