Add comprehensive Matrix alerting system with Grafana integration

- Implement custom Python alerting system (src/alerting.py) with water level monitoring, data freshness checks, and Matrix notifications
- Add complete Grafana Matrix alerting setup guide (docs/GRAFANA_MATRIX_SETUP.md) with webhook configuration, alert rules, and notification policies
- Create Matrix quick start guide (docs/MATRIX_QUICK_START.md) for rapid deployment
- Integrate alerting commands into main application (--alert-check, --alert-test)
- Add Matrix configuration to environment variables (.env.example)
- Update Makefile with alerting targets (alert-check, alert-test)
- Enhance status command to show Matrix notification status
- Support station-specific water level thresholds and escalation rules
- Provide dual alerting approach: native Grafana alerts and custom Python system

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-09-26 16:18:02 +07:00
parent 6c7c128b4d
commit ca730e484b
7 changed files with 1062 additions and 9 deletions

View File

@@ -180,22 +180,81 @@ def run_web_api():
logger.error(f"Web API failed: {e}")
return False
def run_alert_check():
"""Run water level alert check"""
logger.info("Running water level alert check...")
try:
from .alerting import WaterLevelAlertSystem
# Initialize alerting system
alerting = WaterLevelAlertSystem()
# Run alert check
results = alerting.run_alert_check()
if 'error' in results:
logger.error("❌ Alert check failed due to database connection")
return False
logger.info(f"✅ Alert check completed:")
logger.info(f" • Water level alerts: {results['water_alerts']}")
logger.info(f" • Data freshness alerts: {results['data_alerts']}")
logger.info(f" • Total alerts generated: {results['total_alerts']}")
logger.info(f" • Alerts sent: {results['sent']}")
return True
except Exception as e:
logger.error(f"❌ Alert check failed: {e}")
return False
def run_alert_test():
"""Send test alert message"""
logger.info("Sending test alert message...")
try:
from .alerting import WaterLevelAlertSystem
# Initialize alerting system
alerting = WaterLevelAlertSystem()
if not alerting.matrix_notifier:
logger.error("❌ Matrix notifier not configured")
logger.info("Please set MATRIX_ACCESS_TOKEN and MATRIX_ROOM_ID in your .env file")
return False
# Send test message
test_message = "🧪 **Test Alert**\n\nThis is a test message from the Northern Thailand Ping River Monitor.\n\nIf you received this, Matrix notifications are working correctly!"
success = alerting.matrix_notifier.send_message(test_message)
if success:
logger.info("✅ Test alert message sent successfully")
else:
logger.error("❌ Test alert message failed to send")
return success
except Exception as e:
logger.error(f"❌ Test alert failed: {e}")
return False
def show_status():
"""Show current system status"""
logger.info("=== Northern Thailand Ping River Monitor Status ===")
try:
# Show configuration
Config.print_settings()
# Test database connection
logger.info("\n=== Database Connection Test ===")
db_config = Config.get_database_config()
scraper = EnhancedWaterMonitorScraper(db_config)
if scraper.db_adapter:
logger.info("✅ Database connection successful")
# Show latest data
latest_data = scraper.get_latest_data(3)
if latest_data:
@@ -209,19 +268,33 @@ def show_status():
logger.info("No data found in database")
else:
logger.error("❌ Database connection failed")
# Test alerting system
logger.info("\n=== Alerting System Status ===")
try:
from .alerting import WaterLevelAlertSystem
alerting = WaterLevelAlertSystem()
if alerting.matrix_notifier:
logger.info("✅ Matrix notifications configured")
else:
logger.warning("⚠️ Matrix notifications not configured")
logger.info("Set MATRIX_ACCESS_TOKEN and MATRIX_ROOM_ID in .env file")
except Exception as e:
logger.error(f"❌ Alerting system error: {e}")
# Show metrics if available
metrics_collector = get_metrics_collector()
metrics = metrics_collector.get_all_metrics()
if any(metrics.values()):
logger.info("\n=== Metrics Summary ===")
for metric_type, values in metrics.items():
if values:
logger.info(f"{metric_type.title()}: {len(values)} metrics")
return True
except Exception as e:
logger.error(f"Status check failed: {e}")
return False
@@ -239,6 +312,8 @@ Examples:
%(prog)s --fill-gaps 7 # Fill missing data for last 7 days
%(prog)s --update-data 2 # Update existing data for last 2 days
%(prog)s --status # Show system status
%(prog)s --alert-check # Check water levels and send alerts
%(prog)s --alert-test # Send test Matrix message
"""
)
@@ -273,7 +348,19 @@ Examples:
action="store_true",
help="Show current system status"
)
parser.add_argument(
"--alert-check",
action="store_true",
help="Run water level alert check"
)
parser.add_argument(
"--alert-test",
action="store_true",
help="Send test alert message to Matrix"
)
parser.add_argument(
"--log-level",
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
@@ -314,6 +401,10 @@ Examples:
success = run_data_update(args.update_data)
elif args.status:
success = show_status()
elif args.alert_check:
success = run_alert_check()
elif args.alert_test:
success = run_alert_test()
else:
success = run_continuous_monitoring()