Add comprehensive Matrix alerting system with Grafana integration
- Implement custom Python alerting system (src/alerting.py) with water level monitoring, data freshness checks, and Matrix notifications - Add complete Grafana Matrix alerting setup guide (docs/GRAFANA_MATRIX_SETUP.md) with webhook configuration, alert rules, and notification policies - Create Matrix quick start guide (docs/MATRIX_QUICK_START.md) for rapid deployment - Integrate alerting commands into main application (--alert-check, --alert-test) - Add Matrix configuration to environment variables (.env.example) - Update Makefile with alerting targets (alert-check, alert-test) - Enhance status command to show Matrix notification status - Support station-specific water level thresholds and escalation rules - Provide dual alerting approach: native Grafana alerts and custom Python system 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
109
src/main.py
109
src/main.py
@@ -180,22 +180,81 @@ def run_web_api():
|
||||
logger.error(f"Web API failed: {e}")
|
||||
return False
|
||||
|
||||
def run_alert_check():
|
||||
"""Run water level alert check"""
|
||||
logger.info("Running water level alert check...")
|
||||
|
||||
try:
|
||||
from .alerting import WaterLevelAlertSystem
|
||||
|
||||
# Initialize alerting system
|
||||
alerting = WaterLevelAlertSystem()
|
||||
|
||||
# Run alert check
|
||||
results = alerting.run_alert_check()
|
||||
|
||||
if 'error' in results:
|
||||
logger.error("❌ Alert check failed due to database connection")
|
||||
return False
|
||||
|
||||
logger.info(f"✅ Alert check completed:")
|
||||
logger.info(f" • Water level alerts: {results['water_alerts']}")
|
||||
logger.info(f" • Data freshness alerts: {results['data_alerts']}")
|
||||
logger.info(f" • Total alerts generated: {results['total_alerts']}")
|
||||
logger.info(f" • Alerts sent: {results['sent']}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Alert check failed: {e}")
|
||||
return False
|
||||
|
||||
def run_alert_test():
|
||||
"""Send test alert message"""
|
||||
logger.info("Sending test alert message...")
|
||||
|
||||
try:
|
||||
from .alerting import WaterLevelAlertSystem
|
||||
|
||||
# Initialize alerting system
|
||||
alerting = WaterLevelAlertSystem()
|
||||
|
||||
if not alerting.matrix_notifier:
|
||||
logger.error("❌ Matrix notifier not configured")
|
||||
logger.info("Please set MATRIX_ACCESS_TOKEN and MATRIX_ROOM_ID in your .env file")
|
||||
return False
|
||||
|
||||
# Send test message
|
||||
test_message = "🧪 **Test Alert**\n\nThis is a test message from the Northern Thailand Ping River Monitor.\n\nIf you received this, Matrix notifications are working correctly!"
|
||||
success = alerting.matrix_notifier.send_message(test_message)
|
||||
|
||||
if success:
|
||||
logger.info("✅ Test alert message sent successfully")
|
||||
else:
|
||||
logger.error("❌ Test alert message failed to send")
|
||||
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Test alert failed: {e}")
|
||||
return False
|
||||
|
||||
def show_status():
|
||||
"""Show current system status"""
|
||||
logger.info("=== Northern Thailand Ping River Monitor Status ===")
|
||||
|
||||
|
||||
try:
|
||||
# Show configuration
|
||||
Config.print_settings()
|
||||
|
||||
|
||||
# Test database connection
|
||||
logger.info("\n=== Database Connection Test ===")
|
||||
db_config = Config.get_database_config()
|
||||
scraper = EnhancedWaterMonitorScraper(db_config)
|
||||
|
||||
|
||||
if scraper.db_adapter:
|
||||
logger.info("✅ Database connection successful")
|
||||
|
||||
|
||||
# Show latest data
|
||||
latest_data = scraper.get_latest_data(3)
|
||||
if latest_data:
|
||||
@@ -209,19 +268,33 @@ def show_status():
|
||||
logger.info("No data found in database")
|
||||
else:
|
||||
logger.error("❌ Database connection failed")
|
||||
|
||||
|
||||
# Test alerting system
|
||||
logger.info("\n=== Alerting System Status ===")
|
||||
try:
|
||||
from .alerting import WaterLevelAlertSystem
|
||||
alerting = WaterLevelAlertSystem()
|
||||
|
||||
if alerting.matrix_notifier:
|
||||
logger.info("✅ Matrix notifications configured")
|
||||
else:
|
||||
logger.warning("⚠️ Matrix notifications not configured")
|
||||
logger.info("Set MATRIX_ACCESS_TOKEN and MATRIX_ROOM_ID in .env file")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Alerting system error: {e}")
|
||||
|
||||
# Show metrics if available
|
||||
metrics_collector = get_metrics_collector()
|
||||
metrics = metrics_collector.get_all_metrics()
|
||||
|
||||
|
||||
if any(metrics.values()):
|
||||
logger.info("\n=== Metrics Summary ===")
|
||||
for metric_type, values in metrics.items():
|
||||
if values:
|
||||
logger.info(f"{metric_type.title()}: {len(values)} metrics")
|
||||
|
||||
|
||||
return True
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Status check failed: {e}")
|
||||
return False
|
||||
@@ -239,6 +312,8 @@ Examples:
|
||||
%(prog)s --fill-gaps 7 # Fill missing data for last 7 days
|
||||
%(prog)s --update-data 2 # Update existing data for last 2 days
|
||||
%(prog)s --status # Show system status
|
||||
%(prog)s --alert-check # Check water levels and send alerts
|
||||
%(prog)s --alert-test # Send test Matrix message
|
||||
"""
|
||||
)
|
||||
|
||||
@@ -273,7 +348,19 @@ Examples:
|
||||
action="store_true",
|
||||
help="Show current system status"
|
||||
)
|
||||
|
||||
|
||||
parser.add_argument(
|
||||
"--alert-check",
|
||||
action="store_true",
|
||||
help="Run water level alert check"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--alert-test",
|
||||
action="store_true",
|
||||
help="Send test alert message to Matrix"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--log-level",
|
||||
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
|
||||
@@ -314,6 +401,10 @@ Examples:
|
||||
success = run_data_update(args.update_data)
|
||||
elif args.status:
|
||||
success = show_status()
|
||||
elif args.alert_check:
|
||||
success = run_alert_check()
|
||||
elif args.alert_test:
|
||||
success = run_alert_test()
|
||||
else:
|
||||
success = run_continuous_monitoring()
|
||||
|
||||
|
Reference in New Issue
Block a user