Initial commit: Northern Thailand Ping River Monitor v3.1.0
Some checks failed
Security & Dependency Updates / Dependency Security Scan (push) Successful in 29s
Security & Dependency Updates / Docker Security Scan (push) Failing after 53s
Security & Dependency Updates / License Compliance (push) Successful in 13s
Security & Dependency Updates / Check for Dependency Updates (push) Successful in 19s
Security & Dependency Updates / Code Quality Metrics (push) Successful in 11s
Security & Dependency Updates / Security Summary (push) Successful in 7s

Features:
- Real-time water level monitoring for Ping River Basin (16 stations)
- Coverage from Chiang Dao to Nakhon Sawan in Northern Thailand
- FastAPI web interface with interactive dashboard and station management
- Multi-database support (SQLite, MySQL, PostgreSQL, InfluxDB, VictoriaMetrics)
- Comprehensive monitoring with health checks and metrics collection
- Docker deployment with Grafana integration
- Production-ready architecture with enterprise-grade observability

 CI/CD & Automation:
- Complete Gitea Actions workflows for CI/CD, security, and releases
- Multi-Python version testing (3.9-3.12)
- Multi-architecture Docker builds (amd64, arm64)
- Daily security scanning and dependency monitoring
- Automated documentation generation
- Performance testing and validation

 Production Ready:
- Type safety with Pydantic models and comprehensive type hints
- Data validation layer with range checking and error handling
- Rate limiting and request tracking for API protection
- Enhanced logging with rotation, colors, and performance metrics
- Station management API for dynamic CRUD operations
- Comprehensive documentation and deployment guides

 Technical Stack:
- Python 3.9+ with FastAPI and Pydantic
- Multi-database architecture with adapter pattern
- Docker containerization with multi-stage builds
- Grafana dashboards for visualization
- Gitea Actions for CI/CD automation
- Enterprise monitoring and alerting

 Ready for deployment to B4L infrastructure!
This commit is contained in:
2025-08-12 15:37:09 +07:00
commit af62cfef0b
60 changed files with 13267 additions and 0 deletions

38
src/__init__.py Normal file
View File

@@ -0,0 +1,38 @@
#!/usr/bin/env python3
"""
Northern Thailand Ping River Monitor Package
A comprehensive real-time water level monitoring system for the Ping River Basin
in Northern Thailand, covering Royal Irrigation Department (RID) stations.
"""
__version__ = "3.1.0"
__author__ = "Ping River Monitor Team"
__description__ = "Northern Thailand Ping River Monitoring System"
from .water_scraper_v3 import EnhancedWaterMonitorScraper
from .database_adapters import create_database_adapter, DatabaseAdapter
from .config import Config
from .models import WaterMeasurement, StationInfo, DatabaseConfig
from .exceptions import (
WaterMonitorException,
DatabaseConnectionError,
APIConnectionError,
DataValidationError,
ConfigurationError
)
__all__ = [
'EnhancedWaterMonitorScraper',
'create_database_adapter',
'DatabaseAdapter',
'Config',
'WaterMeasurement',
'StationInfo',
'DatabaseConfig',
'WaterMonitorException',
'DatabaseConnectionError',
'APIConnectionError',
'DataValidationError',
'ConfigurationError'
]

191
src/config.py Normal file
View File

@@ -0,0 +1,191 @@
import os
from typing import Dict, Any, Optional
try:
from .exceptions import ConfigurationError
from .models import DatabaseType, DatabaseConfig
except ImportError:
# Handle case when running as standalone script
class ConfigurationError(Exception):
pass
from enum import Enum
class DatabaseType(Enum):
SQLITE = "sqlite"
MYSQL = "mysql"
POSTGRESQL = "postgresql"
INFLUXDB = "influxdb"
VICTORIAMETRICS = "victoriametrics"
class Config:
"""Configuration class for the Water Level Monitor"""
# Database settings
DATABASE_PATH = os.getenv('WATER_DB_PATH', 'water_levels.db')
# Website settings
TARGET_URL = "https://hyd-app-db.rid.go.th/hydro1h.html"
API_URL = "https://hyd-app-db.rid.go.th/webservice/getGroupHourlyWaterLevelReportAllHL.ashx"
REQUEST_TIMEOUT = int(os.getenv('REQUEST_TIMEOUT', '30'))
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
# Database configuration
DB_TYPE = os.getenv('DB_TYPE', 'sqlite').lower()
# VictoriaMetrics settings
VM_HOST = os.getenv('VM_HOST', 'vm.newedge.house')
VM_PORT = int(os.getenv('VM_PORT', '443'))
# Support for HTTPS URLs (e.g., behind reverse proxy)
VM_URL = os.getenv('VM_URL') # Full URL override (e.g., https://vm.example.com)
# InfluxDB settings
INFLUX_HOST = os.getenv('INFLUX_HOST', 'localhost')
INFLUX_PORT = int(os.getenv('INFLUX_PORT', '8086'))
INFLUX_DATABASE = os.getenv('INFLUX_DATABASE', 'water_monitoring')
INFLUX_USERNAME = os.getenv('INFLUX_USERNAME')
INFLUX_PASSWORD = os.getenv('INFLUX_PASSWORD')
# PostgreSQL settings
POSTGRES_CONNECTION_STRING = os.getenv('POSTGRES_CONNECTION_STRING')
# MySQL settings
MYSQL_CONNECTION_STRING = os.getenv('MYSQL_CONNECTION_STRING')
# Scheduler settings
SCRAPING_INTERVAL_HOURS = int(os.getenv('SCRAPING_INTERVAL_HOURS', '1'))
# Logging settings
LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO')
LOG_FILE = os.getenv('LOG_FILE', 'water_monitor.log')
LOG_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
# Data retention
DATA_RETENTION_DAYS = int(os.getenv('DATA_RETENTION_DAYS', '365'))
# Retry settings
MAX_RETRIES = int(os.getenv('MAX_RETRIES', '3'))
RETRY_DELAY_SECONDS = int(os.getenv('RETRY_DELAY_SECONDS', '60'))
@classmethod
def validate_config(cls) -> bool:
"""Validate configuration settings"""
errors = []
# Validate database type
try:
DatabaseType(cls.DB_TYPE)
except ValueError:
errors.append(f"Invalid DB_TYPE: {cls.DB_TYPE}")
# Validate database-specific settings
if cls.DB_TYPE == 'victoriametrics':
if not cls.VM_HOST:
errors.append("VM_HOST is required for VictoriaMetrics")
if not isinstance(cls.VM_PORT, int) or cls.VM_PORT <= 0:
errors.append("VM_PORT must be a positive integer")
elif cls.DB_TYPE == 'influxdb':
if not cls.INFLUX_HOST:
errors.append("INFLUX_HOST is required for InfluxDB")
if not cls.INFLUX_DATABASE:
errors.append("INFLUX_DATABASE is required for InfluxDB")
elif cls.DB_TYPE in ['postgresql', 'mysql']:
connection_string = (cls.POSTGRES_CONNECTION_STRING if cls.DB_TYPE == 'postgresql'
else cls.MYSQL_CONNECTION_STRING)
if not connection_string:
errors.append(f"Connection string is required for {cls.DB_TYPE.upper()}")
# Validate numeric settings
if cls.SCRAPING_INTERVAL_HOURS <= 0:
errors.append("SCRAPING_INTERVAL_HOURS must be positive")
if cls.DATA_RETENTION_DAYS <= 0:
errors.append("DATA_RETENTION_DAYS must be positive")
if errors:
raise ConfigurationError(f"Configuration errors: {'; '.join(errors)}")
return True
@classmethod
def get_database_config(cls) -> Dict[str, Any]:
"""Returns database configuration based on DB_TYPE"""
if cls.DB_TYPE == 'victoriametrics':
return {
'type': 'victoriametrics',
'host': cls.VM_HOST,
'port': cls.VM_PORT
}
elif cls.DB_TYPE == 'influxdb':
return {
'type': 'influxdb',
'host': cls.INFLUX_HOST,
'port': cls.INFLUX_PORT,
'database': cls.INFLUX_DATABASE,
'username': cls.INFLUX_USERNAME,
'password': cls.INFLUX_PASSWORD
}
elif cls.DB_TYPE == 'postgresql':
return {
'type': 'postgresql',
'connection_string': cls.POSTGRES_CONNECTION_STRING or
'postgresql://postgres:password@localhost:5432/water_monitoring'
}
elif cls.DB_TYPE == 'mysql':
return {
'type': 'mysql',
'connection_string': cls.MYSQL_CONNECTION_STRING or
'mysql://root:password@localhost:3306/water_monitoring'
}
else: # sqlite
return {
'type': 'sqlite',
'connection_string': f'sqlite:///{cls.DATABASE_PATH}'
}
@classmethod
def get_all_settings(cls) -> Dict[str, Any]:
"""Returns all configuration settings"""
return {
'DB_TYPE': cls.DB_TYPE,
'DATABASE_PATH': cls.DATABASE_PATH,
'TARGET_URL': cls.TARGET_URL,
'API_URL': cls.API_URL,
'REQUEST_TIMEOUT': cls.REQUEST_TIMEOUT,
'SCRAPING_INTERVAL_HOURS': cls.SCRAPING_INTERVAL_HOURS,
'LOG_LEVEL': cls.LOG_LEVEL,
'LOG_FILE': cls.LOG_FILE,
'DATA_RETENTION_DAYS': cls.DATA_RETENTION_DAYS,
'MAX_RETRIES': cls.MAX_RETRIES,
'RETRY_DELAY_SECONDS': cls.RETRY_DELAY_SECONDS,
'VM_HOST': cls.VM_HOST,
'VM_PORT': cls.VM_PORT,
'INFLUX_HOST': cls.INFLUX_HOST,
'INFLUX_PORT': cls.INFLUX_PORT,
'INFLUX_DATABASE': cls.INFLUX_DATABASE
}
@classmethod
def print_settings(cls):
"""Prints all current settings"""
print("=== Water Level Monitor Configuration ===")
for key, value in cls.get_all_settings().items():
# Hide sensitive information
if 'PASSWORD' in key and value:
value = '*' * len(str(value))
print(f"{key}: {value}")
print("=" * 45)
print("\nDatabase Configuration:")
db_config = cls.get_database_config()
for key, value in db_config.items():
if 'password' in key and value:
value = '*' * len(str(value))
print(f" {key}: {value}")
print("=" * 45)
if __name__ == "__main__":
Config.print_settings()

663
src/database_adapters.py Normal file
View File

@@ -0,0 +1,663 @@
#!/usr/bin/env python3
"""
Database adapters for different storage backends
"""
import datetime
import logging
from typing import List, Dict, Optional, Any
from abc import ABC, abstractmethod
# Base adapter interface
class DatabaseAdapter(ABC):
@abstractmethod
def connect(self):
pass
@abstractmethod
def save_measurements(self, measurements: List[Dict]) -> bool:
pass
@abstractmethod
def get_latest_measurements(self, limit: int = 100) -> List[Dict]:
pass
@abstractmethod
def get_measurements_by_timerange(self, start_time: datetime.datetime,
end_time: datetime.datetime,
station_codes: Optional[List[str]] = None) -> List[Dict]:
pass
# InfluxDB Adapter
class InfluxDBAdapter(DatabaseAdapter):
def __init__(self, host: str = "localhost", port: int = 8086,
database: str = "water_monitoring", username: str = None, password: str = None):
self.host = host
self.port = port
self.database = database
self.username = username
self.password = password
self.client = None
def connect(self):
try:
from influxdb import InfluxDBClient
self.client = InfluxDBClient(
host=self.host,
port=self.port,
username=self.username,
password=self.password,
database=self.database
)
# Create database if it doesn't exist
databases = self.client.get_list_database()
if not any(db['name'] == self.database for db in databases):
self.client.create_database(self.database)
logging.info(f"Created InfluxDB database: {self.database}")
# Create retention policy (keep data for 2 years, downsample after 30 days)
retention_policies = self.client.get_list_retention_policies(self.database)
if not any(rp['name'] == 'water_data_policy' for rp in retention_policies):
self.client.create_retention_policy(
'water_data_policy',
'730d', # 2 years
'1', # replication factor
database=self.database,
default=True
)
logging.info("Connected to InfluxDB successfully")
return True
except ImportError:
logging.error("InfluxDB client not installed. Run: pip install influxdb")
return False
except Exception as e:
logging.error(f"Failed to connect to InfluxDB: {e}")
return False
def save_measurements(self, measurements: List[Dict]) -> bool:
if not self.client:
logging.error("InfluxDB client not connected")
return False
try:
points = []
for measurement in measurements:
point = {
"measurement": "water_data",
"tags": {
"station_code": measurement['station_code'],
"station_name_en": measurement['station_name_en'],
"station_name_th": measurement['station_name_th']
},
"time": measurement['timestamp'].isoformat(),
"fields": {
"water_level": float(measurement['water_level']),
"discharge": float(measurement['discharge']),
"discharge_percent": float(measurement['discharge_percent']) if measurement['discharge_percent'] else None
}
}
points.append(point)
success = self.client.write_points(points)
if success:
logging.info(f"Successfully wrote {len(points)} points to InfluxDB")
return success
except Exception as e:
logging.error(f"Error writing to InfluxDB: {e}")
return False
def get_latest_measurements(self, limit: int = 100) -> List[Dict]:
if not self.client:
return []
try:
query = f"""
SELECT last("water_level") as water_level,
last("discharge") as discharge,
last("discharge_percent") as discharge_percent
FROM "water_data"
GROUP BY "station_code", "station_name_en", "station_name_th"
LIMIT {limit}
"""
result = self.client.query(query)
measurements = []
for point in result.get_points():
measurements.append({
'timestamp': point['time'],
'station_code': point.get('station_code'),
'station_name_en': point.get('station_name_en'),
'station_name_th': point.get('station_name_th'),
'water_level': point.get('water_level'),
'discharge': point.get('discharge'),
'discharge_percent': point.get('discharge_percent')
})
return measurements
except Exception as e:
logging.error(f"Error querying InfluxDB: {e}")
return []
def get_measurements_by_timerange(self, start_time: datetime.datetime,
end_time: datetime.datetime,
station_codes: Optional[List[str]] = None) -> List[Dict]:
if not self.client:
return []
try:
where_clause = f"time >= '{start_time.isoformat()}' AND time <= '{end_time.isoformat()}'"
if station_codes:
station_filter = "'" + "','".join(station_codes) + "'"
where_clause += f" AND station_code IN ({station_filter})"
query = f"""
SELECT "water_level", "discharge", "discharge_percent", "station_code", "station_name_en", "station_name_th"
FROM "water_data"
WHERE {where_clause}
ORDER BY time DESC
"""
result = self.client.query(query)
measurements = []
for point in result.get_points():
measurements.append({
'timestamp': point['time'],
'station_code': point.get('station_code'),
'station_name_en': point.get('station_name_en'),
'station_name_th': point.get('station_name_th'),
'water_level': point.get('water_level'),
'discharge': point.get('discharge'),
'discharge_percent': point.get('discharge_percent')
})
return measurements
except Exception as e:
logging.error(f"Error querying InfluxDB: {e}")
return []
# MySQL/PostgreSQL Adapter
class SQLAdapter(DatabaseAdapter):
def __init__(self, connection_string: str, db_type: str = "mysql"):
self.connection_string = connection_string
self.db_type = db_type.lower()
self.engine = None
# Add SQLite-specific connection parameters for better concurrency
if self.db_type == "sqlite":
if "?" not in connection_string:
self.connection_string += "?timeout=30&check_same_thread=False"
else:
self.connection_string += "&timeout=30&check_same_thread=False"
def connect(self):
try:
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker
self.engine = create_engine(self.connection_string, pool_pre_ping=True)
# Create tables
self._create_tables()
logging.info(f"Connected to {self.db_type.upper()} successfully")
return True
except ImportError:
logging.error("SQLAlchemy not installed. Run: pip install sqlalchemy pymysql")
return False
except Exception as e:
logging.error(f"Failed to connect to {self.db_type.upper()}: {e}")
return False
def _create_tables(self):
from sqlalchemy import text
# Stations table - adjust for different databases
if self.db_type == "sqlite":
stations_sql = """
CREATE TABLE IF NOT EXISTS stations (
id INTEGER PRIMARY KEY,
station_code TEXT UNIQUE NOT NULL,
thai_name TEXT NOT NULL,
english_name TEXT NOT NULL,
latitude REAL,
longitude REAL,
geohash TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
"""
measurements_sql = """
CREATE TABLE IF NOT EXISTS water_measurements (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp DATETIME NOT NULL,
station_id INTEGER NOT NULL,
water_level REAL,
discharge REAL,
discharge_percent REAL,
status TEXT DEFAULT 'active',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (station_id) REFERENCES stations(id),
UNIQUE(timestamp, station_id)
)
"""
# Create indexes separately for SQLite
index_sql = [
"CREATE INDEX IF NOT EXISTS idx_timestamp ON water_measurements(timestamp)",
"CREATE INDEX IF NOT EXISTS idx_station_timestamp ON water_measurements(station_id, timestamp)"
]
elif self.db_type == "postgresql":
stations_sql = """
CREATE TABLE IF NOT EXISTS stations (
id SERIAL PRIMARY KEY,
station_code VARCHAR(10) UNIQUE NOT NULL,
thai_name VARCHAR(255) NOT NULL,
english_name VARCHAR(255) NOT NULL,
latitude DECIMAL(10,8),
longitude DECIMAL(11,8),
geohash VARCHAR(20),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
"""
measurements_sql = """
CREATE TABLE IF NOT EXISTS water_measurements (
id BIGSERIAL PRIMARY KEY,
timestamp TIMESTAMP NOT NULL,
station_id INTEGER NOT NULL,
water_level NUMERIC(10,3),
discharge NUMERIC(10,2),
discharge_percent NUMERIC(5,2),
status VARCHAR(20) DEFAULT 'active',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (station_id) REFERENCES stations(id),
UNIQUE(timestamp, station_id)
)
"""
index_sql = [
"CREATE INDEX IF NOT EXISTS idx_timestamp ON water_measurements(timestamp)",
"CREATE INDEX IF NOT EXISTS idx_station_timestamp ON water_measurements(station_id, timestamp DESC)"
]
else: # MySQL
stations_sql = """
CREATE TABLE IF NOT EXISTS stations (
id INT PRIMARY KEY,
station_code VARCHAR(10) UNIQUE NOT NULL,
thai_name VARCHAR(255) NOT NULL,
english_name VARCHAR(255) NOT NULL,
latitude DECIMAL(10,8),
longitude DECIMAL(11,8),
geohash VARCHAR(20),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
)
"""
measurements_sql = """
CREATE TABLE IF NOT EXISTS water_measurements (
id BIGINT AUTO_INCREMENT PRIMARY KEY,
timestamp DATETIME NOT NULL,
station_id INT NOT NULL,
water_level DECIMAL(10,3),
discharge DECIMAL(10,2),
discharge_percent DECIMAL(5,2),
status VARCHAR(20) DEFAULT 'active',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (station_id) REFERENCES stations(id),
UNIQUE KEY unique_measurement (timestamp, station_id),
INDEX idx_timestamp (timestamp),
INDEX idx_station_timestamp (station_id, timestamp)
)
"""
index_sql = []
with self.engine.begin() as conn:
conn.execute(text(stations_sql))
conn.execute(text(measurements_sql))
# Create indexes for SQLite and PostgreSQL
for index in index_sql:
conn.execute(text(index))
# Transaction is automatically committed when context manager exits
def save_measurements(self, measurements: List[Dict]) -> bool:
if not self.engine:
return False
try:
from sqlalchemy import text
with self.engine.begin() as conn:
# Insert/update stations
for measurement in measurements:
if self.db_type == "sqlite":
station_sql = """
INSERT OR REPLACE INTO stations (id, station_code, thai_name, english_name, latitude, longitude, geohash, updated_at)
VALUES (:station_id, :station_code, :thai_name, :english_name, :latitude, :longitude, :geohash, CURRENT_TIMESTAMP)
"""
elif self.db_type == "postgresql":
station_sql = """
INSERT INTO stations (id, station_code, thai_name, english_name, latitude, longitude, geohash, updated_at)
VALUES (:station_id, :station_code, :thai_name, :english_name, :latitude, :longitude, :geohash, NOW())
ON CONFLICT (id) DO UPDATE SET
thai_name = EXCLUDED.thai_name,
english_name = EXCLUDED.english_name,
latitude = EXCLUDED.latitude,
longitude = EXCLUDED.longitude,
geohash = EXCLUDED.geohash,
updated_at = NOW()
"""
else: # MySQL
station_sql = """
INSERT INTO stations (id, station_code, thai_name, english_name, latitude, longitude, geohash, updated_at)
VALUES (:station_id, :station_code, :thai_name, :english_name, :latitude, :longitude, :geohash, NOW())
ON DUPLICATE KEY UPDATE
thai_name = VALUES(thai_name),
english_name = VALUES(english_name),
latitude = VALUES(latitude),
longitude = VALUES(longitude),
geohash = VALUES(geohash),
updated_at = NOW()
"""
conn.execute(text(station_sql), {
'station_id': measurement['station_id'],
'station_code': measurement['station_code'],
'thai_name': measurement['station_name_th'],
'english_name': measurement['station_name_en'],
'latitude': measurement.get('latitude'),
'longitude': measurement.get('longitude'),
'geohash': measurement.get('geohash')
})
# Insert measurements
for measurement in measurements:
if self.db_type == "sqlite":
measurement_sql = """
INSERT OR REPLACE INTO water_measurements
(timestamp, station_id, water_level, discharge, discharge_percent, status)
VALUES (:timestamp, :station_id, :water_level, :discharge, :discharge_percent, :status)
"""
elif self.db_type == "postgresql":
measurement_sql = """
INSERT INTO water_measurements
(timestamp, station_id, water_level, discharge, discharge_percent, status)
VALUES (:timestamp, :station_id, :water_level, :discharge, :discharge_percent, :status)
ON CONFLICT (timestamp, station_id) DO UPDATE SET
water_level = EXCLUDED.water_level,
discharge = EXCLUDED.discharge,
discharge_percent = EXCLUDED.discharge_percent,
status = EXCLUDED.status
"""
else: # MySQL
measurement_sql = """
INSERT INTO water_measurements
(timestamp, station_id, water_level, discharge, discharge_percent, status)
VALUES (:timestamp, :station_id, :water_level, :discharge, :discharge_percent, :status)
ON DUPLICATE KEY UPDATE
water_level = VALUES(water_level),
discharge = VALUES(discharge),
discharge_percent = VALUES(discharge_percent),
status = VALUES(status)
"""
conn.execute(text(measurement_sql), {
'timestamp': measurement['timestamp'],
'station_id': measurement['station_id'],
'water_level': measurement['water_level'],
'discharge': measurement['discharge'],
'discharge_percent': measurement['discharge_percent'],
'status': measurement['status']
})
# Transaction is automatically committed when context manager exits
logging.info(f"Successfully saved {len(measurements)} measurements to {self.db_type.upper()}")
return True
except Exception as e:
logging.error(f"Error saving to {self.db_type.upper()}: {e}")
return False
def get_latest_measurements(self, limit: int = 100) -> List[Dict]:
if not self.engine:
return []
try:
from sqlalchemy import text
query = """
SELECT m.timestamp, s.station_code, s.english_name, s.thai_name,
m.water_level, m.discharge, m.discharge_percent, m.status
FROM water_measurements m
JOIN stations s ON m.station_id = s.id
INNER JOIN (
SELECT station_id, MAX(timestamp) as max_timestamp
FROM water_measurements
GROUP BY station_id
) latest ON m.station_id = latest.station_id AND m.timestamp = latest.max_timestamp
ORDER BY s.station_code
LIMIT :limit
"""
with self.engine.connect() as conn:
result = conn.execute(text(query), {'limit': limit})
measurements = []
for row in result:
measurements.append({
'timestamp': row[0],
'station_code': row[1],
'station_name_en': row[2],
'station_name_th': row[3],
'water_level': float(row[4]) if row[4] else None,
'discharge': float(row[5]) if row[5] else None,
'discharge_percent': float(row[6]) if row[6] else None,
'status': row[7]
})
return measurements
except Exception as e:
logging.error(f"Error querying {self.db_type.upper()}: {e}")
return []
def get_measurements_by_timerange(self, start_time: datetime.datetime,
end_time: datetime.datetime,
station_codes: Optional[List[str]] = None) -> List[Dict]:
if not self.engine:
return []
try:
from sqlalchemy import text
where_clause = "m.timestamp BETWEEN :start_time AND :end_time"
params = {'start_time': start_time, 'end_time': end_time}
if station_codes:
placeholders = ','.join([f':station_{i}' for i in range(len(station_codes))])
where_clause += f" AND s.station_code IN ({placeholders})"
for i, code in enumerate(station_codes):
params[f'station_{i}'] = code
query = f"""
SELECT m.timestamp, s.station_code, s.english_name, s.thai_name,
m.water_level, m.discharge, m.discharge_percent, m.status
FROM water_measurements m
JOIN stations s ON m.station_id = s.id
WHERE {where_clause}
ORDER BY m.timestamp DESC, s.station_code
"""
with self.engine.connect() as conn:
result = conn.execute(text(query), params)
measurements = []
for row in result:
measurements.append({
'timestamp': row[0],
'station_code': row[1],
'station_name_en': row[2],
'station_name_th': row[3],
'water_level': float(row[4]) if row[4] else None,
'discharge': float(row[5]) if row[5] else None,
'discharge_percent': float(row[6]) if row[6] else None,
'status': row[7]
})
return measurements
except Exception as e:
logging.error(f"Error querying {self.db_type.upper()}: {e}")
return []
# VictoriaMetrics Adapter (using Prometheus format)
class VictoriaMetricsAdapter(DatabaseAdapter):
def __init__(self, host: str = "localhost", port: int = 8428):
self.host = host
self.port = port
# Handle HTTPS URLs and reverse proxy configurations
if host.startswith(('http://', 'https://')):
self.base_url = host
if port != 80 and port != 443 and not host.endswith(f':{port}'):
# Only add port if it's not standard and not already in URL
if '://' in host and ':' not in host.split('://')[1]:
self.base_url = f"{host}:{port}"
else:
# Default to HTTP for localhost, HTTPS for remote hosts
protocol = "https" if host != "localhost" and not host.startswith("127.") else "http"
if (protocol == "https" and port == 443) or (protocol == "http" and port == 80):
self.base_url = f"{protocol}://{host}"
else:
self.base_url = f"{protocol}://{host}:{port}"
def connect(self):
try:
import requests
# Test connection with SSL verification and timeout
response = requests.get(
f"{self.base_url}/api/v1/status/config",
timeout=10,
verify=True # Enable SSL verification for HTTPS
)
if response.status_code == 200:
logging.info(f"Connected to VictoriaMetrics successfully at {self.base_url}")
return True
else:
logging.error(f"VictoriaMetrics connection failed: {response.status_code}")
return False
except requests.exceptions.SSLError as e:
logging.error(f"SSL error connecting to VictoriaMetrics: {e}")
return False
except requests.exceptions.ConnectionError as e:
logging.error(f"Connection error to VictoriaMetrics: {e}")
return False
except Exception as e:
logging.error(f"Failed to connect to VictoriaMetrics: {e}")
return False
def save_measurements(self, measurements: List[Dict]) -> bool:
try:
import requests
# Convert to Prometheus format
metrics_data = []
timestamp_ms = int(datetime.datetime.now().timestamp() * 1000)
for measurement in measurements:
# Water level metric
metrics_data.append(
f'water_level{{station_code="{measurement["station_code"]}",'
f'station_name_en="{measurement["station_name_en"]}",'
f'station_name_th="{measurement["station_name_th"]}"}} '
f'{measurement["water_level"]} {timestamp_ms}'
)
# Discharge metric
metrics_data.append(
f'water_discharge{{station_code="{measurement["station_code"]}",'
f'station_name_en="{measurement["station_name_en"]}",'
f'station_name_th="{measurement["station_name_th"]}"}} '
f'{measurement["discharge"]} {timestamp_ms}'
)
# Discharge percentage metric
if measurement["discharge_percent"]:
metrics_data.append(
f'water_discharge_percent{{station_code="{measurement["station_code"]}",'
f'station_name_en="{measurement["station_name_en"]}",'
f'station_name_th="{measurement["station_name_th"]}"}} '
f'{measurement["discharge_percent"]} {timestamp_ms}'
)
# Send to VictoriaMetrics
data = '\n'.join(metrics_data)
response = requests.post(
f"{self.base_url}/api/v1/import/prometheus",
data=data,
headers={'Content-Type': 'text/plain'},
timeout=30
)
if response.status_code == 204:
logging.info(f"Successfully sent {len(measurements)} measurements to VictoriaMetrics")
return True
else:
logging.error(f"VictoriaMetrics import failed: {response.status_code} - {response.text}")
return False
except Exception as e:
logging.error(f"Error sending to VictoriaMetrics: {e}")
return False
def get_latest_measurements(self, limit: int = 100) -> List[Dict]:
# VictoriaMetrics queries would be implemented here
# This is a simplified version
logging.warning("get_latest_measurements not fully implemented for VictoriaMetrics")
return []
def get_measurements_by_timerange(self, start_time: datetime.datetime,
end_time: datetime.datetime,
station_codes: Optional[List[str]] = None) -> List[Dict]:
# VictoriaMetrics range queries would be implemented here
logging.warning("get_measurements_by_timerange not fully implemented for VictoriaMetrics")
return []
# Factory function to create appropriate adapter
def create_database_adapter(db_type: str, **kwargs) -> DatabaseAdapter:
"""
Factory function to create database adapter
Args:
db_type: 'influxdb', 'mysql', 'postgresql', 'sqlite', or 'victoriametrics'
**kwargs: Database-specific connection parameters
"""
db_type = db_type.lower()
if db_type == 'influxdb':
return InfluxDBAdapter(**kwargs)
elif db_type == 'mysql':
return SQLAdapter(db_type='mysql', **kwargs)
elif db_type == 'postgresql':
return SQLAdapter(db_type='postgresql', **kwargs)
elif db_type == 'sqlite':
return SQLAdapter(db_type='sqlite', **kwargs)
elif db_type == 'victoriametrics':
return VictoriaMetricsAdapter(**kwargs)
else:
raise ValueError(f"Unsupported database type: {db_type}")

331
src/demo_databases.py Normal file
View File

@@ -0,0 +1,331 @@
#!/usr/bin/env python3
"""
Demo script showing different database backend options for water monitoring
"""
import os
import sys
import datetime
from water_scraper_v3 import EnhancedWaterMonitorScraper
def demo_sqlite():
"""Demo with SQLite (local development)"""
print("=" * 60)
print("🗄️ SQLite Demo (Local Development)")
print("=" * 60)
config = {
'type': 'sqlite',
'connection_string': 'sqlite:///demo_water_sqlite.db'
}
try:
scraper = EnhancedWaterMonitorScraper(config)
# Fetch and save data
print("Fetching data from API...")
data = scraper.fetch_water_data()
if data:
print(f"✓ Fetched {len(data)} data points")
success = scraper.save_to_database(data)
if success:
print("✓ Data saved to SQLite database")
# Show latest data
latest = scraper.get_latest_data(5)
print(f"\nLatest 5 measurements:")
for measurement in latest:
print(f"{measurement['station_code']} ({measurement['station_name_en']}): "
f"{measurement['water_level']:.2f}m, {measurement['discharge']:.1f} cms")
else:
print("✗ Failed to save data")
else:
print("✗ No data fetched")
except Exception as e:
print(f"Error: {e}")
def demo_influxdb():
"""Demo with InfluxDB (requires InfluxDB running)"""
print("\n" + "=" * 60)
print("📊 InfluxDB Demo (Time-Series Database)")
print("=" * 60)
config = {
'type': 'influxdb',
'host': 'localhost',
'port': 8086,
'database': 'water_monitoring_demo',
'username': None, # Set if authentication is enabled
'password': None
}
try:
scraper = EnhancedWaterMonitorScraper(config)
if scraper.db_adapter and scraper.db_adapter.client:
print("✓ Connected to InfluxDB")
# Fetch and save data
print("Fetching data from API...")
data = scraper.fetch_water_data()
if data:
print(f"✓ Fetched {len(data)} data points")
success = scraper.save_to_database(data)
if success:
print("✓ Data saved to InfluxDB")
print("💡 You can now query this data in Grafana or InfluxDB CLI")
print(" Example query: SELECT * FROM water_data ORDER BY time DESC LIMIT 10")
else:
print("✗ Failed to save data")
else:
print("✗ No data fetched")
else:
print("✗ Could not connect to InfluxDB")
print("💡 Make sure InfluxDB is running: docker run -p 8086:8086 influxdb:1.8")
except Exception as e:
print(f"Error: {e}")
print("💡 InfluxDB might not be running or accessible")
def demo_postgresql():
"""Demo with PostgreSQL (requires PostgreSQL running)"""
print("\n" + "=" * 60)
print("🐘 PostgreSQL Demo (Relational Database)")
print("=" * 60)
config = {
'type': 'postgresql',
'connection_string': 'postgresql://postgres:password@localhost:5432/water_monitoring'
}
try:
scraper = EnhancedWaterMonitorScraper(config)
if scraper.db_adapter and scraper.db_adapter.engine:
print("✓ Connected to PostgreSQL")
# Fetch and save data
print("Fetching data from API...")
data = scraper.fetch_water_data()
if data:
print(f"✓ Fetched {len(data)} data points")
success = scraper.save_to_database(data)
if success:
print("✓ Data saved to PostgreSQL")
print("💡 You can now query this data with SQL")
print(" Example: SELECT * FROM water_measurements ORDER BY timestamp DESC LIMIT 10;")
else:
print("✗ Failed to save data")
else:
print("✗ No data fetched")
else:
print("✗ Could not connect to PostgreSQL")
print("💡 Make sure PostgreSQL is running with correct credentials")
except Exception as e:
print(f"Error: {e}")
print("💡 PostgreSQL might not be running or credentials might be wrong")
def demo_mysql():
"""Demo with MySQL (requires MySQL running)"""
print("\n" + "=" * 60)
print("🐬 MySQL Demo (Relational Database)")
print("=" * 60)
config = {
'type': 'mysql',
'connection_string': 'mysql://root:password@localhost:3306/water_monitoring'
}
try:
scraper = EnhancedWaterMonitorScraper(config)
if scraper.db_adapter and scraper.db_adapter.engine:
print("✓ Connected to MySQL")
# Fetch and save data
print("Fetching data from API...")
data = scraper.fetch_water_data()
if data:
print(f"✓ Fetched {len(data)} data points")
success = scraper.save_to_database(data)
if success:
print("✓ Data saved to MySQL")
print("💡 You can now query this data with SQL")
print(" Example: SELECT * FROM water_measurements ORDER BY timestamp DESC LIMIT 10;")
else:
print("✗ Failed to save data")
else:
print("✗ No data fetched")
else:
print("✗ Could not connect to MySQL")
print("💡 Make sure MySQL is running with correct credentials")
except Exception as e:
print(f"Error: {e}")
print("💡 MySQL might not be running or credentials might be wrong")
def demo_victoriametrics():
"""Demo with VictoriaMetrics (supports both local and HTTPS configurations)"""
print("\n" + "=" * 60)
print("⚡ VictoriaMetrics Demo (High-Performance Metrics)")
print("=" * 60)
# Use configuration from environment or config.py
from config import Config
db_config = Config.get_database_config()
if db_config['type'] != 'victoriametrics':
# Fallback to default local configuration
config = {
'type': 'victoriametrics',
'host': 'vm.newedge.house',
'port': 443
}
else:
config = db_config
print(f"Connecting to: {config['host']}:{config['port']}")
try:
scraper = EnhancedWaterMonitorScraper(config)
if scraper.db_adapter:
# Test connection using the adapter's connect method
if scraper.db_adapter.connect():
print("✓ Connected to VictoriaMetrics")
# Fetch and save data
print("Fetching data from API...")
data = scraper.fetch_water_data()
if data:
print(f"✓ Fetched {len(data)} data points")
success = scraper.save_to_database(data)
if success:
print("✓ Data saved to VictoriaMetrics")
print("💡 You can now query this data via Prometheus API")
# Show appropriate query URL based on configuration
base_url = scraper.db_adapter.base_url
print(f" Example: {base_url}/api/v1/query?query=water_level")
print(f" Health check: {base_url}/health")
else:
print("✗ Failed to save data")
else:
print("✗ No data fetched")
else:
print("✗ Could not connect to VictoriaMetrics")
if config['host'] == 'localhost':
print("💡 Make sure VictoriaMetrics is running locally:")
print(" docker run -p 8428:8428 victoriametrics/victoria-metrics")
else:
print(f"💡 Check if VictoriaMetrics is accessible at {config['host']}:{config['port']}")
print("💡 Verify HTTPS configuration and network connectivity")
else:
print("✗ Failed to initialize VictoriaMetrics adapter")
except Exception as e:
print(f"Error: {e}")
print("💡 Check your VictoriaMetrics configuration and network connectivity")
def show_recommendations():
"""Show database recommendations"""
print("\n" + "=" * 60)
print("🏆 Database Recommendations")
print("=" * 60)
recommendations = [
{
'name': 'InfluxDB',
'best_for': 'Time-series data, Grafana dashboards',
'pros': ['Purpose-built for time-series', 'Great compression', 'Built-in retention'],
'cons': ['Learning curve', 'Less flexible for complex queries'],
'use_case': 'Recommended for most water monitoring deployments'
},
{
'name': 'PostgreSQL + TimescaleDB',
'best_for': 'Complex queries, existing PostgreSQL infrastructure',
'pros': ['Mature ecosystem', 'SQL compatibility', 'ACID compliance'],
'cons': ['More complex setup', 'Higher resource usage'],
'use_case': 'Best for organizations already using PostgreSQL'
},
{
'name': 'VictoriaMetrics',
'best_for': 'High-performance metrics, Prometheus compatibility',
'pros': ['Extremely fast', 'Low resource usage', 'Better compression'],
'cons': ['Newer ecosystem', 'Less tooling'],
'use_case': 'Best for high-volume, performance-critical deployments'
},
{
'name': 'MySQL',
'best_for': 'Existing MySQL infrastructure, familiar SQL',
'pros': ['Familiar', 'Mature', 'Wide support'],
'cons': ['Not optimized for time-series', 'Manual optimization needed'],
'use_case': 'Good for organizations with existing MySQL expertise'
}
]
for rec in recommendations:
print(f"\n📊 {rec['name']}")
print(f" Best for: {rec['best_for']}")
print(f" Pros: {', '.join(rec['pros'])}")
print(f" Cons: {', '.join(rec['cons'])}")
print(f" 💡 {rec['use_case']}")
def main():
"""Main demo function"""
print("🌊 Thailand Water Monitor - Database Backend Demo")
print("This demo shows how to use different database backends")
# Always run SQLite demo (no external dependencies)
demo_sqlite()
# Check for command line arguments to run specific demos
if len(sys.argv) > 1:
db_type = sys.argv[1].lower()
if db_type == 'influxdb':
demo_influxdb()
elif db_type == 'postgresql':
demo_postgresql()
elif db_type == 'mysql':
demo_mysql()
elif db_type == 'victoriametrics':
demo_victoriametrics()
elif db_type == 'all':
demo_influxdb()
demo_postgresql()
demo_mysql()
demo_victoriametrics()
else:
print(f"\nUnknown database type: {db_type}")
print("Available options: influxdb, postgresql, mysql, victoriametrics, all")
else:
print("\n💡 To test other databases, run:")
print(" python demo_databases.py influxdb")
print(" python demo_databases.py postgresql")
print(" python demo_databases.py mysql")
print(" python demo_databases.py victoriametrics")
print(" python demo_databases.py all")
# Show recommendations
show_recommendations()
print("\n" + "=" * 60)
print("✅ Demo completed!")
print("📖 See DATABASE_DEPLOYMENT_GUIDE.md for production setup instructions")
print("=" * 60)
if __name__ == "__main__":
main()

32
src/exceptions.py Normal file
View File

@@ -0,0 +1,32 @@
#!/usr/bin/env python3
"""
Custom exceptions for water monitoring system
"""
class WaterMonitorException(Exception):
"""Base exception for water monitoring system"""
pass
class DatabaseConnectionError(WaterMonitorException):
"""Raised when database connection fails"""
pass
class APIConnectionError(WaterMonitorException):
"""Raised when API connection fails"""
pass
class DataValidationError(WaterMonitorException):
"""Raised when data validation fails"""
pass
class ConfigurationError(WaterMonitorException):
"""Raised when configuration is invalid"""
pass
class DataParsingError(WaterMonitorException):
"""Raised when data parsing fails"""
pass
class RetryExhaustedError(WaterMonitorException):
"""Raised when all retry attempts are exhausted"""
pass

265
src/health_check.py Normal file
View File

@@ -0,0 +1,265 @@
#!/usr/bin/env python3
"""
Health check system for water monitoring application
"""
import time
import threading
from datetime import datetime, timedelta
from typing import Dict, Any, Optional, List, Callable
from dataclasses import dataclass
from enum import Enum
import logging
logger = logging.getLogger(__name__)
class HealthStatus(Enum):
HEALTHY = "healthy"
DEGRADED = "degraded"
UNHEALTHY = "unhealthy"
@dataclass
class HealthCheckResult:
"""Result of a health check"""
name: str
status: HealthStatus
message: str
timestamp: datetime
response_time_ms: Optional[float] = None
details: Optional[Dict[str, Any]] = None
class HealthCheck:
"""Base health check class"""
def __init__(self, name: str, timeout_seconds: int = 30):
self.name = name
self.timeout_seconds = timeout_seconds
def check(self) -> HealthCheckResult:
"""Perform the health check"""
start_time = time.time()
try:
result = self._perform_check()
response_time = (time.time() - start_time) * 1000
return HealthCheckResult(
name=self.name,
status=result.get('status', HealthStatus.HEALTHY),
message=result.get('message', 'OK'),
timestamp=datetime.now(),
response_time_ms=response_time,
details=result.get('details')
)
except Exception as e:
response_time = (time.time() - start_time) * 1000
logger.error(f"Health check {self.name} failed: {e}")
return HealthCheckResult(
name=self.name,
status=HealthStatus.UNHEALTHY,
message=f"Check failed: {str(e)}",
timestamp=datetime.now(),
response_time_ms=response_time
)
def _perform_check(self) -> Dict[str, Any]:
"""Override this method to implement the actual check"""
raise NotImplementedError
class DatabaseHealthCheck(HealthCheck):
"""Health check for database connectivity"""
def __init__(self, db_adapter, name: str = "database"):
super().__init__(name)
self.db_adapter = db_adapter
def _perform_check(self) -> Dict[str, Any]:
if not self.db_adapter:
return {
'status': HealthStatus.UNHEALTHY,
'message': 'Database adapter not initialized'
}
try:
# Try to connect
if hasattr(self.db_adapter, 'connect'):
connected = self.db_adapter.connect()
if not connected:
return {
'status': HealthStatus.UNHEALTHY,
'message': 'Database connection failed'
}
# Try to get latest data
latest_data = self.db_adapter.get_latest_measurements(limit=1)
if latest_data:
latest_timestamp = latest_data[0].get('timestamp')
if isinstance(latest_timestamp, str):
latest_timestamp = datetime.fromisoformat(latest_timestamp.replace('Z', '+00:00'))
# Check if data is recent (within last 2 hours)
if datetime.now() - latest_timestamp.replace(tzinfo=None) > timedelta(hours=2):
return {
'status': HealthStatus.DEGRADED,
'message': f'Latest data is old: {latest_timestamp}',
'details': {'latest_data_timestamp': str(latest_timestamp)}
}
return {
'status': HealthStatus.HEALTHY,
'message': 'Database connection OK',
'details': {
'latest_data_count': len(latest_data),
'latest_timestamp': str(latest_data[0].get('timestamp')) if latest_data else None
}
}
except Exception as e:
return {
'status': HealthStatus.UNHEALTHY,
'message': f'Database check failed: {str(e)}'
}
class APIHealthCheck(HealthCheck):
"""Health check for external API connectivity"""
def __init__(self, api_url: str, session, name: str = "api"):
super().__init__(name)
self.api_url = api_url
self.session = session
def _perform_check(self) -> Dict[str, Any]:
try:
# Simple GET request to check API availability
response = self.session.get(self.api_url, timeout=self.timeout_seconds)
if response.status_code == 200:
return {
'status': HealthStatus.HEALTHY,
'message': 'API connection OK',
'details': {
'status_code': response.status_code,
'response_size': len(response.content)
}
}
else:
return {
'status': HealthStatus.DEGRADED,
'message': f'API returned status {response.status_code}',
'details': {'status_code': response.status_code}
}
except Exception as e:
return {
'status': HealthStatus.UNHEALTHY,
'message': f'API check failed: {str(e)}'
}
class MemoryHealthCheck(HealthCheck):
"""Health check for memory usage"""
def __init__(self, max_memory_mb: int = 1000, name: str = "memory"):
super().__init__(name)
self.max_memory_mb = max_memory_mb
def _perform_check(self) -> Dict[str, Any]:
try:
import psutil
process = psutil.Process()
memory_info = process.memory_info()
memory_mb = memory_info.rss / 1024 / 1024
if memory_mb > self.max_memory_mb:
return {
'status': HealthStatus.DEGRADED,
'message': f'High memory usage: {memory_mb:.1f}MB',
'details': {'memory_mb': memory_mb, 'max_memory_mb': self.max_memory_mb}
}
return {
'status': HealthStatus.HEALTHY,
'message': f'Memory usage OK: {memory_mb:.1f}MB',
'details': {'memory_mb': memory_mb}
}
except ImportError:
return {
'status': HealthStatus.HEALTHY,
'message': 'Memory check skipped (psutil not available)'
}
except Exception as e:
return {
'status': HealthStatus.UNHEALTHY,
'message': f'Memory check failed: {str(e)}'
}
class HealthCheckManager:
"""Manages multiple health checks"""
def __init__(self):
self.checks: List[HealthCheck] = []
self.last_results: Dict[str, HealthCheckResult] = {}
self._lock = threading.Lock()
def add_check(self, health_check: HealthCheck):
"""Add a health check"""
with self._lock:
self.checks.append(health_check)
def run_all_checks(self) -> Dict[str, HealthCheckResult]:
"""Run all health checks"""
results = {}
for check in self.checks:
try:
result = check.check()
results[check.name] = result
with self._lock:
self.last_results[check.name] = result
except Exception as e:
logger.error(f"Error running health check {check.name}: {e}")
results[check.name] = HealthCheckResult(
name=check.name,
status=HealthStatus.UNHEALTHY,
message=f"Check execution failed: {str(e)}",
timestamp=datetime.now()
)
return results
def get_overall_status(self) -> HealthStatus:
"""Get overall system health status"""
if not self.last_results:
return HealthStatus.UNHEALTHY
statuses = [result.status for result in self.last_results.values()]
if any(status == HealthStatus.UNHEALTHY for status in statuses):
return HealthStatus.UNHEALTHY
elif any(status == HealthStatus.DEGRADED for status in statuses):
return HealthStatus.DEGRADED
else:
return HealthStatus.HEALTHY
def get_health_summary(self) -> Dict[str, Any]:
"""Get a summary of system health"""
overall_status = self.get_overall_status()
return {
'overall_status': overall_status.value,
'timestamp': datetime.now().isoformat(),
'checks': {
name: {
'status': result.status.value,
'message': result.message,
'response_time_ms': result.response_time_ms,
'timestamp': result.timestamp.isoformat()
}
for name, result in self.last_results.items()
}
}

135
src/logging_config.py Normal file
View File

@@ -0,0 +1,135 @@
#!/usr/bin/env python3
"""
Centralized logging configuration for water monitoring system
"""
import logging
import logging.handlers
import os
from datetime import datetime
from typing import Optional
class ColoredFormatter(logging.Formatter):
"""Colored console formatter"""
COLORS = {
'DEBUG': '\033[36m', # Cyan
'INFO': '\033[32m', # Green
'WARNING': '\033[33m', # Yellow
'ERROR': '\033[31m', # Red
'CRITICAL': '\033[35m', # Magenta
'RESET': '\033[0m' # Reset
}
def format(self, record):
if hasattr(record, 'levelname'):
color = self.COLORS.get(record.levelname, self.COLORS['RESET'])
record.levelname = f"{color}{record.levelname}{self.COLORS['RESET']}"
return super().format(record)
def setup_logging(
log_level: str = "INFO",
log_file: Optional[str] = None,
max_file_size: int = 10 * 1024 * 1024, # 10MB
backup_count: int = 5,
enable_console: bool = True,
enable_colors: bool = True
) -> logging.Logger:
"""
Setup comprehensive logging configuration
Args:
log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
log_file: Path to log file (optional)
max_file_size: Maximum size of log file before rotation
backup_count: Number of backup files to keep
enable_console: Whether to enable console logging
enable_colors: Whether to enable colored console output
Returns:
Configured logger instance
"""
# Create logs directory if it doesn't exist
if log_file:
log_dir = os.path.dirname(log_file)
if log_dir and not os.path.exists(log_dir):
os.makedirs(log_dir)
# Configure root logger
logger = logging.getLogger()
logger.setLevel(getattr(logging, log_level.upper()))
# Clear existing handlers
logger.handlers.clear()
# Create formatters
detailed_formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
simple_formatter = logging.Formatter(
'%(asctime)s - %(levelname)s - %(message)s',
datefmt='%H:%M:%S'
)
# Console handler
if enable_console:
console_handler = logging.StreamHandler()
if enable_colors and os.name != 'nt': # Don't use colors on Windows
console_formatter = ColoredFormatter(
'%(asctime)s - %(levelname)s - %(message)s',
datefmt='%H:%M:%S'
)
else:
console_formatter = simple_formatter
console_handler.setFormatter(console_formatter)
console_handler.setLevel(getattr(logging, log_level.upper()))
logger.addHandler(console_handler)
# File handler with rotation
if log_file:
file_handler = logging.handlers.RotatingFileHandler(
log_file,
maxBytes=max_file_size,
backupCount=backup_count,
encoding='utf-8'
)
file_handler.setFormatter(detailed_formatter)
file_handler.setLevel(logging.DEBUG) # Always log everything to file
logger.addHandler(file_handler)
# Add performance logger for metrics
perf_logger = logging.getLogger('performance')
if log_file:
perf_file = log_file.replace('.log', '_performance.log')
perf_handler = logging.handlers.RotatingFileHandler(
perf_file,
maxBytes=max_file_size,
backupCount=backup_count,
encoding='utf-8'
)
perf_formatter = logging.Formatter(
'%(asctime)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
perf_handler.setFormatter(perf_formatter)
perf_logger.addHandler(perf_handler)
perf_logger.setLevel(logging.INFO)
perf_logger.propagate = False
return logger
def log_performance_metric(operation: str, duration: float, additional_info: Optional[str] = None):
"""Log performance metrics"""
perf_logger = logging.getLogger('performance')
message = f"PERF: {operation} took {duration:.3f}s"
if additional_info:
message += f" - {additional_info}"
perf_logger.info(message)
def get_logger(name: str) -> logging.Logger:
"""Get a logger with the specified name"""
return logging.getLogger(name)

337
src/main.py Normal file
View File

@@ -0,0 +1,337 @@
#!/usr/bin/env python3
"""
Main entry point for the Thailand Water Monitor system
"""
import argparse
import asyncio
import sys
import signal
from datetime import datetime
from typing import Optional
from .config import Config
from .water_scraper_v3 import EnhancedWaterMonitorScraper
from .logging_config import setup_logging, get_logger
from .exceptions import ConfigurationError, DatabaseConnectionError
from .metrics import get_metrics_collector
logger = get_logger(__name__)
def setup_signal_handlers(scraper: Optional[EnhancedWaterMonitorScraper] = None):
"""Setup signal handlers for graceful shutdown"""
def signal_handler(signum, frame):
logger.info(f"Received signal {signum}, shutting down gracefully...")
if scraper:
logger.info("Stopping scraper...")
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
def run_test_cycle():
"""Run a single test cycle"""
logger.info("Running test cycle...")
try:
# Validate configuration
Config.validate_config()
# Initialize scraper
db_config = Config.get_database_config()
scraper = EnhancedWaterMonitorScraper(db_config)
# Run single scraping cycle
result = scraper.run_scraping_cycle()
if result:
logger.info("✅ Test cycle completed successfully")
# Show some statistics
latest_data = scraper.get_latest_data(5)
if latest_data:
logger.info(f"Latest data points: {len(latest_data)}")
for data in latest_data[:3]: # Show first 3
logger.info(f"{data['station_code']}: {data['water_level']:.2f}m, {data['discharge']:.1f} cms")
else:
logger.warning("⚠️ Test cycle completed but no new data was found")
return True
except Exception as e:
logger.error(f"❌ Test cycle failed: {e}")
return False
def run_continuous_monitoring():
"""Run continuous monitoring with scheduling"""
logger.info("Starting continuous monitoring...")
try:
# Validate configuration
Config.validate_config()
# Initialize scraper
db_config = Config.get_database_config()
scraper = EnhancedWaterMonitorScraper(db_config)
# Setup signal handlers
setup_signal_handlers(scraper)
logger.info(f"Monitoring started with {Config.SCRAPING_INTERVAL_HOURS}h interval")
logger.info("Press Ctrl+C to stop")
# Run initial cycle
logger.info("Running initial data collection...")
scraper.run_scraping_cycle()
# Start scheduled monitoring
import schedule
schedule.every(Config.SCRAPING_INTERVAL_HOURS).hours.do(scraper.run_scraping_cycle)
while True:
schedule.run_pending()
time.sleep(60) # Check every minute
except KeyboardInterrupt:
logger.info("Monitoring stopped by user")
except Exception as e:
logger.error(f"Monitoring failed: {e}")
return False
return True
def run_gap_filling(days_back: int):
"""Run gap filling for missing data"""
logger.info(f"Checking for data gaps in the last {days_back} days...")
try:
# Validate configuration
Config.validate_config()
# Initialize scraper
db_config = Config.get_database_config()
scraper = EnhancedWaterMonitorScraper(db_config)
# Fill gaps
filled_count = scraper.fill_data_gaps(days_back)
if filled_count > 0:
logger.info(f"✅ Filled {filled_count} missing data points")
else:
logger.info("✅ No data gaps found")
return True
except Exception as e:
logger.error(f"❌ Gap filling failed: {e}")
return False
def run_data_update(days_back: int):
"""Update existing data with latest values"""
logger.info(f"Updating existing data for the last {days_back} days...")
try:
# Validate configuration
Config.validate_config()
# Initialize scraper
db_config = Config.get_database_config()
scraper = EnhancedWaterMonitorScraper(db_config)
# Update data
updated_count = scraper.update_existing_data(days_back)
if updated_count > 0:
logger.info(f"✅ Updated {updated_count} data points")
else:
logger.info("✅ No data updates needed")
return True
except Exception as e:
logger.error(f"❌ Data update failed: {e}")
return False
def run_web_api():
"""Run the FastAPI web interface"""
logger.info("Starting web API server...")
try:
import uvicorn
from .web_api import app
# Validate configuration
Config.validate_config()
# Run the server
uvicorn.run(
app,
host="0.0.0.0",
port=8000,
log_config=None # Use our custom logging
)
except ImportError:
logger.error("FastAPI not installed. Run: pip install fastapi uvicorn")
return False
except Exception as e:
logger.error(f"Web API failed: {e}")
return False
def show_status():
"""Show current system status"""
logger.info("=== Northern Thailand Ping River Monitor Status ===")
try:
# Show configuration
Config.print_settings()
# Test database connection
logger.info("\n=== Database Connection Test ===")
db_config = Config.get_database_config()
scraper = EnhancedWaterMonitorScraper(db_config)
if scraper.db_adapter:
logger.info("✅ Database connection successful")
# Show latest data
latest_data = scraper.get_latest_data(3)
if latest_data:
logger.info(f"\n=== Latest Data ({len(latest_data)} points) ===")
for data in latest_data:
timestamp = data['timestamp']
if isinstance(timestamp, str):
timestamp = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
logger.info(f"{data['station_code']} ({timestamp}): {data['water_level']:.2f}m")
else:
logger.info("No data found in database")
else:
logger.error("❌ Database connection failed")
# Show metrics if available
metrics_collector = get_metrics_collector()
metrics = metrics_collector.get_all_metrics()
if any(metrics.values()):
logger.info("\n=== Metrics Summary ===")
for metric_type, values in metrics.items():
if values:
logger.info(f"{metric_type.title()}: {len(values)} metrics")
return True
except Exception as e:
logger.error(f"Status check failed: {e}")
return False
def main():
"""Main entry point"""
parser = argparse.ArgumentParser(
description="Northern Thailand Ping River Monitor",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s --test # Run single test cycle
%(prog)s # Run continuous monitoring
%(prog)s --web-api # Start web API server
%(prog)s --fill-gaps 7 # Fill missing data for last 7 days
%(prog)s --update-data 2 # Update existing data for last 2 days
%(prog)s --status # Show system status
"""
)
parser.add_argument(
"--test",
action="store_true",
help="Run a single test cycle"
)
parser.add_argument(
"--web-api",
action="store_true",
help="Start the web API server"
)
parser.add_argument(
"--fill-gaps",
type=int,
metavar="DAYS",
help="Fill missing data gaps for the specified number of days back"
)
parser.add_argument(
"--update-data",
type=int,
metavar="DAYS",
help="Update existing data for the specified number of days back"
)
parser.add_argument(
"--status",
action="store_true",
help="Show current system status"
)
parser.add_argument(
"--log-level",
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
default=Config.LOG_LEVEL,
help="Set logging level"
)
parser.add_argument(
"--log-file",
default=Config.LOG_FILE,
help="Log file path"
)
args = parser.parse_args()
# Setup logging
setup_logging(
log_level=args.log_level,
log_file=args.log_file,
enable_console=True,
enable_colors=True
)
logger.info("🏔️ Northern Thailand Ping River Monitor starting...")
logger.info(f"Version: 3.1.0")
logger.info(f"Log level: {args.log_level}")
try:
success = False
if args.test:
success = run_test_cycle()
elif args.web_api:
success = run_web_api()
elif args.fill_gaps is not None:
success = run_gap_filling(args.fill_gaps)
elif args.update_data is not None:
success = run_data_update(args.update_data)
elif args.status:
success = show_status()
else:
success = run_continuous_monitoring()
if success:
logger.info("✅ Operation completed successfully")
sys.exit(0)
else:
logger.error("❌ Operation failed")
sys.exit(1)
except ConfigurationError as e:
logger.error(f"Configuration error: {e}")
sys.exit(1)
except KeyboardInterrupt:
logger.info("Operation cancelled by user")
sys.exit(0)
except Exception as e:
logger.error(f"Unexpected error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()

171
src/metrics.py Normal file
View File

@@ -0,0 +1,171 @@
#!/usr/bin/env python3
"""
Metrics collection and monitoring for water monitoring system
"""
import time
import threading
from datetime import datetime, timedelta
from typing import Dict, Any, Optional, List
from dataclasses import dataclass, field
from collections import defaultdict, deque
import logging
logger = logging.getLogger(__name__)
@dataclass
class MetricPoint:
"""Single metric data point"""
timestamp: datetime
value: float
labels: Dict[str, str] = field(default_factory=dict)
class MetricsCollector:
"""Collects and manages application metrics"""
def __init__(self, retention_hours: int = 24):
self.retention_hours = retention_hours
self.metrics: Dict[str, deque] = defaultdict(lambda: deque(maxlen=1000))
self.counters: Dict[str, float] = defaultdict(float)
self.gauges: Dict[str, float] = defaultdict(float)
self.histograms: Dict[str, List[float]] = defaultdict(list)
self._lock = threading.Lock()
# Start cleanup thread
self._cleanup_thread = threading.Thread(target=self._cleanup_old_metrics, daemon=True)
self._cleanup_thread.start()
def increment_counter(self, name: str, value: float = 1.0, labels: Optional[Dict[str, str]] = None):
"""Increment a counter metric"""
with self._lock:
key = self._make_key(name, labels)
self.counters[key] += value
self.metrics[key].append(MetricPoint(datetime.now(), self.counters[key], labels or {}))
def set_gauge(self, name: str, value: float, labels: Optional[Dict[str, str]] = None):
"""Set a gauge metric"""
with self._lock:
key = self._make_key(name, labels)
self.gauges[key] = value
self.metrics[key].append(MetricPoint(datetime.now(), value, labels or {}))
def record_histogram(self, name: str, value: float, labels: Optional[Dict[str, str]] = None):
"""Record a histogram value"""
with self._lock:
key = self._make_key(name, labels)
self.histograms[key].append(value)
# Keep only recent values
if len(self.histograms[key]) > 1000:
self.histograms[key] = self.histograms[key][-1000:]
self.metrics[key].append(MetricPoint(datetime.now(), value, labels or {}))
def get_counter(self, name: str, labels: Optional[Dict[str, str]] = None) -> float:
"""Get current counter value"""
key = self._make_key(name, labels)
return self.counters.get(key, 0.0)
def get_gauge(self, name: str, labels: Optional[Dict[str, str]] = None) -> float:
"""Get current gauge value"""
key = self._make_key(name, labels)
return self.gauges.get(key, 0.0)
def get_histogram_stats(self, name: str, labels: Optional[Dict[str, str]] = None) -> Dict[str, float]:
"""Get histogram statistics"""
key = self._make_key(name, labels)
values = self.histograms.get(key, [])
if not values:
return {'count': 0, 'sum': 0, 'avg': 0, 'min': 0, 'max': 0}
return {
'count': len(values),
'sum': sum(values),
'avg': sum(values) / len(values),
'min': min(values),
'max': max(values)
}
def get_all_metrics(self) -> Dict[str, Any]:
"""Get all current metrics"""
with self._lock:
return {
'counters': dict(self.counters),
'gauges': dict(self.gauges),
'histograms': {k: self.get_histogram_stats(k) for k in self.histograms}
}
def _make_key(self, name: str, labels: Optional[Dict[str, str]]) -> str:
"""Create a unique key for metric with labels"""
if not labels:
return name
label_str = ','.join(f"{k}={v}" for k, v in sorted(labels.items()))
return f"{name}{{{label_str}}}"
def _cleanup_old_metrics(self):
"""Clean up old metric data points"""
while True:
try:
cutoff_time = datetime.now() - timedelta(hours=self.retention_hours)
with self._lock:
for metric_name, points in self.metrics.items():
# Remove old points
while points and points[0].timestamp < cutoff_time:
points.popleft()
time.sleep(3600) # Run cleanup every hour
except Exception as e:
logger.error(f"Error in metrics cleanup: {e}")
time.sleep(60) # Wait a minute before retrying
# Global metrics collector instance
_metrics_collector = None
def get_metrics_collector() -> MetricsCollector:
"""Get the global metrics collector instance"""
global _metrics_collector
if _metrics_collector is None:
_metrics_collector = MetricsCollector()
return _metrics_collector
# Convenience functions
def increment_counter(name: str, value: float = 1.0, labels: Optional[Dict[str, str]] = None):
"""Increment a counter metric"""
get_metrics_collector().increment_counter(name, value, labels)
def set_gauge(name: str, value: float, labels: Optional[Dict[str, str]] = None):
"""Set a gauge metric"""
get_metrics_collector().set_gauge(name, value, labels)
def record_histogram(name: str, value: float, labels: Optional[Dict[str, str]] = None):
"""Record a histogram value"""
get_metrics_collector().record_histogram(name, value, labels)
class Timer:
"""Context manager for timing operations"""
def __init__(self, metric_name: str, labels: Optional[Dict[str, str]] = None):
self.metric_name = metric_name
self.labels = labels
self.start_time = None
def __enter__(self):
self.start_time = time.time()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
if self.start_time:
duration = time.time() - self.start_time
record_histogram(self.metric_name, duration, self.labels)
def timer(metric_name: str, labels: Optional[Dict[str, str]] = None):
"""Decorator for timing function execution"""
def decorator(func):
def wrapper(*args, **kwargs):
with Timer(metric_name, labels):
return func(*args, **kwargs)
return wrapper
return decorator

107
src/models.py Normal file
View File

@@ -0,0 +1,107 @@
#!/usr/bin/env python3
"""
Data models for water monitoring system
"""
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional, List, Dict, Any
from enum import Enum
class DatabaseType(Enum):
SQLITE = "sqlite"
MYSQL = "mysql"
POSTGRESQL = "postgresql"
INFLUXDB = "influxdb"
VICTORIAMETRICS = "victoriametrics"
class StationStatus(Enum):
ACTIVE = "active"
INACTIVE = "inactive"
MAINTENANCE = "maintenance"
ERROR = "error"
@dataclass
class StationInfo:
"""Station information model"""
station_id: int
station_code: str
thai_name: str
english_name: str
latitude: Optional[float] = None
longitude: Optional[float] = None
geohash: Optional[str] = None
status: StationStatus = StationStatus.ACTIVE
@dataclass
class WaterMeasurement:
"""Water measurement data model"""
timestamp: datetime
station_info: StationInfo
water_level: float
discharge: float
water_level_unit: str = "m"
discharge_unit: str = "cms"
discharge_percent: Optional[float] = None
status: StationStatus = StationStatus.ACTIVE
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for database storage"""
return {
'timestamp': self.timestamp,
'station_id': self.station_info.station_id,
'station_code': self.station_info.station_code,
'station_name_en': self.station_info.english_name,
'station_name_th': self.station_info.thai_name,
'latitude': self.station_info.latitude,
'longitude': self.station_info.longitude,
'geohash': self.station_info.geohash,
'water_level': self.water_level,
'water_level_unit': self.water_level_unit,
'discharge': self.discharge,
'discharge_unit': self.discharge_unit,
'discharge_percent': self.discharge_percent,
'status': self.status.value
}
@dataclass
class DatabaseConfig:
"""Database configuration model"""
db_type: DatabaseType
connection_string: Optional[str] = None
host: Optional[str] = None
port: Optional[int] = None
database: Optional[str] = None
username: Optional[str] = None
password: Optional[str] = None
additional_params: Dict[str, Any] = field(default_factory=dict)
@dataclass
class ScrapingResult:
"""Result of a scraping operation"""
success: bool
measurements_count: int
error_message: Optional[str] = None
timestamp: datetime = field(default_factory=datetime.now)
processing_time_seconds: Optional[float] = None
@dataclass
class StationCreateRequest:
"""Request model for creating a new station"""
station_code: str
thai_name: str
english_name: str
latitude: Optional[float] = None
longitude: Optional[float] = None
geohash: Optional[str] = None
status: StationStatus = StationStatus.ACTIVE
@dataclass
class StationUpdateRequest:
"""Request model for updating an existing station"""
thai_name: Optional[str] = None
english_name: Optional[str] = None
latitude: Optional[float] = None
longitude: Optional[float] = None
geohash: Optional[str] = None
status: Optional[StationStatus] = None

167
src/rate_limiter.py Normal file
View File

@@ -0,0 +1,167 @@
#!/usr/bin/env python3
"""
Rate limiting utilities for API requests
"""
import time
import threading
from typing import Dict, Optional
from collections import deque
from datetime import datetime, timedelta
import logging
logger = logging.getLogger(__name__)
class RateLimiter:
"""Token bucket rate limiter"""
def __init__(self, max_requests: int, time_window_seconds: int):
"""
Initialize rate limiter
Args:
max_requests: Maximum number of requests allowed
time_window_seconds: Time window in seconds
"""
self.max_requests = max_requests
self.time_window = time_window_seconds
self.requests = deque()
self._lock = threading.Lock()
def is_allowed(self) -> bool:
"""Check if a request is allowed"""
with self._lock:
now = time.time()
# Remove old requests outside the time window
while self.requests and self.requests[0] <= now - self.time_window:
self.requests.popleft()
# Check if we can make a new request
if len(self.requests) < self.max_requests:
self.requests.append(now)
return True
return False
def wait_time(self) -> float:
"""Get the time to wait before next request is allowed"""
with self._lock:
if len(self.requests) < self.max_requests:
return 0.0
# Time until the oldest request expires
oldest_request = self.requests[0]
return max(0.0, (oldest_request + self.time_window) - time.time())
def wait_if_needed(self):
"""Block until a request is allowed"""
wait_time = self.wait_time()
if wait_time > 0:
logger.info(f"Rate limit reached, waiting {wait_time:.2f} seconds")
time.sleep(wait_time)
class AdaptiveRateLimiter:
"""Adaptive rate limiter that adjusts based on response times"""
def __init__(self, initial_rate: float = 1.0, min_rate: float = 0.1, max_rate: float = 10.0):
"""
Initialize adaptive rate limiter
Args:
initial_rate: Initial requests per second
min_rate: Minimum requests per second
max_rate: Maximum requests per second
"""
self.current_rate = initial_rate
self.min_rate = min_rate
self.max_rate = max_rate
self.last_request_time = 0.0
self.response_times = deque(maxlen=10)
self._lock = threading.Lock()
def wait_and_record(self, response_time: Optional[float] = None):
"""Wait for rate limit and record response time"""
with self._lock:
now = time.time()
# Calculate wait time based on current rate
time_since_last = now - self.last_request_time
min_interval = 1.0 / self.current_rate
if time_since_last < min_interval:
wait_time = min_interval - time_since_last
time.sleep(wait_time)
now = time.time()
self.last_request_time = now
# Record response time and adjust rate
if response_time is not None:
self.response_times.append(response_time)
self._adjust_rate()
def _adjust_rate(self):
"""Adjust rate based on recent response times"""
if len(self.response_times) < 3:
return
avg_response_time = sum(self.response_times) / len(self.response_times)
# Decrease rate if responses are slow
if avg_response_time > 5.0: # 5 seconds
self.current_rate = max(self.min_rate, self.current_rate * 0.8)
logger.info(f"Decreased rate to {self.current_rate:.2f} req/s due to slow responses")
# Increase rate if responses are fast
elif avg_response_time < 1.0: # 1 second
self.current_rate = min(self.max_rate, self.current_rate * 1.1)
logger.debug(f"Increased rate to {self.current_rate:.2f} req/s")
class RequestTracker:
"""Track API request statistics"""
def __init__(self):
self.total_requests = 0
self.successful_requests = 0
self.failed_requests = 0
self.total_response_time = 0.0
self.last_request_time = None
self.error_count_by_type = {}
self._lock = threading.Lock()
def record_request(self, success: bool, response_time: float, error_type: Optional[str] = None):
"""Record a request"""
with self._lock:
self.total_requests += 1
self.total_response_time += response_time
self.last_request_time = datetime.now()
if success:
self.successful_requests += 1
else:
self.failed_requests += 1
if error_type:
self.error_count_by_type[error_type] = self.error_count_by_type.get(error_type, 0) + 1
def get_stats(self) -> Dict[str, any]:
"""Get request statistics"""
with self._lock:
if self.total_requests == 0:
return {
'total_requests': 0,
'success_rate': 0.0,
'average_response_time': 0.0,
'last_request_time': None,
'error_breakdown': {}
}
return {
'total_requests': self.total_requests,
'successful_requests': self.successful_requests,
'failed_requests': self.failed_requests,
'success_rate': self.successful_requests / self.total_requests,
'average_response_time': self.total_response_time / self.total_requests,
'last_request_time': self.last_request_time.isoformat() if self.last_request_time else None,
'error_breakdown': dict(self.error_count_by_type)
}

116
src/validators.py Normal file
View File

@@ -0,0 +1,116 @@
#!/usr/bin/env python3
"""
Data validation utilities for water monitoring system
"""
from typing import List, Dict, Any, Optional
from datetime import datetime
import logging
from .exceptions import DataValidationError
from .models import WaterMeasurement, StationInfo
logger = logging.getLogger(__name__)
class DataValidator:
"""Validates water measurement data"""
# Reasonable ranges for water measurements
WATER_LEVEL_MIN = -10.0 # meters
WATER_LEVEL_MAX = 50.0 # meters
DISCHARGE_MIN = 0.0 # cms
DISCHARGE_MAX = 10000.0 # cms
DISCHARGE_PERCENT_MIN = 0.0
DISCHARGE_PERCENT_MAX = 200.0 # Allow some overflow
@classmethod
def validate_measurement(cls, measurement: Dict[str, Any]) -> bool:
"""Validate a single measurement"""
try:
# Check required fields
required_fields = ['timestamp', 'station_id', 'water_level', 'discharge']
for field in required_fields:
if field not in measurement:
logger.warning(f"Missing required field: {field}")
return False
# Validate timestamp
if not isinstance(measurement['timestamp'], datetime):
logger.warning(f"Invalid timestamp type: {type(measurement['timestamp'])}")
return False
# Validate water level
water_level = float(measurement['water_level'])
if not (cls.WATER_LEVEL_MIN <= water_level <= cls.WATER_LEVEL_MAX):
logger.warning(f"Water level out of range: {water_level}")
return False
# Validate discharge
discharge = float(measurement['discharge'])
if not (cls.DISCHARGE_MIN <= discharge <= cls.DISCHARGE_MAX):
logger.warning(f"Discharge out of range: {discharge}")
return False
# Validate discharge percent if present
if measurement.get('discharge_percent') is not None:
discharge_percent = float(measurement['discharge_percent'])
if not (cls.DISCHARGE_PERCENT_MIN <= discharge_percent <= cls.DISCHARGE_PERCENT_MAX):
logger.warning(f"Discharge percent out of range: {discharge_percent}")
return False
# Validate station ID
station_id = measurement['station_id']
if not isinstance(station_id, int) or station_id < 1 or station_id > 16:
logger.warning(f"Invalid station ID: {station_id}")
return False
return True
except (ValueError, TypeError) as e:
logger.warning(f"Data validation error: {e}")
return False
@classmethod
def validate_measurements(cls, measurements: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Validate and filter a list of measurements"""
valid_measurements = []
invalid_count = 0
for measurement in measurements:
if cls.validate_measurement(measurement):
valid_measurements.append(measurement)
else:
invalid_count += 1
if invalid_count > 0:
logger.warning(f"Filtered out {invalid_count} invalid measurements")
return valid_measurements
@classmethod
def validate_station_info(cls, station_info: Dict[str, Any]) -> bool:
"""Validate station information"""
try:
required_fields = ['station_id', 'station_code', 'thai_name', 'english_name']
for field in required_fields:
if field not in station_info or not station_info[field]:
logger.warning(f"Missing or empty station field: {field}")
return False
# Validate coordinates if present
if station_info.get('latitude') is not None:
lat = float(station_info['latitude'])
if not (-90 <= lat <= 90):
logger.warning(f"Invalid latitude: {lat}")
return False
if station_info.get('longitude') is not None:
lon = float(station_info['longitude'])
if not (-180 <= lon <= 180):
logger.warning(f"Invalid longitude: {lon}")
return False
return True
except (ValueError, TypeError) as e:
logger.warning(f"Station validation error: {e}")
return False

539
src/water_scraper_v3.py Normal file
View File

@@ -0,0 +1,539 @@
#!/usr/bin/env python3
"""
Enhanced Water Monitor Scraper with multiple database backend support
"""
import requests
import datetime
import time
import schedule
import json
import os
from typing import List, Dict, Optional
try:
from .database_adapters import create_database_adapter, DatabaseAdapter
from .models import WaterMeasurement, StationInfo, ScrapingResult, StationStatus
from .validators import DataValidator
from .exceptions import APIConnectionError, DataValidationError, DatabaseConnectionError
from .metrics import increment_counter, set_gauge, record_histogram, Timer
from .rate_limiter import RateLimiter, RequestTracker
from .logging_config import get_logger
except ImportError:
# Handle case when running as standalone script
from database_adapters import create_database_adapter, DatabaseAdapter
import logging
def get_logger(name):
return logging.getLogger(name)
def increment_counter(*args, **kwargs):
pass
def set_gauge(*args, **kwargs):
pass
def record_histogram(*args, **kwargs):
pass
class Timer:
def __init__(self, *args, **kwargs):
pass
def __enter__(self):
return self
def __exit__(self, *args):
pass
class RateLimiter:
def __init__(self, *args, **kwargs):
pass
def wait_if_needed(self):
pass
class RequestTracker:
def __init__(self):
pass
def record_request(self, *args, **kwargs):
pass
class DataValidator:
@staticmethod
def validate_measurements(measurements):
return measurements
# Get logger instance
logger = get_logger(__name__)
class EnhancedWaterMonitorScraper:
def __init__(self, db_config: Dict):
"""
Initialize scraper with database configuration
Args:
db_config: Database configuration dictionary
"""
self.api_url = "https://hyd-app-db.rid.go.th/webservice/getGroupHourlyWaterLevelReportAllHL.ashx"
self.db_config = db_config.copy() # Make a copy to avoid modifying original
self.db_adapter = None
# Scheduler state tracking
self.last_successful_update = None
self.retry_mode = False
self.next_hourly_check = None
# Rate limiting and request tracking
self.rate_limiter = RateLimiter(max_requests=10, time_window_seconds=60)
self.request_tracker = RequestTracker()
# HTTP session for API requests
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'X-Requested-With': 'XMLHttpRequest'
})
# Station mapping with correct names and geolocation data
self.station_mapping = {
'1': {
'code': 'P.20',
'thai_name': 'บ้านเชียงดาว',
'english_name': 'Ban Chiang Dao',
'latitude': 19.36731448032191,
'longitude': 98.9688487015384,
'geohash': None
},
'2': {
'code': 'P.75',
'thai_name': 'บ้านช่อแล',
'english_name': 'Ban Chai Lat',
'latitude': 19.145972935976225,
'longitude': 99.00735727149247,
'geohash': None
},
'3': {
'code': 'P.92',
'thai_name': 'บ้านเมืองกึ๊ด',
'english_name': 'Ban Muang Aut',
'latitude': 19.220518985435646,
'longitude': 98.84733127007874,
'geohash': None
},
'4': {
'code': 'P.4A',
'thai_name': 'บ้านแม่แตง',
'english_name': 'Ban Mae Taeng',
'latitude': 19.1222679952378,
'longitude': 98.94437462084075,
'geohash': None
},
'5': {
'code': 'P.67',
'thai_name': 'บ้านแม่แต',
'english_name': 'Ban Tae',
'latitude': 19.009762080002453,
'longitude': 98.95978297135508,
'geohash': None
},
'6': {
'code': 'P.21',
'thai_name': 'บ้านริมใต้',
'english_name': 'Ban Rim Tai',
'latitude': 18.917459157963293,
'longitude': 98.97018092996231,
'geohash': None
},
'7': {
'code': 'P.103',
'thai_name': 'สะพานวงแหวนรอบ 3',
'english_name': 'Ring Bridge 3',
'latitude': 18.86664807441675,
'longitude': 98.9781107622432,
'geohash': None
},
'8': {
'code': 'P.1',
'thai_name': 'สะพานนวรัฐ',
'english_name': 'Nawarat Bridge',
'latitude': 18.7875,
'longitude': 99.0045,
'geohash': 'w5q6uuhvfcfp25'
},
'9': {
'code': 'P.82',
'thai_name': 'บ้านสบวิน',
'english_name': 'Ban Sob win',
'latitude': 18.6519444,
'longitude': 98.69,
'geohash': None
},
'10': {
'code': 'P.84',
'thai_name': 'บ้านพันตน',
'english_name': 'Ban Panton',
'latitude': 18.591315274591334,
'longitude': 98.79657058508496,
'geohash': None
},
'11': {
'code': 'P.81',
'thai_name': 'บ้านโป่ง',
'english_name': 'Ban Pong',
'latitude': 13.805661820610888,
'longitude': 99.87174946122846,
'geohash': None
},
'12': {
'code': 'P.5',
'thai_name': 'สะพานท่านาง',
'english_name': 'Tha Nang Bridge',
'latitude': 18.580269437546555,
'longitude': 99.01021397084362,
'geohash': None
},
'13': {
'code': 'P.77',
'thai_name': 'บ้านสบแม่สะป๊วด',
'english_name': 'Baan Sop Mae Sapuord',
'latitude': 18.433347475179602,
'longitude': 99.08510036666527,
'geohash': None
},
'14': {
'code': 'P.87',
'thai_name': 'บ้านป่าซาง',
'english_name': 'Ban Pa Sang',
'latitude': 18.519121825282486,
'longitude': 98.94224374138238,
'geohash': None
},
'15': {
'code': 'P.76',
'thai_name': 'บ้านแม่อีไฮ',
'english_name': 'Banb Mae I Hai',
'latitude': 18.141465831254404,
'longitude': 98.89642508267181,
'geohash': None
},
'16': {
'code': 'P.85',
'thai_name': 'บ้านหล่ายแก้ว',
'english_name': 'Baan Lai Kaew',
'latitude': 18.17856361002219,
'longitude': 98.63023114782287,
'geohash': None
}
}
self.init_database()
def init_database(self):
"""Initialize database connection"""
try:
# Extract db_type and pass remaining config as kwargs
db_config_copy = self.db_config.copy()
db_type = db_config_copy.pop('type')
self.db_adapter = create_database_adapter(db_type, **db_config_copy)
success = self.db_adapter.connect()
if success:
logger.info(f"Successfully connected to {db_type.upper()} database")
set_gauge("database_connected", 1)
increment_counter("database_connections_successful")
else:
logger.error(f"Failed to connect to {db_type.upper()} database")
set_gauge("database_connected", 0)
increment_counter("database_connections_failed")
except Exception as e:
logger.error(f"Error initializing database: {e}")
set_gauge("database_connected", 0)
increment_counter("database_connections_failed")
self.db_adapter = None
def fetch_water_data_for_date(self, target_date: datetime.datetime) -> Optional[List[Dict]]:
"""Fetch water levels and discharge data from API for a specific date"""
with Timer("api_request_duration"):
try:
logger.info(f"Starting data fetch from API for date: {target_date.strftime('%Y-%m-%d')}")
# Rate limiting
self.rate_limiter.wait_if_needed()
# Create Thai format date (Buddhist calendar)
thai_year = target_date.year + 543
thai_date = f"{target_date.day:02d}/{target_date.month:02d}/{thai_year}"
# API parameters
payload = {
'DW[UtokID]': '1',
'DW[BasinID]': '6',
'DW[TimeCurrent]': thai_date,
'_search': 'false',
'nd': str(int(time.time() * 1000)),
'rows': '100',
'page': '1',
'sidx': 'indexhourly',
'sord': 'asc'
}
logger.debug(f"API parameters: {payload}")
# POST request to API
start_time = time.time()
response = self.session.post(self.api_url, data=payload, timeout=30)
response_time = time.time() - start_time
response.raise_for_status()
# Record successful request
self.request_tracker.record_request(True, response_time)
increment_counter("api_requests_successful")
record_histogram("api_response_time", response_time)
# Parse JSON response
try:
json_data = response.json()
logger.debug(f"API response received: {len(str(json_data))} characters")
except ValueError as e:
logger.error(f"Error parsing JSON response: {e}")
self.request_tracker.record_request(False, response_time, "json_parse_error")
increment_counter("api_requests_failed")
return None
water_data = []
# Parse JSON data
if json_data and isinstance(json_data, dict) and 'rows' in json_data:
for row in json_data['rows']:
try:
# Parse timestamp
time_str = row.get('hourlytime', '')
if not time_str:
continue
try:
# Format: "1.00", "2.00", ..., "24.00"
api_hour = int(float(time_str))
if api_hour < 1 or api_hour > 24:
continue
if api_hour == 24:
# Hour 24 = midnight (00:00) of the next day
data_time = target_date.replace(hour=0, minute=0, second=0, microsecond=0)
data_time = data_time + datetime.timedelta(days=1)
else:
# Hours 1-23 = 01:00-23:00 of the same day
data_time = target_date.replace(hour=api_hour, minute=0, second=0, microsecond=0)
except (ValueError, IndexError):
logger.warning(f"Could not parse timestamp: {time_str}")
continue
# Parse all water levels and discharge values
station_count = 0
for station_num in range(1, 17): # Stations 1-16
wl_key = f'wlvalues{station_num}'
q_key = f'qvalues{station_num}'
qp_key = f'QPercent{station_num}'
# Check if both water level and discharge data exist
if wl_key in row and q_key in row:
try:
water_level = row[wl_key]
discharge = row[q_key]
discharge_percent = row.get(qp_key)
# Skip if values are None or invalid
if water_level is None or discharge is None:
continue
# Convert to float
water_level = float(water_level)
discharge = float(discharge)
discharge_percent = float(discharge_percent) if discharge_percent is not None else None
station_info = self.station_mapping.get(str(station_num), {
'code': f'P.{19+station_num}',
'thai_name': f'Station {station_num}',
'english_name': f'Station {station_num}'
})
water_data.append({
'timestamp': data_time,
'station_id': station_num,
'station_code': station_info['code'],
'station_name_en': station_info['english_name'],
'station_name_th': station_info['thai_name'],
'latitude': station_info.get('latitude'),
'longitude': station_info.get('longitude'),
'geohash': station_info.get('geohash'),
'water_level': water_level,
'water_level_unit': 'm',
'discharge': discharge,
'discharge_unit': 'cms',
'discharge_percent': discharge_percent,
'status': 'active'
})
station_count += 1
except (ValueError, TypeError) as e:
logger.warning(f"Could not parse data for station {station_num}: {e}")
continue
logger.debug(f"Processed {station_count} stations for time {time_str}")
except Exception as e:
logger.warning(f"Error processing data row: {e}")
continue
# Validate data
water_data = DataValidator.validate_measurements(water_data)
logger.info(f"Successfully fetched {len(water_data)} data points from API for {target_date.strftime('%Y-%m-%d')}")
return water_data
except requests.RequestException as e:
logger.error(f"Network error fetching API data: {e}")
self.request_tracker.record_request(False, 0, "network_error")
increment_counter("api_requests_failed")
return None
except Exception as e:
logger.error(f"Unexpected error fetching API data: {e}")
self.request_tracker.record_request(False, 0, "unexpected_error")
increment_counter("api_requests_failed")
return None
def fetch_water_data(self) -> Optional[List[Dict]]:
"""Fetch water levels and discharge data from API for current date"""
current_date = datetime.datetime.now()
return self.fetch_water_data_for_date(current_date)
def save_to_database(self, water_data: List[Dict], max_retries: int = 3) -> bool:
"""Save water measurements to database with retry logic"""
if not self.db_adapter:
logger.error("Database adapter not initialized")
return False
if not water_data:
logger.warning("No data to save")
return False
for attempt in range(max_retries):
try:
success = self.db_adapter.save_measurements(water_data)
if success:
logger.info(f"Successfully saved {len(water_data)} measurements to database")
increment_counter("database_saves_successful")
set_gauge("last_save_timestamp", time.time())
return True
else:
logger.warning(f"Save attempt {attempt + 1} failed, retrying...")
except Exception as e:
if "database is locked" in str(e).lower() and attempt < max_retries - 1:
logger.warning(f"Database locked on attempt {attempt + 1}, retrying in {2 ** attempt} seconds...")
time.sleep(2 ** attempt) # Exponential backoff
continue
else:
logger.error(f"Error saving to database (attempt {attempt + 1}): {e}")
if attempt == max_retries - 1:
increment_counter("database_saves_failed")
return False
return False
def get_latest_data(self, limit: int = 100) -> List[Dict]:
"""Get latest data from database"""
if not self.db_adapter:
return []
try:
return self.db_adapter.get_latest_measurements(limit=limit)
except Exception as e:
logger.error(f"Error getting latest data: {e}")
return []
def run_scraping_cycle(self) -> bool:
"""Run a complete scraping cycle"""
logger.info("Starting scraping cycle...")
try:
# Fetch current data
water_data = self.fetch_water_data()
if water_data:
success = self.save_to_database(water_data)
if success:
logger.info("Scraping cycle completed successfully")
increment_counter("scraping_cycles_successful")
return True
else:
logger.error("Failed to save data")
increment_counter("scraping_cycles_failed")
return False
else:
logger.warning("No data fetched")
increment_counter("scraping_cycles_failed")
return False
except Exception as e:
logger.error(f"Scraping cycle failed: {e}")
increment_counter("scraping_cycles_failed")
return False
# Main execution for standalone usage
if __name__ == "__main__":
import argparse
import sys
# Configure basic logging for standalone usage
import logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('water_monitor.log'),
logging.StreamHandler()
]
)
parser = argparse.ArgumentParser(description="Thailand Water Monitor")
parser.add_argument("--test", action="store_true", help="Run single test cycle")
args = parser.parse_args()
# Default SQLite configuration
db_config = {
'type': 'sqlite',
'connection_string': 'sqlite:///water_levels.db'
}
try:
scraper = EnhancedWaterMonitorScraper(db_config)
if args.test:
logger.info("Running test cycle...")
result = scraper.run_scraping_cycle()
if result:
logger.info("✅ Test completed successfully")
sys.exit(0)
else:
logger.error("❌ Test failed")
sys.exit(1)
else:
logger.info("Starting continuous monitoring...")
schedule.every(1).hours.do(scraper.run_scraping_cycle)
# Run initial cycle
scraper.run_scraping_cycle()
while True:
schedule.run_pending()
time.sleep(60)
except KeyboardInterrupt:
logger.info("Monitoring stopped by user")
except Exception as e:
logger.error(f"Error: {e}")
sys.exit(1)

620
src/web_api.py Normal file
View File

@@ -0,0 +1,620 @@
#!/usr/bin/env python3
"""
FastAPI web interface for water monitoring system
"""
import asyncio
import threading
from datetime import datetime, timedelta
from typing import List, Dict, Any, Optional
from contextlib import asynccontextmanager
from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from .water_scraper_v3 import EnhancedWaterMonitorScraper
from .config import Config
from .models import WaterMeasurement, StationInfo, ScrapingResult, StationCreateRequest, StationUpdateRequest, StationStatus
from .health_check import HealthCheckManager, DatabaseHealthCheck, APIHealthCheck, MemoryHealthCheck
from .metrics import get_metrics_collector, increment_counter, set_gauge
from .logging_config import setup_logging, get_logger
logger = get_logger(__name__)
# Pydantic models for API responses
class StationResponse(BaseModel):
station_id: int
station_code: str
thai_name: str
english_name: str
latitude: Optional[float] = None
longitude: Optional[float] = None
geohash: Optional[str] = None
status: str = "active"
class StationCreateModel(BaseModel):
station_code: str = Field(..., description="Station code (e.g., P.1, P.20)")
thai_name: str = Field(..., description="Thai name of the station")
english_name: str = Field(..., description="English name of the station")
latitude: Optional[float] = Field(None, ge=-90, le=90, description="Latitude coordinate")
longitude: Optional[float] = Field(None, ge=-180, le=180, description="Longitude coordinate")
geohash: Optional[str] = Field(None, description="Geohash for the location")
status: str = Field("active", description="Station status")
class StationUpdateModel(BaseModel):
thai_name: Optional[str] = Field(None, description="Thai name of the station")
english_name: Optional[str] = Field(None, description="English name of the station")
latitude: Optional[float] = Field(None, ge=-90, le=90, description="Latitude coordinate")
longitude: Optional[float] = Field(None, ge=-180, le=180, description="Longitude coordinate")
geohash: Optional[str] = Field(None, description="Geohash for the location")
status: Optional[str] = Field(None, description="Station status")
class MeasurementResponse(BaseModel):
timestamp: datetime
station_code: str
station_name_en: str
station_name_th: str
water_level: float
discharge: float
discharge_percent: Optional[float] = None
status: str = "active"
class HealthResponse(BaseModel):
overall_status: str
timestamp: str
checks: Dict[str, Dict[str, Any]]
class MetricsResponse(BaseModel):
counters: Dict[str, float]
gauges: Dict[str, float]
histograms: Dict[str, Dict[str, float]]
class ScrapingStatusResponse(BaseModel):
is_running: bool
last_run: Optional[datetime] = None
next_run: Optional[datetime] = None
total_runs: int = 0
successful_runs: int = 0
failed_runs: int = 0
# Global application state
app_state = {
"scraper": None,
"health_manager": None,
"scraping_task": None,
"is_scraping": False,
"scraping_stats": {
"total_runs": 0,
"successful_runs": 0,
"failed_runs": 0,
"last_run": None,
"next_run": None
}
}
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan manager"""
# Startup
logger.info("Starting Water Monitor API...")
# Initialize configuration
try:
Config.validate_config()
logger.info("Configuration validated successfully")
except Exception as e:
logger.error(f"Configuration validation failed: {e}")
raise
# Initialize scraper
db_config = Config.get_database_config()
app_state["scraper"] = EnhancedWaterMonitorScraper(db_config)
# Initialize health checks
health_manager = HealthCheckManager()
health_manager.add_check(DatabaseHealthCheck(app_state["scraper"].db_adapter))
health_manager.add_check(APIHealthCheck(Config.API_URL, app_state["scraper"].session))
health_manager.add_check(MemoryHealthCheck(max_memory_mb=1000))
app_state["health_manager"] = health_manager
# Start background scraping task
app_state["scraping_task"] = asyncio.create_task(background_scraping_task())
logger.info("Water Monitor API started successfully")
yield
# Shutdown
logger.info("Shutting down Water Monitor API...")
if app_state["scraping_task"]:
app_state["scraping_task"].cancel()
try:
await app_state["scraping_task"]
except asyncio.CancelledError:
pass
logger.info("Water Monitor API shutdown complete")
# Create FastAPI app
app = FastAPI(
title="Northern Thailand Ping River Monitor API",
description="Real-time water level monitoring system for Northern Thailand's Ping River Basin stations",
version="3.1.0",
lifespan=lifespan
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Configure appropriately for production
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
async def background_scraping_task():
"""Background task for periodic data scraping"""
while True:
try:
if not app_state["is_scraping"]:
app_state["is_scraping"] = True
# Run scraping cycle
scraper = app_state["scraper"]
if scraper:
logger.info("Starting background scraping cycle")
start_time = datetime.now()
try:
result = scraper.run_scraping_cycle()
# Update stats
app_state["scraping_stats"]["total_runs"] += 1
app_state["scraping_stats"]["last_run"] = start_time
if result:
app_state["scraping_stats"]["successful_runs"] += 1
increment_counter("scraping_cycles_successful")
logger.info("Background scraping cycle completed successfully")
else:
app_state["scraping_stats"]["failed_runs"] += 1
increment_counter("scraping_cycles_failed")
logger.warning("Background scraping cycle completed with no new data")
# Update metrics
set_gauge("last_scraping_timestamp", start_time.timestamp())
except Exception as e:
app_state["scraping_stats"]["failed_runs"] += 1
increment_counter("scraping_cycles_failed")
logger.error(f"Background scraping cycle failed: {e}")
app_state["is_scraping"] = False
# Calculate next run time
interval_seconds = Config.SCRAPING_INTERVAL_HOURS * 3600
app_state["scraping_stats"]["next_run"] = datetime.now() + timedelta(seconds=interval_seconds)
# Wait for next cycle
await asyncio.sleep(interval_seconds)
except asyncio.CancelledError:
logger.info("Background scraping task cancelled")
break
except Exception as e:
logger.error(f"Error in background scraping task: {e}")
await asyncio.sleep(60) # Wait a minute before retrying
# API Routes
@app.get("/", response_class=HTMLResponse)
async def root():
"""Root endpoint with basic dashboard"""
html_content = """
<!DOCTYPE html>
<html>
<head>
<title>Northern Thailand Ping River Monitor</title>
<style>
body { font-family: Arial, sans-serif; margin: 40px; }
.header { color: #2c3e50; border-bottom: 2px solid #3498db; padding-bottom: 10px; }
.section { margin: 20px 0; padding: 15px; border: 1px solid #ddd; border-radius: 5px; }
.status-healthy { color: #27ae60; }
.status-degraded { color: #f39c12; }
.status-unhealthy { color: #e74c3c; }
.endpoint { background: #f8f9fa; padding: 10px; margin: 5px 0; border-radius: 3px; }
.endpoint code { color: #2c3e50; }
</style>
</head>
<body>
<div class="header">
<h1>🏔️ Northern Thailand Ping River Monitor API</h1>
<p>Real-time water level monitoring system for the Ping River Basin in Northern Thailand</p>
</div>
<div class="section">
<h2>📊 Quick Status</h2>
<p>API is running and monitoring 16 water stations along the Ping River</p>
<p>Coverage: From Chiang Dao to Nakhon Sawan</p>
<p>Data collection interval: Every hour</p>
</div>
<div class="section">
<h2>🔗 API Endpoints</h2>
<div class="endpoint"><code>GET /health</code> - System health status</div>
<div class="endpoint"><code>GET /metrics</code> - Application metrics</div>
<div class="endpoint"><code>GET /stations</code> - List all monitoring stations</div>
<div class="endpoint"><code>POST /stations</code> - Add new monitoring station</div>
<div class="endpoint"><code>PUT /stations/{station_id}</code> - Update station information</div>
<div class="endpoint"><code>GET /measurements/latest</code> - Latest measurements</div>
<div class="endpoint"><code>GET /measurements/station/{station_code}</code> - Station-specific data</div>
<div class="endpoint"><code>POST /scrape/trigger</code> - Trigger manual data collection</div>
<div class="endpoint"><code>GET /scraping/status</code> - Scraping status</div>
<div class="endpoint"><code>GET /docs</code> - Interactive API documentation</div>
</div>
<div class="section">
<h2>📈 Monitoring</h2>
<p>• Grafana dashboards available for data visualization</p>
<p>• Health checks monitor database, API, and system resources</p>
<p>• Metrics collection for performance monitoring</p>
</div>
</body>
</html>
"""
return HTMLResponse(content=html_content)
@app.get("/health", response_model=HealthResponse)
async def get_health():
"""Get system health status"""
increment_counter("api_requests", labels={"endpoint": "health"})
health_manager = app_state["health_manager"]
if not health_manager:
raise HTTPException(status_code=503, detail="Health manager not initialized")
# Run health checks
results = health_manager.run_all_checks()
summary = health_manager.get_health_summary()
return HealthResponse(**summary)
@app.get("/metrics", response_model=MetricsResponse)
async def get_metrics():
"""Get application metrics"""
increment_counter("api_requests", labels={"endpoint": "metrics"})
metrics_collector = get_metrics_collector()
metrics = metrics_collector.get_all_metrics()
return MetricsResponse(**metrics)
@app.get("/stations", response_model=List[StationResponse])
async def get_stations():
"""Get list of all monitoring stations"""
increment_counter("api_requests", labels={"endpoint": "stations"})
scraper = app_state["scraper"]
if not scraper:
raise HTTPException(status_code=503, detail="Scraper not initialized")
stations = []
for station_id, station_info in scraper.station_mapping.items():
stations.append(StationResponse(
station_id=int(station_id),
station_code=station_info["code"],
thai_name=station_info["thai_name"],
english_name=station_info["english_name"],
latitude=station_info.get("latitude"),
longitude=station_info.get("longitude"),
status="active"
))
return stations
@app.post("/stations", response_model=StationResponse)
async def create_station(station: StationCreateModel):
"""Create a new monitoring station"""
increment_counter("api_requests", labels={"endpoint": "create_station"})
scraper = app_state["scraper"]
if not scraper:
raise HTTPException(status_code=503, detail="Scraper not initialized")
try:
# Find next available station ID
existing_ids = [int(sid) for sid in scraper.station_mapping.keys()]
new_station_id = max(existing_ids) + 1 if existing_ids else 1
# Add to station mapping
scraper.station_mapping[str(new_station_id)] = {
'code': station.station_code,
'thai_name': station.thai_name,
'english_name': station.english_name,
'latitude': station.latitude,
'longitude': station.longitude,
'geohash': station.geohash
}
logger.info(f"Created new station: {station.station_code} ({station.english_name})")
return StationResponse(
station_id=new_station_id,
station_code=station.station_code,
thai_name=station.thai_name,
english_name=station.english_name,
latitude=station.latitude,
longitude=station.longitude,
geohash=station.geohash,
status=station.status
)
except Exception as e:
logger.error(f"Error creating station: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.put("/stations/{station_id}", response_model=StationResponse)
async def update_station(station_id: int, updates: StationUpdateModel):
"""Update an existing monitoring station"""
increment_counter("api_requests", labels={"endpoint": "update_station"})
scraper = app_state["scraper"]
if not scraper:
raise HTTPException(status_code=503, detail="Scraper not initialized")
station_key = str(station_id)
if station_key not in scraper.station_mapping:
raise HTTPException(status_code=404, detail="Station not found")
try:
station_info = scraper.station_mapping[station_key]
# Update fields if provided
if updates.thai_name is not None:
station_info['thai_name'] = updates.thai_name
if updates.english_name is not None:
station_info['english_name'] = updates.english_name
if updates.latitude is not None:
station_info['latitude'] = updates.latitude
if updates.longitude is not None:
station_info['longitude'] = updates.longitude
if updates.geohash is not None:
station_info['geohash'] = updates.geohash
logger.info(f"Updated station {station_id}: {station_info['code']}")
return StationResponse(
station_id=station_id,
station_code=station_info['code'],
thai_name=station_info['thai_name'],
english_name=station_info['english_name'],
latitude=station_info.get('latitude'),
longitude=station_info.get('longitude'),
geohash=station_info.get('geohash'),
status=updates.status or "active"
)
except Exception as e:
logger.error(f"Error updating station {station_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.delete("/stations/{station_id}")
async def delete_station(station_id: int):
"""Delete a monitoring station"""
increment_counter("api_requests", labels={"endpoint": "delete_station"})
scraper = app_state["scraper"]
if not scraper:
raise HTTPException(status_code=503, detail="Scraper not initialized")
station_key = str(station_id)
if station_key not in scraper.station_mapping:
raise HTTPException(status_code=404, detail="Station not found")
try:
station_info = scraper.station_mapping.pop(station_key)
logger.info(f"Deleted station {station_id}: {station_info['code']}")
return {"message": f"Station {station_info['code']} deleted successfully"}
except Exception as e:
logger.error(f"Error deleting station {station_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/stations/{station_id}", response_model=StationResponse)
async def get_station(station_id: int):
"""Get details of a specific monitoring station"""
increment_counter("api_requests", labels={"endpoint": "get_station"})
scraper = app_state["scraper"]
if not scraper:
raise HTTPException(status_code=503, detail="Scraper not initialized")
station_key = str(station_id)
if station_key not in scraper.station_mapping:
raise HTTPException(status_code=404, detail="Station not found")
station_info = scraper.station_mapping[station_key]
return StationResponse(
station_id=station_id,
station_code=station_info['code'],
thai_name=station_info['thai_name'],
english_name=station_info['english_name'],
latitude=station_info.get('latitude'),
longitude=station_info.get('longitude'),
geohash=station_info.get('geohash'),
status="active"
)
@app.get("/measurements/latest", response_model=List[MeasurementResponse])
async def get_latest_measurements(limit: int = 100):
"""Get latest measurements from all stations"""
increment_counter("api_requests", labels={"endpoint": "measurements_latest"})
scraper = app_state["scraper"]
if not scraper or not scraper.db_adapter:
raise HTTPException(status_code=503, detail="Database not available")
try:
measurements = scraper.get_latest_data(limit=limit)
response = []
for measurement in measurements:
response.append(MeasurementResponse(
timestamp=measurement["timestamp"],
station_code=measurement["station_code"],
station_name_en=measurement["station_name_en"],
station_name_th=measurement["station_name_th"],
water_level=measurement["water_level"],
discharge=measurement["discharge"],
discharge_percent=measurement.get("discharge_percent"),
status=measurement.get("status", "active")
))
return response
except Exception as e:
logger.error(f"Error fetching latest measurements: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/measurements/station/{station_code}", response_model=List[MeasurementResponse])
async def get_station_measurements(
station_code: str,
hours: int = 24,
limit: int = 1000
):
"""Get measurements for a specific station"""
increment_counter("api_requests", labels={"endpoint": "measurements_station"})
scraper = app_state["scraper"]
if not scraper or not scraper.db_adapter:
raise HTTPException(status_code=503, detail="Database not available")
try:
# Get measurements for the specified time range
end_time = datetime.now()
start_time = end_time - timedelta(hours=hours)
measurements = scraper.db_adapter.get_measurements_by_timerange(
start_time, end_time, station_codes=[station_code]
)
# Limit results
measurements = measurements[:limit]
response = []
for measurement in measurements:
response.append(MeasurementResponse(
timestamp=measurement["timestamp"],
station_code=measurement["station_code"],
station_name_en=measurement["station_name_en"],
station_name_th=measurement["station_name_th"],
water_level=measurement["water_level"],
discharge=measurement["discharge"],
discharge_percent=measurement.get("discharge_percent"),
status=measurement.get("status", "active")
))
return response
except Exception as e:
logger.error(f"Error fetching station measurements: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/scrape/trigger")
async def trigger_scraping(background_tasks: BackgroundTasks):
"""Trigger manual data scraping"""
increment_counter("api_requests", labels={"endpoint": "scrape_trigger"})
if app_state["is_scraping"]:
raise HTTPException(status_code=409, detail="Scraping already in progress")
scraper = app_state["scraper"]
if not scraper:
raise HTTPException(status_code=503, detail="Scraper not initialized")
def run_scraping():
"""Background task to run scraping"""
try:
app_state["is_scraping"] = True
logger.info("Manual scraping triggered via API")
result = scraper.run_scraping_cycle()
# Update stats
app_state["scraping_stats"]["total_runs"] += 1
app_state["scraping_stats"]["last_run"] = datetime.now()
if result:
app_state["scraping_stats"]["successful_runs"] += 1
increment_counter("manual_scraping_successful")
else:
app_state["scraping_stats"]["failed_runs"] += 1
increment_counter("manual_scraping_failed")
except Exception as e:
app_state["scraping_stats"]["failed_runs"] += 1
increment_counter("manual_scraping_failed")
logger.error(f"Manual scraping failed: {e}")
finally:
app_state["is_scraping"] = False
background_tasks.add_task(run_scraping)
return {"message": "Scraping triggered", "status": "started"}
@app.get("/scraping/status", response_model=ScrapingStatusResponse)
async def get_scraping_status():
"""Get current scraping status"""
increment_counter("api_requests", labels={"endpoint": "scraping_status"})
stats = app_state["scraping_stats"]
return ScrapingStatusResponse(
is_running=app_state["is_scraping"],
last_run=stats["last_run"],
next_run=stats["next_run"],
total_runs=stats["total_runs"],
successful_runs=stats["successful_runs"],
failed_runs=stats["failed_runs"]
)
@app.get("/config")
async def get_config():
"""Get current configuration (sensitive data masked)"""
increment_counter("api_requests", labels={"endpoint": "config"})
config = Config.get_all_settings()
# Mask sensitive information
for key in config:
if 'password' in key.lower() or 'secret' in key.lower():
if config[key]:
config[key] = '*' * 8
return config
if __name__ == "__main__":
import uvicorn
# Setup logging
setup_logging(
log_level=Config.LOG_LEVEL,
log_file=Config.LOG_FILE,
enable_console=True,
enable_colors=True
)
# Run the API server
uvicorn.run(
"web_api:app",
host="0.0.0.0",
port=8000,
reload=False,
log_config=None # Use our custom logging
)