Initial commit: Northern Thailand Ping River Monitor v3.1.0
Some checks failed
Security & Dependency Updates / Dependency Security Scan (push) Successful in 29s
Security & Dependency Updates / Docker Security Scan (push) Failing after 53s
Security & Dependency Updates / License Compliance (push) Successful in 13s
Security & Dependency Updates / Check for Dependency Updates (push) Successful in 19s
Security & Dependency Updates / Code Quality Metrics (push) Successful in 11s
Security & Dependency Updates / Security Summary (push) Successful in 7s
Some checks failed
Security & Dependency Updates / Dependency Security Scan (push) Successful in 29s
Security & Dependency Updates / Docker Security Scan (push) Failing after 53s
Security & Dependency Updates / License Compliance (push) Successful in 13s
Security & Dependency Updates / Check for Dependency Updates (push) Successful in 19s
Security & Dependency Updates / Code Quality Metrics (push) Successful in 11s
Security & Dependency Updates / Security Summary (push) Successful in 7s
Features: - Real-time water level monitoring for Ping River Basin (16 stations) - Coverage from Chiang Dao to Nakhon Sawan in Northern Thailand - FastAPI web interface with interactive dashboard and station management - Multi-database support (SQLite, MySQL, PostgreSQL, InfluxDB, VictoriaMetrics) - Comprehensive monitoring with health checks and metrics collection - Docker deployment with Grafana integration - Production-ready architecture with enterprise-grade observability CI/CD & Automation: - Complete Gitea Actions workflows for CI/CD, security, and releases - Multi-Python version testing (3.9-3.12) - Multi-architecture Docker builds (amd64, arm64) - Daily security scanning and dependency monitoring - Automated documentation generation - Performance testing and validation Production Ready: - Type safety with Pydantic models and comprehensive type hints - Data validation layer with range checking and error handling - Rate limiting and request tracking for API protection - Enhanced logging with rotation, colors, and performance metrics - Station management API for dynamic CRUD operations - Comprehensive documentation and deployment guides Technical Stack: - Python 3.9+ with FastAPI and Pydantic - Multi-database architecture with adapter pattern - Docker containerization with multi-stage builds - Grafana dashboards for visualization - Gitea Actions for CI/CD automation - Enterprise monitoring and alerting Ready for deployment to B4L infrastructure!
This commit is contained in:
38
src/__init__.py
Normal file
38
src/__init__.py
Normal file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Northern Thailand Ping River Monitor Package
|
||||
|
||||
A comprehensive real-time water level monitoring system for the Ping River Basin
|
||||
in Northern Thailand, covering Royal Irrigation Department (RID) stations.
|
||||
"""
|
||||
|
||||
__version__ = "3.1.0"
|
||||
__author__ = "Ping River Monitor Team"
|
||||
__description__ = "Northern Thailand Ping River Monitoring System"
|
||||
|
||||
from .water_scraper_v3 import EnhancedWaterMonitorScraper
|
||||
from .database_adapters import create_database_adapter, DatabaseAdapter
|
||||
from .config import Config
|
||||
from .models import WaterMeasurement, StationInfo, DatabaseConfig
|
||||
from .exceptions import (
|
||||
WaterMonitorException,
|
||||
DatabaseConnectionError,
|
||||
APIConnectionError,
|
||||
DataValidationError,
|
||||
ConfigurationError
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'EnhancedWaterMonitorScraper',
|
||||
'create_database_adapter',
|
||||
'DatabaseAdapter',
|
||||
'Config',
|
||||
'WaterMeasurement',
|
||||
'StationInfo',
|
||||
'DatabaseConfig',
|
||||
'WaterMonitorException',
|
||||
'DatabaseConnectionError',
|
||||
'APIConnectionError',
|
||||
'DataValidationError',
|
||||
'ConfigurationError'
|
||||
]
|
191
src/config.py
Normal file
191
src/config.py
Normal file
@@ -0,0 +1,191 @@
|
||||
import os
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
try:
|
||||
from .exceptions import ConfigurationError
|
||||
from .models import DatabaseType, DatabaseConfig
|
||||
except ImportError:
|
||||
# Handle case when running as standalone script
|
||||
class ConfigurationError(Exception):
|
||||
pass
|
||||
|
||||
from enum import Enum
|
||||
|
||||
class DatabaseType(Enum):
|
||||
SQLITE = "sqlite"
|
||||
MYSQL = "mysql"
|
||||
POSTGRESQL = "postgresql"
|
||||
INFLUXDB = "influxdb"
|
||||
VICTORIAMETRICS = "victoriametrics"
|
||||
|
||||
class Config:
|
||||
"""Configuration class for the Water Level Monitor"""
|
||||
|
||||
# Database settings
|
||||
DATABASE_PATH = os.getenv('WATER_DB_PATH', 'water_levels.db')
|
||||
|
||||
# Website settings
|
||||
TARGET_URL = "https://hyd-app-db.rid.go.th/hydro1h.html"
|
||||
API_URL = "https://hyd-app-db.rid.go.th/webservice/getGroupHourlyWaterLevelReportAllHL.ashx"
|
||||
REQUEST_TIMEOUT = int(os.getenv('REQUEST_TIMEOUT', '30'))
|
||||
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||
|
||||
# Database configuration
|
||||
DB_TYPE = os.getenv('DB_TYPE', 'sqlite').lower()
|
||||
|
||||
# VictoriaMetrics settings
|
||||
VM_HOST = os.getenv('VM_HOST', 'vm.newedge.house')
|
||||
VM_PORT = int(os.getenv('VM_PORT', '443'))
|
||||
|
||||
# Support for HTTPS URLs (e.g., behind reverse proxy)
|
||||
VM_URL = os.getenv('VM_URL') # Full URL override (e.g., https://vm.example.com)
|
||||
|
||||
# InfluxDB settings
|
||||
INFLUX_HOST = os.getenv('INFLUX_HOST', 'localhost')
|
||||
INFLUX_PORT = int(os.getenv('INFLUX_PORT', '8086'))
|
||||
INFLUX_DATABASE = os.getenv('INFLUX_DATABASE', 'water_monitoring')
|
||||
INFLUX_USERNAME = os.getenv('INFLUX_USERNAME')
|
||||
INFLUX_PASSWORD = os.getenv('INFLUX_PASSWORD')
|
||||
|
||||
# PostgreSQL settings
|
||||
POSTGRES_CONNECTION_STRING = os.getenv('POSTGRES_CONNECTION_STRING')
|
||||
|
||||
# MySQL settings
|
||||
MYSQL_CONNECTION_STRING = os.getenv('MYSQL_CONNECTION_STRING')
|
||||
|
||||
# Scheduler settings
|
||||
SCRAPING_INTERVAL_HOURS = int(os.getenv('SCRAPING_INTERVAL_HOURS', '1'))
|
||||
|
||||
# Logging settings
|
||||
LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO')
|
||||
LOG_FILE = os.getenv('LOG_FILE', 'water_monitor.log')
|
||||
LOG_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
|
||||
|
||||
# Data retention
|
||||
DATA_RETENTION_DAYS = int(os.getenv('DATA_RETENTION_DAYS', '365'))
|
||||
|
||||
# Retry settings
|
||||
MAX_RETRIES = int(os.getenv('MAX_RETRIES', '3'))
|
||||
RETRY_DELAY_SECONDS = int(os.getenv('RETRY_DELAY_SECONDS', '60'))
|
||||
|
||||
@classmethod
|
||||
def validate_config(cls) -> bool:
|
||||
"""Validate configuration settings"""
|
||||
errors = []
|
||||
|
||||
# Validate database type
|
||||
try:
|
||||
DatabaseType(cls.DB_TYPE)
|
||||
except ValueError:
|
||||
errors.append(f"Invalid DB_TYPE: {cls.DB_TYPE}")
|
||||
|
||||
# Validate database-specific settings
|
||||
if cls.DB_TYPE == 'victoriametrics':
|
||||
if not cls.VM_HOST:
|
||||
errors.append("VM_HOST is required for VictoriaMetrics")
|
||||
if not isinstance(cls.VM_PORT, int) or cls.VM_PORT <= 0:
|
||||
errors.append("VM_PORT must be a positive integer")
|
||||
|
||||
elif cls.DB_TYPE == 'influxdb':
|
||||
if not cls.INFLUX_HOST:
|
||||
errors.append("INFLUX_HOST is required for InfluxDB")
|
||||
if not cls.INFLUX_DATABASE:
|
||||
errors.append("INFLUX_DATABASE is required for InfluxDB")
|
||||
|
||||
elif cls.DB_TYPE in ['postgresql', 'mysql']:
|
||||
connection_string = (cls.POSTGRES_CONNECTION_STRING if cls.DB_TYPE == 'postgresql'
|
||||
else cls.MYSQL_CONNECTION_STRING)
|
||||
if not connection_string:
|
||||
errors.append(f"Connection string is required for {cls.DB_TYPE.upper()}")
|
||||
|
||||
# Validate numeric settings
|
||||
if cls.SCRAPING_INTERVAL_HOURS <= 0:
|
||||
errors.append("SCRAPING_INTERVAL_HOURS must be positive")
|
||||
|
||||
if cls.DATA_RETENTION_DAYS <= 0:
|
||||
errors.append("DATA_RETENTION_DAYS must be positive")
|
||||
|
||||
if errors:
|
||||
raise ConfigurationError(f"Configuration errors: {'; '.join(errors)}")
|
||||
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def get_database_config(cls) -> Dict[str, Any]:
|
||||
"""Returns database configuration based on DB_TYPE"""
|
||||
if cls.DB_TYPE == 'victoriametrics':
|
||||
return {
|
||||
'type': 'victoriametrics',
|
||||
'host': cls.VM_HOST,
|
||||
'port': cls.VM_PORT
|
||||
}
|
||||
elif cls.DB_TYPE == 'influxdb':
|
||||
return {
|
||||
'type': 'influxdb',
|
||||
'host': cls.INFLUX_HOST,
|
||||
'port': cls.INFLUX_PORT,
|
||||
'database': cls.INFLUX_DATABASE,
|
||||
'username': cls.INFLUX_USERNAME,
|
||||
'password': cls.INFLUX_PASSWORD
|
||||
}
|
||||
elif cls.DB_TYPE == 'postgresql':
|
||||
return {
|
||||
'type': 'postgresql',
|
||||
'connection_string': cls.POSTGRES_CONNECTION_STRING or
|
||||
'postgresql://postgres:password@localhost:5432/water_monitoring'
|
||||
}
|
||||
elif cls.DB_TYPE == 'mysql':
|
||||
return {
|
||||
'type': 'mysql',
|
||||
'connection_string': cls.MYSQL_CONNECTION_STRING or
|
||||
'mysql://root:password@localhost:3306/water_monitoring'
|
||||
}
|
||||
else: # sqlite
|
||||
return {
|
||||
'type': 'sqlite',
|
||||
'connection_string': f'sqlite:///{cls.DATABASE_PATH}'
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_all_settings(cls) -> Dict[str, Any]:
|
||||
"""Returns all configuration settings"""
|
||||
return {
|
||||
'DB_TYPE': cls.DB_TYPE,
|
||||
'DATABASE_PATH': cls.DATABASE_PATH,
|
||||
'TARGET_URL': cls.TARGET_URL,
|
||||
'API_URL': cls.API_URL,
|
||||
'REQUEST_TIMEOUT': cls.REQUEST_TIMEOUT,
|
||||
'SCRAPING_INTERVAL_HOURS': cls.SCRAPING_INTERVAL_HOURS,
|
||||
'LOG_LEVEL': cls.LOG_LEVEL,
|
||||
'LOG_FILE': cls.LOG_FILE,
|
||||
'DATA_RETENTION_DAYS': cls.DATA_RETENTION_DAYS,
|
||||
'MAX_RETRIES': cls.MAX_RETRIES,
|
||||
'RETRY_DELAY_SECONDS': cls.RETRY_DELAY_SECONDS,
|
||||
'VM_HOST': cls.VM_HOST,
|
||||
'VM_PORT': cls.VM_PORT,
|
||||
'INFLUX_HOST': cls.INFLUX_HOST,
|
||||
'INFLUX_PORT': cls.INFLUX_PORT,
|
||||
'INFLUX_DATABASE': cls.INFLUX_DATABASE
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def print_settings(cls):
|
||||
"""Prints all current settings"""
|
||||
print("=== Water Level Monitor Configuration ===")
|
||||
for key, value in cls.get_all_settings().items():
|
||||
# Hide sensitive information
|
||||
if 'PASSWORD' in key and value:
|
||||
value = '*' * len(str(value))
|
||||
print(f"{key}: {value}")
|
||||
print("=" * 45)
|
||||
|
||||
print("\nDatabase Configuration:")
|
||||
db_config = cls.get_database_config()
|
||||
for key, value in db_config.items():
|
||||
if 'password' in key and value:
|
||||
value = '*' * len(str(value))
|
||||
print(f" {key}: {value}")
|
||||
print("=" * 45)
|
||||
|
||||
if __name__ == "__main__":
|
||||
Config.print_settings()
|
663
src/database_adapters.py
Normal file
663
src/database_adapters.py
Normal file
@@ -0,0 +1,663 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Database adapters for different storage backends
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
from typing import List, Dict, Optional, Any
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
# Base adapter interface
|
||||
class DatabaseAdapter(ABC):
|
||||
@abstractmethod
|
||||
def connect(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_measurements(self, measurements: List[Dict]) -> bool:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_latest_measurements(self, limit: int = 100) -> List[Dict]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_measurements_by_timerange(self, start_time: datetime.datetime,
|
||||
end_time: datetime.datetime,
|
||||
station_codes: Optional[List[str]] = None) -> List[Dict]:
|
||||
pass
|
||||
|
||||
# InfluxDB Adapter
|
||||
class InfluxDBAdapter(DatabaseAdapter):
|
||||
def __init__(self, host: str = "localhost", port: int = 8086,
|
||||
database: str = "water_monitoring", username: str = None, password: str = None):
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.database = database
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.client = None
|
||||
|
||||
def connect(self):
|
||||
try:
|
||||
from influxdb import InfluxDBClient
|
||||
self.client = InfluxDBClient(
|
||||
host=self.host,
|
||||
port=self.port,
|
||||
username=self.username,
|
||||
password=self.password,
|
||||
database=self.database
|
||||
)
|
||||
|
||||
# Create database if it doesn't exist
|
||||
databases = self.client.get_list_database()
|
||||
if not any(db['name'] == self.database for db in databases):
|
||||
self.client.create_database(self.database)
|
||||
logging.info(f"Created InfluxDB database: {self.database}")
|
||||
|
||||
# Create retention policy (keep data for 2 years, downsample after 30 days)
|
||||
retention_policies = self.client.get_list_retention_policies(self.database)
|
||||
if not any(rp['name'] == 'water_data_policy' for rp in retention_policies):
|
||||
self.client.create_retention_policy(
|
||||
'water_data_policy',
|
||||
'730d', # 2 years
|
||||
'1', # replication factor
|
||||
database=self.database,
|
||||
default=True
|
||||
)
|
||||
|
||||
logging.info("Connected to InfluxDB successfully")
|
||||
return True
|
||||
|
||||
except ImportError:
|
||||
logging.error("InfluxDB client not installed. Run: pip install influxdb")
|
||||
return False
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to connect to InfluxDB: {e}")
|
||||
return False
|
||||
|
||||
def save_measurements(self, measurements: List[Dict]) -> bool:
|
||||
if not self.client:
|
||||
logging.error("InfluxDB client not connected")
|
||||
return False
|
||||
|
||||
try:
|
||||
points = []
|
||||
for measurement in measurements:
|
||||
point = {
|
||||
"measurement": "water_data",
|
||||
"tags": {
|
||||
"station_code": measurement['station_code'],
|
||||
"station_name_en": measurement['station_name_en'],
|
||||
"station_name_th": measurement['station_name_th']
|
||||
},
|
||||
"time": measurement['timestamp'].isoformat(),
|
||||
"fields": {
|
||||
"water_level": float(measurement['water_level']),
|
||||
"discharge": float(measurement['discharge']),
|
||||
"discharge_percent": float(measurement['discharge_percent']) if measurement['discharge_percent'] else None
|
||||
}
|
||||
}
|
||||
points.append(point)
|
||||
|
||||
success = self.client.write_points(points)
|
||||
if success:
|
||||
logging.info(f"Successfully wrote {len(points)} points to InfluxDB")
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error writing to InfluxDB: {e}")
|
||||
return False
|
||||
|
||||
def get_latest_measurements(self, limit: int = 100) -> List[Dict]:
|
||||
if not self.client:
|
||||
return []
|
||||
|
||||
try:
|
||||
query = f"""
|
||||
SELECT last("water_level") as water_level,
|
||||
last("discharge") as discharge,
|
||||
last("discharge_percent") as discharge_percent
|
||||
FROM "water_data"
|
||||
GROUP BY "station_code", "station_name_en", "station_name_th"
|
||||
LIMIT {limit}
|
||||
"""
|
||||
|
||||
result = self.client.query(query)
|
||||
measurements = []
|
||||
|
||||
for point in result.get_points():
|
||||
measurements.append({
|
||||
'timestamp': point['time'],
|
||||
'station_code': point.get('station_code'),
|
||||
'station_name_en': point.get('station_name_en'),
|
||||
'station_name_th': point.get('station_name_th'),
|
||||
'water_level': point.get('water_level'),
|
||||
'discharge': point.get('discharge'),
|
||||
'discharge_percent': point.get('discharge_percent')
|
||||
})
|
||||
|
||||
return measurements
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error querying InfluxDB: {e}")
|
||||
return []
|
||||
|
||||
def get_measurements_by_timerange(self, start_time: datetime.datetime,
|
||||
end_time: datetime.datetime,
|
||||
station_codes: Optional[List[str]] = None) -> List[Dict]:
|
||||
if not self.client:
|
||||
return []
|
||||
|
||||
try:
|
||||
where_clause = f"time >= '{start_time.isoformat()}' AND time <= '{end_time.isoformat()}'"
|
||||
if station_codes:
|
||||
station_filter = "'" + "','".join(station_codes) + "'"
|
||||
where_clause += f" AND station_code IN ({station_filter})"
|
||||
|
||||
query = f"""
|
||||
SELECT "water_level", "discharge", "discharge_percent", "station_code", "station_name_en", "station_name_th"
|
||||
FROM "water_data"
|
||||
WHERE {where_clause}
|
||||
ORDER BY time DESC
|
||||
"""
|
||||
|
||||
result = self.client.query(query)
|
||||
measurements = []
|
||||
|
||||
for point in result.get_points():
|
||||
measurements.append({
|
||||
'timestamp': point['time'],
|
||||
'station_code': point.get('station_code'),
|
||||
'station_name_en': point.get('station_name_en'),
|
||||
'station_name_th': point.get('station_name_th'),
|
||||
'water_level': point.get('water_level'),
|
||||
'discharge': point.get('discharge'),
|
||||
'discharge_percent': point.get('discharge_percent')
|
||||
})
|
||||
|
||||
return measurements
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error querying InfluxDB: {e}")
|
||||
return []
|
||||
|
||||
# MySQL/PostgreSQL Adapter
|
||||
class SQLAdapter(DatabaseAdapter):
|
||||
def __init__(self, connection_string: str, db_type: str = "mysql"):
|
||||
self.connection_string = connection_string
|
||||
self.db_type = db_type.lower()
|
||||
self.engine = None
|
||||
|
||||
# Add SQLite-specific connection parameters for better concurrency
|
||||
if self.db_type == "sqlite":
|
||||
if "?" not in connection_string:
|
||||
self.connection_string += "?timeout=30&check_same_thread=False"
|
||||
else:
|
||||
self.connection_string += "&timeout=30&check_same_thread=False"
|
||||
|
||||
def connect(self):
|
||||
try:
|
||||
from sqlalchemy import create_engine, text
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
self.engine = create_engine(self.connection_string, pool_pre_ping=True)
|
||||
|
||||
# Create tables
|
||||
self._create_tables()
|
||||
|
||||
logging.info(f"Connected to {self.db_type.upper()} successfully")
|
||||
return True
|
||||
|
||||
except ImportError:
|
||||
logging.error("SQLAlchemy not installed. Run: pip install sqlalchemy pymysql")
|
||||
return False
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to connect to {self.db_type.upper()}: {e}")
|
||||
return False
|
||||
|
||||
def _create_tables(self):
|
||||
from sqlalchemy import text
|
||||
|
||||
# Stations table - adjust for different databases
|
||||
if self.db_type == "sqlite":
|
||||
stations_sql = """
|
||||
CREATE TABLE IF NOT EXISTS stations (
|
||||
id INTEGER PRIMARY KEY,
|
||||
station_code TEXT UNIQUE NOT NULL,
|
||||
thai_name TEXT NOT NULL,
|
||||
english_name TEXT NOT NULL,
|
||||
latitude REAL,
|
||||
longitude REAL,
|
||||
geohash TEXT,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
"""
|
||||
|
||||
measurements_sql = """
|
||||
CREATE TABLE IF NOT EXISTS water_measurements (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
timestamp DATETIME NOT NULL,
|
||||
station_id INTEGER NOT NULL,
|
||||
water_level REAL,
|
||||
discharge REAL,
|
||||
discharge_percent REAL,
|
||||
status TEXT DEFAULT 'active',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (station_id) REFERENCES stations(id),
|
||||
UNIQUE(timestamp, station_id)
|
||||
)
|
||||
"""
|
||||
|
||||
# Create indexes separately for SQLite
|
||||
index_sql = [
|
||||
"CREATE INDEX IF NOT EXISTS idx_timestamp ON water_measurements(timestamp)",
|
||||
"CREATE INDEX IF NOT EXISTS idx_station_timestamp ON water_measurements(station_id, timestamp)"
|
||||
]
|
||||
|
||||
elif self.db_type == "postgresql":
|
||||
stations_sql = """
|
||||
CREATE TABLE IF NOT EXISTS stations (
|
||||
id SERIAL PRIMARY KEY,
|
||||
station_code VARCHAR(10) UNIQUE NOT NULL,
|
||||
thai_name VARCHAR(255) NOT NULL,
|
||||
english_name VARCHAR(255) NOT NULL,
|
||||
latitude DECIMAL(10,8),
|
||||
longitude DECIMAL(11,8),
|
||||
geohash VARCHAR(20),
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
"""
|
||||
|
||||
measurements_sql = """
|
||||
CREATE TABLE IF NOT EXISTS water_measurements (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
timestamp TIMESTAMP NOT NULL,
|
||||
station_id INTEGER NOT NULL,
|
||||
water_level NUMERIC(10,3),
|
||||
discharge NUMERIC(10,2),
|
||||
discharge_percent NUMERIC(5,2),
|
||||
status VARCHAR(20) DEFAULT 'active',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (station_id) REFERENCES stations(id),
|
||||
UNIQUE(timestamp, station_id)
|
||||
)
|
||||
"""
|
||||
|
||||
index_sql = [
|
||||
"CREATE INDEX IF NOT EXISTS idx_timestamp ON water_measurements(timestamp)",
|
||||
"CREATE INDEX IF NOT EXISTS idx_station_timestamp ON water_measurements(station_id, timestamp DESC)"
|
||||
]
|
||||
|
||||
else: # MySQL
|
||||
stations_sql = """
|
||||
CREATE TABLE IF NOT EXISTS stations (
|
||||
id INT PRIMARY KEY,
|
||||
station_code VARCHAR(10) UNIQUE NOT NULL,
|
||||
thai_name VARCHAR(255) NOT NULL,
|
||||
english_name VARCHAR(255) NOT NULL,
|
||||
latitude DECIMAL(10,8),
|
||||
longitude DECIMAL(11,8),
|
||||
geohash VARCHAR(20),
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
|
||||
)
|
||||
"""
|
||||
|
||||
measurements_sql = """
|
||||
CREATE TABLE IF NOT EXISTS water_measurements (
|
||||
id BIGINT AUTO_INCREMENT PRIMARY KEY,
|
||||
timestamp DATETIME NOT NULL,
|
||||
station_id INT NOT NULL,
|
||||
water_level DECIMAL(10,3),
|
||||
discharge DECIMAL(10,2),
|
||||
discharge_percent DECIMAL(5,2),
|
||||
status VARCHAR(20) DEFAULT 'active',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (station_id) REFERENCES stations(id),
|
||||
UNIQUE KEY unique_measurement (timestamp, station_id),
|
||||
INDEX idx_timestamp (timestamp),
|
||||
INDEX idx_station_timestamp (station_id, timestamp)
|
||||
)
|
||||
"""
|
||||
index_sql = []
|
||||
|
||||
with self.engine.begin() as conn:
|
||||
conn.execute(text(stations_sql))
|
||||
conn.execute(text(measurements_sql))
|
||||
|
||||
# Create indexes for SQLite and PostgreSQL
|
||||
for index in index_sql:
|
||||
conn.execute(text(index))
|
||||
|
||||
# Transaction is automatically committed when context manager exits
|
||||
|
||||
def save_measurements(self, measurements: List[Dict]) -> bool:
|
||||
if not self.engine:
|
||||
return False
|
||||
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
|
||||
with self.engine.begin() as conn:
|
||||
# Insert/update stations
|
||||
for measurement in measurements:
|
||||
if self.db_type == "sqlite":
|
||||
station_sql = """
|
||||
INSERT OR REPLACE INTO stations (id, station_code, thai_name, english_name, latitude, longitude, geohash, updated_at)
|
||||
VALUES (:station_id, :station_code, :thai_name, :english_name, :latitude, :longitude, :geohash, CURRENT_TIMESTAMP)
|
||||
"""
|
||||
elif self.db_type == "postgresql":
|
||||
station_sql = """
|
||||
INSERT INTO stations (id, station_code, thai_name, english_name, latitude, longitude, geohash, updated_at)
|
||||
VALUES (:station_id, :station_code, :thai_name, :english_name, :latitude, :longitude, :geohash, NOW())
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
thai_name = EXCLUDED.thai_name,
|
||||
english_name = EXCLUDED.english_name,
|
||||
latitude = EXCLUDED.latitude,
|
||||
longitude = EXCLUDED.longitude,
|
||||
geohash = EXCLUDED.geohash,
|
||||
updated_at = NOW()
|
||||
"""
|
||||
else: # MySQL
|
||||
station_sql = """
|
||||
INSERT INTO stations (id, station_code, thai_name, english_name, latitude, longitude, geohash, updated_at)
|
||||
VALUES (:station_id, :station_code, :thai_name, :english_name, :latitude, :longitude, :geohash, NOW())
|
||||
ON DUPLICATE KEY UPDATE
|
||||
thai_name = VALUES(thai_name),
|
||||
english_name = VALUES(english_name),
|
||||
latitude = VALUES(latitude),
|
||||
longitude = VALUES(longitude),
|
||||
geohash = VALUES(geohash),
|
||||
updated_at = NOW()
|
||||
"""
|
||||
|
||||
conn.execute(text(station_sql), {
|
||||
'station_id': measurement['station_id'],
|
||||
'station_code': measurement['station_code'],
|
||||
'thai_name': measurement['station_name_th'],
|
||||
'english_name': measurement['station_name_en'],
|
||||
'latitude': measurement.get('latitude'),
|
||||
'longitude': measurement.get('longitude'),
|
||||
'geohash': measurement.get('geohash')
|
||||
})
|
||||
|
||||
# Insert measurements
|
||||
for measurement in measurements:
|
||||
if self.db_type == "sqlite":
|
||||
measurement_sql = """
|
||||
INSERT OR REPLACE INTO water_measurements
|
||||
(timestamp, station_id, water_level, discharge, discharge_percent, status)
|
||||
VALUES (:timestamp, :station_id, :water_level, :discharge, :discharge_percent, :status)
|
||||
"""
|
||||
elif self.db_type == "postgresql":
|
||||
measurement_sql = """
|
||||
INSERT INTO water_measurements
|
||||
(timestamp, station_id, water_level, discharge, discharge_percent, status)
|
||||
VALUES (:timestamp, :station_id, :water_level, :discharge, :discharge_percent, :status)
|
||||
ON CONFLICT (timestamp, station_id) DO UPDATE SET
|
||||
water_level = EXCLUDED.water_level,
|
||||
discharge = EXCLUDED.discharge,
|
||||
discharge_percent = EXCLUDED.discharge_percent,
|
||||
status = EXCLUDED.status
|
||||
"""
|
||||
else: # MySQL
|
||||
measurement_sql = """
|
||||
INSERT INTO water_measurements
|
||||
(timestamp, station_id, water_level, discharge, discharge_percent, status)
|
||||
VALUES (:timestamp, :station_id, :water_level, :discharge, :discharge_percent, :status)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
water_level = VALUES(water_level),
|
||||
discharge = VALUES(discharge),
|
||||
discharge_percent = VALUES(discharge_percent),
|
||||
status = VALUES(status)
|
||||
"""
|
||||
|
||||
conn.execute(text(measurement_sql), {
|
||||
'timestamp': measurement['timestamp'],
|
||||
'station_id': measurement['station_id'],
|
||||
'water_level': measurement['water_level'],
|
||||
'discharge': measurement['discharge'],
|
||||
'discharge_percent': measurement['discharge_percent'],
|
||||
'status': measurement['status']
|
||||
})
|
||||
|
||||
# Transaction is automatically committed when context manager exits
|
||||
logging.info(f"Successfully saved {len(measurements)} measurements to {self.db_type.upper()}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error saving to {self.db_type.upper()}: {e}")
|
||||
return False
|
||||
|
||||
def get_latest_measurements(self, limit: int = 100) -> List[Dict]:
|
||||
if not self.engine:
|
||||
return []
|
||||
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
|
||||
query = """
|
||||
SELECT m.timestamp, s.station_code, s.english_name, s.thai_name,
|
||||
m.water_level, m.discharge, m.discharge_percent, m.status
|
||||
FROM water_measurements m
|
||||
JOIN stations s ON m.station_id = s.id
|
||||
INNER JOIN (
|
||||
SELECT station_id, MAX(timestamp) as max_timestamp
|
||||
FROM water_measurements
|
||||
GROUP BY station_id
|
||||
) latest ON m.station_id = latest.station_id AND m.timestamp = latest.max_timestamp
|
||||
ORDER BY s.station_code
|
||||
LIMIT :limit
|
||||
"""
|
||||
|
||||
with self.engine.connect() as conn:
|
||||
result = conn.execute(text(query), {'limit': limit})
|
||||
measurements = []
|
||||
|
||||
for row in result:
|
||||
measurements.append({
|
||||
'timestamp': row[0],
|
||||
'station_code': row[1],
|
||||
'station_name_en': row[2],
|
||||
'station_name_th': row[3],
|
||||
'water_level': float(row[4]) if row[4] else None,
|
||||
'discharge': float(row[5]) if row[5] else None,
|
||||
'discharge_percent': float(row[6]) if row[6] else None,
|
||||
'status': row[7]
|
||||
})
|
||||
|
||||
return measurements
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error querying {self.db_type.upper()}: {e}")
|
||||
return []
|
||||
|
||||
def get_measurements_by_timerange(self, start_time: datetime.datetime,
|
||||
end_time: datetime.datetime,
|
||||
station_codes: Optional[List[str]] = None) -> List[Dict]:
|
||||
if not self.engine:
|
||||
return []
|
||||
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
|
||||
where_clause = "m.timestamp BETWEEN :start_time AND :end_time"
|
||||
params = {'start_time': start_time, 'end_time': end_time}
|
||||
|
||||
if station_codes:
|
||||
placeholders = ','.join([f':station_{i}' for i in range(len(station_codes))])
|
||||
where_clause += f" AND s.station_code IN ({placeholders})"
|
||||
for i, code in enumerate(station_codes):
|
||||
params[f'station_{i}'] = code
|
||||
|
||||
query = f"""
|
||||
SELECT m.timestamp, s.station_code, s.english_name, s.thai_name,
|
||||
m.water_level, m.discharge, m.discharge_percent, m.status
|
||||
FROM water_measurements m
|
||||
JOIN stations s ON m.station_id = s.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY m.timestamp DESC, s.station_code
|
||||
"""
|
||||
|
||||
with self.engine.connect() as conn:
|
||||
result = conn.execute(text(query), params)
|
||||
measurements = []
|
||||
|
||||
for row in result:
|
||||
measurements.append({
|
||||
'timestamp': row[0],
|
||||
'station_code': row[1],
|
||||
'station_name_en': row[2],
|
||||
'station_name_th': row[3],
|
||||
'water_level': float(row[4]) if row[4] else None,
|
||||
'discharge': float(row[5]) if row[5] else None,
|
||||
'discharge_percent': float(row[6]) if row[6] else None,
|
||||
'status': row[7]
|
||||
})
|
||||
|
||||
return measurements
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error querying {self.db_type.upper()}: {e}")
|
||||
return []
|
||||
|
||||
# VictoriaMetrics Adapter (using Prometheus format)
|
||||
class VictoriaMetricsAdapter(DatabaseAdapter):
|
||||
def __init__(self, host: str = "localhost", port: int = 8428):
|
||||
self.host = host
|
||||
self.port = port
|
||||
|
||||
# Handle HTTPS URLs and reverse proxy configurations
|
||||
if host.startswith(('http://', 'https://')):
|
||||
self.base_url = host
|
||||
if port != 80 and port != 443 and not host.endswith(f':{port}'):
|
||||
# Only add port if it's not standard and not already in URL
|
||||
if '://' in host and ':' not in host.split('://')[1]:
|
||||
self.base_url = f"{host}:{port}"
|
||||
else:
|
||||
# Default to HTTP for localhost, HTTPS for remote hosts
|
||||
protocol = "https" if host != "localhost" and not host.startswith("127.") else "http"
|
||||
if (protocol == "https" and port == 443) or (protocol == "http" and port == 80):
|
||||
self.base_url = f"{protocol}://{host}"
|
||||
else:
|
||||
self.base_url = f"{protocol}://{host}:{port}"
|
||||
|
||||
def connect(self):
|
||||
try:
|
||||
import requests
|
||||
# Test connection with SSL verification and timeout
|
||||
response = requests.get(
|
||||
f"{self.base_url}/api/v1/status/config",
|
||||
timeout=10,
|
||||
verify=True # Enable SSL verification for HTTPS
|
||||
)
|
||||
if response.status_code == 200:
|
||||
logging.info(f"Connected to VictoriaMetrics successfully at {self.base_url}")
|
||||
return True
|
||||
else:
|
||||
logging.error(f"VictoriaMetrics connection failed: {response.status_code}")
|
||||
return False
|
||||
except requests.exceptions.SSLError as e:
|
||||
logging.error(f"SSL error connecting to VictoriaMetrics: {e}")
|
||||
return False
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
logging.error(f"Connection error to VictoriaMetrics: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to connect to VictoriaMetrics: {e}")
|
||||
return False
|
||||
|
||||
def save_measurements(self, measurements: List[Dict]) -> bool:
|
||||
try:
|
||||
import requests
|
||||
|
||||
# Convert to Prometheus format
|
||||
metrics_data = []
|
||||
timestamp_ms = int(datetime.datetime.now().timestamp() * 1000)
|
||||
|
||||
for measurement in measurements:
|
||||
# Water level metric
|
||||
metrics_data.append(
|
||||
f'water_level{{station_code="{measurement["station_code"]}",'
|
||||
f'station_name_en="{measurement["station_name_en"]}",'
|
||||
f'station_name_th="{measurement["station_name_th"]}"}} '
|
||||
f'{measurement["water_level"]} {timestamp_ms}'
|
||||
)
|
||||
|
||||
# Discharge metric
|
||||
metrics_data.append(
|
||||
f'water_discharge{{station_code="{measurement["station_code"]}",'
|
||||
f'station_name_en="{measurement["station_name_en"]}",'
|
||||
f'station_name_th="{measurement["station_name_th"]}"}} '
|
||||
f'{measurement["discharge"]} {timestamp_ms}'
|
||||
)
|
||||
|
||||
# Discharge percentage metric
|
||||
if measurement["discharge_percent"]:
|
||||
metrics_data.append(
|
||||
f'water_discharge_percent{{station_code="{measurement["station_code"]}",'
|
||||
f'station_name_en="{measurement["station_name_en"]}",'
|
||||
f'station_name_th="{measurement["station_name_th"]}"}} '
|
||||
f'{measurement["discharge_percent"]} {timestamp_ms}'
|
||||
)
|
||||
|
||||
# Send to VictoriaMetrics
|
||||
data = '\n'.join(metrics_data)
|
||||
response = requests.post(
|
||||
f"{self.base_url}/api/v1/import/prometheus",
|
||||
data=data,
|
||||
headers={'Content-Type': 'text/plain'},
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if response.status_code == 204:
|
||||
logging.info(f"Successfully sent {len(measurements)} measurements to VictoriaMetrics")
|
||||
return True
|
||||
else:
|
||||
logging.error(f"VictoriaMetrics import failed: {response.status_code} - {response.text}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error sending to VictoriaMetrics: {e}")
|
||||
return False
|
||||
|
||||
def get_latest_measurements(self, limit: int = 100) -> List[Dict]:
|
||||
# VictoriaMetrics queries would be implemented here
|
||||
# This is a simplified version
|
||||
logging.warning("get_latest_measurements not fully implemented for VictoriaMetrics")
|
||||
return []
|
||||
|
||||
def get_measurements_by_timerange(self, start_time: datetime.datetime,
|
||||
end_time: datetime.datetime,
|
||||
station_codes: Optional[List[str]] = None) -> List[Dict]:
|
||||
# VictoriaMetrics range queries would be implemented here
|
||||
logging.warning("get_measurements_by_timerange not fully implemented for VictoriaMetrics")
|
||||
return []
|
||||
|
||||
# Factory function to create appropriate adapter
|
||||
def create_database_adapter(db_type: str, **kwargs) -> DatabaseAdapter:
|
||||
"""
|
||||
Factory function to create database adapter
|
||||
|
||||
Args:
|
||||
db_type: 'influxdb', 'mysql', 'postgresql', 'sqlite', or 'victoriametrics'
|
||||
**kwargs: Database-specific connection parameters
|
||||
"""
|
||||
db_type = db_type.lower()
|
||||
|
||||
if db_type == 'influxdb':
|
||||
return InfluxDBAdapter(**kwargs)
|
||||
elif db_type == 'mysql':
|
||||
return SQLAdapter(db_type='mysql', **kwargs)
|
||||
elif db_type == 'postgresql':
|
||||
return SQLAdapter(db_type='postgresql', **kwargs)
|
||||
elif db_type == 'sqlite':
|
||||
return SQLAdapter(db_type='sqlite', **kwargs)
|
||||
elif db_type == 'victoriametrics':
|
||||
return VictoriaMetricsAdapter(**kwargs)
|
||||
else:
|
||||
raise ValueError(f"Unsupported database type: {db_type}")
|
331
src/demo_databases.py
Normal file
331
src/demo_databases.py
Normal file
@@ -0,0 +1,331 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Demo script showing different database backend options for water monitoring
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
from water_scraper_v3 import EnhancedWaterMonitorScraper
|
||||
|
||||
def demo_sqlite():
|
||||
"""Demo with SQLite (local development)"""
|
||||
print("=" * 60)
|
||||
print("🗄️ SQLite Demo (Local Development)")
|
||||
print("=" * 60)
|
||||
|
||||
config = {
|
||||
'type': 'sqlite',
|
||||
'connection_string': 'sqlite:///demo_water_sqlite.db'
|
||||
}
|
||||
|
||||
try:
|
||||
scraper = EnhancedWaterMonitorScraper(config)
|
||||
|
||||
# Fetch and save data
|
||||
print("Fetching data from API...")
|
||||
data = scraper.fetch_water_data()
|
||||
|
||||
if data:
|
||||
print(f"✓ Fetched {len(data)} data points")
|
||||
success = scraper.save_to_database(data)
|
||||
|
||||
if success:
|
||||
print("✓ Data saved to SQLite database")
|
||||
|
||||
# Show latest data
|
||||
latest = scraper.get_latest_data(5)
|
||||
print(f"\nLatest 5 measurements:")
|
||||
for measurement in latest:
|
||||
print(f" • {measurement['station_code']} ({measurement['station_name_en']}): "
|
||||
f"{measurement['water_level']:.2f}m, {measurement['discharge']:.1f} cms")
|
||||
else:
|
||||
print("✗ Failed to save data")
|
||||
else:
|
||||
print("✗ No data fetched")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
def demo_influxdb():
|
||||
"""Demo with InfluxDB (requires InfluxDB running)"""
|
||||
print("\n" + "=" * 60)
|
||||
print("📊 InfluxDB Demo (Time-Series Database)")
|
||||
print("=" * 60)
|
||||
|
||||
config = {
|
||||
'type': 'influxdb',
|
||||
'host': 'localhost',
|
||||
'port': 8086,
|
||||
'database': 'water_monitoring_demo',
|
||||
'username': None, # Set if authentication is enabled
|
||||
'password': None
|
||||
}
|
||||
|
||||
try:
|
||||
scraper = EnhancedWaterMonitorScraper(config)
|
||||
|
||||
if scraper.db_adapter and scraper.db_adapter.client:
|
||||
print("✓ Connected to InfluxDB")
|
||||
|
||||
# Fetch and save data
|
||||
print("Fetching data from API...")
|
||||
data = scraper.fetch_water_data()
|
||||
|
||||
if data:
|
||||
print(f"✓ Fetched {len(data)} data points")
|
||||
success = scraper.save_to_database(data)
|
||||
|
||||
if success:
|
||||
print("✓ Data saved to InfluxDB")
|
||||
print("💡 You can now query this data in Grafana or InfluxDB CLI")
|
||||
print(" Example query: SELECT * FROM water_data ORDER BY time DESC LIMIT 10")
|
||||
else:
|
||||
print("✗ Failed to save data")
|
||||
else:
|
||||
print("✗ No data fetched")
|
||||
else:
|
||||
print("✗ Could not connect to InfluxDB")
|
||||
print("💡 Make sure InfluxDB is running: docker run -p 8086:8086 influxdb:1.8")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
print("💡 InfluxDB might not be running or accessible")
|
||||
|
||||
def demo_postgresql():
|
||||
"""Demo with PostgreSQL (requires PostgreSQL running)"""
|
||||
print("\n" + "=" * 60)
|
||||
print("🐘 PostgreSQL Demo (Relational Database)")
|
||||
print("=" * 60)
|
||||
|
||||
config = {
|
||||
'type': 'postgresql',
|
||||
'connection_string': 'postgresql://postgres:password@localhost:5432/water_monitoring'
|
||||
}
|
||||
|
||||
try:
|
||||
scraper = EnhancedWaterMonitorScraper(config)
|
||||
|
||||
if scraper.db_adapter and scraper.db_adapter.engine:
|
||||
print("✓ Connected to PostgreSQL")
|
||||
|
||||
# Fetch and save data
|
||||
print("Fetching data from API...")
|
||||
data = scraper.fetch_water_data()
|
||||
|
||||
if data:
|
||||
print(f"✓ Fetched {len(data)} data points")
|
||||
success = scraper.save_to_database(data)
|
||||
|
||||
if success:
|
||||
print("✓ Data saved to PostgreSQL")
|
||||
print("💡 You can now query this data with SQL")
|
||||
print(" Example: SELECT * FROM water_measurements ORDER BY timestamp DESC LIMIT 10;")
|
||||
else:
|
||||
print("✗ Failed to save data")
|
||||
else:
|
||||
print("✗ No data fetched")
|
||||
else:
|
||||
print("✗ Could not connect to PostgreSQL")
|
||||
print("💡 Make sure PostgreSQL is running with correct credentials")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
print("💡 PostgreSQL might not be running or credentials might be wrong")
|
||||
|
||||
def demo_mysql():
|
||||
"""Demo with MySQL (requires MySQL running)"""
|
||||
print("\n" + "=" * 60)
|
||||
print("🐬 MySQL Demo (Relational Database)")
|
||||
print("=" * 60)
|
||||
|
||||
config = {
|
||||
'type': 'mysql',
|
||||
'connection_string': 'mysql://root:password@localhost:3306/water_monitoring'
|
||||
}
|
||||
|
||||
try:
|
||||
scraper = EnhancedWaterMonitorScraper(config)
|
||||
|
||||
if scraper.db_adapter and scraper.db_adapter.engine:
|
||||
print("✓ Connected to MySQL")
|
||||
|
||||
# Fetch and save data
|
||||
print("Fetching data from API...")
|
||||
data = scraper.fetch_water_data()
|
||||
|
||||
if data:
|
||||
print(f"✓ Fetched {len(data)} data points")
|
||||
success = scraper.save_to_database(data)
|
||||
|
||||
if success:
|
||||
print("✓ Data saved to MySQL")
|
||||
print("💡 You can now query this data with SQL")
|
||||
print(" Example: SELECT * FROM water_measurements ORDER BY timestamp DESC LIMIT 10;")
|
||||
else:
|
||||
print("✗ Failed to save data")
|
||||
else:
|
||||
print("✗ No data fetched")
|
||||
else:
|
||||
print("✗ Could not connect to MySQL")
|
||||
print("💡 Make sure MySQL is running with correct credentials")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
print("💡 MySQL might not be running or credentials might be wrong")
|
||||
|
||||
def demo_victoriametrics():
|
||||
"""Demo with VictoriaMetrics (supports both local and HTTPS configurations)"""
|
||||
print("\n" + "=" * 60)
|
||||
print("⚡ VictoriaMetrics Demo (High-Performance Metrics)")
|
||||
print("=" * 60)
|
||||
|
||||
# Use configuration from environment or config.py
|
||||
from config import Config
|
||||
db_config = Config.get_database_config()
|
||||
|
||||
if db_config['type'] != 'victoriametrics':
|
||||
# Fallback to default local configuration
|
||||
config = {
|
||||
'type': 'victoriametrics',
|
||||
'host': 'vm.newedge.house',
|
||||
'port': 443
|
||||
}
|
||||
else:
|
||||
config = db_config
|
||||
|
||||
print(f"Connecting to: {config['host']}:{config['port']}")
|
||||
|
||||
try:
|
||||
scraper = EnhancedWaterMonitorScraper(config)
|
||||
|
||||
if scraper.db_adapter:
|
||||
# Test connection using the adapter's connect method
|
||||
if scraper.db_adapter.connect():
|
||||
print("✓ Connected to VictoriaMetrics")
|
||||
|
||||
# Fetch and save data
|
||||
print("Fetching data from API...")
|
||||
data = scraper.fetch_water_data()
|
||||
|
||||
if data:
|
||||
print(f"✓ Fetched {len(data)} data points")
|
||||
success = scraper.save_to_database(data)
|
||||
|
||||
if success:
|
||||
print("✓ Data saved to VictoriaMetrics")
|
||||
print("💡 You can now query this data via Prometheus API")
|
||||
|
||||
# Show appropriate query URL based on configuration
|
||||
base_url = scraper.db_adapter.base_url
|
||||
print(f" Example: {base_url}/api/v1/query?query=water_level")
|
||||
print(f" Health check: {base_url}/health")
|
||||
else:
|
||||
print("✗ Failed to save data")
|
||||
else:
|
||||
print("✗ No data fetched")
|
||||
else:
|
||||
print("✗ Could not connect to VictoriaMetrics")
|
||||
if config['host'] == 'localhost':
|
||||
print("💡 Make sure VictoriaMetrics is running locally:")
|
||||
print(" docker run -p 8428:8428 victoriametrics/victoria-metrics")
|
||||
else:
|
||||
print(f"💡 Check if VictoriaMetrics is accessible at {config['host']}:{config['port']}")
|
||||
print("💡 Verify HTTPS configuration and network connectivity")
|
||||
else:
|
||||
print("✗ Failed to initialize VictoriaMetrics adapter")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
print("💡 Check your VictoriaMetrics configuration and network connectivity")
|
||||
|
||||
def show_recommendations():
|
||||
"""Show database recommendations"""
|
||||
print("\n" + "=" * 60)
|
||||
print("🏆 Database Recommendations")
|
||||
print("=" * 60)
|
||||
|
||||
recommendations = [
|
||||
{
|
||||
'name': 'InfluxDB',
|
||||
'best_for': 'Time-series data, Grafana dashboards',
|
||||
'pros': ['Purpose-built for time-series', 'Great compression', 'Built-in retention'],
|
||||
'cons': ['Learning curve', 'Less flexible for complex queries'],
|
||||
'use_case': 'Recommended for most water monitoring deployments'
|
||||
},
|
||||
{
|
||||
'name': 'PostgreSQL + TimescaleDB',
|
||||
'best_for': 'Complex queries, existing PostgreSQL infrastructure',
|
||||
'pros': ['Mature ecosystem', 'SQL compatibility', 'ACID compliance'],
|
||||
'cons': ['More complex setup', 'Higher resource usage'],
|
||||
'use_case': 'Best for organizations already using PostgreSQL'
|
||||
},
|
||||
{
|
||||
'name': 'VictoriaMetrics',
|
||||
'best_for': 'High-performance metrics, Prometheus compatibility',
|
||||
'pros': ['Extremely fast', 'Low resource usage', 'Better compression'],
|
||||
'cons': ['Newer ecosystem', 'Less tooling'],
|
||||
'use_case': 'Best for high-volume, performance-critical deployments'
|
||||
},
|
||||
{
|
||||
'name': 'MySQL',
|
||||
'best_for': 'Existing MySQL infrastructure, familiar SQL',
|
||||
'pros': ['Familiar', 'Mature', 'Wide support'],
|
||||
'cons': ['Not optimized for time-series', 'Manual optimization needed'],
|
||||
'use_case': 'Good for organizations with existing MySQL expertise'
|
||||
}
|
||||
]
|
||||
|
||||
for rec in recommendations:
|
||||
print(f"\n📊 {rec['name']}")
|
||||
print(f" Best for: {rec['best_for']}")
|
||||
print(f" Pros: {', '.join(rec['pros'])}")
|
||||
print(f" Cons: {', '.join(rec['cons'])}")
|
||||
print(f" 💡 {rec['use_case']}")
|
||||
|
||||
def main():
|
||||
"""Main demo function"""
|
||||
print("🌊 Thailand Water Monitor - Database Backend Demo")
|
||||
print("This demo shows how to use different database backends")
|
||||
|
||||
# Always run SQLite demo (no external dependencies)
|
||||
demo_sqlite()
|
||||
|
||||
# Check for command line arguments to run specific demos
|
||||
if len(sys.argv) > 1:
|
||||
db_type = sys.argv[1].lower()
|
||||
|
||||
if db_type == 'influxdb':
|
||||
demo_influxdb()
|
||||
elif db_type == 'postgresql':
|
||||
demo_postgresql()
|
||||
elif db_type == 'mysql':
|
||||
demo_mysql()
|
||||
elif db_type == 'victoriametrics':
|
||||
demo_victoriametrics()
|
||||
elif db_type == 'all':
|
||||
demo_influxdb()
|
||||
demo_postgresql()
|
||||
demo_mysql()
|
||||
demo_victoriametrics()
|
||||
else:
|
||||
print(f"\nUnknown database type: {db_type}")
|
||||
print("Available options: influxdb, postgresql, mysql, victoriametrics, all")
|
||||
else:
|
||||
print("\n💡 To test other databases, run:")
|
||||
print(" python demo_databases.py influxdb")
|
||||
print(" python demo_databases.py postgresql")
|
||||
print(" python demo_databases.py mysql")
|
||||
print(" python demo_databases.py victoriametrics")
|
||||
print(" python demo_databases.py all")
|
||||
|
||||
# Show recommendations
|
||||
show_recommendations()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("✅ Demo completed!")
|
||||
print("📖 See DATABASE_DEPLOYMENT_GUIDE.md for production setup instructions")
|
||||
print("=" * 60)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
32
src/exceptions.py
Normal file
32
src/exceptions.py
Normal file
@@ -0,0 +1,32 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Custom exceptions for water monitoring system
|
||||
"""
|
||||
|
||||
class WaterMonitorException(Exception):
|
||||
"""Base exception for water monitoring system"""
|
||||
pass
|
||||
|
||||
class DatabaseConnectionError(WaterMonitorException):
|
||||
"""Raised when database connection fails"""
|
||||
pass
|
||||
|
||||
class APIConnectionError(WaterMonitorException):
|
||||
"""Raised when API connection fails"""
|
||||
pass
|
||||
|
||||
class DataValidationError(WaterMonitorException):
|
||||
"""Raised when data validation fails"""
|
||||
pass
|
||||
|
||||
class ConfigurationError(WaterMonitorException):
|
||||
"""Raised when configuration is invalid"""
|
||||
pass
|
||||
|
||||
class DataParsingError(WaterMonitorException):
|
||||
"""Raised when data parsing fails"""
|
||||
pass
|
||||
|
||||
class RetryExhaustedError(WaterMonitorException):
|
||||
"""Raised when all retry attempts are exhausted"""
|
||||
pass
|
265
src/health_check.py
Normal file
265
src/health_check.py
Normal file
@@ -0,0 +1,265 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Health check system for water monitoring application
|
||||
"""
|
||||
|
||||
import time
|
||||
import threading
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional, List, Callable
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class HealthStatus(Enum):
|
||||
HEALTHY = "healthy"
|
||||
DEGRADED = "degraded"
|
||||
UNHEALTHY = "unhealthy"
|
||||
|
||||
@dataclass
|
||||
class HealthCheckResult:
|
||||
"""Result of a health check"""
|
||||
name: str
|
||||
status: HealthStatus
|
||||
message: str
|
||||
timestamp: datetime
|
||||
response_time_ms: Optional[float] = None
|
||||
details: Optional[Dict[str, Any]] = None
|
||||
|
||||
class HealthCheck:
|
||||
"""Base health check class"""
|
||||
|
||||
def __init__(self, name: str, timeout_seconds: int = 30):
|
||||
self.name = name
|
||||
self.timeout_seconds = timeout_seconds
|
||||
|
||||
def check(self) -> HealthCheckResult:
|
||||
"""Perform the health check"""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
result = self._perform_check()
|
||||
response_time = (time.time() - start_time) * 1000
|
||||
|
||||
return HealthCheckResult(
|
||||
name=self.name,
|
||||
status=result.get('status', HealthStatus.HEALTHY),
|
||||
message=result.get('message', 'OK'),
|
||||
timestamp=datetime.now(),
|
||||
response_time_ms=response_time,
|
||||
details=result.get('details')
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
response_time = (time.time() - start_time) * 1000
|
||||
logger.error(f"Health check {self.name} failed: {e}")
|
||||
|
||||
return HealthCheckResult(
|
||||
name=self.name,
|
||||
status=HealthStatus.UNHEALTHY,
|
||||
message=f"Check failed: {str(e)}",
|
||||
timestamp=datetime.now(),
|
||||
response_time_ms=response_time
|
||||
)
|
||||
|
||||
def _perform_check(self) -> Dict[str, Any]:
|
||||
"""Override this method to implement the actual check"""
|
||||
raise NotImplementedError
|
||||
|
||||
class DatabaseHealthCheck(HealthCheck):
|
||||
"""Health check for database connectivity"""
|
||||
|
||||
def __init__(self, db_adapter, name: str = "database"):
|
||||
super().__init__(name)
|
||||
self.db_adapter = db_adapter
|
||||
|
||||
def _perform_check(self) -> Dict[str, Any]:
|
||||
if not self.db_adapter:
|
||||
return {
|
||||
'status': HealthStatus.UNHEALTHY,
|
||||
'message': 'Database adapter not initialized'
|
||||
}
|
||||
|
||||
try:
|
||||
# Try to connect
|
||||
if hasattr(self.db_adapter, 'connect'):
|
||||
connected = self.db_adapter.connect()
|
||||
if not connected:
|
||||
return {
|
||||
'status': HealthStatus.UNHEALTHY,
|
||||
'message': 'Database connection failed'
|
||||
}
|
||||
|
||||
# Try to get latest data
|
||||
latest_data = self.db_adapter.get_latest_measurements(limit=1)
|
||||
|
||||
if latest_data:
|
||||
latest_timestamp = latest_data[0].get('timestamp')
|
||||
if isinstance(latest_timestamp, str):
|
||||
latest_timestamp = datetime.fromisoformat(latest_timestamp.replace('Z', '+00:00'))
|
||||
|
||||
# Check if data is recent (within last 2 hours)
|
||||
if datetime.now() - latest_timestamp.replace(tzinfo=None) > timedelta(hours=2):
|
||||
return {
|
||||
'status': HealthStatus.DEGRADED,
|
||||
'message': f'Latest data is old: {latest_timestamp}',
|
||||
'details': {'latest_data_timestamp': str(latest_timestamp)}
|
||||
}
|
||||
|
||||
return {
|
||||
'status': HealthStatus.HEALTHY,
|
||||
'message': 'Database connection OK',
|
||||
'details': {
|
||||
'latest_data_count': len(latest_data),
|
||||
'latest_timestamp': str(latest_data[0].get('timestamp')) if latest_data else None
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'status': HealthStatus.UNHEALTHY,
|
||||
'message': f'Database check failed: {str(e)}'
|
||||
}
|
||||
|
||||
class APIHealthCheck(HealthCheck):
|
||||
"""Health check for external API connectivity"""
|
||||
|
||||
def __init__(self, api_url: str, session, name: str = "api"):
|
||||
super().__init__(name)
|
||||
self.api_url = api_url
|
||||
self.session = session
|
||||
|
||||
def _perform_check(self) -> Dict[str, Any]:
|
||||
try:
|
||||
# Simple GET request to check API availability
|
||||
response = self.session.get(self.api_url, timeout=self.timeout_seconds)
|
||||
|
||||
if response.status_code == 200:
|
||||
return {
|
||||
'status': HealthStatus.HEALTHY,
|
||||
'message': 'API connection OK',
|
||||
'details': {
|
||||
'status_code': response.status_code,
|
||||
'response_size': len(response.content)
|
||||
}
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'status': HealthStatus.DEGRADED,
|
||||
'message': f'API returned status {response.status_code}',
|
||||
'details': {'status_code': response.status_code}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'status': HealthStatus.UNHEALTHY,
|
||||
'message': f'API check failed: {str(e)}'
|
||||
}
|
||||
|
||||
class MemoryHealthCheck(HealthCheck):
|
||||
"""Health check for memory usage"""
|
||||
|
||||
def __init__(self, max_memory_mb: int = 1000, name: str = "memory"):
|
||||
super().__init__(name)
|
||||
self.max_memory_mb = max_memory_mb
|
||||
|
||||
def _perform_check(self) -> Dict[str, Any]:
|
||||
try:
|
||||
import psutil
|
||||
process = psutil.Process()
|
||||
memory_info = process.memory_info()
|
||||
memory_mb = memory_info.rss / 1024 / 1024
|
||||
|
||||
if memory_mb > self.max_memory_mb:
|
||||
return {
|
||||
'status': HealthStatus.DEGRADED,
|
||||
'message': f'High memory usage: {memory_mb:.1f}MB',
|
||||
'details': {'memory_mb': memory_mb, 'max_memory_mb': self.max_memory_mb}
|
||||
}
|
||||
|
||||
return {
|
||||
'status': HealthStatus.HEALTHY,
|
||||
'message': f'Memory usage OK: {memory_mb:.1f}MB',
|
||||
'details': {'memory_mb': memory_mb}
|
||||
}
|
||||
|
||||
except ImportError:
|
||||
return {
|
||||
'status': HealthStatus.HEALTHY,
|
||||
'message': 'Memory check skipped (psutil not available)'
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
'status': HealthStatus.UNHEALTHY,
|
||||
'message': f'Memory check failed: {str(e)}'
|
||||
}
|
||||
|
||||
class HealthCheckManager:
|
||||
"""Manages multiple health checks"""
|
||||
|
||||
def __init__(self):
|
||||
self.checks: List[HealthCheck] = []
|
||||
self.last_results: Dict[str, HealthCheckResult] = {}
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def add_check(self, health_check: HealthCheck):
|
||||
"""Add a health check"""
|
||||
with self._lock:
|
||||
self.checks.append(health_check)
|
||||
|
||||
def run_all_checks(self) -> Dict[str, HealthCheckResult]:
|
||||
"""Run all health checks"""
|
||||
results = {}
|
||||
|
||||
for check in self.checks:
|
||||
try:
|
||||
result = check.check()
|
||||
results[check.name] = result
|
||||
|
||||
with self._lock:
|
||||
self.last_results[check.name] = result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error running health check {check.name}: {e}")
|
||||
results[check.name] = HealthCheckResult(
|
||||
name=check.name,
|
||||
status=HealthStatus.UNHEALTHY,
|
||||
message=f"Check execution failed: {str(e)}",
|
||||
timestamp=datetime.now()
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
def get_overall_status(self) -> HealthStatus:
|
||||
"""Get overall system health status"""
|
||||
if not self.last_results:
|
||||
return HealthStatus.UNHEALTHY
|
||||
|
||||
statuses = [result.status for result in self.last_results.values()]
|
||||
|
||||
if any(status == HealthStatus.UNHEALTHY for status in statuses):
|
||||
return HealthStatus.UNHEALTHY
|
||||
elif any(status == HealthStatus.DEGRADED for status in statuses):
|
||||
return HealthStatus.DEGRADED
|
||||
else:
|
||||
return HealthStatus.HEALTHY
|
||||
|
||||
def get_health_summary(self) -> Dict[str, Any]:
|
||||
"""Get a summary of system health"""
|
||||
overall_status = self.get_overall_status()
|
||||
|
||||
return {
|
||||
'overall_status': overall_status.value,
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'checks': {
|
||||
name: {
|
||||
'status': result.status.value,
|
||||
'message': result.message,
|
||||
'response_time_ms': result.response_time_ms,
|
||||
'timestamp': result.timestamp.isoformat()
|
||||
}
|
||||
for name, result in self.last_results.items()
|
||||
}
|
||||
}
|
135
src/logging_config.py
Normal file
135
src/logging_config.py
Normal file
@@ -0,0 +1,135 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Centralized logging configuration for water monitoring system
|
||||
"""
|
||||
|
||||
import logging
|
||||
import logging.handlers
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
class ColoredFormatter(logging.Formatter):
|
||||
"""Colored console formatter"""
|
||||
|
||||
COLORS = {
|
||||
'DEBUG': '\033[36m', # Cyan
|
||||
'INFO': '\033[32m', # Green
|
||||
'WARNING': '\033[33m', # Yellow
|
||||
'ERROR': '\033[31m', # Red
|
||||
'CRITICAL': '\033[35m', # Magenta
|
||||
'RESET': '\033[0m' # Reset
|
||||
}
|
||||
|
||||
def format(self, record):
|
||||
if hasattr(record, 'levelname'):
|
||||
color = self.COLORS.get(record.levelname, self.COLORS['RESET'])
|
||||
record.levelname = f"{color}{record.levelname}{self.COLORS['RESET']}"
|
||||
return super().format(record)
|
||||
|
||||
def setup_logging(
|
||||
log_level: str = "INFO",
|
||||
log_file: Optional[str] = None,
|
||||
max_file_size: int = 10 * 1024 * 1024, # 10MB
|
||||
backup_count: int = 5,
|
||||
enable_console: bool = True,
|
||||
enable_colors: bool = True
|
||||
) -> logging.Logger:
|
||||
"""
|
||||
Setup comprehensive logging configuration
|
||||
|
||||
Args:
|
||||
log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
||||
log_file: Path to log file (optional)
|
||||
max_file_size: Maximum size of log file before rotation
|
||||
backup_count: Number of backup files to keep
|
||||
enable_console: Whether to enable console logging
|
||||
enable_colors: Whether to enable colored console output
|
||||
|
||||
Returns:
|
||||
Configured logger instance
|
||||
"""
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
if log_file:
|
||||
log_dir = os.path.dirname(log_file)
|
||||
if log_dir and not os.path.exists(log_dir):
|
||||
os.makedirs(log_dir)
|
||||
|
||||
# Configure root logger
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(getattr(logging, log_level.upper()))
|
||||
|
||||
# Clear existing handlers
|
||||
logger.handlers.clear()
|
||||
|
||||
# Create formatters
|
||||
detailed_formatter = logging.Formatter(
|
||||
'%(asctime)s - %(name)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
|
||||
simple_formatter = logging.Formatter(
|
||||
'%(asctime)s - %(levelname)s - %(message)s',
|
||||
datefmt='%H:%M:%S'
|
||||
)
|
||||
|
||||
# Console handler
|
||||
if enable_console:
|
||||
console_handler = logging.StreamHandler()
|
||||
if enable_colors and os.name != 'nt': # Don't use colors on Windows
|
||||
console_formatter = ColoredFormatter(
|
||||
'%(asctime)s - %(levelname)s - %(message)s',
|
||||
datefmt='%H:%M:%S'
|
||||
)
|
||||
else:
|
||||
console_formatter = simple_formatter
|
||||
|
||||
console_handler.setFormatter(console_formatter)
|
||||
console_handler.setLevel(getattr(logging, log_level.upper()))
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
# File handler with rotation
|
||||
if log_file:
|
||||
file_handler = logging.handlers.RotatingFileHandler(
|
||||
log_file,
|
||||
maxBytes=max_file_size,
|
||||
backupCount=backup_count,
|
||||
encoding='utf-8'
|
||||
)
|
||||
file_handler.setFormatter(detailed_formatter)
|
||||
file_handler.setLevel(logging.DEBUG) # Always log everything to file
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
# Add performance logger for metrics
|
||||
perf_logger = logging.getLogger('performance')
|
||||
if log_file:
|
||||
perf_file = log_file.replace('.log', '_performance.log')
|
||||
perf_handler = logging.handlers.RotatingFileHandler(
|
||||
perf_file,
|
||||
maxBytes=max_file_size,
|
||||
backupCount=backup_count,
|
||||
encoding='utf-8'
|
||||
)
|
||||
perf_formatter = logging.Formatter(
|
||||
'%(asctime)s - %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
perf_handler.setFormatter(perf_formatter)
|
||||
perf_logger.addHandler(perf_handler)
|
||||
perf_logger.setLevel(logging.INFO)
|
||||
perf_logger.propagate = False
|
||||
|
||||
return logger
|
||||
|
||||
def log_performance_metric(operation: str, duration: float, additional_info: Optional[str] = None):
|
||||
"""Log performance metrics"""
|
||||
perf_logger = logging.getLogger('performance')
|
||||
message = f"PERF: {operation} took {duration:.3f}s"
|
||||
if additional_info:
|
||||
message += f" - {additional_info}"
|
||||
perf_logger.info(message)
|
||||
|
||||
def get_logger(name: str) -> logging.Logger:
|
||||
"""Get a logger with the specified name"""
|
||||
return logging.getLogger(name)
|
337
src/main.py
Normal file
337
src/main.py
Normal file
@@ -0,0 +1,337 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Main entry point for the Thailand Water Monitor system
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import sys
|
||||
import signal
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from .config import Config
|
||||
from .water_scraper_v3 import EnhancedWaterMonitorScraper
|
||||
from .logging_config import setup_logging, get_logger
|
||||
from .exceptions import ConfigurationError, DatabaseConnectionError
|
||||
from .metrics import get_metrics_collector
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
def setup_signal_handlers(scraper: Optional[EnhancedWaterMonitorScraper] = None):
|
||||
"""Setup signal handlers for graceful shutdown"""
|
||||
def signal_handler(signum, frame):
|
||||
logger.info(f"Received signal {signum}, shutting down gracefully...")
|
||||
if scraper:
|
||||
logger.info("Stopping scraper...")
|
||||
sys.exit(0)
|
||||
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
def run_test_cycle():
|
||||
"""Run a single test cycle"""
|
||||
logger.info("Running test cycle...")
|
||||
|
||||
try:
|
||||
# Validate configuration
|
||||
Config.validate_config()
|
||||
|
||||
# Initialize scraper
|
||||
db_config = Config.get_database_config()
|
||||
scraper = EnhancedWaterMonitorScraper(db_config)
|
||||
|
||||
# Run single scraping cycle
|
||||
result = scraper.run_scraping_cycle()
|
||||
|
||||
if result:
|
||||
logger.info("✅ Test cycle completed successfully")
|
||||
|
||||
# Show some statistics
|
||||
latest_data = scraper.get_latest_data(5)
|
||||
if latest_data:
|
||||
logger.info(f"Latest data points: {len(latest_data)}")
|
||||
for data in latest_data[:3]: # Show first 3
|
||||
logger.info(f" • {data['station_code']}: {data['water_level']:.2f}m, {data['discharge']:.1f} cms")
|
||||
else:
|
||||
logger.warning("⚠️ Test cycle completed but no new data was found")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Test cycle failed: {e}")
|
||||
return False
|
||||
|
||||
def run_continuous_monitoring():
|
||||
"""Run continuous monitoring with scheduling"""
|
||||
logger.info("Starting continuous monitoring...")
|
||||
|
||||
try:
|
||||
# Validate configuration
|
||||
Config.validate_config()
|
||||
|
||||
# Initialize scraper
|
||||
db_config = Config.get_database_config()
|
||||
scraper = EnhancedWaterMonitorScraper(db_config)
|
||||
|
||||
# Setup signal handlers
|
||||
setup_signal_handlers(scraper)
|
||||
|
||||
logger.info(f"Monitoring started with {Config.SCRAPING_INTERVAL_HOURS}h interval")
|
||||
logger.info("Press Ctrl+C to stop")
|
||||
|
||||
# Run initial cycle
|
||||
logger.info("Running initial data collection...")
|
||||
scraper.run_scraping_cycle()
|
||||
|
||||
# Start scheduled monitoring
|
||||
import schedule
|
||||
|
||||
schedule.every(Config.SCRAPING_INTERVAL_HOURS).hours.do(scraper.run_scraping_cycle)
|
||||
|
||||
while True:
|
||||
schedule.run_pending()
|
||||
time.sleep(60) # Check every minute
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Monitoring stopped by user")
|
||||
except Exception as e:
|
||||
logger.error(f"Monitoring failed: {e}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def run_gap_filling(days_back: int):
|
||||
"""Run gap filling for missing data"""
|
||||
logger.info(f"Checking for data gaps in the last {days_back} days...")
|
||||
|
||||
try:
|
||||
# Validate configuration
|
||||
Config.validate_config()
|
||||
|
||||
# Initialize scraper
|
||||
db_config = Config.get_database_config()
|
||||
scraper = EnhancedWaterMonitorScraper(db_config)
|
||||
|
||||
# Fill gaps
|
||||
filled_count = scraper.fill_data_gaps(days_back)
|
||||
|
||||
if filled_count > 0:
|
||||
logger.info(f"✅ Filled {filled_count} missing data points")
|
||||
else:
|
||||
logger.info("✅ No data gaps found")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Gap filling failed: {e}")
|
||||
return False
|
||||
|
||||
def run_data_update(days_back: int):
|
||||
"""Update existing data with latest values"""
|
||||
logger.info(f"Updating existing data for the last {days_back} days...")
|
||||
|
||||
try:
|
||||
# Validate configuration
|
||||
Config.validate_config()
|
||||
|
||||
# Initialize scraper
|
||||
db_config = Config.get_database_config()
|
||||
scraper = EnhancedWaterMonitorScraper(db_config)
|
||||
|
||||
# Update data
|
||||
updated_count = scraper.update_existing_data(days_back)
|
||||
|
||||
if updated_count > 0:
|
||||
logger.info(f"✅ Updated {updated_count} data points")
|
||||
else:
|
||||
logger.info("✅ No data updates needed")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Data update failed: {e}")
|
||||
return False
|
||||
|
||||
def run_web_api():
|
||||
"""Run the FastAPI web interface"""
|
||||
logger.info("Starting web API server...")
|
||||
|
||||
try:
|
||||
import uvicorn
|
||||
from .web_api import app
|
||||
|
||||
# Validate configuration
|
||||
Config.validate_config()
|
||||
|
||||
# Run the server
|
||||
uvicorn.run(
|
||||
app,
|
||||
host="0.0.0.0",
|
||||
port=8000,
|
||||
log_config=None # Use our custom logging
|
||||
)
|
||||
|
||||
except ImportError:
|
||||
logger.error("FastAPI not installed. Run: pip install fastapi uvicorn")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Web API failed: {e}")
|
||||
return False
|
||||
|
||||
def show_status():
|
||||
"""Show current system status"""
|
||||
logger.info("=== Northern Thailand Ping River Monitor Status ===")
|
||||
|
||||
try:
|
||||
# Show configuration
|
||||
Config.print_settings()
|
||||
|
||||
# Test database connection
|
||||
logger.info("\n=== Database Connection Test ===")
|
||||
db_config = Config.get_database_config()
|
||||
scraper = EnhancedWaterMonitorScraper(db_config)
|
||||
|
||||
if scraper.db_adapter:
|
||||
logger.info("✅ Database connection successful")
|
||||
|
||||
# Show latest data
|
||||
latest_data = scraper.get_latest_data(3)
|
||||
if latest_data:
|
||||
logger.info(f"\n=== Latest Data ({len(latest_data)} points) ===")
|
||||
for data in latest_data:
|
||||
timestamp = data['timestamp']
|
||||
if isinstance(timestamp, str):
|
||||
timestamp = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
|
||||
logger.info(f" • {data['station_code']} ({timestamp}): {data['water_level']:.2f}m")
|
||||
else:
|
||||
logger.info("No data found in database")
|
||||
else:
|
||||
logger.error("❌ Database connection failed")
|
||||
|
||||
# Show metrics if available
|
||||
metrics_collector = get_metrics_collector()
|
||||
metrics = metrics_collector.get_all_metrics()
|
||||
|
||||
if any(metrics.values()):
|
||||
logger.info("\n=== Metrics Summary ===")
|
||||
for metric_type, values in metrics.items():
|
||||
if values:
|
||||
logger.info(f"{metric_type.title()}: {len(values)} metrics")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Status check failed: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
"""Main entry point"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Northern Thailand Ping River Monitor",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
%(prog)s --test # Run single test cycle
|
||||
%(prog)s # Run continuous monitoring
|
||||
%(prog)s --web-api # Start web API server
|
||||
%(prog)s --fill-gaps 7 # Fill missing data for last 7 days
|
||||
%(prog)s --update-data 2 # Update existing data for last 2 days
|
||||
%(prog)s --status # Show system status
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--test",
|
||||
action="store_true",
|
||||
help="Run a single test cycle"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--web-api",
|
||||
action="store_true",
|
||||
help="Start the web API server"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--fill-gaps",
|
||||
type=int,
|
||||
metavar="DAYS",
|
||||
help="Fill missing data gaps for the specified number of days back"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--update-data",
|
||||
type=int,
|
||||
metavar="DAYS",
|
||||
help="Update existing data for the specified number of days back"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--status",
|
||||
action="store_true",
|
||||
help="Show current system status"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--log-level",
|
||||
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
|
||||
default=Config.LOG_LEVEL,
|
||||
help="Set logging level"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--log-file",
|
||||
default=Config.LOG_FILE,
|
||||
help="Log file path"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Setup logging
|
||||
setup_logging(
|
||||
log_level=args.log_level,
|
||||
log_file=args.log_file,
|
||||
enable_console=True,
|
||||
enable_colors=True
|
||||
)
|
||||
|
||||
logger.info("🏔️ Northern Thailand Ping River Monitor starting...")
|
||||
logger.info(f"Version: 3.1.0")
|
||||
logger.info(f"Log level: {args.log_level}")
|
||||
|
||||
try:
|
||||
success = False
|
||||
|
||||
if args.test:
|
||||
success = run_test_cycle()
|
||||
elif args.web_api:
|
||||
success = run_web_api()
|
||||
elif args.fill_gaps is not None:
|
||||
success = run_gap_filling(args.fill_gaps)
|
||||
elif args.update_data is not None:
|
||||
success = run_data_update(args.update_data)
|
||||
elif args.status:
|
||||
success = show_status()
|
||||
else:
|
||||
success = run_continuous_monitoring()
|
||||
|
||||
if success:
|
||||
logger.info("✅ Operation completed successfully")
|
||||
sys.exit(0)
|
||||
else:
|
||||
logger.error("❌ Operation failed")
|
||||
sys.exit(1)
|
||||
|
||||
except ConfigurationError as e:
|
||||
logger.error(f"Configuration error: {e}")
|
||||
sys.exit(1)
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Operation cancelled by user")
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
171
src/metrics.py
Normal file
171
src/metrics.py
Normal file
@@ -0,0 +1,171 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Metrics collection and monitoring for water monitoring system
|
||||
"""
|
||||
|
||||
import time
|
||||
import threading
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional, List
|
||||
from dataclasses import dataclass, field
|
||||
from collections import defaultdict, deque
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@dataclass
|
||||
class MetricPoint:
|
||||
"""Single metric data point"""
|
||||
timestamp: datetime
|
||||
value: float
|
||||
labels: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
class MetricsCollector:
|
||||
"""Collects and manages application metrics"""
|
||||
|
||||
def __init__(self, retention_hours: int = 24):
|
||||
self.retention_hours = retention_hours
|
||||
self.metrics: Dict[str, deque] = defaultdict(lambda: deque(maxlen=1000))
|
||||
self.counters: Dict[str, float] = defaultdict(float)
|
||||
self.gauges: Dict[str, float] = defaultdict(float)
|
||||
self.histograms: Dict[str, List[float]] = defaultdict(list)
|
||||
self._lock = threading.Lock()
|
||||
|
||||
# Start cleanup thread
|
||||
self._cleanup_thread = threading.Thread(target=self._cleanup_old_metrics, daemon=True)
|
||||
self._cleanup_thread.start()
|
||||
|
||||
def increment_counter(self, name: str, value: float = 1.0, labels: Optional[Dict[str, str]] = None):
|
||||
"""Increment a counter metric"""
|
||||
with self._lock:
|
||||
key = self._make_key(name, labels)
|
||||
self.counters[key] += value
|
||||
self.metrics[key].append(MetricPoint(datetime.now(), self.counters[key], labels or {}))
|
||||
|
||||
def set_gauge(self, name: str, value: float, labels: Optional[Dict[str, str]] = None):
|
||||
"""Set a gauge metric"""
|
||||
with self._lock:
|
||||
key = self._make_key(name, labels)
|
||||
self.gauges[key] = value
|
||||
self.metrics[key].append(MetricPoint(datetime.now(), value, labels or {}))
|
||||
|
||||
def record_histogram(self, name: str, value: float, labels: Optional[Dict[str, str]] = None):
|
||||
"""Record a histogram value"""
|
||||
with self._lock:
|
||||
key = self._make_key(name, labels)
|
||||
self.histograms[key].append(value)
|
||||
# Keep only recent values
|
||||
if len(self.histograms[key]) > 1000:
|
||||
self.histograms[key] = self.histograms[key][-1000:]
|
||||
|
||||
self.metrics[key].append(MetricPoint(datetime.now(), value, labels or {}))
|
||||
|
||||
def get_counter(self, name: str, labels: Optional[Dict[str, str]] = None) -> float:
|
||||
"""Get current counter value"""
|
||||
key = self._make_key(name, labels)
|
||||
return self.counters.get(key, 0.0)
|
||||
|
||||
def get_gauge(self, name: str, labels: Optional[Dict[str, str]] = None) -> float:
|
||||
"""Get current gauge value"""
|
||||
key = self._make_key(name, labels)
|
||||
return self.gauges.get(key, 0.0)
|
||||
|
||||
def get_histogram_stats(self, name: str, labels: Optional[Dict[str, str]] = None) -> Dict[str, float]:
|
||||
"""Get histogram statistics"""
|
||||
key = self._make_key(name, labels)
|
||||
values = self.histograms.get(key, [])
|
||||
|
||||
if not values:
|
||||
return {'count': 0, 'sum': 0, 'avg': 0, 'min': 0, 'max': 0}
|
||||
|
||||
return {
|
||||
'count': len(values),
|
||||
'sum': sum(values),
|
||||
'avg': sum(values) / len(values),
|
||||
'min': min(values),
|
||||
'max': max(values)
|
||||
}
|
||||
|
||||
def get_all_metrics(self) -> Dict[str, Any]:
|
||||
"""Get all current metrics"""
|
||||
with self._lock:
|
||||
return {
|
||||
'counters': dict(self.counters),
|
||||
'gauges': dict(self.gauges),
|
||||
'histograms': {k: self.get_histogram_stats(k) for k in self.histograms}
|
||||
}
|
||||
|
||||
def _make_key(self, name: str, labels: Optional[Dict[str, str]]) -> str:
|
||||
"""Create a unique key for metric with labels"""
|
||||
if not labels:
|
||||
return name
|
||||
|
||||
label_str = ','.join(f"{k}={v}" for k, v in sorted(labels.items()))
|
||||
return f"{name}{{{label_str}}}"
|
||||
|
||||
def _cleanup_old_metrics(self):
|
||||
"""Clean up old metric data points"""
|
||||
while True:
|
||||
try:
|
||||
cutoff_time = datetime.now() - timedelta(hours=self.retention_hours)
|
||||
|
||||
with self._lock:
|
||||
for metric_name, points in self.metrics.items():
|
||||
# Remove old points
|
||||
while points and points[0].timestamp < cutoff_time:
|
||||
points.popleft()
|
||||
|
||||
time.sleep(3600) # Run cleanup every hour
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in metrics cleanup: {e}")
|
||||
time.sleep(60) # Wait a minute before retrying
|
||||
|
||||
# Global metrics collector instance
|
||||
_metrics_collector = None
|
||||
|
||||
def get_metrics_collector() -> MetricsCollector:
|
||||
"""Get the global metrics collector instance"""
|
||||
global _metrics_collector
|
||||
if _metrics_collector is None:
|
||||
_metrics_collector = MetricsCollector()
|
||||
return _metrics_collector
|
||||
|
||||
# Convenience functions
|
||||
def increment_counter(name: str, value: float = 1.0, labels: Optional[Dict[str, str]] = None):
|
||||
"""Increment a counter metric"""
|
||||
get_metrics_collector().increment_counter(name, value, labels)
|
||||
|
||||
def set_gauge(name: str, value: float, labels: Optional[Dict[str, str]] = None):
|
||||
"""Set a gauge metric"""
|
||||
get_metrics_collector().set_gauge(name, value, labels)
|
||||
|
||||
def record_histogram(name: str, value: float, labels: Optional[Dict[str, str]] = None):
|
||||
"""Record a histogram value"""
|
||||
get_metrics_collector().record_histogram(name, value, labels)
|
||||
|
||||
class Timer:
|
||||
"""Context manager for timing operations"""
|
||||
|
||||
def __init__(self, metric_name: str, labels: Optional[Dict[str, str]] = None):
|
||||
self.metric_name = metric_name
|
||||
self.labels = labels
|
||||
self.start_time = None
|
||||
|
||||
def __enter__(self):
|
||||
self.start_time = time.time()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.start_time:
|
||||
duration = time.time() - self.start_time
|
||||
record_histogram(self.metric_name, duration, self.labels)
|
||||
|
||||
def timer(metric_name: str, labels: Optional[Dict[str, str]] = None):
|
||||
"""Decorator for timing function execution"""
|
||||
def decorator(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
with Timer(metric_name, labels):
|
||||
return func(*args, **kwargs)
|
||||
return wrapper
|
||||
return decorator
|
107
src/models.py
Normal file
107
src/models.py
Normal file
@@ -0,0 +1,107 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Data models for water monitoring system
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Optional, List, Dict, Any
|
||||
from enum import Enum
|
||||
|
||||
class DatabaseType(Enum):
|
||||
SQLITE = "sqlite"
|
||||
MYSQL = "mysql"
|
||||
POSTGRESQL = "postgresql"
|
||||
INFLUXDB = "influxdb"
|
||||
VICTORIAMETRICS = "victoriametrics"
|
||||
|
||||
class StationStatus(Enum):
|
||||
ACTIVE = "active"
|
||||
INACTIVE = "inactive"
|
||||
MAINTENANCE = "maintenance"
|
||||
ERROR = "error"
|
||||
|
||||
@dataclass
|
||||
class StationInfo:
|
||||
"""Station information model"""
|
||||
station_id: int
|
||||
station_code: str
|
||||
thai_name: str
|
||||
english_name: str
|
||||
latitude: Optional[float] = None
|
||||
longitude: Optional[float] = None
|
||||
geohash: Optional[str] = None
|
||||
status: StationStatus = StationStatus.ACTIVE
|
||||
|
||||
@dataclass
|
||||
class WaterMeasurement:
|
||||
"""Water measurement data model"""
|
||||
timestamp: datetime
|
||||
station_info: StationInfo
|
||||
water_level: float
|
||||
discharge: float
|
||||
water_level_unit: str = "m"
|
||||
discharge_unit: str = "cms"
|
||||
discharge_percent: Optional[float] = None
|
||||
status: StationStatus = StationStatus.ACTIVE
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for database storage"""
|
||||
return {
|
||||
'timestamp': self.timestamp,
|
||||
'station_id': self.station_info.station_id,
|
||||
'station_code': self.station_info.station_code,
|
||||
'station_name_en': self.station_info.english_name,
|
||||
'station_name_th': self.station_info.thai_name,
|
||||
'latitude': self.station_info.latitude,
|
||||
'longitude': self.station_info.longitude,
|
||||
'geohash': self.station_info.geohash,
|
||||
'water_level': self.water_level,
|
||||
'water_level_unit': self.water_level_unit,
|
||||
'discharge': self.discharge,
|
||||
'discharge_unit': self.discharge_unit,
|
||||
'discharge_percent': self.discharge_percent,
|
||||
'status': self.status.value
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class DatabaseConfig:
|
||||
"""Database configuration model"""
|
||||
db_type: DatabaseType
|
||||
connection_string: Optional[str] = None
|
||||
host: Optional[str] = None
|
||||
port: Optional[int] = None
|
||||
database: Optional[str] = None
|
||||
username: Optional[str] = None
|
||||
password: Optional[str] = None
|
||||
additional_params: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@dataclass
|
||||
class ScrapingResult:
|
||||
"""Result of a scraping operation"""
|
||||
success: bool
|
||||
measurements_count: int
|
||||
error_message: Optional[str] = None
|
||||
timestamp: datetime = field(default_factory=datetime.now)
|
||||
processing_time_seconds: Optional[float] = None
|
||||
|
||||
@dataclass
|
||||
class StationCreateRequest:
|
||||
"""Request model for creating a new station"""
|
||||
station_code: str
|
||||
thai_name: str
|
||||
english_name: str
|
||||
latitude: Optional[float] = None
|
||||
longitude: Optional[float] = None
|
||||
geohash: Optional[str] = None
|
||||
status: StationStatus = StationStatus.ACTIVE
|
||||
|
||||
@dataclass
|
||||
class StationUpdateRequest:
|
||||
"""Request model for updating an existing station"""
|
||||
thai_name: Optional[str] = None
|
||||
english_name: Optional[str] = None
|
||||
latitude: Optional[float] = None
|
||||
longitude: Optional[float] = None
|
||||
geohash: Optional[str] = None
|
||||
status: Optional[StationStatus] = None
|
167
src/rate_limiter.py
Normal file
167
src/rate_limiter.py
Normal file
@@ -0,0 +1,167 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Rate limiting utilities for API requests
|
||||
"""
|
||||
|
||||
import time
|
||||
import threading
|
||||
from typing import Dict, Optional
|
||||
from collections import deque
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class RateLimiter:
|
||||
"""Token bucket rate limiter"""
|
||||
|
||||
def __init__(self, max_requests: int, time_window_seconds: int):
|
||||
"""
|
||||
Initialize rate limiter
|
||||
|
||||
Args:
|
||||
max_requests: Maximum number of requests allowed
|
||||
time_window_seconds: Time window in seconds
|
||||
"""
|
||||
self.max_requests = max_requests
|
||||
self.time_window = time_window_seconds
|
||||
self.requests = deque()
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def is_allowed(self) -> bool:
|
||||
"""Check if a request is allowed"""
|
||||
with self._lock:
|
||||
now = time.time()
|
||||
|
||||
# Remove old requests outside the time window
|
||||
while self.requests and self.requests[0] <= now - self.time_window:
|
||||
self.requests.popleft()
|
||||
|
||||
# Check if we can make a new request
|
||||
if len(self.requests) < self.max_requests:
|
||||
self.requests.append(now)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def wait_time(self) -> float:
|
||||
"""Get the time to wait before next request is allowed"""
|
||||
with self._lock:
|
||||
if len(self.requests) < self.max_requests:
|
||||
return 0.0
|
||||
|
||||
# Time until the oldest request expires
|
||||
oldest_request = self.requests[0]
|
||||
return max(0.0, (oldest_request + self.time_window) - time.time())
|
||||
|
||||
def wait_if_needed(self):
|
||||
"""Block until a request is allowed"""
|
||||
wait_time = self.wait_time()
|
||||
if wait_time > 0:
|
||||
logger.info(f"Rate limit reached, waiting {wait_time:.2f} seconds")
|
||||
time.sleep(wait_time)
|
||||
|
||||
class AdaptiveRateLimiter:
|
||||
"""Adaptive rate limiter that adjusts based on response times"""
|
||||
|
||||
def __init__(self, initial_rate: float = 1.0, min_rate: float = 0.1, max_rate: float = 10.0):
|
||||
"""
|
||||
Initialize adaptive rate limiter
|
||||
|
||||
Args:
|
||||
initial_rate: Initial requests per second
|
||||
min_rate: Minimum requests per second
|
||||
max_rate: Maximum requests per second
|
||||
"""
|
||||
self.current_rate = initial_rate
|
||||
self.min_rate = min_rate
|
||||
self.max_rate = max_rate
|
||||
self.last_request_time = 0.0
|
||||
self.response_times = deque(maxlen=10)
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def wait_and_record(self, response_time: Optional[float] = None):
|
||||
"""Wait for rate limit and record response time"""
|
||||
with self._lock:
|
||||
now = time.time()
|
||||
|
||||
# Calculate wait time based on current rate
|
||||
time_since_last = now - self.last_request_time
|
||||
min_interval = 1.0 / self.current_rate
|
||||
|
||||
if time_since_last < min_interval:
|
||||
wait_time = min_interval - time_since_last
|
||||
time.sleep(wait_time)
|
||||
now = time.time()
|
||||
|
||||
self.last_request_time = now
|
||||
|
||||
# Record response time and adjust rate
|
||||
if response_time is not None:
|
||||
self.response_times.append(response_time)
|
||||
self._adjust_rate()
|
||||
|
||||
def _adjust_rate(self):
|
||||
"""Adjust rate based on recent response times"""
|
||||
if len(self.response_times) < 3:
|
||||
return
|
||||
|
||||
avg_response_time = sum(self.response_times) / len(self.response_times)
|
||||
|
||||
# Decrease rate if responses are slow
|
||||
if avg_response_time > 5.0: # 5 seconds
|
||||
self.current_rate = max(self.min_rate, self.current_rate * 0.8)
|
||||
logger.info(f"Decreased rate to {self.current_rate:.2f} req/s due to slow responses")
|
||||
|
||||
# Increase rate if responses are fast
|
||||
elif avg_response_time < 1.0: # 1 second
|
||||
self.current_rate = min(self.max_rate, self.current_rate * 1.1)
|
||||
logger.debug(f"Increased rate to {self.current_rate:.2f} req/s")
|
||||
|
||||
class RequestTracker:
|
||||
"""Track API request statistics"""
|
||||
|
||||
def __init__(self):
|
||||
self.total_requests = 0
|
||||
self.successful_requests = 0
|
||||
self.failed_requests = 0
|
||||
self.total_response_time = 0.0
|
||||
self.last_request_time = None
|
||||
self.error_count_by_type = {}
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def record_request(self, success: bool, response_time: float, error_type: Optional[str] = None):
|
||||
"""Record a request"""
|
||||
with self._lock:
|
||||
self.total_requests += 1
|
||||
self.total_response_time += response_time
|
||||
self.last_request_time = datetime.now()
|
||||
|
||||
if success:
|
||||
self.successful_requests += 1
|
||||
else:
|
||||
self.failed_requests += 1
|
||||
if error_type:
|
||||
self.error_count_by_type[error_type] = self.error_count_by_type.get(error_type, 0) + 1
|
||||
|
||||
def get_stats(self) -> Dict[str, any]:
|
||||
"""Get request statistics"""
|
||||
with self._lock:
|
||||
if self.total_requests == 0:
|
||||
return {
|
||||
'total_requests': 0,
|
||||
'success_rate': 0.0,
|
||||
'average_response_time': 0.0,
|
||||
'last_request_time': None,
|
||||
'error_breakdown': {}
|
||||
}
|
||||
|
||||
return {
|
||||
'total_requests': self.total_requests,
|
||||
'successful_requests': self.successful_requests,
|
||||
'failed_requests': self.failed_requests,
|
||||
'success_rate': self.successful_requests / self.total_requests,
|
||||
'average_response_time': self.total_response_time / self.total_requests,
|
||||
'last_request_time': self.last_request_time.isoformat() if self.last_request_time else None,
|
||||
'error_breakdown': dict(self.error_count_by_type)
|
||||
}
|
116
src/validators.py
Normal file
116
src/validators.py
Normal file
@@ -0,0 +1,116 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Data validation utilities for water monitoring system
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
import logging
|
||||
from .exceptions import DataValidationError
|
||||
from .models import WaterMeasurement, StationInfo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DataValidator:
|
||||
"""Validates water measurement data"""
|
||||
|
||||
# Reasonable ranges for water measurements
|
||||
WATER_LEVEL_MIN = -10.0 # meters
|
||||
WATER_LEVEL_MAX = 50.0 # meters
|
||||
DISCHARGE_MIN = 0.0 # cms
|
||||
DISCHARGE_MAX = 10000.0 # cms
|
||||
DISCHARGE_PERCENT_MIN = 0.0
|
||||
DISCHARGE_PERCENT_MAX = 200.0 # Allow some overflow
|
||||
|
||||
@classmethod
|
||||
def validate_measurement(cls, measurement: Dict[str, Any]) -> bool:
|
||||
"""Validate a single measurement"""
|
||||
try:
|
||||
# Check required fields
|
||||
required_fields = ['timestamp', 'station_id', 'water_level', 'discharge']
|
||||
for field in required_fields:
|
||||
if field not in measurement:
|
||||
logger.warning(f"Missing required field: {field}")
|
||||
return False
|
||||
|
||||
# Validate timestamp
|
||||
if not isinstance(measurement['timestamp'], datetime):
|
||||
logger.warning(f"Invalid timestamp type: {type(measurement['timestamp'])}")
|
||||
return False
|
||||
|
||||
# Validate water level
|
||||
water_level = float(measurement['water_level'])
|
||||
if not (cls.WATER_LEVEL_MIN <= water_level <= cls.WATER_LEVEL_MAX):
|
||||
logger.warning(f"Water level out of range: {water_level}")
|
||||
return False
|
||||
|
||||
# Validate discharge
|
||||
discharge = float(measurement['discharge'])
|
||||
if not (cls.DISCHARGE_MIN <= discharge <= cls.DISCHARGE_MAX):
|
||||
logger.warning(f"Discharge out of range: {discharge}")
|
||||
return False
|
||||
|
||||
# Validate discharge percent if present
|
||||
if measurement.get('discharge_percent') is not None:
|
||||
discharge_percent = float(measurement['discharge_percent'])
|
||||
if not (cls.DISCHARGE_PERCENT_MIN <= discharge_percent <= cls.DISCHARGE_PERCENT_MAX):
|
||||
logger.warning(f"Discharge percent out of range: {discharge_percent}")
|
||||
return False
|
||||
|
||||
# Validate station ID
|
||||
station_id = measurement['station_id']
|
||||
if not isinstance(station_id, int) or station_id < 1 or station_id > 16:
|
||||
logger.warning(f"Invalid station ID: {station_id}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.warning(f"Data validation error: {e}")
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def validate_measurements(cls, measurements: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Validate and filter a list of measurements"""
|
||||
valid_measurements = []
|
||||
invalid_count = 0
|
||||
|
||||
for measurement in measurements:
|
||||
if cls.validate_measurement(measurement):
|
||||
valid_measurements.append(measurement)
|
||||
else:
|
||||
invalid_count += 1
|
||||
|
||||
if invalid_count > 0:
|
||||
logger.warning(f"Filtered out {invalid_count} invalid measurements")
|
||||
|
||||
return valid_measurements
|
||||
|
||||
@classmethod
|
||||
def validate_station_info(cls, station_info: Dict[str, Any]) -> bool:
|
||||
"""Validate station information"""
|
||||
try:
|
||||
required_fields = ['station_id', 'station_code', 'thai_name', 'english_name']
|
||||
for field in required_fields:
|
||||
if field not in station_info or not station_info[field]:
|
||||
logger.warning(f"Missing or empty station field: {field}")
|
||||
return False
|
||||
|
||||
# Validate coordinates if present
|
||||
if station_info.get('latitude') is not None:
|
||||
lat = float(station_info['latitude'])
|
||||
if not (-90 <= lat <= 90):
|
||||
logger.warning(f"Invalid latitude: {lat}")
|
||||
return False
|
||||
|
||||
if station_info.get('longitude') is not None:
|
||||
lon = float(station_info['longitude'])
|
||||
if not (-180 <= lon <= 180):
|
||||
logger.warning(f"Invalid longitude: {lon}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.warning(f"Station validation error: {e}")
|
||||
return False
|
539
src/water_scraper_v3.py
Normal file
539
src/water_scraper_v3.py
Normal file
@@ -0,0 +1,539 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Enhanced Water Monitor Scraper with multiple database backend support
|
||||
"""
|
||||
|
||||
import requests
|
||||
import datetime
|
||||
import time
|
||||
import schedule
|
||||
import json
|
||||
import os
|
||||
from typing import List, Dict, Optional
|
||||
|
||||
try:
|
||||
from .database_adapters import create_database_adapter, DatabaseAdapter
|
||||
from .models import WaterMeasurement, StationInfo, ScrapingResult, StationStatus
|
||||
from .validators import DataValidator
|
||||
from .exceptions import APIConnectionError, DataValidationError, DatabaseConnectionError
|
||||
from .metrics import increment_counter, set_gauge, record_histogram, Timer
|
||||
from .rate_limiter import RateLimiter, RequestTracker
|
||||
from .logging_config import get_logger
|
||||
except ImportError:
|
||||
# Handle case when running as standalone script
|
||||
from database_adapters import create_database_adapter, DatabaseAdapter
|
||||
import logging
|
||||
|
||||
def get_logger(name):
|
||||
return logging.getLogger(name)
|
||||
|
||||
def increment_counter(*args, **kwargs):
|
||||
pass
|
||||
|
||||
def set_gauge(*args, **kwargs):
|
||||
pass
|
||||
|
||||
def record_histogram(*args, **kwargs):
|
||||
pass
|
||||
|
||||
class Timer:
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
def __enter__(self):
|
||||
return self
|
||||
def __exit__(self, *args):
|
||||
pass
|
||||
|
||||
class RateLimiter:
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
def wait_if_needed(self):
|
||||
pass
|
||||
|
||||
class RequestTracker:
|
||||
def __init__(self):
|
||||
pass
|
||||
def record_request(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
class DataValidator:
|
||||
@staticmethod
|
||||
def validate_measurements(measurements):
|
||||
return measurements
|
||||
|
||||
# Get logger instance
|
||||
logger = get_logger(__name__)
|
||||
|
||||
class EnhancedWaterMonitorScraper:
|
||||
def __init__(self, db_config: Dict):
|
||||
"""
|
||||
Initialize scraper with database configuration
|
||||
|
||||
Args:
|
||||
db_config: Database configuration dictionary
|
||||
"""
|
||||
self.api_url = "https://hyd-app-db.rid.go.th/webservice/getGroupHourlyWaterLevelReportAllHL.ashx"
|
||||
self.db_config = db_config.copy() # Make a copy to avoid modifying original
|
||||
self.db_adapter = None
|
||||
|
||||
# Scheduler state tracking
|
||||
self.last_successful_update = None
|
||||
self.retry_mode = False
|
||||
self.next_hourly_check = None
|
||||
|
||||
# Rate limiting and request tracking
|
||||
self.rate_limiter = RateLimiter(max_requests=10, time_window_seconds=60)
|
||||
self.request_tracker = RequestTracker()
|
||||
|
||||
# HTTP session for API requests
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
||||
'X-Requested-With': 'XMLHttpRequest'
|
||||
})
|
||||
|
||||
# Station mapping with correct names and geolocation data
|
||||
self.station_mapping = {
|
||||
'1': {
|
||||
'code': 'P.20',
|
||||
'thai_name': 'บ้านเชียงดาว',
|
||||
'english_name': 'Ban Chiang Dao',
|
||||
'latitude': 19.36731448032191,
|
||||
'longitude': 98.9688487015384,
|
||||
'geohash': None
|
||||
},
|
||||
'2': {
|
||||
'code': 'P.75',
|
||||
'thai_name': 'บ้านช่อแล',
|
||||
'english_name': 'Ban Chai Lat',
|
||||
'latitude': 19.145972935976225,
|
||||
'longitude': 99.00735727149247,
|
||||
'geohash': None
|
||||
},
|
||||
'3': {
|
||||
'code': 'P.92',
|
||||
'thai_name': 'บ้านเมืองกึ๊ด',
|
||||
'english_name': 'Ban Muang Aut',
|
||||
'latitude': 19.220518985435646,
|
||||
'longitude': 98.84733127007874,
|
||||
'geohash': None
|
||||
},
|
||||
'4': {
|
||||
'code': 'P.4A',
|
||||
'thai_name': 'บ้านแม่แตง',
|
||||
'english_name': 'Ban Mae Taeng',
|
||||
'latitude': 19.1222679952378,
|
||||
'longitude': 98.94437462084075,
|
||||
'geohash': None
|
||||
},
|
||||
'5': {
|
||||
'code': 'P.67',
|
||||
'thai_name': 'บ้านแม่แต',
|
||||
'english_name': 'Ban Tae',
|
||||
'latitude': 19.009762080002453,
|
||||
'longitude': 98.95978297135508,
|
||||
'geohash': None
|
||||
},
|
||||
'6': {
|
||||
'code': 'P.21',
|
||||
'thai_name': 'บ้านริมใต้',
|
||||
'english_name': 'Ban Rim Tai',
|
||||
'latitude': 18.917459157963293,
|
||||
'longitude': 98.97018092996231,
|
||||
'geohash': None
|
||||
},
|
||||
'7': {
|
||||
'code': 'P.103',
|
||||
'thai_name': 'สะพานวงแหวนรอบ 3',
|
||||
'english_name': 'Ring Bridge 3',
|
||||
'latitude': 18.86664807441675,
|
||||
'longitude': 98.9781107622432,
|
||||
'geohash': None
|
||||
},
|
||||
'8': {
|
||||
'code': 'P.1',
|
||||
'thai_name': 'สะพานนวรัฐ',
|
||||
'english_name': 'Nawarat Bridge',
|
||||
'latitude': 18.7875,
|
||||
'longitude': 99.0045,
|
||||
'geohash': 'w5q6uuhvfcfp25'
|
||||
},
|
||||
'9': {
|
||||
'code': 'P.82',
|
||||
'thai_name': 'บ้านสบวิน',
|
||||
'english_name': 'Ban Sob win',
|
||||
'latitude': 18.6519444,
|
||||
'longitude': 98.69,
|
||||
'geohash': None
|
||||
},
|
||||
'10': {
|
||||
'code': 'P.84',
|
||||
'thai_name': 'บ้านพันตน',
|
||||
'english_name': 'Ban Panton',
|
||||
'latitude': 18.591315274591334,
|
||||
'longitude': 98.79657058508496,
|
||||
'geohash': None
|
||||
},
|
||||
'11': {
|
||||
'code': 'P.81',
|
||||
'thai_name': 'บ้านโป่ง',
|
||||
'english_name': 'Ban Pong',
|
||||
'latitude': 13.805661820610888,
|
||||
'longitude': 99.87174946122846,
|
||||
'geohash': None
|
||||
},
|
||||
'12': {
|
||||
'code': 'P.5',
|
||||
'thai_name': 'สะพานท่านาง',
|
||||
'english_name': 'Tha Nang Bridge',
|
||||
'latitude': 18.580269437546555,
|
||||
'longitude': 99.01021397084362,
|
||||
'geohash': None
|
||||
},
|
||||
'13': {
|
||||
'code': 'P.77',
|
||||
'thai_name': 'บ้านสบแม่สะป๊วด',
|
||||
'english_name': 'Baan Sop Mae Sapuord',
|
||||
'latitude': 18.433347475179602,
|
||||
'longitude': 99.08510036666527,
|
||||
'geohash': None
|
||||
},
|
||||
'14': {
|
||||
'code': 'P.87',
|
||||
'thai_name': 'บ้านป่าซาง',
|
||||
'english_name': 'Ban Pa Sang',
|
||||
'latitude': 18.519121825282486,
|
||||
'longitude': 98.94224374138238,
|
||||
'geohash': None
|
||||
},
|
||||
'15': {
|
||||
'code': 'P.76',
|
||||
'thai_name': 'บ้านแม่อีไฮ',
|
||||
'english_name': 'Banb Mae I Hai',
|
||||
'latitude': 18.141465831254404,
|
||||
'longitude': 98.89642508267181,
|
||||
'geohash': None
|
||||
},
|
||||
'16': {
|
||||
'code': 'P.85',
|
||||
'thai_name': 'บ้านหล่ายแก้ว',
|
||||
'english_name': 'Baan Lai Kaew',
|
||||
'latitude': 18.17856361002219,
|
||||
'longitude': 98.63023114782287,
|
||||
'geohash': None
|
||||
}
|
||||
}
|
||||
|
||||
self.init_database()
|
||||
|
||||
def init_database(self):
|
||||
"""Initialize database connection"""
|
||||
try:
|
||||
# Extract db_type and pass remaining config as kwargs
|
||||
db_config_copy = self.db_config.copy()
|
||||
db_type = db_config_copy.pop('type')
|
||||
self.db_adapter = create_database_adapter(db_type, **db_config_copy)
|
||||
success = self.db_adapter.connect()
|
||||
|
||||
if success:
|
||||
logger.info(f"Successfully connected to {db_type.upper()} database")
|
||||
set_gauge("database_connected", 1)
|
||||
increment_counter("database_connections_successful")
|
||||
else:
|
||||
logger.error(f"Failed to connect to {db_type.upper()} database")
|
||||
set_gauge("database_connected", 0)
|
||||
increment_counter("database_connections_failed")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error initializing database: {e}")
|
||||
set_gauge("database_connected", 0)
|
||||
increment_counter("database_connections_failed")
|
||||
self.db_adapter = None
|
||||
|
||||
def fetch_water_data_for_date(self, target_date: datetime.datetime) -> Optional[List[Dict]]:
|
||||
"""Fetch water levels and discharge data from API for a specific date"""
|
||||
with Timer("api_request_duration"):
|
||||
try:
|
||||
logger.info(f"Starting data fetch from API for date: {target_date.strftime('%Y-%m-%d')}")
|
||||
|
||||
# Rate limiting
|
||||
self.rate_limiter.wait_if_needed()
|
||||
|
||||
# Create Thai format date (Buddhist calendar)
|
||||
thai_year = target_date.year + 543
|
||||
thai_date = f"{target_date.day:02d}/{target_date.month:02d}/{thai_year}"
|
||||
|
||||
# API parameters
|
||||
payload = {
|
||||
'DW[UtokID]': '1',
|
||||
'DW[BasinID]': '6',
|
||||
'DW[TimeCurrent]': thai_date,
|
||||
'_search': 'false',
|
||||
'nd': str(int(time.time() * 1000)),
|
||||
'rows': '100',
|
||||
'page': '1',
|
||||
'sidx': 'indexhourly',
|
||||
'sord': 'asc'
|
||||
}
|
||||
|
||||
logger.debug(f"API parameters: {payload}")
|
||||
|
||||
# POST request to API
|
||||
start_time = time.time()
|
||||
response = self.session.post(self.api_url, data=payload, timeout=30)
|
||||
response_time = time.time() - start_time
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
# Record successful request
|
||||
self.request_tracker.record_request(True, response_time)
|
||||
increment_counter("api_requests_successful")
|
||||
record_histogram("api_response_time", response_time)
|
||||
|
||||
# Parse JSON response
|
||||
try:
|
||||
json_data = response.json()
|
||||
logger.debug(f"API response received: {len(str(json_data))} characters")
|
||||
except ValueError as e:
|
||||
logger.error(f"Error parsing JSON response: {e}")
|
||||
self.request_tracker.record_request(False, response_time, "json_parse_error")
|
||||
increment_counter("api_requests_failed")
|
||||
return None
|
||||
|
||||
water_data = []
|
||||
|
||||
# Parse JSON data
|
||||
if json_data and isinstance(json_data, dict) and 'rows' in json_data:
|
||||
for row in json_data['rows']:
|
||||
try:
|
||||
# Parse timestamp
|
||||
time_str = row.get('hourlytime', '')
|
||||
if not time_str:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Format: "1.00", "2.00", ..., "24.00"
|
||||
api_hour = int(float(time_str))
|
||||
if api_hour < 1 or api_hour > 24:
|
||||
continue
|
||||
|
||||
if api_hour == 24:
|
||||
# Hour 24 = midnight (00:00) of the next day
|
||||
data_time = target_date.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
data_time = data_time + datetime.timedelta(days=1)
|
||||
else:
|
||||
# Hours 1-23 = 01:00-23:00 of the same day
|
||||
data_time = target_date.replace(hour=api_hour, minute=0, second=0, microsecond=0)
|
||||
|
||||
except (ValueError, IndexError):
|
||||
logger.warning(f"Could not parse timestamp: {time_str}")
|
||||
continue
|
||||
|
||||
# Parse all water levels and discharge values
|
||||
station_count = 0
|
||||
for station_num in range(1, 17): # Stations 1-16
|
||||
wl_key = f'wlvalues{station_num}'
|
||||
q_key = f'qvalues{station_num}'
|
||||
qp_key = f'QPercent{station_num}'
|
||||
|
||||
# Check if both water level and discharge data exist
|
||||
if wl_key in row and q_key in row:
|
||||
try:
|
||||
water_level = row[wl_key]
|
||||
discharge = row[q_key]
|
||||
discharge_percent = row.get(qp_key)
|
||||
|
||||
# Skip if values are None or invalid
|
||||
if water_level is None or discharge is None:
|
||||
continue
|
||||
|
||||
# Convert to float
|
||||
water_level = float(water_level)
|
||||
discharge = float(discharge)
|
||||
discharge_percent = float(discharge_percent) if discharge_percent is not None else None
|
||||
|
||||
station_info = self.station_mapping.get(str(station_num), {
|
||||
'code': f'P.{19+station_num}',
|
||||
'thai_name': f'Station {station_num}',
|
||||
'english_name': f'Station {station_num}'
|
||||
})
|
||||
|
||||
water_data.append({
|
||||
'timestamp': data_time,
|
||||
'station_id': station_num,
|
||||
'station_code': station_info['code'],
|
||||
'station_name_en': station_info['english_name'],
|
||||
'station_name_th': station_info['thai_name'],
|
||||
'latitude': station_info.get('latitude'),
|
||||
'longitude': station_info.get('longitude'),
|
||||
'geohash': station_info.get('geohash'),
|
||||
'water_level': water_level,
|
||||
'water_level_unit': 'm',
|
||||
'discharge': discharge,
|
||||
'discharge_unit': 'cms',
|
||||
'discharge_percent': discharge_percent,
|
||||
'status': 'active'
|
||||
})
|
||||
|
||||
station_count += 1
|
||||
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.warning(f"Could not parse data for station {station_num}: {e}")
|
||||
continue
|
||||
|
||||
logger.debug(f"Processed {station_count} stations for time {time_str}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error processing data row: {e}")
|
||||
continue
|
||||
|
||||
# Validate data
|
||||
water_data = DataValidator.validate_measurements(water_data)
|
||||
|
||||
logger.info(f"Successfully fetched {len(water_data)} data points from API for {target_date.strftime('%Y-%m-%d')}")
|
||||
return water_data
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"Network error fetching API data: {e}")
|
||||
self.request_tracker.record_request(False, 0, "network_error")
|
||||
increment_counter("api_requests_failed")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error fetching API data: {e}")
|
||||
self.request_tracker.record_request(False, 0, "unexpected_error")
|
||||
increment_counter("api_requests_failed")
|
||||
return None
|
||||
|
||||
def fetch_water_data(self) -> Optional[List[Dict]]:
|
||||
"""Fetch water levels and discharge data from API for current date"""
|
||||
current_date = datetime.datetime.now()
|
||||
return self.fetch_water_data_for_date(current_date)
|
||||
|
||||
def save_to_database(self, water_data: List[Dict], max_retries: int = 3) -> bool:
|
||||
"""Save water measurements to database with retry logic"""
|
||||
if not self.db_adapter:
|
||||
logger.error("Database adapter not initialized")
|
||||
return False
|
||||
|
||||
if not water_data:
|
||||
logger.warning("No data to save")
|
||||
return False
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
success = self.db_adapter.save_measurements(water_data)
|
||||
if success:
|
||||
logger.info(f"Successfully saved {len(water_data)} measurements to database")
|
||||
increment_counter("database_saves_successful")
|
||||
set_gauge("last_save_timestamp", time.time())
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"Save attempt {attempt + 1} failed, retrying...")
|
||||
|
||||
except Exception as e:
|
||||
if "database is locked" in str(e).lower() and attempt < max_retries - 1:
|
||||
logger.warning(f"Database locked on attempt {attempt + 1}, retrying in {2 ** attempt} seconds...")
|
||||
time.sleep(2 ** attempt) # Exponential backoff
|
||||
continue
|
||||
else:
|
||||
logger.error(f"Error saving to database (attempt {attempt + 1}): {e}")
|
||||
if attempt == max_retries - 1:
|
||||
increment_counter("database_saves_failed")
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def get_latest_data(self, limit: int = 100) -> List[Dict]:
|
||||
"""Get latest data from database"""
|
||||
if not self.db_adapter:
|
||||
return []
|
||||
|
||||
try:
|
||||
return self.db_adapter.get_latest_measurements(limit=limit)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting latest data: {e}")
|
||||
return []
|
||||
|
||||
def run_scraping_cycle(self) -> bool:
|
||||
"""Run a complete scraping cycle"""
|
||||
logger.info("Starting scraping cycle...")
|
||||
|
||||
try:
|
||||
# Fetch current data
|
||||
water_data = self.fetch_water_data()
|
||||
if water_data:
|
||||
success = self.save_to_database(water_data)
|
||||
if success:
|
||||
logger.info("Scraping cycle completed successfully")
|
||||
increment_counter("scraping_cycles_successful")
|
||||
return True
|
||||
else:
|
||||
logger.error("Failed to save data")
|
||||
increment_counter("scraping_cycles_failed")
|
||||
return False
|
||||
else:
|
||||
logger.warning("No data fetched")
|
||||
increment_counter("scraping_cycles_failed")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Scraping cycle failed: {e}")
|
||||
increment_counter("scraping_cycles_failed")
|
||||
return False
|
||||
|
||||
# Main execution for standalone usage
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
# Configure basic logging for standalone usage
|
||||
import logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.FileHandler('water_monitor.log'),
|
||||
logging.StreamHandler()
|
||||
]
|
||||
)
|
||||
|
||||
parser = argparse.ArgumentParser(description="Thailand Water Monitor")
|
||||
parser.add_argument("--test", action="store_true", help="Run single test cycle")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Default SQLite configuration
|
||||
db_config = {
|
||||
'type': 'sqlite',
|
||||
'connection_string': 'sqlite:///water_levels.db'
|
||||
}
|
||||
|
||||
try:
|
||||
scraper = EnhancedWaterMonitorScraper(db_config)
|
||||
|
||||
if args.test:
|
||||
logger.info("Running test cycle...")
|
||||
result = scraper.run_scraping_cycle()
|
||||
if result:
|
||||
logger.info("✅ Test completed successfully")
|
||||
sys.exit(0)
|
||||
else:
|
||||
logger.error("❌ Test failed")
|
||||
sys.exit(1)
|
||||
else:
|
||||
logger.info("Starting continuous monitoring...")
|
||||
schedule.every(1).hours.do(scraper.run_scraping_cycle)
|
||||
|
||||
# Run initial cycle
|
||||
scraper.run_scraping_cycle()
|
||||
|
||||
while True:
|
||||
schedule.run_pending()
|
||||
time.sleep(60)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Monitoring stopped by user")
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
sys.exit(1)
|
620
src/web_api.py
Normal file
620
src/web_api.py
Normal file
@@ -0,0 +1,620 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
FastAPI web interface for water monitoring system
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import threading
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Dict, Any, Optional
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends
|
||||
from fastapi.responses import HTMLResponse, JSONResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from .water_scraper_v3 import EnhancedWaterMonitorScraper
|
||||
from .config import Config
|
||||
from .models import WaterMeasurement, StationInfo, ScrapingResult, StationCreateRequest, StationUpdateRequest, StationStatus
|
||||
from .health_check import HealthCheckManager, DatabaseHealthCheck, APIHealthCheck, MemoryHealthCheck
|
||||
from .metrics import get_metrics_collector, increment_counter, set_gauge
|
||||
from .logging_config import setup_logging, get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# Pydantic models for API responses
|
||||
class StationResponse(BaseModel):
|
||||
station_id: int
|
||||
station_code: str
|
||||
thai_name: str
|
||||
english_name: str
|
||||
latitude: Optional[float] = None
|
||||
longitude: Optional[float] = None
|
||||
geohash: Optional[str] = None
|
||||
status: str = "active"
|
||||
|
||||
class StationCreateModel(BaseModel):
|
||||
station_code: str = Field(..., description="Station code (e.g., P.1, P.20)")
|
||||
thai_name: str = Field(..., description="Thai name of the station")
|
||||
english_name: str = Field(..., description="English name of the station")
|
||||
latitude: Optional[float] = Field(None, ge=-90, le=90, description="Latitude coordinate")
|
||||
longitude: Optional[float] = Field(None, ge=-180, le=180, description="Longitude coordinate")
|
||||
geohash: Optional[str] = Field(None, description="Geohash for the location")
|
||||
status: str = Field("active", description="Station status")
|
||||
|
||||
class StationUpdateModel(BaseModel):
|
||||
thai_name: Optional[str] = Field(None, description="Thai name of the station")
|
||||
english_name: Optional[str] = Field(None, description="English name of the station")
|
||||
latitude: Optional[float] = Field(None, ge=-90, le=90, description="Latitude coordinate")
|
||||
longitude: Optional[float] = Field(None, ge=-180, le=180, description="Longitude coordinate")
|
||||
geohash: Optional[str] = Field(None, description="Geohash for the location")
|
||||
status: Optional[str] = Field(None, description="Station status")
|
||||
|
||||
class MeasurementResponse(BaseModel):
|
||||
timestamp: datetime
|
||||
station_code: str
|
||||
station_name_en: str
|
||||
station_name_th: str
|
||||
water_level: float
|
||||
discharge: float
|
||||
discharge_percent: Optional[float] = None
|
||||
status: str = "active"
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
overall_status: str
|
||||
timestamp: str
|
||||
checks: Dict[str, Dict[str, Any]]
|
||||
|
||||
class MetricsResponse(BaseModel):
|
||||
counters: Dict[str, float]
|
||||
gauges: Dict[str, float]
|
||||
histograms: Dict[str, Dict[str, float]]
|
||||
|
||||
class ScrapingStatusResponse(BaseModel):
|
||||
is_running: bool
|
||||
last_run: Optional[datetime] = None
|
||||
next_run: Optional[datetime] = None
|
||||
total_runs: int = 0
|
||||
successful_runs: int = 0
|
||||
failed_runs: int = 0
|
||||
|
||||
# Global application state
|
||||
app_state = {
|
||||
"scraper": None,
|
||||
"health_manager": None,
|
||||
"scraping_task": None,
|
||||
"is_scraping": False,
|
||||
"scraping_stats": {
|
||||
"total_runs": 0,
|
||||
"successful_runs": 0,
|
||||
"failed_runs": 0,
|
||||
"last_run": None,
|
||||
"next_run": None
|
||||
}
|
||||
}
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Application lifespan manager"""
|
||||
# Startup
|
||||
logger.info("Starting Water Monitor API...")
|
||||
|
||||
# Initialize configuration
|
||||
try:
|
||||
Config.validate_config()
|
||||
logger.info("Configuration validated successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Configuration validation failed: {e}")
|
||||
raise
|
||||
|
||||
# Initialize scraper
|
||||
db_config = Config.get_database_config()
|
||||
app_state["scraper"] = EnhancedWaterMonitorScraper(db_config)
|
||||
|
||||
# Initialize health checks
|
||||
health_manager = HealthCheckManager()
|
||||
health_manager.add_check(DatabaseHealthCheck(app_state["scraper"].db_adapter))
|
||||
health_manager.add_check(APIHealthCheck(Config.API_URL, app_state["scraper"].session))
|
||||
health_manager.add_check(MemoryHealthCheck(max_memory_mb=1000))
|
||||
app_state["health_manager"] = health_manager
|
||||
|
||||
# Start background scraping task
|
||||
app_state["scraping_task"] = asyncio.create_task(background_scraping_task())
|
||||
|
||||
logger.info("Water Monitor API started successfully")
|
||||
|
||||
yield
|
||||
|
||||
# Shutdown
|
||||
logger.info("Shutting down Water Monitor API...")
|
||||
|
||||
if app_state["scraping_task"]:
|
||||
app_state["scraping_task"].cancel()
|
||||
try:
|
||||
await app_state["scraping_task"]
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
logger.info("Water Monitor API shutdown complete")
|
||||
|
||||
# Create FastAPI app
|
||||
app = FastAPI(
|
||||
title="Northern Thailand Ping River Monitor API",
|
||||
description="Real-time water level monitoring system for Northern Thailand's Ping River Basin stations",
|
||||
version="3.1.0",
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
# Add CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # Configure appropriately for production
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
async def background_scraping_task():
|
||||
"""Background task for periodic data scraping"""
|
||||
while True:
|
||||
try:
|
||||
if not app_state["is_scraping"]:
|
||||
app_state["is_scraping"] = True
|
||||
|
||||
# Run scraping cycle
|
||||
scraper = app_state["scraper"]
|
||||
if scraper:
|
||||
logger.info("Starting background scraping cycle")
|
||||
start_time = datetime.now()
|
||||
|
||||
try:
|
||||
result = scraper.run_scraping_cycle()
|
||||
|
||||
# Update stats
|
||||
app_state["scraping_stats"]["total_runs"] += 1
|
||||
app_state["scraping_stats"]["last_run"] = start_time
|
||||
|
||||
if result:
|
||||
app_state["scraping_stats"]["successful_runs"] += 1
|
||||
increment_counter("scraping_cycles_successful")
|
||||
logger.info("Background scraping cycle completed successfully")
|
||||
else:
|
||||
app_state["scraping_stats"]["failed_runs"] += 1
|
||||
increment_counter("scraping_cycles_failed")
|
||||
logger.warning("Background scraping cycle completed with no new data")
|
||||
|
||||
# Update metrics
|
||||
set_gauge("last_scraping_timestamp", start_time.timestamp())
|
||||
|
||||
except Exception as e:
|
||||
app_state["scraping_stats"]["failed_runs"] += 1
|
||||
increment_counter("scraping_cycles_failed")
|
||||
logger.error(f"Background scraping cycle failed: {e}")
|
||||
|
||||
app_state["is_scraping"] = False
|
||||
|
||||
# Calculate next run time
|
||||
interval_seconds = Config.SCRAPING_INTERVAL_HOURS * 3600
|
||||
app_state["scraping_stats"]["next_run"] = datetime.now() + timedelta(seconds=interval_seconds)
|
||||
|
||||
# Wait for next cycle
|
||||
await asyncio.sleep(interval_seconds)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Background scraping task cancelled")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background scraping task: {e}")
|
||||
await asyncio.sleep(60) # Wait a minute before retrying
|
||||
|
||||
# API Routes
|
||||
|
||||
@app.get("/", response_class=HTMLResponse)
|
||||
async def root():
|
||||
"""Root endpoint with basic dashboard"""
|
||||
html_content = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Northern Thailand Ping River Monitor</title>
|
||||
<style>
|
||||
body { font-family: Arial, sans-serif; margin: 40px; }
|
||||
.header { color: #2c3e50; border-bottom: 2px solid #3498db; padding-bottom: 10px; }
|
||||
.section { margin: 20px 0; padding: 15px; border: 1px solid #ddd; border-radius: 5px; }
|
||||
.status-healthy { color: #27ae60; }
|
||||
.status-degraded { color: #f39c12; }
|
||||
.status-unhealthy { color: #e74c3c; }
|
||||
.endpoint { background: #f8f9fa; padding: 10px; margin: 5px 0; border-radius: 3px; }
|
||||
.endpoint code { color: #2c3e50; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="header">
|
||||
<h1>🏔️ Northern Thailand Ping River Monitor API</h1>
|
||||
<p>Real-time water level monitoring system for the Ping River Basin in Northern Thailand</p>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<h2>📊 Quick Status</h2>
|
||||
<p>API is running and monitoring 16 water stations along the Ping River</p>
|
||||
<p>Coverage: From Chiang Dao to Nakhon Sawan</p>
|
||||
<p>Data collection interval: Every hour</p>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<h2>🔗 API Endpoints</h2>
|
||||
<div class="endpoint"><code>GET /health</code> - System health status</div>
|
||||
<div class="endpoint"><code>GET /metrics</code> - Application metrics</div>
|
||||
<div class="endpoint"><code>GET /stations</code> - List all monitoring stations</div>
|
||||
<div class="endpoint"><code>POST /stations</code> - Add new monitoring station</div>
|
||||
<div class="endpoint"><code>PUT /stations/{station_id}</code> - Update station information</div>
|
||||
<div class="endpoint"><code>GET /measurements/latest</code> - Latest measurements</div>
|
||||
<div class="endpoint"><code>GET /measurements/station/{station_code}</code> - Station-specific data</div>
|
||||
<div class="endpoint"><code>POST /scrape/trigger</code> - Trigger manual data collection</div>
|
||||
<div class="endpoint"><code>GET /scraping/status</code> - Scraping status</div>
|
||||
<div class="endpoint"><code>GET /docs</code> - Interactive API documentation</div>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<h2>📈 Monitoring</h2>
|
||||
<p>• Grafana dashboards available for data visualization</p>
|
||||
<p>• Health checks monitor database, API, and system resources</p>
|
||||
<p>• Metrics collection for performance monitoring</p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
return HTMLResponse(content=html_content)
|
||||
|
||||
@app.get("/health", response_model=HealthResponse)
|
||||
async def get_health():
|
||||
"""Get system health status"""
|
||||
increment_counter("api_requests", labels={"endpoint": "health"})
|
||||
|
||||
health_manager = app_state["health_manager"]
|
||||
if not health_manager:
|
||||
raise HTTPException(status_code=503, detail="Health manager not initialized")
|
||||
|
||||
# Run health checks
|
||||
results = health_manager.run_all_checks()
|
||||
summary = health_manager.get_health_summary()
|
||||
|
||||
return HealthResponse(**summary)
|
||||
|
||||
@app.get("/metrics", response_model=MetricsResponse)
|
||||
async def get_metrics():
|
||||
"""Get application metrics"""
|
||||
increment_counter("api_requests", labels={"endpoint": "metrics"})
|
||||
|
||||
metrics_collector = get_metrics_collector()
|
||||
metrics = metrics_collector.get_all_metrics()
|
||||
|
||||
return MetricsResponse(**metrics)
|
||||
|
||||
@app.get("/stations", response_model=List[StationResponse])
|
||||
async def get_stations():
|
||||
"""Get list of all monitoring stations"""
|
||||
increment_counter("api_requests", labels={"endpoint": "stations"})
|
||||
|
||||
scraper = app_state["scraper"]
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=503, detail="Scraper not initialized")
|
||||
|
||||
stations = []
|
||||
for station_id, station_info in scraper.station_mapping.items():
|
||||
stations.append(StationResponse(
|
||||
station_id=int(station_id),
|
||||
station_code=station_info["code"],
|
||||
thai_name=station_info["thai_name"],
|
||||
english_name=station_info["english_name"],
|
||||
latitude=station_info.get("latitude"),
|
||||
longitude=station_info.get("longitude"),
|
||||
status="active"
|
||||
))
|
||||
|
||||
return stations
|
||||
|
||||
@app.post("/stations", response_model=StationResponse)
|
||||
async def create_station(station: StationCreateModel):
|
||||
"""Create a new monitoring station"""
|
||||
increment_counter("api_requests", labels={"endpoint": "create_station"})
|
||||
|
||||
scraper = app_state["scraper"]
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=503, detail="Scraper not initialized")
|
||||
|
||||
try:
|
||||
# Find next available station ID
|
||||
existing_ids = [int(sid) for sid in scraper.station_mapping.keys()]
|
||||
new_station_id = max(existing_ids) + 1 if existing_ids else 1
|
||||
|
||||
# Add to station mapping
|
||||
scraper.station_mapping[str(new_station_id)] = {
|
||||
'code': station.station_code,
|
||||
'thai_name': station.thai_name,
|
||||
'english_name': station.english_name,
|
||||
'latitude': station.latitude,
|
||||
'longitude': station.longitude,
|
||||
'geohash': station.geohash
|
||||
}
|
||||
|
||||
logger.info(f"Created new station: {station.station_code} ({station.english_name})")
|
||||
|
||||
return StationResponse(
|
||||
station_id=new_station_id,
|
||||
station_code=station.station_code,
|
||||
thai_name=station.thai_name,
|
||||
english_name=station.english_name,
|
||||
latitude=station.latitude,
|
||||
longitude=station.longitude,
|
||||
geohash=station.geohash,
|
||||
status=station.status
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating station: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.put("/stations/{station_id}", response_model=StationResponse)
|
||||
async def update_station(station_id: int, updates: StationUpdateModel):
|
||||
"""Update an existing monitoring station"""
|
||||
increment_counter("api_requests", labels={"endpoint": "update_station"})
|
||||
|
||||
scraper = app_state["scraper"]
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=503, detail="Scraper not initialized")
|
||||
|
||||
station_key = str(station_id)
|
||||
if station_key not in scraper.station_mapping:
|
||||
raise HTTPException(status_code=404, detail="Station not found")
|
||||
|
||||
try:
|
||||
station_info = scraper.station_mapping[station_key]
|
||||
|
||||
# Update fields if provided
|
||||
if updates.thai_name is not None:
|
||||
station_info['thai_name'] = updates.thai_name
|
||||
if updates.english_name is not None:
|
||||
station_info['english_name'] = updates.english_name
|
||||
if updates.latitude is not None:
|
||||
station_info['latitude'] = updates.latitude
|
||||
if updates.longitude is not None:
|
||||
station_info['longitude'] = updates.longitude
|
||||
if updates.geohash is not None:
|
||||
station_info['geohash'] = updates.geohash
|
||||
|
||||
logger.info(f"Updated station {station_id}: {station_info['code']}")
|
||||
|
||||
return StationResponse(
|
||||
station_id=station_id,
|
||||
station_code=station_info['code'],
|
||||
thai_name=station_info['thai_name'],
|
||||
english_name=station_info['english_name'],
|
||||
latitude=station_info.get('latitude'),
|
||||
longitude=station_info.get('longitude'),
|
||||
geohash=station_info.get('geohash'),
|
||||
status=updates.status or "active"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating station {station_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.delete("/stations/{station_id}")
|
||||
async def delete_station(station_id: int):
|
||||
"""Delete a monitoring station"""
|
||||
increment_counter("api_requests", labels={"endpoint": "delete_station"})
|
||||
|
||||
scraper = app_state["scraper"]
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=503, detail="Scraper not initialized")
|
||||
|
||||
station_key = str(station_id)
|
||||
if station_key not in scraper.station_mapping:
|
||||
raise HTTPException(status_code=404, detail="Station not found")
|
||||
|
||||
try:
|
||||
station_info = scraper.station_mapping.pop(station_key)
|
||||
logger.info(f"Deleted station {station_id}: {station_info['code']}")
|
||||
|
||||
return {"message": f"Station {station_info['code']} deleted successfully"}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting station {station_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/stations/{station_id}", response_model=StationResponse)
|
||||
async def get_station(station_id: int):
|
||||
"""Get details of a specific monitoring station"""
|
||||
increment_counter("api_requests", labels={"endpoint": "get_station"})
|
||||
|
||||
scraper = app_state["scraper"]
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=503, detail="Scraper not initialized")
|
||||
|
||||
station_key = str(station_id)
|
||||
if station_key not in scraper.station_mapping:
|
||||
raise HTTPException(status_code=404, detail="Station not found")
|
||||
|
||||
station_info = scraper.station_mapping[station_key]
|
||||
|
||||
return StationResponse(
|
||||
station_id=station_id,
|
||||
station_code=station_info['code'],
|
||||
thai_name=station_info['thai_name'],
|
||||
english_name=station_info['english_name'],
|
||||
latitude=station_info.get('latitude'),
|
||||
longitude=station_info.get('longitude'),
|
||||
geohash=station_info.get('geohash'),
|
||||
status="active"
|
||||
)
|
||||
|
||||
@app.get("/measurements/latest", response_model=List[MeasurementResponse])
|
||||
async def get_latest_measurements(limit: int = 100):
|
||||
"""Get latest measurements from all stations"""
|
||||
increment_counter("api_requests", labels={"endpoint": "measurements_latest"})
|
||||
|
||||
scraper = app_state["scraper"]
|
||||
if not scraper or not scraper.db_adapter:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
try:
|
||||
measurements = scraper.get_latest_data(limit=limit)
|
||||
|
||||
response = []
|
||||
for measurement in measurements:
|
||||
response.append(MeasurementResponse(
|
||||
timestamp=measurement["timestamp"],
|
||||
station_code=measurement["station_code"],
|
||||
station_name_en=measurement["station_name_en"],
|
||||
station_name_th=measurement["station_name_th"],
|
||||
water_level=measurement["water_level"],
|
||||
discharge=measurement["discharge"],
|
||||
discharge_percent=measurement.get("discharge_percent"),
|
||||
status=measurement.get("status", "active")
|
||||
))
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching latest measurements: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/measurements/station/{station_code}", response_model=List[MeasurementResponse])
|
||||
async def get_station_measurements(
|
||||
station_code: str,
|
||||
hours: int = 24,
|
||||
limit: int = 1000
|
||||
):
|
||||
"""Get measurements for a specific station"""
|
||||
increment_counter("api_requests", labels={"endpoint": "measurements_station"})
|
||||
|
||||
scraper = app_state["scraper"]
|
||||
if not scraper or not scraper.db_adapter:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
try:
|
||||
# Get measurements for the specified time range
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(hours=hours)
|
||||
|
||||
measurements = scraper.db_adapter.get_measurements_by_timerange(
|
||||
start_time, end_time, station_codes=[station_code]
|
||||
)
|
||||
|
||||
# Limit results
|
||||
measurements = measurements[:limit]
|
||||
|
||||
response = []
|
||||
for measurement in measurements:
|
||||
response.append(MeasurementResponse(
|
||||
timestamp=measurement["timestamp"],
|
||||
station_code=measurement["station_code"],
|
||||
station_name_en=measurement["station_name_en"],
|
||||
station_name_th=measurement["station_name_th"],
|
||||
water_level=measurement["water_level"],
|
||||
discharge=measurement["discharge"],
|
||||
discharge_percent=measurement.get("discharge_percent"),
|
||||
status=measurement.get("status", "active")
|
||||
))
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching station measurements: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/scrape/trigger")
|
||||
async def trigger_scraping(background_tasks: BackgroundTasks):
|
||||
"""Trigger manual data scraping"""
|
||||
increment_counter("api_requests", labels={"endpoint": "scrape_trigger"})
|
||||
|
||||
if app_state["is_scraping"]:
|
||||
raise HTTPException(status_code=409, detail="Scraping already in progress")
|
||||
|
||||
scraper = app_state["scraper"]
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=503, detail="Scraper not initialized")
|
||||
|
||||
def run_scraping():
|
||||
"""Background task to run scraping"""
|
||||
try:
|
||||
app_state["is_scraping"] = True
|
||||
logger.info("Manual scraping triggered via API")
|
||||
|
||||
result = scraper.run_scraping_cycle()
|
||||
|
||||
# Update stats
|
||||
app_state["scraping_stats"]["total_runs"] += 1
|
||||
app_state["scraping_stats"]["last_run"] = datetime.now()
|
||||
|
||||
if result:
|
||||
app_state["scraping_stats"]["successful_runs"] += 1
|
||||
increment_counter("manual_scraping_successful")
|
||||
else:
|
||||
app_state["scraping_stats"]["failed_runs"] += 1
|
||||
increment_counter("manual_scraping_failed")
|
||||
|
||||
except Exception as e:
|
||||
app_state["scraping_stats"]["failed_runs"] += 1
|
||||
increment_counter("manual_scraping_failed")
|
||||
logger.error(f"Manual scraping failed: {e}")
|
||||
finally:
|
||||
app_state["is_scraping"] = False
|
||||
|
||||
background_tasks.add_task(run_scraping)
|
||||
|
||||
return {"message": "Scraping triggered", "status": "started"}
|
||||
|
||||
@app.get("/scraping/status", response_model=ScrapingStatusResponse)
|
||||
async def get_scraping_status():
|
||||
"""Get current scraping status"""
|
||||
increment_counter("api_requests", labels={"endpoint": "scraping_status"})
|
||||
|
||||
stats = app_state["scraping_stats"]
|
||||
|
||||
return ScrapingStatusResponse(
|
||||
is_running=app_state["is_scraping"],
|
||||
last_run=stats["last_run"],
|
||||
next_run=stats["next_run"],
|
||||
total_runs=stats["total_runs"],
|
||||
successful_runs=stats["successful_runs"],
|
||||
failed_runs=stats["failed_runs"]
|
||||
)
|
||||
|
||||
@app.get("/config")
|
||||
async def get_config():
|
||||
"""Get current configuration (sensitive data masked)"""
|
||||
increment_counter("api_requests", labels={"endpoint": "config"})
|
||||
|
||||
config = Config.get_all_settings()
|
||||
|
||||
# Mask sensitive information
|
||||
for key in config:
|
||||
if 'password' in key.lower() or 'secret' in key.lower():
|
||||
if config[key]:
|
||||
config[key] = '*' * 8
|
||||
|
||||
return config
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
# Setup logging
|
||||
setup_logging(
|
||||
log_level=Config.LOG_LEVEL,
|
||||
log_file=Config.LOG_FILE,
|
||||
enable_console=True,
|
||||
enable_colors=True
|
||||
)
|
||||
|
||||
# Run the API server
|
||||
uvicorn.run(
|
||||
"web_api:app",
|
||||
host="0.0.0.0",
|
||||
port=8000,
|
||||
reload=False,
|
||||
log_config=None # Use our custom logging
|
||||
)
|
Reference in New Issue
Block a user