Initial commit: Northern Thailand Ping River Monitor v3.1.0
Some checks failed
Security & Dependency Updates / Dependency Security Scan (push) Successful in 29s
Security & Dependency Updates / Docker Security Scan (push) Failing after 53s
Security & Dependency Updates / License Compliance (push) Successful in 13s
Security & Dependency Updates / Check for Dependency Updates (push) Successful in 19s
Security & Dependency Updates / Code Quality Metrics (push) Successful in 11s
Security & Dependency Updates / Security Summary (push) Successful in 7s
Some checks failed
Security & Dependency Updates / Dependency Security Scan (push) Successful in 29s
Security & Dependency Updates / Docker Security Scan (push) Failing after 53s
Security & Dependency Updates / License Compliance (push) Successful in 13s
Security & Dependency Updates / Check for Dependency Updates (push) Successful in 19s
Security & Dependency Updates / Code Quality Metrics (push) Successful in 11s
Security & Dependency Updates / Security Summary (push) Successful in 7s
Features: - Real-time water level monitoring for Ping River Basin (16 stations) - Coverage from Chiang Dao to Nakhon Sawan in Northern Thailand - FastAPI web interface with interactive dashboard and station management - Multi-database support (SQLite, MySQL, PostgreSQL, InfluxDB, VictoriaMetrics) - Comprehensive monitoring with health checks and metrics collection - Docker deployment with Grafana integration - Production-ready architecture with enterprise-grade observability CI/CD & Automation: - Complete Gitea Actions workflows for CI/CD, security, and releases - Multi-Python version testing (3.9-3.12) - Multi-architecture Docker builds (amd64, arm64) - Daily security scanning and dependency monitoring - Automated documentation generation - Performance testing and validation Production Ready: - Type safety with Pydantic models and comprehensive type hints - Data validation layer with range checking and error handling - Rate limiting and request tracking for API protection - Enhanced logging with rotation, colors, and performance metrics - Station management API for dynamic CRUD operations - Comprehensive documentation and deployment guides Technical Stack: - Python 3.9+ with FastAPI and Pydantic - Multi-database architecture with adapter pattern - Docker containerization with multi-stage builds - Grafana dashboards for visualization - Gitea Actions for CI/CD automation - Enterprise monitoring and alerting Ready for deployment to B4L infrastructure!
This commit is contained in:
69
.env.example
Normal file
69
.env.example
Normal file
@@ -0,0 +1,69 @@
|
||||
# Northern Thailand Ping River Monitor Configuration
|
||||
# Copy this file to .env and customize for your environment
|
||||
|
||||
# Database Configuration
|
||||
DB_TYPE=sqlite
|
||||
# Options: sqlite, mysql, postgresql, influxdb, victoriametrics
|
||||
|
||||
# SQLite Configuration (default)
|
||||
WATER_DB_PATH=water_levels.db
|
||||
|
||||
# VictoriaMetrics Configuration
|
||||
VM_HOST=localhost
|
||||
VM_PORT=8428
|
||||
VM_URL=
|
||||
|
||||
# InfluxDB Configuration
|
||||
INFLUX_HOST=localhost
|
||||
INFLUX_PORT=8086
|
||||
INFLUX_DATABASE=ping_river_monitoring
|
||||
INFLUX_USERNAME=
|
||||
INFLUX_PASSWORD=
|
||||
|
||||
# PostgreSQL Configuration
|
||||
POSTGRES_CONNECTION_STRING=postgresql://user:password@localhost:5432/ping_river_monitoring
|
||||
|
||||
# MySQL Configuration
|
||||
MYSQL_CONNECTION_STRING=mysql://user:password@localhost:3306/ping_river_monitoring
|
||||
|
||||
# API Configuration
|
||||
API_HOST=0.0.0.0
|
||||
API_PORT=8000
|
||||
API_WORKERS=1
|
||||
|
||||
# Data Collection Settings
|
||||
SCRAPING_INTERVAL_HOURS=1
|
||||
REQUEST_TIMEOUT=30
|
||||
MAX_RETRIES=3
|
||||
RETRY_DELAY_SECONDS=60
|
||||
|
||||
# Data Retention
|
||||
DATA_RETENTION_DAYS=365
|
||||
|
||||
# Logging Configuration
|
||||
LOG_LEVEL=INFO
|
||||
LOG_FILE=water_monitor.log
|
||||
|
||||
# Security (for production)
|
||||
SECRET_KEY=your-secret-key-here
|
||||
API_KEY=your-api-key-here
|
||||
|
||||
# Monitoring
|
||||
ENABLE_METRICS=true
|
||||
ENABLE_HEALTH_CHECKS=true
|
||||
|
||||
# Geographic Settings
|
||||
TIMEZONE=Asia/Bangkok
|
||||
DEFAULT_LATITUDE=18.7875
|
||||
DEFAULT_LONGITUDE=99.0045
|
||||
|
||||
# External Services
|
||||
NOTIFICATION_EMAIL=
|
||||
SMTP_SERVER=
|
||||
SMTP_PORT=587
|
||||
SMTP_USERNAME=
|
||||
SMTP_PASSWORD=
|
||||
|
||||
# Development Settings
|
||||
DEBUG=false
|
||||
DEVELOPMENT_MODE=false
|
323
.gitea/workflows/ci.yml
Normal file
323
.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,323 @@
|
||||
name: CI/CD Pipeline - Northern Thailand Ping River Monitor
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main, develop ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
schedule:
|
||||
# Run tests daily at 2 AM UTC
|
||||
- cron: '0 2 * * *'
|
||||
|
||||
env:
|
||||
PYTHON_VERSION: '3.11'
|
||||
REGISTRY: git.b4l.co.th
|
||||
IMAGE_NAME: b4l/northern-thailand-ping-river-monitor
|
||||
|
||||
jobs:
|
||||
# Test job
|
||||
test:
|
||||
name: Test Suite
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.9', '3.10', '3.11', '3.12']
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Cache pip dependencies
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements*.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
pip install -r requirements-dev.txt
|
||||
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
flake8 src/ --count --select=E9,F63,F7,F82 --show-source --statistics
|
||||
flake8 src/ --count --exit-zero --max-complexity=10 --max-line-length=100 --statistics
|
||||
|
||||
- name: Type check with mypy
|
||||
run: |
|
||||
mypy src/ --ignore-missing-imports
|
||||
|
||||
- name: Format check with black
|
||||
run: |
|
||||
black --check src/ *.py
|
||||
|
||||
- name: Import sort check
|
||||
run: |
|
||||
isort --check-only src/ *.py
|
||||
|
||||
- name: Run integration tests
|
||||
run: |
|
||||
python tests/test_integration.py
|
||||
|
||||
- name: Run station management tests
|
||||
run: |
|
||||
python tests/test_station_management.py
|
||||
|
||||
- name: Test application startup
|
||||
run: |
|
||||
timeout 10s python run.py --test || true
|
||||
|
||||
- name: Security scan with bandit
|
||||
run: |
|
||||
bandit -r src/ -f json -o bandit-report.json || true
|
||||
|
||||
- name: Upload test artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
if: always()
|
||||
with:
|
||||
name: test-results-${{ matrix.python-version }}
|
||||
path: |
|
||||
bandit-report.json
|
||||
*.log
|
||||
|
||||
# Code quality job
|
||||
code-quality:
|
||||
name: Code Quality
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements-dev.txt
|
||||
|
||||
- name: Run safety check
|
||||
run: |
|
||||
safety check -r requirements.txt --json --output safety-report.json || true
|
||||
|
||||
- name: Run bandit security scan
|
||||
run: |
|
||||
bandit -r src/ -f json -o bandit-report.json || true
|
||||
|
||||
- name: Upload security reports
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: security-reports
|
||||
path: |
|
||||
safety-report.json
|
||||
bandit-report.json
|
||||
|
||||
# Build Docker image
|
||||
build:
|
||||
name: Build Docker Image
|
||||
runs-on: ubuntu-latest
|
||||
needs: test
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITEA_TOKEN }}
|
||||
|
||||
- name: Extract metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=ref,event=pr
|
||||
type=sha,prefix={{branch}}-
|
||||
type=raw,value=latest,enable={{is_default_branch}}
|
||||
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
platforms: linux/amd64,linux/arm64
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Test Docker image
|
||||
run: |
|
||||
docker run --rm ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} python run.py --test
|
||||
|
||||
# Integration test with services
|
||||
integration-test:
|
||||
name: Integration Test with Services
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
|
||||
services:
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:latest
|
||||
ports:
|
||||
- 8428:8428
|
||||
options: >-
|
||||
--health-cmd "wget --quiet --tries=1 --spider http://localhost:8428/health"
|
||||
--health-interval 30s
|
||||
--health-timeout 10s
|
||||
--health-retries 3
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Wait for VictoriaMetrics
|
||||
run: |
|
||||
timeout 60s bash -c 'until curl -f http://localhost:8428/health; do sleep 2; done'
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
|
||||
- name: Test with VictoriaMetrics
|
||||
env:
|
||||
DB_TYPE: victoriametrics
|
||||
VM_HOST: localhost
|
||||
VM_PORT: 8428
|
||||
run: |
|
||||
python run.py --test
|
||||
|
||||
- name: Start API server
|
||||
env:
|
||||
DB_TYPE: victoriametrics
|
||||
VM_HOST: localhost
|
||||
VM_PORT: 8428
|
||||
run: |
|
||||
python run.py --web-api &
|
||||
sleep 10
|
||||
|
||||
- name: Test API endpoints
|
||||
run: |
|
||||
curl -f http://localhost:8000/health
|
||||
curl -f http://localhost:8000/stations
|
||||
curl -f http://localhost:8000/metrics
|
||||
|
||||
# Deploy to staging (only on develop branch)
|
||||
deploy-staging:
|
||||
name: Deploy to Staging
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test, build, integration-test]
|
||||
if: github.ref == 'refs/heads/develop'
|
||||
environment:
|
||||
name: staging
|
||||
url: https://staging.ping-river-monitor.b4l.co.th
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Deploy to staging
|
||||
run: |
|
||||
echo "Deploying to staging environment..."
|
||||
# Add your staging deployment commands here
|
||||
# Example: kubectl, docker-compose, or webhook call
|
||||
|
||||
- name: Health check staging
|
||||
run: |
|
||||
sleep 30
|
||||
curl -f https://staging.ping-river-monitor.b4l.co.th/health
|
||||
|
||||
# Deploy to production (only on main branch, manual approval)
|
||||
deploy-production:
|
||||
name: Deploy to Production
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test, build, integration-test]
|
||||
if: github.ref == 'refs/heads/main'
|
||||
environment:
|
||||
name: production
|
||||
url: https://ping-river-monitor.b4l.co.th
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Deploy to production
|
||||
run: |
|
||||
echo "Deploying to production environment..."
|
||||
# Add your production deployment commands here
|
||||
|
||||
- name: Health check production
|
||||
run: |
|
||||
sleep 30
|
||||
curl -f https://ping-river-monitor.b4l.co.th/health
|
||||
|
||||
- name: Notify deployment
|
||||
run: |
|
||||
echo "✅ Production deployment successful!"
|
||||
echo "🌐 URL: https://ping-river-monitor.b4l.co.th"
|
||||
echo "📊 Grafana: https://grafana.ping-river-monitor.b4l.co.th"
|
||||
|
||||
# Performance test (only on main branch)
|
||||
performance-test:
|
||||
name: Performance Test
|
||||
runs-on: ubuntu-latest
|
||||
needs: deploy-production
|
||||
if: github.ref == 'refs/heads/main'
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install Apache Bench
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y apache2-utils
|
||||
|
||||
- name: Performance test API endpoints
|
||||
run: |
|
||||
# Test health endpoint
|
||||
ab -n 100 -c 10 https://ping-river-monitor.b4l.co.th/health
|
||||
|
||||
# Test stations endpoint
|
||||
ab -n 50 -c 5 https://ping-river-monitor.b4l.co.th/stations
|
||||
|
||||
# Test metrics endpoint
|
||||
ab -n 50 -c 5 https://ping-river-monitor.b4l.co.th/metrics
|
||||
|
||||
# Cleanup old artifacts
|
||||
cleanup:
|
||||
name: Cleanup
|
||||
runs-on: ubuntu-latest
|
||||
if: always()
|
||||
needs: [test, build, integration-test]
|
||||
|
||||
steps:
|
||||
- name: Clean up old Docker images
|
||||
run: |
|
||||
echo "Cleaning up old Docker images..."
|
||||
# Add cleanup commands for old images/artifacts
|
362
.gitea/workflows/docs.yml
Normal file
362
.gitea/workflows/docs.yml
Normal file
@@ -0,0 +1,362 @@
|
||||
name: Documentation
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main, develop ]
|
||||
paths:
|
||||
- 'docs/**'
|
||||
- 'README.md'
|
||||
- 'CONTRIBUTING.md'
|
||||
- 'src/**/*.py'
|
||||
pull_request:
|
||||
paths:
|
||||
- 'docs/**'
|
||||
- 'README.md'
|
||||
- 'CONTRIBUTING.md'
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
PYTHON_VERSION: '3.11'
|
||||
|
||||
jobs:
|
||||
# Validate documentation
|
||||
validate-docs:
|
||||
name: Validate Documentation
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Install documentation tools
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
pip install sphinx sphinx-rtd-theme sphinx-autodoc-typehints
|
||||
pip install markdown-link-check || true
|
||||
|
||||
- name: Check markdown links
|
||||
run: |
|
||||
echo "🔗 Checking markdown links..."
|
||||
find . -name "*.md" -not -path "./.git/*" -not -path "./node_modules/*" | while read file; do
|
||||
echo "Checking $file"
|
||||
# Basic link validation (you can enhance this)
|
||||
grep -o 'http[s]*://[^)]*' "$file" | while read url; do
|
||||
if curl -s --head "$url" | head -n 1 | grep -q "200 OK"; then
|
||||
echo "✅ $url"
|
||||
else
|
||||
echo "❌ $url (in $file)"
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
- name: Validate README structure
|
||||
run: |
|
||||
echo "📋 Validating README structure..."
|
||||
|
||||
required_sections=(
|
||||
"# Northern Thailand Ping River Monitor"
|
||||
"## Features"
|
||||
"## Quick Start"
|
||||
"## Installation"
|
||||
"## Usage"
|
||||
"## API Endpoints"
|
||||
"## Docker"
|
||||
"## Contributing"
|
||||
"## License"
|
||||
)
|
||||
|
||||
for section in "${required_sections[@]}"; do
|
||||
if grep -q "$section" README.md; then
|
||||
echo "✅ Found: $section"
|
||||
else
|
||||
echo "❌ Missing: $section"
|
||||
fi
|
||||
done
|
||||
|
||||
- name: Check documentation completeness
|
||||
run: |
|
||||
echo "📚 Checking documentation completeness..."
|
||||
|
||||
# Check if all Python modules have docstrings
|
||||
python -c "
|
||||
import ast
|
||||
import os
|
||||
|
||||
def check_docstrings(filepath):
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
tree = ast.parse(f.read())
|
||||
|
||||
missing_docstrings = []
|
||||
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, (ast.FunctionDef, ast.ClassDef, ast.AsyncFunctionDef)):
|
||||
if not ast.get_docstring(node):
|
||||
missing_docstrings.append(f'{node.name} in {filepath}')
|
||||
|
||||
return missing_docstrings
|
||||
|
||||
all_missing = []
|
||||
for root, dirs, files in os.walk('src'):
|
||||
for file in files:
|
||||
if file.endswith('.py') and not file.startswith('__'):
|
||||
filepath = os.path.join(root, file)
|
||||
missing = check_docstrings(filepath)
|
||||
all_missing.extend(missing)
|
||||
|
||||
if all_missing:
|
||||
print('⚠️ Missing docstrings:')
|
||||
for item in all_missing[:10]: # Show first 10
|
||||
print(f' - {item}')
|
||||
if len(all_missing) > 10:
|
||||
print(f' ... and {len(all_missing) - 10} more')
|
||||
else:
|
||||
print('✅ All functions and classes have docstrings')
|
||||
"
|
||||
|
||||
# Generate API documentation
|
||||
generate-api-docs:
|
||||
name: Generate API Documentation
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
|
||||
- name: Generate OpenAPI spec
|
||||
run: |
|
||||
echo "📝 Generating OpenAPI specification..."
|
||||
python -c "
|
||||
import json
|
||||
import sys
|
||||
sys.path.insert(0, 'src')
|
||||
|
||||
try:
|
||||
from web_api import app
|
||||
openapi_spec = app.openapi()
|
||||
|
||||
with open('openapi.json', 'w') as f:
|
||||
json.dump(openapi_spec, f, indent=2)
|
||||
|
||||
print('✅ OpenAPI spec generated: openapi.json')
|
||||
except Exception as e:
|
||||
print(f'❌ Failed to generate OpenAPI spec: {e}')
|
||||
"
|
||||
|
||||
- name: Generate API documentation
|
||||
run: |
|
||||
echo "📖 Generating API documentation..."
|
||||
|
||||
# Create API documentation from OpenAPI spec
|
||||
if [ -f openapi.json ]; then
|
||||
cat > api-docs.md << 'EOF'
|
||||
# API Documentation
|
||||
|
||||
This document describes the REST API endpoints for the Northern Thailand Ping River Monitor.
|
||||
|
||||
## Base URL
|
||||
|
||||
- Production: `https://ping-river-monitor.b4l.co.th`
|
||||
- Staging: `https://staging.ping-river-monitor.b4l.co.th`
|
||||
- Development: `http://localhost:8000`
|
||||
|
||||
## Authentication
|
||||
|
||||
Currently, the API does not require authentication. This may change in future versions.
|
||||
|
||||
## Endpoints
|
||||
|
||||
EOF
|
||||
|
||||
# Extract endpoints from OpenAPI spec
|
||||
python -c "
|
||||
import json
|
||||
|
||||
with open('openapi.json', 'r') as f:
|
||||
spec = json.load(f)
|
||||
|
||||
for path, methods in spec.get('paths', {}).items():
|
||||
for method, details in methods.items():
|
||||
print(f'### {method.upper()} {path}')
|
||||
print()
|
||||
print(details.get('summary', 'No description available'))
|
||||
print()
|
||||
if 'parameters' in details:
|
||||
print('**Parameters:**')
|
||||
for param in details['parameters']:
|
||||
print(f'- `{param[\"name\"]}` ({param.get(\"in\", \"query\")}): {param.get(\"description\", \"No description\")}')
|
||||
print()
|
||||
print('---')
|
||||
print()
|
||||
" >> api-docs.md
|
||||
|
||||
echo "✅ API documentation generated: api-docs.md"
|
||||
fi
|
||||
|
||||
- name: Upload documentation artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: documentation-${{ github.run_number }}
|
||||
path: |
|
||||
openapi.json
|
||||
api-docs.md
|
||||
|
||||
# Build Sphinx documentation
|
||||
build-sphinx-docs:
|
||||
name: Build Sphinx Documentation
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
pip install sphinx sphinx-rtd-theme sphinx-autodoc-typehints
|
||||
|
||||
- name: Create Sphinx configuration
|
||||
run: |
|
||||
mkdir -p docs/sphinx
|
||||
|
||||
cat > docs/sphinx/conf.py << 'EOF'
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(0, os.path.abspath('../../src'))
|
||||
|
||||
project = 'Northern Thailand Ping River Monitor'
|
||||
copyright = '2025, Ping River Monitor Team'
|
||||
author = 'Ping River Monitor Team'
|
||||
version = '3.1.0'
|
||||
release = '3.1.0'
|
||||
|
||||
extensions = [
|
||||
'sphinx.ext.autodoc',
|
||||
'sphinx.ext.viewcode',
|
||||
'sphinx.ext.napoleon',
|
||||
'sphinx_autodoc_typehints',
|
||||
]
|
||||
|
||||
templates_path = ['_templates']
|
||||
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
|
||||
|
||||
html_theme = 'sphinx_rtd_theme'
|
||||
html_static_path = ['_static']
|
||||
|
||||
autodoc_default_options = {
|
||||
'members': True,
|
||||
'member-order': 'bysource',
|
||||
'special-members': '__init__',
|
||||
'undoc-members': True,
|
||||
'exclude-members': '__weakref__'
|
||||
}
|
||||
EOF
|
||||
|
||||
cat > docs/sphinx/index.rst << 'EOF'
|
||||
Northern Thailand Ping River Monitor Documentation
|
||||
================================================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Contents:
|
||||
|
||||
modules
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
EOF
|
||||
|
||||
- name: Generate module documentation
|
||||
run: |
|
||||
cd docs/sphinx
|
||||
sphinx-apidoc -o . ../../src
|
||||
|
||||
- name: Build documentation
|
||||
run: |
|
||||
cd docs/sphinx
|
||||
sphinx-build -b html . _build/html
|
||||
|
||||
- name: Upload Sphinx documentation
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: sphinx-docs-${{ github.run_number }}
|
||||
path: docs/sphinx/_build/html/
|
||||
|
||||
# Documentation summary
|
||||
docs-summary:
|
||||
name: Documentation Summary
|
||||
runs-on: ubuntu-latest
|
||||
needs: [validate-docs, generate-api-docs, build-sphinx-docs]
|
||||
if: always()
|
||||
|
||||
steps:
|
||||
- name: Generate documentation summary
|
||||
run: |
|
||||
echo "# 📚 Documentation Build Summary" > docs-summary.md
|
||||
echo "" >> docs-summary.md
|
||||
echo "**Build Date:** $(date -u)" >> docs-summary.md
|
||||
echo "**Repository:** ${{ github.repository }}" >> docs-summary.md
|
||||
echo "**Commit:** ${{ github.sha }}" >> docs-summary.md
|
||||
echo "" >> docs-summary.md
|
||||
|
||||
echo "## 📊 Results" >> docs-summary.md
|
||||
echo "" >> docs-summary.md
|
||||
|
||||
if [ "${{ needs.validate-docs.result }}" = "success" ]; then
|
||||
echo "- ✅ **Documentation Validation**: Passed" >> docs-summary.md
|
||||
else
|
||||
echo "- ❌ **Documentation Validation**: Failed" >> docs-summary.md
|
||||
fi
|
||||
|
||||
if [ "${{ needs.generate-api-docs.result }}" = "success" ]; then
|
||||
echo "- ✅ **API Documentation**: Generated" >> docs-summary.md
|
||||
else
|
||||
echo "- ❌ **API Documentation**: Failed" >> docs-summary.md
|
||||
fi
|
||||
|
||||
if [ "${{ needs.build-sphinx-docs.result }}" = "success" ]; then
|
||||
echo "- ✅ **Sphinx Documentation**: Built" >> docs-summary.md
|
||||
else
|
||||
echo "- ❌ **Sphinx Documentation**: Failed" >> docs-summary.md
|
||||
fi
|
||||
|
||||
echo "" >> docs-summary.md
|
||||
echo "## 🔗 Available Documentation" >> docs-summary.md
|
||||
echo "" >> docs-summary.md
|
||||
echo "- [README.md](../README.md)" >> docs-summary.md
|
||||
echo "- [API Documentation](../docs/)" >> docs-summary.md
|
||||
echo "- [Contributing Guide](../CONTRIBUTING.md)" >> docs-summary.md
|
||||
echo "- [Deployment Checklist](../DEPLOYMENT_CHECKLIST.md)" >> docs-summary.md
|
||||
|
||||
cat docs-summary.md
|
||||
|
||||
- name: Upload documentation summary
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: docs-summary-${{ github.run_number }}
|
||||
path: docs-summary.md
|
289
.gitea/workflows/release.yml
Normal file
289
.gitea/workflows/release.yml
Normal file
@@ -0,0 +1,289 @@
|
||||
name: Release - Northern Thailand Ping River Monitor
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v*.*.*'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Release version (e.g., v3.1.0)'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
env:
|
||||
PYTHON_VERSION: '3.11'
|
||||
REGISTRY: git.b4l.co.th
|
||||
IMAGE_NAME: b4l/northern-thailand-ping-river-monitor
|
||||
|
||||
jobs:
|
||||
# Create release
|
||||
create-release:
|
||||
name: Create Release
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
version: ${{ steps.version.outputs.version }}
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get version
|
||||
id: version
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
echo "version=${{ github.event.inputs.version }}" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "version=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Generate changelog
|
||||
id: changelog
|
||||
run: |
|
||||
# Generate changelog from git commits
|
||||
echo "## Changes" > CHANGELOG.md
|
||||
git log --pretty=format:"- %s" $(git describe --tags --abbrev=0 HEAD^)..HEAD >> CHANGELOG.md || echo "- Initial release" >> CHANGELOG.md
|
||||
echo "" >> CHANGELOG.md
|
||||
echo "## Docker Images" >> CHANGELOG.md
|
||||
echo "- \`${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.version.outputs.version }}\`" >> CHANGELOG.md
|
||||
echo "- \`${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest\`" >> CHANGELOG.md
|
||||
|
||||
- name: Create Release
|
||||
uses: actions/create-release@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITEA_TOKEN }}
|
||||
with:
|
||||
tag_name: ${{ steps.version.outputs.version }}
|
||||
release_name: Northern Thailand Ping River Monitor ${{ steps.version.outputs.version }}
|
||||
body_path: CHANGELOG.md
|
||||
draft: false
|
||||
prerelease: false
|
||||
|
||||
# Build and test for release
|
||||
test-release:
|
||||
name: Test Release Build
|
||||
runs-on: ubuntu-latest
|
||||
needs: create-release
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.9', '3.10', '3.11', '3.12']
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
pip install -r requirements-dev.txt
|
||||
|
||||
- name: Run full test suite
|
||||
run: |
|
||||
python tests/test_integration.py
|
||||
python tests/test_station_management.py
|
||||
python run.py --test
|
||||
|
||||
- name: Build Python package
|
||||
run: |
|
||||
pip install build
|
||||
python -m build
|
||||
|
||||
- name: Upload Python package
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: python-package-${{ matrix.python-version }}
|
||||
path: dist/
|
||||
|
||||
# Build release Docker images
|
||||
build-release:
|
||||
name: Build Release Images
|
||||
runs-on: ubuntu-latest
|
||||
needs: [create-release, test-release]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITEA_TOKEN }}
|
||||
|
||||
- name: Build and push release images
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
platforms: linux/amd64,linux/arm64
|
||||
push: true
|
||||
tags: |
|
||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ needs.create-release.outputs.version }}
|
||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
|
||||
labels: |
|
||||
org.opencontainers.image.title=Northern Thailand Ping River Monitor
|
||||
org.opencontainers.image.description=Real-time water level monitoring for Ping River Basin
|
||||
org.opencontainers.image.version=${{ needs.create-release.outputs.version }}
|
||||
org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }}
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
# Security scan for release
|
||||
security-scan:
|
||||
name: Security Scan
|
||||
runs-on: ubuntu-latest
|
||||
needs: build-release
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@master
|
||||
with:
|
||||
image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ needs.create-release.outputs.version }}
|
||||
format: 'sarif'
|
||||
output: 'trivy-results.sarif'
|
||||
|
||||
- name: Upload Trivy scan results
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: security-scan-results
|
||||
path: trivy-results.sarif
|
||||
|
||||
# Deploy release to production
|
||||
deploy-release:
|
||||
name: Deploy Release
|
||||
runs-on: ubuntu-latest
|
||||
needs: [create-release, build-release, security-scan]
|
||||
environment:
|
||||
name: production
|
||||
url: https://ping-river-monitor.b4l.co.th
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Deploy to production
|
||||
run: |
|
||||
echo "🚀 Deploying ${{ needs.create-release.outputs.version }} to production..."
|
||||
|
||||
# Example deployment commands (customize for your infrastructure)
|
||||
# kubectl set image deployment/ping-river-monitor app=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ needs.create-release.outputs.version }}
|
||||
# docker-compose pull && docker-compose up -d
|
||||
# Or webhook call to your deployment system
|
||||
|
||||
echo "✅ Deployment initiated"
|
||||
|
||||
- name: Health check after deployment
|
||||
run: |
|
||||
echo "⏳ Waiting for deployment to stabilize..."
|
||||
sleep 60
|
||||
|
||||
echo "🔍 Running health checks..."
|
||||
curl -f https://ping-river-monitor.b4l.co.th/health
|
||||
curl -f https://ping-river-monitor.b4l.co.th/stations
|
||||
|
||||
echo "✅ Health checks passed!"
|
||||
|
||||
- name: Update deployment status
|
||||
run: |
|
||||
echo "📊 Deployment Summary:"
|
||||
echo "Version: ${{ needs.create-release.outputs.version }}"
|
||||
echo "Image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ needs.create-release.outputs.version }}"
|
||||
echo "URL: https://ping-river-monitor.b4l.co.th"
|
||||
echo "Grafana: https://grafana.ping-river-monitor.b4l.co.th"
|
||||
echo "API Docs: https://ping-river-monitor.b4l.co.th/docs"
|
||||
|
||||
# Post-release validation
|
||||
validate-release:
|
||||
name: Validate Release
|
||||
runs-on: ubuntu-latest
|
||||
needs: deploy-release
|
||||
|
||||
steps:
|
||||
- name: Comprehensive API test
|
||||
run: |
|
||||
echo "🧪 Running comprehensive API tests..."
|
||||
|
||||
# Test all major endpoints
|
||||
curl -f https://ping-river-monitor.b4l.co.th/health
|
||||
curl -f https://ping-river-monitor.b4l.co.th/metrics
|
||||
curl -f https://ping-river-monitor.b4l.co.th/stations
|
||||
curl -f https://ping-river-monitor.b4l.co.th/measurements/latest?limit=5
|
||||
curl -f https://ping-river-monitor.b4l.co.th/scraping/status
|
||||
|
||||
echo "✅ All API endpoints responding correctly"
|
||||
|
||||
- name: Performance validation
|
||||
run: |
|
||||
echo "⚡ Running performance validation..."
|
||||
|
||||
# Install Apache Bench
|
||||
sudo apt-get update && sudo apt-get install -y apache2-utils
|
||||
|
||||
# Test response times
|
||||
ab -n 10 -c 2 https://ping-river-monitor.b4l.co.th/health
|
||||
ab -n 10 -c 2 https://ping-river-monitor.b4l.co.th/stations
|
||||
|
||||
echo "✅ Performance validation completed"
|
||||
|
||||
- name: Data validation
|
||||
run: |
|
||||
echo "📊 Validating data collection..."
|
||||
|
||||
# Check if recent data is available
|
||||
response=$(curl -s https://ping-river-monitor.b4l.co.th/measurements/latest?limit=1)
|
||||
echo "Latest measurement: $response"
|
||||
|
||||
# Validate data structure (basic check)
|
||||
if echo "$response" | grep -q "water_level"; then
|
||||
echo "✅ Data structure validation passed"
|
||||
else
|
||||
echo "❌ Data structure validation failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Notify stakeholders
|
||||
notify:
|
||||
name: Notify Release
|
||||
runs-on: ubuntu-latest
|
||||
needs: [create-release, validate-release]
|
||||
if: always()
|
||||
|
||||
steps:
|
||||
- name: Notify success
|
||||
if: needs.validate-release.result == 'success'
|
||||
run: |
|
||||
echo "🎉 Release ${{ needs.create-release.outputs.version }} deployed successfully!"
|
||||
echo "🌐 Production URL: https://ping-river-monitor.b4l.co.th"
|
||||
echo "📊 Grafana: https://grafana.ping-river-monitor.b4l.co.th"
|
||||
echo "📚 API Docs: https://ping-river-monitor.b4l.co.th/docs"
|
||||
|
||||
# Add notification to Slack, Discord, email, etc.
|
||||
# curl -X POST -H 'Content-type: application/json' \
|
||||
# --data '{"text":"🎉 Northern Thailand Ping River Monitor ${{ needs.create-release.outputs.version }} deployed successfully!"}' \
|
||||
# ${{ secrets.SLACK_WEBHOOK_URL }}
|
||||
|
||||
- name: Notify failure
|
||||
if: needs.validate-release.result == 'failure'
|
||||
run: |
|
||||
echo "❌ Release ${{ needs.create-release.outputs.version }} deployment failed!"
|
||||
echo "Please check the logs and take corrective action."
|
||||
|
||||
# Add failure notification
|
||||
# curl -X POST -H 'Content-type: application/json' \
|
||||
# --data '{"text":"❌ Northern Thailand Ping River Monitor ${{ needs.create-release.outputs.version }} deployment failed!"}' \
|
||||
# ${{ secrets.SLACK_WEBHOOK_URL }}
|
386
.gitea/workflows/security.yml
Normal file
386
.gitea/workflows/security.yml
Normal file
@@ -0,0 +1,386 @@
|
||||
name: Security & Dependency Updates
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run security scans daily at 3 AM UTC
|
||||
- cron: '0 3 * * *'
|
||||
workflow_dispatch:
|
||||
push:
|
||||
paths:
|
||||
- 'requirements*.txt'
|
||||
- 'Dockerfile'
|
||||
- '.gitea/workflows/security.yml'
|
||||
|
||||
env:
|
||||
PYTHON_VERSION: '3.11'
|
||||
|
||||
jobs:
|
||||
# Dependency vulnerability scan
|
||||
dependency-scan:
|
||||
name: Dependency Security Scan
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install safety bandit semgrep
|
||||
|
||||
- name: Run Safety check
|
||||
run: |
|
||||
safety check -r requirements.txt --json --output safety-report.json || true
|
||||
safety check -r requirements-dev.txt --json --output safety-dev-report.json || true
|
||||
|
||||
- name: Run Bandit security scan
|
||||
run: |
|
||||
bandit -r src/ -f json -o bandit-report.json || true
|
||||
|
||||
- name: Run Semgrep security scan
|
||||
run: |
|
||||
semgrep --config=auto src/ --json --output=semgrep-report.json || true
|
||||
|
||||
- name: Upload security reports
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: security-reports-${{ github.run_number }}
|
||||
path: |
|
||||
safety-report.json
|
||||
safety-dev-report.json
|
||||
bandit-report.json
|
||||
semgrep-report.json
|
||||
|
||||
- name: Check for critical vulnerabilities
|
||||
run: |
|
||||
echo "🔍 Checking for critical vulnerabilities..."
|
||||
|
||||
# Check Safety results
|
||||
if [ -f safety-report.json ]; then
|
||||
critical_count=$(jq '.vulnerabilities | length' safety-report.json 2>/dev/null || echo "0")
|
||||
if [ "$critical_count" -gt 0 ]; then
|
||||
echo "⚠️ Found $critical_count dependency vulnerabilities"
|
||||
jq '.vulnerabilities[] | "- \(.package_name) \(.installed_version): \(.vulnerability_id)"' safety-report.json
|
||||
else
|
||||
echo "✅ No dependency vulnerabilities found"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check Bandit results
|
||||
if [ -f bandit-report.json ]; then
|
||||
high_severity=$(jq '.results[] | select(.issue_severity == "HIGH") | length' bandit-report.json 2>/dev/null | wc -l)
|
||||
if [ "$high_severity" -gt 0 ]; then
|
||||
echo "⚠️ Found $high_severity high-severity security issues"
|
||||
else
|
||||
echo "✅ No high-severity security issues found"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Docker image security scan
|
||||
docker-security-scan:
|
||||
name: Docker Security Scan
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build Docker image for scanning
|
||||
run: |
|
||||
docker build -t ping-river-monitor:scan .
|
||||
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@master
|
||||
with:
|
||||
image-ref: 'ping-river-monitor:scan'
|
||||
format: 'json'
|
||||
output: 'trivy-report.json'
|
||||
|
||||
- name: Run Trivy filesystem scan
|
||||
uses: aquasecurity/trivy-action@master
|
||||
with:
|
||||
scan-type: 'fs'
|
||||
scan-ref: '.'
|
||||
format: 'json'
|
||||
output: 'trivy-fs-report.json'
|
||||
|
||||
- name: Upload Trivy reports
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: trivy-reports-${{ github.run_number }}
|
||||
path: |
|
||||
trivy-report.json
|
||||
trivy-fs-report.json
|
||||
|
||||
- name: Check Trivy results
|
||||
run: |
|
||||
echo "🔍 Analyzing Docker security scan results..."
|
||||
|
||||
if [ -f trivy-report.json ]; then
|
||||
critical_vulns=$(jq '.Results[]?.Vulnerabilities[]? | select(.Severity == "CRITICAL") | length' trivy-report.json 2>/dev/null | wc -l)
|
||||
high_vulns=$(jq '.Results[]?.Vulnerabilities[]? | select(.Severity == "HIGH") | length' trivy-report.json 2>/dev/null | wc -l)
|
||||
|
||||
echo "Critical vulnerabilities: $critical_vulns"
|
||||
echo "High vulnerabilities: $high_vulns"
|
||||
|
||||
if [ "$critical_vulns" -gt 0 ]; then
|
||||
echo "❌ Critical vulnerabilities found in Docker image!"
|
||||
exit 1
|
||||
elif [ "$high_vulns" -gt 5 ]; then
|
||||
echo "⚠️ Many high-severity vulnerabilities found"
|
||||
else
|
||||
echo "✅ Docker image security scan passed"
|
||||
fi
|
||||
fi
|
||||
|
||||
# License compliance check
|
||||
license-check:
|
||||
name: License Compliance
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Install pip-licenses
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pip-licenses
|
||||
pip install -r requirements.txt
|
||||
|
||||
- name: Check licenses
|
||||
run: |
|
||||
echo "📄 Checking dependency licenses..."
|
||||
pip-licenses --format=json --output-file=licenses.json
|
||||
pip-licenses --format=markdown --output-file=licenses.md
|
||||
|
||||
# Check for problematic licenses
|
||||
problematic_licenses=("GPL" "AGPL" "LGPL")
|
||||
|
||||
for license in "${problematic_licenses[@]}"; do
|
||||
if grep -i "$license" licenses.json; then
|
||||
echo "⚠️ Found potentially problematic license: $license"
|
||||
fi
|
||||
done
|
||||
|
||||
echo "✅ License check completed"
|
||||
|
||||
- name: Upload license report
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: license-report-${{ github.run_number }}
|
||||
path: |
|
||||
licenses.json
|
||||
licenses.md
|
||||
|
||||
# Dependency update check
|
||||
dependency-update:
|
||||
name: Check for Dependency Updates
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Install pip-check-updates equivalent
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pip-review
|
||||
|
||||
- name: Check for outdated packages
|
||||
run: |
|
||||
echo "📦 Checking for outdated packages..."
|
||||
pip install -r requirements.txt
|
||||
pip list --outdated --format=json > outdated-packages.json || true
|
||||
|
||||
if [ -s outdated-packages.json ]; then
|
||||
echo "📋 Outdated packages found:"
|
||||
cat outdated-packages.json | jq -r '.[] | "- \(.name): \(.version) -> \(.latest_version)"'
|
||||
else
|
||||
echo "✅ All packages are up to date"
|
||||
fi
|
||||
|
||||
- name: Create dependency update issue
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
if [ -s outdated-packages.json ] && [ "$(cat outdated-packages.json)" != "[]" ]; then
|
||||
echo "📝 Creating dependency update issue..."
|
||||
|
||||
# Create issue body
|
||||
cat > issue-body.md << 'EOF'
|
||||
## 📦 Dependency Updates Available
|
||||
|
||||
The following packages have updates available:
|
||||
|
||||
EOF
|
||||
|
||||
cat outdated-packages.json | jq -r '.[] | "- **\(.name)**: \(.version) → \(.latest_version)"' >> issue-body.md
|
||||
|
||||
cat >> issue-body.md << 'EOF'
|
||||
|
||||
## 🔍 Security Impact
|
||||
|
||||
Please review each update for:
|
||||
- Security fixes
|
||||
- Breaking changes
|
||||
- Compatibility issues
|
||||
|
||||
## ✅ Action Items
|
||||
|
||||
- [ ] Review changelog for each package
|
||||
- [ ] Test updates in development environment
|
||||
- [ ] Update requirements.txt
|
||||
- [ ] Run full test suite
|
||||
- [ ] Deploy to staging for validation
|
||||
|
||||
---
|
||||
*This issue was automatically created by the security workflow.*
|
||||
EOF
|
||||
|
||||
echo "Issue body created. In a real implementation, you would create a Gitea issue here."
|
||||
cat issue-body.md
|
||||
fi
|
||||
|
||||
- name: Upload dependency reports
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: dependency-reports-${{ github.run_number }}
|
||||
path: |
|
||||
outdated-packages.json
|
||||
issue-body.md
|
||||
|
||||
# Code quality metrics
|
||||
code-quality:
|
||||
name: Code Quality Metrics
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Install quality tools
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install radon xenon vulture
|
||||
pip install -r requirements.txt
|
||||
|
||||
- name: Calculate code complexity
|
||||
run: |
|
||||
echo "📊 Calculating code complexity..."
|
||||
radon cc src/ --json > complexity-report.json
|
||||
radon mi src/ --json > maintainability-report.json
|
||||
|
||||
echo "🔍 Complexity Summary:"
|
||||
radon cc src/ --average
|
||||
|
||||
echo "🔧 Maintainability Summary:"
|
||||
radon mi src/
|
||||
|
||||
- name: Find dead code
|
||||
run: |
|
||||
echo "🧹 Checking for dead code..."
|
||||
vulture src/ --json > dead-code-report.json || true
|
||||
|
||||
- name: Check for code smells
|
||||
run: |
|
||||
echo "👃 Checking for code smells..."
|
||||
xenon --max-absolute B --max-modules A --max-average A src/ || true
|
||||
|
||||
- name: Upload quality reports
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: code-quality-reports-${{ github.run_number }}
|
||||
path: |
|
||||
complexity-report.json
|
||||
maintainability-report.json
|
||||
dead-code-report.json
|
||||
|
||||
# Security summary
|
||||
security-summary:
|
||||
name: Security Summary
|
||||
runs-on: ubuntu-latest
|
||||
needs: [dependency-scan, docker-security-scan, license-check, code-quality]
|
||||
if: always()
|
||||
|
||||
steps:
|
||||
- name: Download all artifacts
|
||||
uses: actions/download-artifact@v3
|
||||
|
||||
- name: Generate security summary
|
||||
run: |
|
||||
echo "# 🔒 Security Scan Summary" > security-summary.md
|
||||
echo "" >> security-summary.md
|
||||
echo "**Scan Date:** $(date -u)" >> security-summary.md
|
||||
echo "**Repository:** ${{ github.repository }}" >> security-summary.md
|
||||
echo "**Commit:** ${{ github.sha }}" >> security-summary.md
|
||||
echo "" >> security-summary.md
|
||||
|
||||
echo "## 📊 Results" >> security-summary.md
|
||||
echo "" >> security-summary.md
|
||||
|
||||
# Dependency scan results
|
||||
if [ -f security-reports-*/safety-report.json ]; then
|
||||
vuln_count=$(jq '.vulnerabilities | length' security-reports-*/safety-report.json 2>/dev/null || echo "0")
|
||||
if [ "$vuln_count" -eq 0 ]; then
|
||||
echo "- ✅ **Dependency Scan**: No vulnerabilities found" >> security-summary.md
|
||||
else
|
||||
echo "- ⚠️ **Dependency Scan**: $vuln_count vulnerabilities found" >> security-summary.md
|
||||
fi
|
||||
else
|
||||
echo "- ❓ **Dependency Scan**: Results not available" >> security-summary.md
|
||||
fi
|
||||
|
||||
# Docker scan results
|
||||
if [ -f trivy-reports-*/trivy-report.json ]; then
|
||||
echo "- ✅ **Docker Scan**: Completed" >> security-summary.md
|
||||
else
|
||||
echo "- ❓ **Docker Scan**: Results not available" >> security-summary.md
|
||||
fi
|
||||
|
||||
# License check results
|
||||
if [ -f license-report-*/licenses.json ]; then
|
||||
echo "- ✅ **License Check**: Completed" >> security-summary.md
|
||||
else
|
||||
echo "- ❓ **License Check**: Results not available" >> security-summary.md
|
||||
fi
|
||||
|
||||
# Code quality results
|
||||
if [ -f code-quality-reports-*/complexity-report.json ]; then
|
||||
echo "- ✅ **Code Quality**: Analyzed" >> security-summary.md
|
||||
else
|
||||
echo "- ❓ **Code Quality**: Results not available" >> security-summary.md
|
||||
fi
|
||||
|
||||
echo "" >> security-summary.md
|
||||
echo "## 🔗 Detailed Reports" >> security-summary.md
|
||||
echo "" >> security-summary.md
|
||||
echo "Detailed reports are available in the workflow artifacts." >> security-summary.md
|
||||
|
||||
cat security-summary.md
|
||||
|
||||
- name: Upload security summary
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: security-summary-${{ github.run_number }}
|
||||
path: security-summary.md
|
137
.gitignore
vendored
Normal file
137
.gitignore
vendored
Normal file
@@ -0,0 +1,137 @@
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
PIPFILE.lock
|
||||
|
||||
# Virtual environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# Kiro IDE (keep local only)
|
||||
.kiro/
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
logs/
|
||||
water_monitor.log
|
||||
water_monitor_performance.log
|
||||
|
||||
# Database files
|
||||
*.db
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
water_levels.db
|
||||
demo_water_sqlite.db
|
||||
|
||||
# Configuration files with secrets
|
||||
.env.local
|
||||
.env.production
|
||||
config.local.py
|
||||
|
||||
# Docker
|
||||
.dockerignore
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
|
||||
# Temporary files
|
||||
*.tmp
|
||||
*.temp
|
||||
temp/
|
||||
tmp/
|
||||
|
||||
# Coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# pipenv
|
||||
Pipfile.lock
|
||||
|
||||
# PEP 582
|
||||
__pypackages__/
|
||||
|
||||
# Celery
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.env.local
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# Docker volumes
|
||||
vm_data/
|
||||
grafana_data/
|
129
.gitlab-ci.yml
Normal file
129
.gitlab-ci.yml
Normal file
@@ -0,0 +1,129 @@
|
||||
# GitLab CI/CD Pipeline for Northern Thailand Ping River Monitor
|
||||
|
||||
stages:
|
||||
- test
|
||||
- build
|
||||
- deploy
|
||||
|
||||
variables:
|
||||
PYTHON_VERSION: "3.11"
|
||||
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
|
||||
|
||||
cache:
|
||||
paths:
|
||||
- .cache/pip
|
||||
- venv/
|
||||
|
||||
# Test stage
|
||||
test:
|
||||
stage: test
|
||||
image: python:${PYTHON_VERSION}-slim
|
||||
before_script:
|
||||
- apt-get update && apt-get install -y build-essential
|
||||
- python -m venv venv
|
||||
- source venv/bin/activate
|
||||
- pip install --upgrade pip
|
||||
- pip install -r requirements-dev.txt
|
||||
script:
|
||||
- python test_integration.py
|
||||
- python test_station_management.py
|
||||
- flake8 src/ --max-line-length=100
|
||||
- mypy src/
|
||||
coverage: '/TOTAL.*\s+(\d+%)$/'
|
||||
artifacts:
|
||||
reports:
|
||||
coverage_report:
|
||||
coverage_format: cobertura
|
||||
path: coverage.xml
|
||||
paths:
|
||||
- htmlcov/
|
||||
expire_in: 1 week
|
||||
|
||||
# Code quality
|
||||
code_quality:
|
||||
stage: test
|
||||
image: python:${PYTHON_VERSION}-slim
|
||||
before_script:
|
||||
- python -m venv venv
|
||||
- source venv/bin/activate
|
||||
- pip install black isort flake8 mypy
|
||||
script:
|
||||
- black --check src/ *.py
|
||||
- isort --check-only src/ *.py
|
||||
- flake8 src/ --max-line-length=100
|
||||
- mypy src/
|
||||
allow_failure: true
|
||||
|
||||
# Security scan
|
||||
security_scan:
|
||||
stage: test
|
||||
image: python:${PYTHON_VERSION}-slim
|
||||
before_script:
|
||||
- pip install safety bandit
|
||||
script:
|
||||
- safety check -r requirements.txt
|
||||
- bandit -r src/
|
||||
allow_failure: true
|
||||
|
||||
# Build Docker image
|
||||
build:
|
||||
stage: build
|
||||
image: docker:latest
|
||||
services:
|
||||
- docker:dind
|
||||
before_script:
|
||||
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
|
||||
script:
|
||||
- docker build -t $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA .
|
||||
- docker build -t $CI_REGISTRY_IMAGE:latest .
|
||||
- docker push $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA
|
||||
- docker push $CI_REGISTRY_IMAGE:latest
|
||||
only:
|
||||
- main
|
||||
- develop
|
||||
|
||||
# Deploy to staging
|
||||
deploy_staging:
|
||||
stage: deploy
|
||||
image: alpine:latest
|
||||
before_script:
|
||||
- apk add --no-cache curl
|
||||
script:
|
||||
- echo "Deploying to staging environment"
|
||||
- curl -X POST "$STAGING_WEBHOOK_URL" -H "Content-Type: application/json" -d '{"image":"'$CI_REGISTRY_IMAGE:$CI_COMMIT_SHA'"}'
|
||||
environment:
|
||||
name: staging
|
||||
url: https://staging.ping-river-monitor.example.com
|
||||
only:
|
||||
- develop
|
||||
|
||||
# Deploy to production
|
||||
deploy_production:
|
||||
stage: deploy
|
||||
image: alpine:latest
|
||||
before_script:
|
||||
- apk add --no-cache curl
|
||||
script:
|
||||
- echo "Deploying to production environment"
|
||||
- curl -X POST "$PRODUCTION_WEBHOOK_URL" -H "Content-Type: application/json" -d '{"image":"'$CI_REGISTRY_IMAGE:$CI_COMMIT_SHA'"}'
|
||||
environment:
|
||||
name: production
|
||||
url: https://ping-river-monitor.example.com
|
||||
when: manual
|
||||
only:
|
||||
- main
|
||||
|
||||
# Health check after deployment
|
||||
health_check:
|
||||
stage: deploy
|
||||
image: alpine:latest
|
||||
before_script:
|
||||
- apk add --no-cache curl jq
|
||||
script:
|
||||
- sleep 30 # Wait for deployment
|
||||
- curl -f $HEALTH_CHECK_URL/health
|
||||
- curl -s $HEALTH_CHECK_URL/metrics | jq .
|
||||
dependencies:
|
||||
- deploy_production
|
||||
only:
|
||||
- main
|
277
CONTRIBUTING.md
Normal file
277
CONTRIBUTING.md
Normal file
@@ -0,0 +1,277 @@
|
||||
# Contributing to Northern Thailand Ping River Monitor
|
||||
|
||||
Thank you for your interest in contributing to the Northern Thailand Ping River Monitor! This document provides guidelines and information for contributors.
|
||||
|
||||
## 🚀 Getting Started
|
||||
|
||||
### Prerequisites
|
||||
- Python 3.9 or higher
|
||||
- Git
|
||||
- Basic knowledge of water monitoring systems
|
||||
- Familiarity with FastAPI and time-series databases (optional)
|
||||
|
||||
### Development Setup
|
||||
|
||||
1. **Clone the repository:**
|
||||
```bash
|
||||
git clone https://git.b4l.co.th/B4L/Northern-Thailand-Ping-River-Monitor.git
|
||||
cd Northern-Thailand-Ping-River-Monitor
|
||||
```
|
||||
|
||||
2. **Create a virtual environment:**
|
||||
```bash
|
||||
python -m venv venv
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
```
|
||||
|
||||
3. **Install dependencies:**
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
4. **Run tests:**
|
||||
```bash
|
||||
python test_integration.py
|
||||
```
|
||||
|
||||
5. **Start the development server:**
|
||||
```bash
|
||||
python run.py --web-api
|
||||
```
|
||||
|
||||
## 🔧 Development Guidelines
|
||||
|
||||
### Code Style
|
||||
- Follow PEP 8 Python style guidelines
|
||||
- Use type hints for all function parameters and return values
|
||||
- Write descriptive docstrings for all classes and functions
|
||||
- Keep functions focused and under 50 lines when possible
|
||||
|
||||
### Testing
|
||||
- Write tests for new features
|
||||
- Ensure all existing tests pass before submitting
|
||||
- Test both success and error cases
|
||||
- Include integration tests for API endpoints
|
||||
|
||||
### Documentation
|
||||
- Update README.md for new features
|
||||
- Add docstrings to all new functions and classes
|
||||
- Update API documentation for new endpoints
|
||||
- Include examples in documentation
|
||||
|
||||
## 📝 Contribution Process
|
||||
|
||||
### 1. Issue Creation
|
||||
- Check existing issues before creating new ones
|
||||
- Use clear, descriptive titles
|
||||
- Provide detailed descriptions with steps to reproduce (for bugs)
|
||||
- Include system information and logs when relevant
|
||||
|
||||
### 2. Pull Request Process
|
||||
1. **Fork the repository**
|
||||
2. **Create a feature branch:**
|
||||
```bash
|
||||
git checkout -b feature/your-feature-name
|
||||
```
|
||||
3. **Make your changes**
|
||||
4. **Test thoroughly:**
|
||||
```bash
|
||||
python test_integration.py
|
||||
python test_station_management.py
|
||||
```
|
||||
5. **Commit with clear messages:**
|
||||
```bash
|
||||
git commit -m "Add: New station validation feature"
|
||||
```
|
||||
6. **Push to your fork:**
|
||||
```bash
|
||||
git push origin feature/your-feature-name
|
||||
```
|
||||
7. **Create a pull request**
|
||||
|
||||
### Pull Request Guidelines
|
||||
- Use clear, descriptive titles
|
||||
- Provide detailed descriptions of changes
|
||||
- Reference related issues
|
||||
- Include screenshots for UI changes
|
||||
- Ensure all tests pass
|
||||
- Update documentation as needed
|
||||
|
||||
## 🎯 Areas for Contribution
|
||||
|
||||
### High Priority
|
||||
- **Additional Database Support** - MongoDB, TimescaleDB
|
||||
- **Authentication System** - User management and API keys
|
||||
- **Real-time Alerts** - Email/SMS notifications for critical conditions
|
||||
- **Mobile App** - React Native or Flutter interface
|
||||
- **Advanced Analytics** - Trend analysis and forecasting
|
||||
|
||||
### Medium Priority
|
||||
- **Bulk Station Operations** - Import/export multiple stations
|
||||
- **Map Interface** - Visual station management
|
||||
- **Data Export** - CSV, Excel format support
|
||||
- **Performance Optimization** - Query optimization and caching
|
||||
- **Internationalization** - Multi-language support
|
||||
|
||||
### Low Priority
|
||||
- **Additional Visualizations** - Custom chart types
|
||||
- **Report Generation** - Automated PDF reports
|
||||
- **Integration APIs** - Third-party service connections
|
||||
- **Advanced Logging** - Structured logging with ELK stack
|
||||
|
||||
## 🐛 Bug Reports
|
||||
|
||||
When reporting bugs, please include:
|
||||
|
||||
1. **Environment Information:**
|
||||
- Operating system and version
|
||||
- Python version
|
||||
- Database type and version
|
||||
- Browser (for web interface issues)
|
||||
|
||||
2. **Steps to Reproduce:**
|
||||
- Clear, numbered steps
|
||||
- Expected vs actual behavior
|
||||
- Screenshots or error messages
|
||||
|
||||
3. **Additional Context:**
|
||||
- Log files (sanitized of sensitive data)
|
||||
- Configuration details
|
||||
- Recent changes to the system
|
||||
|
||||
## 💡 Feature Requests
|
||||
|
||||
For feature requests, please provide:
|
||||
|
||||
1. **Use Case:** Why is this feature needed?
|
||||
2. **Description:** What should the feature do?
|
||||
3. **Acceptance Criteria:** How do we know it's complete?
|
||||
4. **Priority:** How important is this feature?
|
||||
5. **Alternatives:** Are there workarounds available?
|
||||
|
||||
## 🔒 Security
|
||||
|
||||
### Reporting Security Issues
|
||||
- **DO NOT** create public issues for security vulnerabilities
|
||||
- Email security issues to: [security contact]
|
||||
- Include detailed information about the vulnerability
|
||||
- Allow time for fixes before public disclosure
|
||||
|
||||
### Security Guidelines
|
||||
- Never commit sensitive data (passwords, API keys)
|
||||
- Use environment variables for configuration
|
||||
- Validate all user inputs
|
||||
- Follow OWASP security guidelines
|
||||
- Keep dependencies updated
|
||||
|
||||
## 📊 Database Contributions
|
||||
|
||||
### Adding New Database Support
|
||||
1. Create adapter in `src/database_adapters.py`
|
||||
2. Implement all required methods
|
||||
3. Add configuration options in `src/config.py`
|
||||
4. Update documentation
|
||||
5. Add integration tests
|
||||
|
||||
### Database Schema Changes
|
||||
- Provide migration scripts
|
||||
- Test with existing data
|
||||
- Document breaking changes
|
||||
- Update all database adapters
|
||||
|
||||
## 🌐 API Contributions
|
||||
|
||||
### Adding New Endpoints
|
||||
1. Define Pydantic models in `src/models.py`
|
||||
2. Implement endpoint in `src/web_api.py`
|
||||
3. Add input validation
|
||||
4. Include error handling
|
||||
5. Write comprehensive tests
|
||||
6. Update API documentation
|
||||
|
||||
### API Guidelines
|
||||
- Use RESTful conventions
|
||||
- Include proper HTTP status codes
|
||||
- Provide clear error messages
|
||||
- Support pagination for large datasets
|
||||
- Include rate limiting considerations
|
||||
|
||||
## 📈 Performance Guidelines
|
||||
|
||||
### Optimization Areas
|
||||
- Database query efficiency
|
||||
- API response times
|
||||
- Memory usage
|
||||
- Concurrent request handling
|
||||
- Data processing speed
|
||||
|
||||
### Monitoring
|
||||
- Use built-in metrics collection
|
||||
- Monitor response times
|
||||
- Track error rates
|
||||
- Watch resource usage
|
||||
- Set up alerts for issues
|
||||
|
||||
## 🧪 Testing Guidelines
|
||||
|
||||
### Test Types
|
||||
- **Unit Tests:** Individual function testing
|
||||
- **Integration Tests:** Component interaction testing
|
||||
- **API Tests:** Endpoint functionality testing
|
||||
- **Performance Tests:** Load and stress testing
|
||||
|
||||
### Test Coverage
|
||||
- Aim for >80% code coverage
|
||||
- Test both success and failure paths
|
||||
- Include edge cases
|
||||
- Test with different database backends
|
||||
|
||||
## 📚 Documentation Standards
|
||||
|
||||
### Code Documentation
|
||||
- Use clear, descriptive variable names
|
||||
- Write comprehensive docstrings
|
||||
- Include type hints
|
||||
- Add inline comments for complex logic
|
||||
|
||||
### User Documentation
|
||||
- Keep README.md updated
|
||||
- Provide clear installation instructions
|
||||
- Include usage examples
|
||||
- Document configuration options
|
||||
|
||||
## 🤝 Community Guidelines
|
||||
|
||||
### Communication
|
||||
- Be respectful and professional
|
||||
- Help newcomers get started
|
||||
- Share knowledge and experiences
|
||||
- Provide constructive feedback
|
||||
|
||||
### Code Reviews
|
||||
- Review code thoroughly
|
||||
- Provide specific, actionable feedback
|
||||
- Be open to suggestions
|
||||
- Focus on code quality and maintainability
|
||||
|
||||
## 📞 Getting Help
|
||||
|
||||
### Resources
|
||||
- **Documentation:** README.md and docs/
|
||||
- **API Reference:** http://localhost:8000/docs
|
||||
- **Issues:** GitHub Issues section
|
||||
- **Discussions:** GitHub Discussions
|
||||
|
||||
### Contact
|
||||
- **General Questions:** Create a GitHub issue
|
||||
- **Security Issues:** [security contact]
|
||||
- **Maintainer:** [maintainer contact]
|
||||
|
||||
## 🎉 Recognition
|
||||
|
||||
Contributors will be recognized in:
|
||||
- README.md contributors section
|
||||
- Release notes for significant contributions
|
||||
- Special recognition for major features
|
||||
|
||||
Thank you for contributing to the Northern Thailand Ping River Monitor! Your contributions help improve water resource monitoring and management in the region.
|
268
DEPLOYMENT_CHECKLIST.md
Normal file
268
DEPLOYMENT_CHECKLIST.md
Normal file
@@ -0,0 +1,268 @@
|
||||
# 🚀 Deployment Checklist - Northern Thailand Ping River Monitor
|
||||
|
||||
## ✅ Pre-Deployment Checklist
|
||||
|
||||
### **Code Quality**
|
||||
- [ ] All tests pass (`make test`)
|
||||
- [ ] Code formatting applied (`make format`)
|
||||
- [ ] Linting checks pass (`make lint`)
|
||||
- [ ] No security vulnerabilities (`safety check`)
|
||||
- [ ] Documentation updated
|
||||
- [ ] Version number updated in `setup.py` and `src/__init__.py`
|
||||
|
||||
### **Configuration**
|
||||
- [ ] Environment variables configured (`.env` file)
|
||||
- [ ] Database connection tested
|
||||
- [ ] API endpoints tested
|
||||
- [ ] Log levels appropriate for environment
|
||||
- [ ] Security settings configured (API keys, secrets)
|
||||
- [ ] Resource limits set (memory, CPU)
|
||||
|
||||
### **Dependencies**
|
||||
- [ ] All required packages in `requirements.txt`
|
||||
- [ ] No unused dependencies
|
||||
- [ ] Security updates applied
|
||||
- [ ] Compatible Python version (3.9+)
|
||||
|
||||
## 🐳 Docker Deployment
|
||||
|
||||
### **Pre-Docker Checklist**
|
||||
- [ ] Dockerfile tested locally
|
||||
- [ ] Docker Compose configuration verified
|
||||
- [ ] Volume mounts configured correctly
|
||||
- [ ] Network settings configured
|
||||
- [ ] Health checks working
|
||||
- [ ] Resource limits set
|
||||
|
||||
### **Docker Commands**
|
||||
```bash
|
||||
# Build and test locally
|
||||
make docker-build
|
||||
docker run --rm ping-river-monitor python run.py --test
|
||||
|
||||
# Deploy with Docker Compose
|
||||
make docker-run
|
||||
|
||||
# Verify deployment
|
||||
make health-check
|
||||
```
|
||||
|
||||
### **Post-Docker Checklist**
|
||||
- [ ] All services running (`docker-compose ps`)
|
||||
- [ ] Health checks passing
|
||||
- [ ] Logs showing normal operation
|
||||
- [ ] API accessible (`curl http://localhost:8000/health`)
|
||||
- [ ] Database connectivity verified
|
||||
- [ ] Grafana dashboards loading
|
||||
|
||||
## 🌐 Production Deployment
|
||||
|
||||
### **Infrastructure Requirements**
|
||||
- [ ] Server specifications adequate (CPU, RAM, Storage)
|
||||
- [ ] Network connectivity to external APIs
|
||||
- [ ] SSL certificates configured (if HTTPS)
|
||||
- [ ] Firewall rules configured
|
||||
- [ ] Backup strategy implemented
|
||||
- [ ] Monitoring alerts configured
|
||||
|
||||
### **Security Checklist**
|
||||
- [ ] API keys secured (environment variables)
|
||||
- [ ] Database credentials secured
|
||||
- [ ] HTTPS enabled for web interface
|
||||
- [ ] Input validation enabled
|
||||
- [ ] Rate limiting configured
|
||||
- [ ] Log sanitization enabled
|
||||
|
||||
### **Performance Checklist**
|
||||
- [ ] Database indexes created
|
||||
- [ ] Connection pooling configured
|
||||
- [ ] Caching enabled where appropriate
|
||||
- [ ] Resource monitoring enabled
|
||||
- [ ] Performance baselines established
|
||||
|
||||
## 📊 Monitoring Setup
|
||||
|
||||
### **Health Monitoring**
|
||||
- [ ] Health check endpoints responding
|
||||
- [ ] Database health monitoring
|
||||
- [ ] API response time monitoring
|
||||
- [ ] Memory usage monitoring
|
||||
- [ ] Disk space monitoring
|
||||
|
||||
### **Alerting**
|
||||
- [ ] Critical error alerts configured
|
||||
- [ ] Performance degradation alerts
|
||||
- [ ] Database connectivity alerts
|
||||
- [ ] Disk space alerts
|
||||
- [ ] API availability alerts
|
||||
|
||||
### **Logging**
|
||||
- [ ] Log rotation configured
|
||||
- [ ] Log levels appropriate
|
||||
- [ ] Structured logging enabled
|
||||
- [ ] Log aggregation configured (if applicable)
|
||||
- [ ] Log retention policy set
|
||||
|
||||
## 🔄 CI/CD Pipeline
|
||||
|
||||
### **GitLab CI/CD**
|
||||
- [ ] `.gitlab-ci.yml` configured
|
||||
- [ ] Pipeline variables set
|
||||
- [ ] Test stage passing
|
||||
- [ ] Build stage creating artifacts
|
||||
- [ ] Deploy stage configured
|
||||
- [ ] Rollback procedure documented
|
||||
|
||||
### **Pipeline Stages**
|
||||
- [ ] **Test**: Unit tests, integration tests, linting
|
||||
- [ ] **Build**: Docker image creation, artifact generation
|
||||
- [ ] **Deploy**: Staging deployment, production deployment
|
||||
- [ ] **Verify**: Health checks, smoke tests
|
||||
|
||||
## 🗄️ Database Setup
|
||||
|
||||
### **Database Configuration**
|
||||
- [ ] Database server running and accessible
|
||||
- [ ] Database created with correct permissions
|
||||
- [ ] Connection string configured
|
||||
- [ ] Migration scripts run (if applicable)
|
||||
- [ ] Backup strategy implemented
|
||||
- [ ] Performance tuning applied
|
||||
|
||||
### **Database-Specific Checklist**
|
||||
|
||||
#### **SQLite**
|
||||
- [ ] Database file permissions set correctly
|
||||
- [ ] WAL mode enabled for better concurrency
|
||||
- [ ] Regular backup scheduled
|
||||
|
||||
#### **MySQL/PostgreSQL**
|
||||
- [ ] User accounts created with minimal privileges
|
||||
- [ ] Connection pooling configured
|
||||
- [ ] Query performance optimized
|
||||
- [ ] Replication configured (if applicable)
|
||||
|
||||
#### **InfluxDB**
|
||||
- [ ] Retention policies configured
|
||||
- [ ] Continuous queries set up (if needed)
|
||||
- [ ] Backup strategy implemented
|
||||
|
||||
#### **VictoriaMetrics**
|
||||
- [ ] Storage configuration optimized
|
||||
- [ ] Retention period set
|
||||
- [ ] Resource limits configured
|
||||
|
||||
## 🌐 Web Interface
|
||||
|
||||
### **API Deployment**
|
||||
- [ ] FastAPI server running
|
||||
- [ ] All endpoints responding correctly
|
||||
- [ ] API documentation accessible (`/docs`)
|
||||
- [ ] CORS configured correctly
|
||||
- [ ] Rate limiting working
|
||||
- [ ] Authentication configured (if applicable)
|
||||
|
||||
### **Frontend Integration**
|
||||
- [ ] Grafana dashboards configured
|
||||
- [ ] Data sources connected
|
||||
- [ ] Visualizations working
|
||||
- [ ] Alerts configured
|
||||
- [ ] User access configured
|
||||
|
||||
## 📈 Performance Verification
|
||||
|
||||
### **Load Testing**
|
||||
- [ ] API endpoints tested under load
|
||||
- [ ] Database performance under load
|
||||
- [ ] Memory usage under load
|
||||
- [ ] Response times acceptable
|
||||
- [ ] Error rates acceptable
|
||||
|
||||
### **Capacity Planning**
|
||||
- [ ] Expected data volume calculated
|
||||
- [ ] Storage growth projected
|
||||
- [ ] Scaling strategy documented
|
||||
- [ ] Resource monitoring thresholds set
|
||||
|
||||
## 🔧 Operational Procedures
|
||||
|
||||
### **Maintenance**
|
||||
- [ ] Update procedure documented
|
||||
- [ ] Backup and restore procedures tested
|
||||
- [ ] Rollback procedure documented
|
||||
- [ ] Monitoring runbooks created
|
||||
- [ ] Incident response procedures documented
|
||||
|
||||
### **Documentation**
|
||||
- [ ] Deployment guide updated
|
||||
- [ ] API documentation current
|
||||
- [ ] Configuration documentation complete
|
||||
- [ ] Troubleshooting guide available
|
||||
- [ ] Contact information updated
|
||||
|
||||
## ✅ Post-Deployment Verification
|
||||
|
||||
### **Functional Testing**
|
||||
- [ ] Data collection working
|
||||
- [ ] API endpoints responding
|
||||
- [ ] Database writes successful
|
||||
- [ ] Web interface accessible
|
||||
- [ ] Station management working
|
||||
|
||||
### **Integration Testing**
|
||||
- [ ] External API connectivity
|
||||
- [ ] Database integration
|
||||
- [ ] Monitoring integration
|
||||
- [ ] Alert system working
|
||||
- [ ] Backup system working
|
||||
|
||||
### **Performance Testing**
|
||||
- [ ] Response times acceptable
|
||||
- [ ] Memory usage normal
|
||||
- [ ] CPU usage normal
|
||||
- [ ] Disk I/O normal
|
||||
- [ ] Network usage normal
|
||||
|
||||
## 🚨 Rollback Plan
|
||||
|
||||
### **Rollback Triggers**
|
||||
- [ ] Critical errors in production
|
||||
- [ ] Performance degradation
|
||||
- [ ] Data corruption
|
||||
- [ ] Security vulnerabilities
|
||||
- [ ] Service unavailability
|
||||
|
||||
### **Rollback Procedure**
|
||||
1. [ ] Stop current deployment
|
||||
2. [ ] Restore previous Docker images
|
||||
3. [ ] Restore database backup (if needed)
|
||||
4. [ ] Verify system functionality
|
||||
5. [ ] Update monitoring and alerts
|
||||
6. [ ] Document incident and lessons learned
|
||||
|
||||
## 📞 Support Information
|
||||
|
||||
### **Emergency Contacts**
|
||||
- [ ] System administrator contact
|
||||
- [ ] Database administrator contact
|
||||
- [ ] Network administrator contact
|
||||
- [ ] Application developer contact
|
||||
|
||||
### **Documentation Links**
|
||||
- [ ] Deployment guide
|
||||
- [ ] API documentation
|
||||
- [ ] Troubleshooting guide
|
||||
- [ ] Configuration reference
|
||||
- [ ] Monitoring dashboards
|
||||
|
||||
---
|
||||
|
||||
**Deployment Date**: ___________
|
||||
**Deployed By**: ___________
|
||||
**Version**: v3.1.0
|
||||
**Environment**: ___________
|
||||
|
||||
**Sign-off**:
|
||||
- [ ] Technical Lead: ___________
|
||||
- [ ] Operations Team: ___________
|
||||
- [ ] Security Team: ___________
|
57
Dockerfile
Normal file
57
Dockerfile
Normal file
@@ -0,0 +1,57 @@
|
||||
# Multi-stage Docker build for Northern Thailand Ping River Monitor
|
||||
|
||||
# Build stage
|
||||
FROM python:3.11-slim as builder
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install build dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
gcc \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements and install Python dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir --user -r requirements.txt
|
||||
|
||||
# Production stage
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install runtime dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
wget \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& groupadd -r appuser && useradd -r -g appuser appuser
|
||||
|
||||
# Copy Python packages from builder stage
|
||||
COPY --from=builder /root/.local /root/.local
|
||||
|
||||
# Copy application code
|
||||
COPY . .
|
||||
|
||||
# Create logs directory and set permissions
|
||||
RUN mkdir -p logs && chown -R appuser:appuser /app
|
||||
|
||||
# Set environment variables
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV TZ=Asia/Bangkok
|
||||
ENV PATH=/root/.local/bin:$PATH
|
||||
|
||||
# Switch to non-root user
|
||||
USER appuser
|
||||
|
||||
# Health check for API mode
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD python -c "import requests; requests.get('http://localhost:8000/health', timeout=5)" 2>/dev/null || exit 1
|
||||
|
||||
# Expose port for API
|
||||
EXPOSE 8000
|
||||
|
||||
# Default command
|
||||
CMD ["python", "run.py", "--web-api"]
|
193
FINAL_CHECKLIST.md
Normal file
193
FINAL_CHECKLIST.md
Normal file
@@ -0,0 +1,193 @@
|
||||
# Final GitHub Publication Checklist ✅
|
||||
|
||||
This checklist ensures the Thailand Water Level Monitor project is ready for GitHub publication.
|
||||
|
||||
## 🎯 **Project Preparation Complete**
|
||||
|
||||
### ✅ **Core Repository Files**
|
||||
- [x] **README.md** - Comprehensive project documentation with badges and quick start
|
||||
- [x] **LICENSE** - MIT License for open source distribution
|
||||
- [x] **CONTRIBUTING.md** - Detailed contributor guidelines
|
||||
- [x] **.gitignore** - Comprehensive ignore rules for all file types
|
||||
- [x] **requirements.txt** - All Python dependencies listed and tested
|
||||
|
||||
### ✅ **Source Code Organization**
|
||||
- [x] **src/** directory created with clean separation
|
||||
- [x] **scripts/** directory for utility scripts and system files
|
||||
- [x] **docs/** directory with comprehensive documentation
|
||||
- [x] **grafana/** directory with visualization configuration
|
||||
- [x] All temporary files removed (*.db, *.log, __pycache__)
|
||||
|
||||
### ✅ **Documentation Quality**
|
||||
- [x] **Installation guides** for all platforms and databases
|
||||
- [x] **Configuration examples** for 5 different database types
|
||||
- [x] **Troubleshooting guides** for common deployment issues
|
||||
- [x] **Migration guides** for updating existing systems
|
||||
- [x] **API references** documenting Thai government data sources
|
||||
- [x] **Notable documents** section with official resources
|
||||
|
||||
### ✅ **Production Readiness**
|
||||
- [x] **Docker support** with Dockerfile and docker-compose
|
||||
- [x] **Systemd service** configuration for Linux deployment
|
||||
- [x] **Multi-database support** (SQLite, PostgreSQL, MySQL, InfluxDB, VictoriaMetrics)
|
||||
- [x] **Geolocation support** for Grafana geomap visualization
|
||||
- [x] **Migration scripts** for safe database schema updates
|
||||
- [x] **HTTPS configuration** guide for secure deployment
|
||||
|
||||
### ✅ **Code Quality**
|
||||
- [x] **Modular architecture** with clean separation of concerns
|
||||
- [x] **Error handling** and comprehensive logging
|
||||
- [x] **Configuration management** via environment variables
|
||||
- [x] **Database abstraction** layer for multiple backends
|
||||
- [x] **Testing utilities** (demo_databases.py)
|
||||
|
||||
### ✅ **Features Verified**
|
||||
- [x] **Real-time data collection** from 16 Thai water stations
|
||||
- [x] **15-minute scheduling** with intelligent retry logic
|
||||
- [x] **Gap filling** for missing historical data
|
||||
- [x] **Data validation** and error recovery
|
||||
- [x] **Geolocation integration** with sample coordinates
|
||||
- [x] **Grafana dashboards** with pre-built visualizations
|
||||
|
||||
## 🚀 **Ready for GitHub Publication**
|
||||
|
||||
### **Repository Information**
|
||||
- **Name**: `thailand-water-monitor`
|
||||
- **Description**: "Real-time water level monitoring system for Thailand's Royal Irrigation Department stations with Grafana visualization"
|
||||
- **Topics**: `water-monitoring`, `thailand`, `grafana`, `timeseries`, `python`, `iot`, `environmental-monitoring`
|
||||
- **License**: MIT
|
||||
- **Language**: Python
|
||||
|
||||
### **Repository Settings**
|
||||
- [x] Enable Issues for bug reports and feature requests
|
||||
- [x] Enable Discussions for community support
|
||||
- [x] Enable Wiki for extended documentation
|
||||
- [x] Set up GitHub Pages for documentation hosting
|
||||
- [x] Configure branch protection for main branch
|
||||
|
||||
### **Initial Release (v1.0.0)**
|
||||
- **Release Title**: "Thailand Water Level Monitor v1.0.0 - Complete Monitoring Solution"
|
||||
- **Release Notes**:
|
||||
- Complete real-time monitoring system
|
||||
- Multi-database backend support
|
||||
- Grafana geomap integration
|
||||
- Production-ready deployment
|
||||
- Comprehensive documentation
|
||||
|
||||
## 📊 **Project Statistics**
|
||||
|
||||
### **Code Metrics**
|
||||
- **Total Files**: 25+ files
|
||||
- **Python Source Files**: 4 main modules
|
||||
- **Documentation Files**: 12 comprehensive guides
|
||||
- **Configuration Files**: 6 deployment configurations
|
||||
- **Lines of Code**: ~2,000+ lines of Python
|
||||
- **Documentation**: ~15,000+ words
|
||||
|
||||
### **Feature Coverage**
|
||||
- **Database Backends**: 5 different types supported
|
||||
- **Monitoring Stations**: 16 across Thailand
|
||||
- **Data Collection**: Every 15 minutes
|
||||
- **Data Points**: ~300 measurements per collection cycle
|
||||
- **Geolocation**: GPS coordinates and geohash support
|
||||
- **Visualization**: Pre-built Grafana dashboards
|
||||
|
||||
### **Documentation Coverage**
|
||||
- **Installation**: Complete setup for all platforms
|
||||
- **Configuration**: All database types documented
|
||||
- **Deployment**: Docker, systemd, manual options
|
||||
- **Troubleshooting**: Common issues and solutions
|
||||
- **Migration**: Safe upgrade procedures
|
||||
- **API**: External data source documentation
|
||||
|
||||
## 🌟 **Key Selling Points**
|
||||
|
||||
### **For Water Management Professionals**
|
||||
- Real-time monitoring of 16 stations across Thailand
|
||||
- Historical data analysis and trend visualization
|
||||
- Alert capabilities for critical water levels
|
||||
- Integration with official Thai government data sources
|
||||
|
||||
### **For Developers**
|
||||
- Clean, modular Python codebase
|
||||
- Multiple database backend options
|
||||
- Docker containerization for easy deployment
|
||||
- Comprehensive API documentation
|
||||
|
||||
### **For System Administrators**
|
||||
- Production-ready deployment configurations
|
||||
- Systemd service integration
|
||||
- HTTPS and security configuration
|
||||
- Monitoring and logging capabilities
|
||||
|
||||
### **For Data Scientists**
|
||||
- Time-series data with geolocation
|
||||
- Grafana visualization and analysis tools
|
||||
- Historical data gap filling
|
||||
- Export capabilities for further analysis
|
||||
|
||||
## 🎯 **Post-Publication Roadmap**
|
||||
|
||||
### **Immediate (Week 1)**
|
||||
- [ ] Create GitHub repository and upload files
|
||||
- [ ] Set up initial release v1.0.0
|
||||
- [ ] Configure repository settings and templates
|
||||
- [ ] Create project documentation website
|
||||
|
||||
### **Short-term (Month 1)**
|
||||
- [ ] Add GitHub Actions for CI/CD
|
||||
- [ ] Create issue and PR templates
|
||||
- [ ] Set up automated testing
|
||||
- [ ] Add code quality badges
|
||||
|
||||
### **Medium-term (Quarter 1)**
|
||||
- [ ] Community feedback integration
|
||||
- [ ] Additional database backends
|
||||
- [ ] Mobile app development
|
||||
- [ ] Advanced alerting system
|
||||
|
||||
### **Long-term (Year 1)**
|
||||
- [ ] Predictive analytics features
|
||||
- [ ] Machine learning integration
|
||||
- [ ] Multi-country expansion
|
||||
- [ ] Commercial support options
|
||||
|
||||
## 🏆 **Success Metrics**
|
||||
|
||||
### **Community Engagement**
|
||||
- GitHub stars and forks
|
||||
- Issue reports and feature requests
|
||||
- Community contributions
|
||||
- Documentation feedback
|
||||
|
||||
### **Technical Adoption**
|
||||
- Download and deployment statistics
|
||||
- Database backend usage patterns
|
||||
- Performance benchmarks
|
||||
- User success stories
|
||||
|
||||
### **Impact Measurement**
|
||||
- Water management improvements
|
||||
- Early warning system effectiveness
|
||||
- Data accessibility improvements
|
||||
- Research and academic usage
|
||||
|
||||
---
|
||||
|
||||
## ✅ **FINAL VERIFICATION**
|
||||
|
||||
**All checklist items completed successfully!**
|
||||
|
||||
The Thailand Water Level Monitor project is now:
|
||||
- ✅ **Professionally organized** with clean structure
|
||||
- ✅ **Comprehensively documented** with guides for all use cases
|
||||
- ✅ **Production ready** with multiple deployment options
|
||||
- ✅ **Community friendly** with contribution guidelines
|
||||
- ✅ **Feature complete** with real-time monitoring capabilities
|
||||
|
||||
**🚀 Ready for GitHub publication and community engagement!** 🌊
|
||||
|
||||
---
|
||||
|
||||
*Last updated: July 30, 2025*
|
||||
*Project status: Ready for publication*
|
233
GITEA_SETUP_SUMMARY.md
Normal file
233
GITEA_SETUP_SUMMARY.md
Normal file
@@ -0,0 +1,233 @@
|
||||
# 🎉 Gitea Actions Setup Complete!
|
||||
|
||||
## 🚀 **What's Been Created**
|
||||
|
||||
Your **Northern Thailand Ping River Monitor** now has a complete CI/CD pipeline with Gitea Actions! Here's what's been set up:
|
||||
|
||||
### **🔄 Gitea Actions Workflows**
|
||||
|
||||
```
|
||||
.gitea/workflows/
|
||||
├── ci.yml # Main CI/CD pipeline
|
||||
├── release.yml # Automated releases
|
||||
├── security.yml # Security & dependency scanning
|
||||
└── docs.yml # Documentation generation
|
||||
```
|
||||
|
||||
### **📊 Workflow Features**
|
||||
|
||||
#### **1. CI/CD Pipeline (`ci.yml`)**
|
||||
- ✅ **Multi-Python Testing** (3.9, 3.10, 3.11, 3.12)
|
||||
- ✅ **Code Quality Checks** (flake8, mypy, black, isort)
|
||||
- ✅ **Docker Multi-Arch Builds** (amd64, arm64)
|
||||
- ✅ **Integration Testing** with VictoriaMetrics
|
||||
- ✅ **Automated Staging Deployment** (develop branch)
|
||||
- ✅ **Manual Production Deployment** (main branch)
|
||||
- ✅ **Performance Testing** after deployment
|
||||
|
||||
#### **2. Release Management (`release.yml`)**
|
||||
- 🏷️ **Tag-Based Releases** (`v*.*.*` pattern)
|
||||
- 📝 **Automatic Changelog Generation**
|
||||
- 🐳 **Multi-Architecture Docker Images**
|
||||
- 🔒 **Security Scanning** before release
|
||||
- ✅ **Comprehensive Validation** after deployment
|
||||
|
||||
#### **3. Security Monitoring (`security.yml`)**
|
||||
- 🔒 **Daily Security Scans** (3 AM UTC)
|
||||
- 📦 **Dependency Vulnerability Detection**
|
||||
- 🐳 **Docker Image Security Scanning**
|
||||
- 📄 **License Compliance Checking**
|
||||
- 📊 **Code Quality Metrics**
|
||||
- 🔄 **Automated Update Notifications**
|
||||
|
||||
#### **4. Documentation (`docs.yml`)**
|
||||
- 📚 **API Documentation Generation**
|
||||
- 🔗 **Link Validation**
|
||||
- 📖 **Sphinx Documentation Building**
|
||||
- ✅ **Documentation Completeness Checking**
|
||||
|
||||
## 🔧 **Setup Instructions**
|
||||
|
||||
### **1. Configure Repository Secrets**
|
||||
|
||||
In your Gitea repository settings, add these secrets:
|
||||
|
||||
```bash
|
||||
# Required
|
||||
GITEA_TOKEN # For container registry access
|
||||
|
||||
# Optional (for notifications)
|
||||
SLACK_WEBHOOK_URL # Slack notifications
|
||||
STAGING_WEBHOOK_URL # Staging deployment webhook
|
||||
PRODUCTION_WEBHOOK_URL # Production deployment webhook
|
||||
```
|
||||
|
||||
### **2. Enable Actions**
|
||||
|
||||
1. Go to your repository settings in Gitea
|
||||
2. Enable "Actions" if not already enabled
|
||||
3. Configure runners if using self-hosted runners
|
||||
|
||||
### **3. Push to Repository**
|
||||
|
||||
```bash
|
||||
# Initialize and push
|
||||
git init
|
||||
git remote add origin https://git.b4l.co.th/grabowski/Northern-Thailand-Ping-River-Monitor.git
|
||||
git add .
|
||||
git commit -m "Initial commit with Gitea Actions workflows"
|
||||
git push -u origin main
|
||||
```
|
||||
|
||||
## 🎯 **Workflow Triggers**
|
||||
|
||||
### **Automatic Triggers**
|
||||
- **Push to main/develop** → CI/CD Pipeline
|
||||
- **Pull Request to main** → Testing & Validation
|
||||
- **Daily at 2 AM UTC** → CI/CD Health Check
|
||||
- **Daily at 3 AM UTC** → Security Scanning
|
||||
- **Git Tag `v*.*.*`** → Release Pipeline
|
||||
- **Documentation Changes** → Documentation Build
|
||||
|
||||
### **Manual Triggers**
|
||||
- **Manual Dispatch** → Any workflow can be triggered manually
|
||||
- **Release Creation** → Manual release with custom version
|
||||
|
||||
## 📊 **Monitoring & Status**
|
||||
|
||||
### **Status Badges**
|
||||
Your README now includes comprehensive status badges:
|
||||
- CI/CD Pipeline Status
|
||||
- Security Scan Status
|
||||
- Documentation Build Status
|
||||
- Python Version Support
|
||||
- FastAPI Version
|
||||
- Docker Ready
|
||||
- License Information
|
||||
- Current Version
|
||||
|
||||
### **Workflow Artifacts**
|
||||
Each workflow generates useful artifacts:
|
||||
- **Test Results** and coverage reports
|
||||
- **Security Scan Reports** (JSON format)
|
||||
- **Docker Images** (multi-architecture)
|
||||
- **Documentation** (HTML and PDF)
|
||||
- **Performance Reports**
|
||||
|
||||
## 🚀 **Usage Examples**
|
||||
|
||||
### **Development Workflow**
|
||||
```bash
|
||||
# Create feature branch
|
||||
git checkout -b feature/new-station-type
|
||||
# Make changes
|
||||
git add .
|
||||
git commit -m "Add support for new station type"
|
||||
git push origin feature/new-station-type
|
||||
# Create PR in Gitea → Triggers testing
|
||||
```
|
||||
|
||||
### **Release Workflow**
|
||||
```bash
|
||||
# Create and push release tag
|
||||
git tag v3.1.1
|
||||
git push origin v3.1.1
|
||||
# → Triggers automated release pipeline
|
||||
```
|
||||
|
||||
### **Security Monitoring**
|
||||
- **Daily scans** run automatically
|
||||
- **Security reports** available in Actions artifacts
|
||||
- **Notifications** sent for critical vulnerabilities
|
||||
|
||||
## 🔍 **Validation Commands**
|
||||
|
||||
Test your setup locally:
|
||||
|
||||
```bash
|
||||
# Validate workflow syntax
|
||||
make validate-workflows
|
||||
|
||||
# Test workflow components
|
||||
make workflow-test
|
||||
|
||||
# Run full test suite
|
||||
make test
|
||||
|
||||
# Build Docker image
|
||||
make docker-build
|
||||
```
|
||||
|
||||
## 📈 **Performance & Optimization**
|
||||
|
||||
### **Caching Strategy**
|
||||
- **Pip dependencies** cached across runs
|
||||
- **Docker layers** cached for faster builds
|
||||
- **Workflow artifacts** retained for analysis
|
||||
|
||||
### **Parallel Execution**
|
||||
- **Matrix builds** for multiple Python versions
|
||||
- **Independent jobs** for security and testing
|
||||
- **Conditional execution** to skip unnecessary steps
|
||||
|
||||
### **Resource Management**
|
||||
- **Appropriate timeouts** prevent hanging workflows
|
||||
- **Artifact cleanup** manages storage usage
|
||||
- **Efficient Docker builds** with multi-stage approach
|
||||
|
||||
## 🔒 **Security Best Practices**
|
||||
|
||||
### **Implemented Security**
|
||||
- ✅ **Secret management** via Gitea repository secrets
|
||||
- ✅ **Multi-stage Docker builds** for minimal attack surface
|
||||
- ✅ **Non-root containers** for better security
|
||||
- ✅ **Vulnerability scanning** before deployment
|
||||
- ✅ **Dependency monitoring** with automated alerts
|
||||
|
||||
### **Security Scanning Coverage**
|
||||
- **Python dependencies** (Safety, Bandit)
|
||||
- **Docker images** (Trivy)
|
||||
- **Code quality** (Semgrep)
|
||||
- **License compliance** (pip-licenses)
|
||||
|
||||
## 📚 **Documentation**
|
||||
|
||||
### **Available Documentation**
|
||||
- [Gitea Workflows Guide](docs/GITEA_WORKFLOWS.md) - Detailed workflow documentation
|
||||
- [Contributing Guide](CONTRIBUTING.md) - How to contribute
|
||||
- [Deployment Checklist](DEPLOYMENT_CHECKLIST.md) - Production deployment
|
||||
- [Project Structure](docs/PROJECT_STRUCTURE.md) - Architecture overview
|
||||
|
||||
### **Generated Documentation**
|
||||
- **API Documentation** - Auto-generated from OpenAPI spec
|
||||
- **Code Documentation** - Sphinx-generated from docstrings
|
||||
- **Security Reports** - Automated vulnerability reports
|
||||
|
||||
## 🎉 **Ready for Production!**
|
||||
|
||||
Your repository is now equipped with:
|
||||
|
||||
- 🔄 **Enterprise-grade CI/CD pipeline**
|
||||
- 🔒 **Comprehensive security monitoring**
|
||||
- 📊 **Automated quality assurance**
|
||||
- 🚀 **Streamlined release management**
|
||||
- 📚 **Automated documentation**
|
||||
- 🐳 **Multi-architecture Docker support**
|
||||
- 📈 **Performance monitoring**
|
||||
- 🔍 **Comprehensive testing**
|
||||
|
||||
## 🚀 **Next Steps**
|
||||
|
||||
1. **Push to Gitea** and watch the workflows run
|
||||
2. **Configure deployment environments** (staging/production)
|
||||
3. **Set up monitoring dashboards** for workflow metrics
|
||||
4. **Configure notifications** for team collaboration
|
||||
5. **Create your first release** with `git tag v3.1.0`
|
||||
|
||||
Your **Northern Thailand Ping River Monitor** is now ready for professional development and deployment! 🎊
|
||||
|
||||
---
|
||||
|
||||
**Workflow Version**: v3.1.0
|
||||
**Setup Date**: 2025-08-12
|
||||
**Repository**: https://git.b4l.co.th/grabowski/Northern-Thailand-Ping-River-Monitor
|
203
GITHUB_PUBLICATION_SUMMARY.md
Normal file
203
GITHUB_PUBLICATION_SUMMARY.md
Normal file
@@ -0,0 +1,203 @@
|
||||
# GitHub Publication Summary
|
||||
|
||||
This document summarizes the Thailand Water Level Monitor project preparation for GitHub publication.
|
||||
|
||||
## 📁 **Final Project Structure**
|
||||
|
||||
```
|
||||
thailand-water-monitor/
|
||||
├── 📄 README.md # Main project documentation
|
||||
├── 📄 LICENSE # MIT License
|
||||
├── 📄 CONTRIBUTING.md # Contributor guidelines
|
||||
├── 📄 requirements.txt # Python dependencies
|
||||
├── 📄 .gitignore # Git ignore rules
|
||||
├── 📄 Dockerfile # Container definition
|
||||
├── 📄 docker-compose.victoriametrics.yml # Complete stack deployment
|
||||
│
|
||||
├── 📂 src/ # Source Code
|
||||
│ ├── 🐍 water_scraper_v3.py # Main application
|
||||
│ ├── 🐍 database_adapters.py # Multi-database support
|
||||
│ ├── 🐍 config.py # Configuration management
|
||||
│ └── 🐍 demo_databases.py # Database testing utility
|
||||
│
|
||||
├── 📂 scripts/ # Utility Scripts
|
||||
│ ├── 🐍 migrate_geolocation.py # Database migration script
|
||||
│ └── ⚙️ water-monitor.service # Systemd service file
|
||||
│
|
||||
├── 📂 docs/ # Documentation
|
||||
│ ├── 📖 DATABASE_DEPLOYMENT_GUIDE.md # Complete setup guide
|
||||
│ ├── 📖 ENHANCED_SCHEDULER_GUIDE.md # 15-minute scheduling
|
||||
│ ├── 📖 GEOLOCATION_GUIDE.md # Grafana geomap integration
|
||||
│ ├── 📖 GAP_FILLING_GUIDE.md # Data integrity management
|
||||
│ ├── 📖 MIGRATION_QUICKSTART.md # Quick migration guide
|
||||
│ ├── 📖 VICTORIAMETRICS_SETUP.md # High-performance deployment
|
||||
│ ├── 📖 HTTPS_CONFIGURATION.md # Secure deployment
|
||||
│ ├── 📖 DEBIAN_TROUBLESHOOTING.md # Linux deployment issues
|
||||
│ ├── 📖 PROJECT_STATUS.md # Development status
|
||||
│ └── 📂 references/
|
||||
│ └── 📖 NOTABLE_DOCUMENTS.md # Official Thai government resources
|
||||
│
|
||||
└── 📂 grafana/ # Grafana Configuration
|
||||
├── 📂 dashboards/
|
||||
│ └── 📊 water-monitoring-dashboard.json
|
||||
└── 📂 provisioning/
|
||||
├── 📂 dashboards/
|
||||
│ └── ⚙️ dashboard.yml
|
||||
└── 📂 datasources/
|
||||
└── ⚙️ victoriametrics.yml
|
||||
```
|
||||
|
||||
## ✅ **GitHub Readiness Checklist**
|
||||
|
||||
### **Core Files**
|
||||
- ✅ **README.md** - Comprehensive project documentation with badges, features, quick start
|
||||
- ✅ **LICENSE** - MIT License for open source distribution
|
||||
- ✅ **CONTRIBUTING.md** - Detailed contributor guidelines and development setup
|
||||
- ✅ **.gitignore** - Comprehensive ignore rules for Python, databases, logs, IDE files
|
||||
- ✅ **requirements.txt** - All Python dependencies listed
|
||||
|
||||
### **Source Code Organization**
|
||||
- ✅ **src/** directory - Clean separation of source code
|
||||
- ✅ **scripts/** directory - Utility scripts and system files
|
||||
- ✅ **docs/** directory - Comprehensive documentation
|
||||
- ✅ **grafana/** directory - Visualization configuration
|
||||
|
||||
### **Documentation Quality**
|
||||
- ✅ **Installation guides** - Multiple deployment options
|
||||
- ✅ **Configuration examples** - All database types covered
|
||||
- ✅ **Troubleshooting guides** - Common issues and solutions
|
||||
- ✅ **Migration guides** - Updating existing systems
|
||||
- ✅ **API references** - External data sources documented
|
||||
|
||||
### **Production Readiness**
|
||||
- ✅ **Docker support** - Containerization ready
|
||||
- ✅ **Systemd service** - Linux service configuration
|
||||
- ✅ **Multi-database support** - 5 different database options
|
||||
- ✅ **Geolocation support** - Grafana geomap integration
|
||||
- ✅ **Migration scripts** - Safe database updates
|
||||
|
||||
## 🌟 **Key Features for GitHub**
|
||||
|
||||
### **Real-time Monitoring**
|
||||
- 16 water stations across Thailand
|
||||
- 15-minute data collection frequency
|
||||
- Automatic gap filling and data validation
|
||||
- Multi-database backend support
|
||||
|
||||
### **Visualization Ready**
|
||||
- Pre-built Grafana dashboards
|
||||
- Geomap integration with coordinates
|
||||
- Real-time alerts and notifications
|
||||
- Historical trend analysis
|
||||
|
||||
### **Production Deployment**
|
||||
- Docker containerization
|
||||
- VictoriaMetrics high-performance backend
|
||||
- HTTPS and security configuration
|
||||
- Comprehensive logging and monitoring
|
||||
|
||||
### **Developer Friendly**
|
||||
- Clean, modular code structure
|
||||
- Comprehensive documentation
|
||||
- Multiple database adapters
|
||||
- Easy local development setup
|
||||
|
||||
## 📊 **Project Statistics**
|
||||
|
||||
### **Code Metrics**
|
||||
- **Python Files**: 4 main source files
|
||||
- **Documentation**: 10+ comprehensive guides
|
||||
- **Database Support**: 5 different backends
|
||||
- **Monitoring Stations**: 16 across Thailand
|
||||
- **Data Points**: ~300 every 15 minutes
|
||||
|
||||
### **Documentation Coverage**
|
||||
- **Installation**: Complete setup guides for all platforms
|
||||
- **Configuration**: All database types documented
|
||||
- **Deployment**: Docker, systemd, and manual options
|
||||
- **Troubleshooting**: Common issues and solutions
|
||||
- **Migration**: Safe upgrade procedures
|
||||
|
||||
### **Features Implemented**
|
||||
- ✅ Real-time data collection
|
||||
- ✅ Multi-database support
|
||||
- ✅ Geolocation integration
|
||||
- ✅ Gap filling and data validation
|
||||
- ✅ Grafana visualization
|
||||
- ✅ Docker deployment
|
||||
- ✅ Production monitoring
|
||||
- ✅ Migration tools
|
||||
|
||||
## 🚀 **Ready for GitHub Publication**
|
||||
|
||||
### **Repository Setup**
|
||||
1. **Create GitHub repository** - "thailand-water-monitor"
|
||||
2. **Upload all files** - Complete project structure
|
||||
3. **Configure repository settings**:
|
||||
- Add description: "Real-time water level monitoring for Thailand's RID stations"
|
||||
- Add topics: `water-monitoring`, `thailand`, `grafana`, `timeseries`, `python`
|
||||
- Enable Issues and Discussions
|
||||
- Set up GitHub Pages for documentation
|
||||
|
||||
### **Initial Release**
|
||||
- **Version**: v1.0.0
|
||||
- **Release Notes**: Complete feature set with multi-database support
|
||||
- **Assets**: Include sample configuration files
|
||||
- **Documentation**: Link to comprehensive guides
|
||||
|
||||
### **Community Features**
|
||||
- **Issues Template**: Bug reports and feature requests
|
||||
- **Pull Request Template**: Contribution guidelines
|
||||
- **Discussions**: Community support and questions
|
||||
- **Wiki**: Extended documentation and tutorials
|
||||
|
||||
## 🎯 **Post-Publication Tasks**
|
||||
|
||||
### **Community Building**
|
||||
- Create detailed issue templates
|
||||
- Set up GitHub Actions for CI/CD
|
||||
- Add code quality badges
|
||||
- Create project roadmap
|
||||
|
||||
### **Documentation Enhancement**
|
||||
- Add video tutorials
|
||||
- Create API documentation
|
||||
- Add performance benchmarks
|
||||
- Create deployment examples
|
||||
|
||||
### **Feature Development**
|
||||
- Mobile app integration
|
||||
- Additional database backends
|
||||
- Advanced alerting system
|
||||
- Predictive analytics
|
||||
|
||||
## 📞 **Support Channels**
|
||||
|
||||
- **GitHub Issues**: Bug reports and feature requests
|
||||
- **GitHub Discussions**: Community support and questions
|
||||
- **Documentation**: Comprehensive guides in docs/ directory
|
||||
- **Examples**: Working configurations and deployments
|
||||
|
||||
## 🏆 **Project Highlights**
|
||||
|
||||
### **Technical Excellence**
|
||||
- Clean, modular architecture
|
||||
- Comprehensive error handling
|
||||
- Production-ready deployment
|
||||
- Multi-database abstraction
|
||||
|
||||
### **Documentation Quality**
|
||||
- Step-by-step installation guides
|
||||
- Troubleshooting for common issues
|
||||
- Migration procedures for updates
|
||||
- API and configuration references
|
||||
|
||||
### **Community Ready**
|
||||
- Open source MIT license
|
||||
- Contributor guidelines
|
||||
- Development setup instructions
|
||||
- Code quality standards
|
||||
|
||||
---
|
||||
|
||||
**The Thailand Water Level Monitor project is now fully prepared for GitHub publication with a professional structure, comprehensive documentation, and production-ready features.** 🌊
|
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 Northern Thailand Ping River Monitor
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
146
Makefile
Normal file
146
Makefile
Normal file
@@ -0,0 +1,146 @@
|
||||
# Northern Thailand Ping River Monitor - Makefile
|
||||
|
||||
.PHONY: help install install-dev test test-coverage lint format clean run run-api docker-build docker-run docs
|
||||
|
||||
# Default target
|
||||
help:
|
||||
@echo "Northern Thailand Ping River Monitor - Available Commands:"
|
||||
@echo ""
|
||||
@echo "Setup:"
|
||||
@echo " install Install production dependencies"
|
||||
@echo " install-dev Install development dependencies"
|
||||
@echo ""
|
||||
@echo "Development:"
|
||||
@echo " test Run all tests"
|
||||
@echo " test-cov Run tests with coverage report"
|
||||
@echo " lint Run code linting"
|
||||
@echo " format Format code with black and isort"
|
||||
@echo " clean Clean up temporary files"
|
||||
@echo ""
|
||||
@echo "Running:"
|
||||
@echo " run Run the monitor in continuous mode"
|
||||
@echo " run-api Run the web API server"
|
||||
@echo " run-test Run a single test cycle"
|
||||
@echo ""
|
||||
@echo "Docker:"
|
||||
@echo " docker-build Build Docker image"
|
||||
@echo " docker-run Run with Docker Compose"
|
||||
@echo " docker-stop Stop Docker services"
|
||||
@echo ""
|
||||
@echo "Documentation:"
|
||||
@echo " docs Generate documentation"
|
||||
|
||||
# Installation
|
||||
install:
|
||||
pip install -r requirements.txt
|
||||
|
||||
install-dev:
|
||||
pip install -r requirements-dev.txt
|
||||
pre-commit install
|
||||
|
||||
# Testing
|
||||
test:
|
||||
python test_integration.py
|
||||
python test_station_management.py
|
||||
|
||||
test-cov:
|
||||
pytest --cov=src --cov-report=html --cov-report=term
|
||||
|
||||
# Code quality
|
||||
lint:
|
||||
flake8 src/ --max-line-length=100
|
||||
mypy src/
|
||||
|
||||
format:
|
||||
black src/ *.py
|
||||
isort src/ *.py
|
||||
|
||||
# Cleanup
|
||||
clean:
|
||||
find . -type f -name "*.pyc" -delete
|
||||
find . -type d -name "__pycache__" -delete
|
||||
find . -type f -name "*.log" -delete
|
||||
rm -rf .pytest_cache/
|
||||
rm -rf .mypy_cache/
|
||||
rm -rf htmlcov/
|
||||
rm -rf dist/
|
||||
rm -rf build/
|
||||
rm -rf *.egg-info/
|
||||
|
||||
# Running
|
||||
run:
|
||||
python run.py
|
||||
|
||||
run-api:
|
||||
python run.py --web-api
|
||||
|
||||
run-test:
|
||||
python run.py --test
|
||||
|
||||
run-status:
|
||||
python run.py --status
|
||||
|
||||
# Docker
|
||||
docker-build:
|
||||
docker build -t ping-river-monitor .
|
||||
|
||||
docker-run:
|
||||
docker-compose -f docker-compose.victoriametrics.yml up -d
|
||||
|
||||
docker-stop:
|
||||
docker-compose -f docker-compose.victoriametrics.yml down
|
||||
|
||||
docker-logs:
|
||||
docker-compose -f docker-compose.victoriametrics.yml logs -f
|
||||
|
||||
# Documentation
|
||||
docs:
|
||||
cd docs && make html
|
||||
|
||||
# Database management
|
||||
db-migrate:
|
||||
python scripts/migrate_geolocation.py
|
||||
|
||||
# Monitoring
|
||||
health-check:
|
||||
curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
metrics:
|
||||
curl -s http://localhost:8000/metrics | jq .
|
||||
|
||||
# Development helpers
|
||||
dev-setup: install-dev
|
||||
cp .env.example .env
|
||||
@echo "Development environment set up!"
|
||||
@echo "Edit .env file with your configuration"
|
||||
|
||||
# Production deployment
|
||||
deploy-check:
|
||||
python run.py --test
|
||||
@echo "Deployment check passed!"
|
||||
|
||||
# Git helpers
|
||||
git-setup:
|
||||
git remote add origin https://git.b4l.co.th/B4L/Northern-Thailand-Ping-River-Monitor.git
|
||||
@echo "Git remote configured!"
|
||||
|
||||
# Quick start
|
||||
quick-start: install dev-setup run-test
|
||||
@echo "Quick start completed!"
|
||||
@echo "Run 'make run-api' to start the web interface"
|
||||
|
||||
# Gitea Actions
|
||||
validate-workflows:
|
||||
@echo "Validating Gitea Actions workflows..."
|
||||
@for file in .gitea/workflows/*.yml; do \
|
||||
echo "Checking $$file..."; \
|
||||
python -c "import yaml; yaml.safe_load(open('$$file', encoding='utf-8'))" || exit 1; \
|
||||
done
|
||||
@echo "✅ All workflows are valid"
|
||||
|
||||
workflow-test:
|
||||
@echo "Testing workflow components locally..."
|
||||
make test
|
||||
make lint
|
||||
make format
|
||||
@echo "✅ Workflow test completed"
|
491
README.md
Normal file
491
README.md
Normal file
@@ -0,0 +1,491 @@
|
||||
# Northern Thailand Ping River Monitor 🏔️
|
||||
|
||||
A comprehensive real-time water level monitoring system for the Ping River Basin in Northern Thailand, covering Royal Irrigation Department (RID) stations from Chiang Dao to Nakhon Sawan with advanced data collection, storage, and visualization capabilities.
|
||||
|
||||
[](https://git.b4l.co.th/B4L/Northern-Thailand-Ping-River-Monitor/actions) [](https://git.b4l.co.th/B4L/Northern-Thailand-Ping-River-Monitor/actions) [](https://git.b4l.co.th/B4L/Northern-Thailand-Ping-River-Monitor/actions) [](https://python.org) [](https://fastapi.tiangolo.com) [](https://docker.com) [](LICENSE) [](https://git.b4l.co.th/B4L/Northern-Thailand-Ping-River-Monitor/releases)
|
||||
|
||||
## 🌟 Features
|
||||
|
||||
### 📊 **Real-time Data Collection**
|
||||
- **16 Monitoring Stations** across Thailand
|
||||
- **15-minute Collection Frequency** with intelligent scheduling
|
||||
- **Automatic Gap Filling** for missing historical data
|
||||
- **Data Validation** and error recovery mechanisms
|
||||
- **Rate Limiting** to prevent API abuse
|
||||
|
||||
### 🌐 **Web API Interface (NEW!)**
|
||||
- **FastAPI-powered REST API** with interactive documentation
|
||||
- **Station Management** - Add, update, and remove monitoring stations
|
||||
- **Real-time health monitoring** and system status
|
||||
- **Manual data collection triggers** via web interface
|
||||
- **Comprehensive metrics** and performance monitoring
|
||||
- **CORS support** for web applications
|
||||
|
||||
### 🗄️ **Multi-Database Support**
|
||||
- **VictoriaMetrics** (Recommended) - High-performance time-series
|
||||
- **InfluxDB** - Purpose-built time-series database
|
||||
- **PostgreSQL + TimescaleDB** - Relational with time-series optimization
|
||||
- **MySQL** - Traditional relational database
|
||||
- **SQLite** - Local development and testing
|
||||
|
||||
### 🗺️ **Geolocation Support**
|
||||
- **Grafana Geomap** integration ready
|
||||
- **GPS coordinates** and geohash support
|
||||
- **Interactive mapping** of water stations
|
||||
|
||||
### 📈 **Visualization & Monitoring**
|
||||
- **Pre-built Grafana dashboards**
|
||||
- **Real-time alerts** and notifications
|
||||
- **Historical trend analysis**
|
||||
- **Built-in metrics collection** (counters, gauges, histograms)
|
||||
- **Health checks** for database, API, and system resources
|
||||
|
||||
### 🚀 **Production Ready**
|
||||
- **Docker containerization** with multi-service support
|
||||
- **Systemd service** configuration
|
||||
- **HTTPS support** with SSL certificates
|
||||
- **Comprehensive logging** with rotation and colored output
|
||||
- **Type safety** with Pydantic models and type hints
|
||||
- **Custom exception handling** for better error management
|
||||
|
||||
## 🚀 Quick Start
|
||||
|
||||
### Prerequisites
|
||||
- Python 3.9 or higher
|
||||
- Internet connection for data fetching
|
||||
- Database server (optional - SQLite works out of the box)
|
||||
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
# Clone the repository
|
||||
git clone https://git.b4l.co.th/B4L/Northern-Thailand-Ping-River-Monitor.git
|
||||
cd Northern-Thailand-Ping-River-Monitor
|
||||
|
||||
# Quick setup with Make
|
||||
make dev-setup
|
||||
|
||||
# Or manual setup:
|
||||
python -m venv venv
|
||||
source venv/bin/activate # Windows: venv\Scripts\activate
|
||||
pip install -r requirements.txt
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```bash
|
||||
# Test run with SQLite (default)
|
||||
make run-test
|
||||
# or: python run.py --test
|
||||
|
||||
# Run continuous monitoring
|
||||
make run
|
||||
# or: python run.py
|
||||
|
||||
# Start web API server (NEW!)
|
||||
make run-api
|
||||
# or: python run.py --web-api
|
||||
|
||||
# Run all tests
|
||||
make test
|
||||
|
||||
# Demo different databases
|
||||
python src/demo_databases.py
|
||||
```
|
||||
|
||||
### 🌐 Web API Interface (NEW!)
|
||||
|
||||
The system now includes a comprehensive FastAPI web interface:
|
||||
|
||||
```bash
|
||||
# Start the web API
|
||||
python run.py --web-api
|
||||
|
||||
# Access the API at:
|
||||
# - Dashboard: http://localhost:8000
|
||||
# - Interactive docs: http://localhost:8000/docs
|
||||
# - Health check: http://localhost:8000/health
|
||||
# - Latest data: http://localhost:8000/measurements/latest
|
||||
```
|
||||
|
||||
**Key API Endpoints:**
|
||||
- `GET /` - Web dashboard
|
||||
- `GET /health` - System health status
|
||||
- `GET /metrics` - Application metrics
|
||||
- `GET /stations` - List all monitoring stations
|
||||
- `POST /stations` - Add new monitoring station
|
||||
- `PUT /stations/{id}` - Update station information
|
||||
- `DELETE /stations/{id}` - Remove monitoring station
|
||||
- `GET /measurements/latest` - Latest measurements
|
||||
- `GET /measurements/station/{code}` - Station-specific data
|
||||
- `POST /scrape/trigger` - Trigger manual data collection
|
||||
|
||||
## 📊 Station Information
|
||||
|
||||
The system monitors **16 water stations** along the Ping River Basin in Northern Thailand:
|
||||
|
||||
| Station | Thai Name | English Name | Location |
|
||||
|---------|-----------|--------------|----------|
|
||||
| P.1 | สะพานนวรัฐ | Nawarat Bridge | Nakhon Sawan |
|
||||
| P.5 | สะพานท่านาง | Tha Nang Bridge | - |
|
||||
| P.20 | บ้านเชียงดาว | Ban Chiang Dao | Chiang Mai |
|
||||
| P.21 | บ้านริมใต้ | Ban Rim Tai | - |
|
||||
| P.4A | บ้านแม่แตง | Ban Mae Taeng | Chiang Mai |
|
||||
| P.67 | บ้านแม่แต | Ban Tae | - |
|
||||
| P.75 | บ้านช่อแล | Ban Chai Lat | - |
|
||||
| P.76 | บ้านแม่อีไฮ | Banb Mae I Hai | - |
|
||||
| P.77 | บ้านสบแม่สะป๊วด | Baan Sop Mae Sapuord | - |
|
||||
| P.81 | บ้านโป่ง | Ban Pong | - |
|
||||
| P.82 | บ้านสบวิน | Ban Sob win | - |
|
||||
| P.84 | บ้านพันตน | Ban Panton | - |
|
||||
| P.85 | บ้านหล่ายแก้ว | Baan Lai Kaew | - |
|
||||
| P.87 | บ้านป่าซาง | Ban Pa Sang | - |
|
||||
| P.92 | บ้านเมืองกึ๊ด | Ban Muang Aut | - |
|
||||
| P.103 | สะพานวงแหวนรอบ 3 | Ring Bridge 3 | Bangkok |
|
||||
|
||||
### Data Metrics
|
||||
- **Water Level**: Measured in meters (m)
|
||||
- **Discharge**: Flow rate in cubic meters per second (cms)
|
||||
- **Discharge Percentage**: Relative to station capacity
|
||||
- **Timestamp**: Thai time (UTC+7) with Buddhist calendar support
|
||||
|
||||
## 🗄️ Database Configuration
|
||||
|
||||
### VictoriaMetrics (Recommended)
|
||||
|
||||
**High-performance time-series database with excellent compression and query speed.**
|
||||
|
||||
```bash
|
||||
# Environment variables
|
||||
export DB_TYPE=victoriametrics
|
||||
export VM_HOST=localhost
|
||||
export VM_PORT=8428
|
||||
|
||||
# Quick start with Docker
|
||||
docker run -d \
|
||||
--name victoriametrics \
|
||||
-p 8428:8428 \
|
||||
-v victoria-metrics-data:/victoria-metrics-data \
|
||||
victoriametrics/victoria-metrics:latest \
|
||||
--storageDataPath=/victoria-metrics-data \
|
||||
--retentionPeriod=2y \
|
||||
--httpListenAddr=:8428
|
||||
```
|
||||
|
||||
### Complete Stack with Grafana
|
||||
|
||||
```bash
|
||||
# Start the complete monitoring stack
|
||||
docker-compose -f docker-compose.victoriametrics.yml up -d
|
||||
|
||||
# Access Grafana at http://localhost:3000
|
||||
# Username: admin, Password: admin_password
|
||||
```
|
||||
|
||||
### Other Database Options
|
||||
|
||||
<details>
|
||||
<summary>InfluxDB Configuration</summary>
|
||||
|
||||
```bash
|
||||
export DB_TYPE=influxdb
|
||||
export INFLUX_HOST=localhost
|
||||
export INFLUX_PORT=8086
|
||||
export INFLUX_DATABASE=water_monitoring
|
||||
export INFLUX_USERNAME=water_user
|
||||
export INFLUX_PASSWORD=your_password
|
||||
```
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>PostgreSQL Configuration</summary>
|
||||
|
||||
```bash
|
||||
export DB_TYPE=postgresql
|
||||
export POSTGRES_CONNECTION_STRING=postgresql://user:password@localhost:5432/water_monitoring
|
||||
```
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>MySQL Configuration</summary>
|
||||
|
||||
```bash
|
||||
export DB_TYPE=mysql
|
||||
export MYSQL_CONNECTION_STRING=mysql://user:password@localhost:3306/water_monitoring
|
||||
```
|
||||
</details>
|
||||
|
||||
## 📈 Grafana Dashboards
|
||||
|
||||
### Pre-built Dashboard Features
|
||||
- **Real-time water levels** across all stations
|
||||
- **Historical trends** and patterns
|
||||
- **Discharge monitoring** with percentage indicators
|
||||
- **Station status** and health monitoring
|
||||
- **Geomap visualization** of station locations
|
||||
- **Alert thresholds** for critical water levels
|
||||
|
||||
### Sample Queries
|
||||
|
||||
**VictoriaMetrics/Prometheus:**
|
||||
```promql
|
||||
# Current water levels
|
||||
water_level
|
||||
|
||||
# High discharge alerts
|
||||
water_discharge_percent > 80
|
||||
|
||||
# Station-specific data
|
||||
water_level{station_code="P.1"}
|
||||
```
|
||||
|
||||
**SQL Databases:**
|
||||
```sql
|
||||
-- Latest readings from all stations
|
||||
SELECT s.station_code, s.english_name, m.water_level, m.discharge
|
||||
FROM stations s
|
||||
JOIN water_measurements m ON s.id = m.station_id
|
||||
WHERE m.timestamp = (SELECT MAX(timestamp) FROM water_measurements WHERE station_id = s.id);
|
||||
```
|
||||
|
||||
## 🚀 Production Deployment
|
||||
|
||||
### Docker Deployment
|
||||
|
||||
```bash
|
||||
# Build the image
|
||||
docker build -t thailand-water-monitor .
|
||||
|
||||
# Run with environment variables
|
||||
docker run -d \
|
||||
--name water-monitor \
|
||||
-e DB_TYPE=victoriametrics \
|
||||
-e VM_HOST=victoriametrics \
|
||||
thailand-water-monitor
|
||||
```
|
||||
|
||||
### Systemd Service (Linux)
|
||||
|
||||
```bash
|
||||
# Copy service file
|
||||
sudo cp scripts/water-monitor.service /etc/systemd/system/
|
||||
|
||||
# Enable and start
|
||||
sudo systemctl enable water-monitor.service
|
||||
sudo systemctl start water-monitor.service
|
||||
```
|
||||
|
||||
### Migration for Existing Systems
|
||||
|
||||
If you have an existing installation, use the migration script to add geolocation support:
|
||||
|
||||
```bash
|
||||
# Stop the service
|
||||
sudo systemctl stop water-monitor
|
||||
|
||||
# Run migration
|
||||
python scripts/migrate_geolocation.py
|
||||
|
||||
# Restart the service
|
||||
sudo systemctl start water-monitor
|
||||
```
|
||||
|
||||
## 🔧 Command Line Tools
|
||||
|
||||
### Main Application
|
||||
```bash
|
||||
python src/water_scraper_v3.py # Run continuous monitoring
|
||||
python src/water_scraper_v3.py --test # Single test cycle
|
||||
python src/water_scraper_v3.py --help # Show help
|
||||
```
|
||||
|
||||
### Data Management
|
||||
```bash
|
||||
python src/water_scraper_v3.py --check-gaps 7 # Check for missing data (7 days)
|
||||
python src/water_scraper_v3.py --fill-gaps 7 # Fill missing data gaps
|
||||
python src/water_scraper_v3.py --update-data 2 # Update existing data (2 days)
|
||||
```
|
||||
|
||||
### Database Testing
|
||||
```bash
|
||||
python src/demo_databases.py # SQLite demo
|
||||
python src/demo_databases.py victoriametrics # VictoriaMetrics demo
|
||||
python src/demo_databases.py all # Test all databases
|
||||
```
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
### Core Documentation
|
||||
- **[Installation Guide](docs/DATABASE_DEPLOYMENT_GUIDE.md)** - Complete setup instructions
|
||||
- **[Scheduler Guide](docs/ENHANCED_SCHEDULER_GUIDE.md)** - 15-minute scheduling system
|
||||
- **[Geolocation Guide](docs/GEOLOCATION_GUIDE.md)** - Grafana geomap integration
|
||||
- **[Gap Filling Guide](docs/GAP_FILLING_GUIDE.md)** - Data integrity management
|
||||
|
||||
### Deployment Guides
|
||||
- **[VictoriaMetrics Setup](docs/VICTORIAMETRICS_SETUP.md)** - High-performance deployment
|
||||
- **[HTTPS Configuration](docs/HTTPS_CONFIGURATION.md)** - Secure deployment
|
||||
- **[Debian Troubleshooting](docs/DEBIAN_TROUBLESHOOTING.md)** - Linux deployment issues
|
||||
|
||||
### References
|
||||
- **[Notable Documents](docs/references/NOTABLE_DOCUMENTS.md)** - Official Thai government resources
|
||||
- **[Migration Guide](docs/MIGRATION_QUICKSTART.md)** - Updating existing systems
|
||||
|
||||
## 🔍 Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
**Database Connection Errors:**
|
||||
```bash
|
||||
# Check database status
|
||||
python src/demo_databases.py
|
||||
|
||||
# Test specific database
|
||||
python src/demo_databases.py victoriametrics
|
||||
```
|
||||
|
||||
**Missing Data:**
|
||||
```bash
|
||||
# Check for gaps
|
||||
python src/water_scraper_v3.py --check-gaps 7
|
||||
|
||||
# Fill missing data
|
||||
python src/water_scraper_v3.py --fill-gaps 7
|
||||
```
|
||||
|
||||
**Service Issues:**
|
||||
```bash
|
||||
# Check service status
|
||||
sudo systemctl status water-monitor
|
||||
|
||||
# View logs
|
||||
sudo journalctl -u water-monitor -f
|
||||
```
|
||||
|
||||
### Health Checks
|
||||
|
||||
```bash
|
||||
# VictoriaMetrics health
|
||||
curl http://localhost:8428/health
|
||||
|
||||
# Check latest data
|
||||
curl "http://localhost:8428/api/v1/query?query=water_level"
|
||||
|
||||
# Application logs
|
||||
tail -f water_monitor.log
|
||||
```
|
||||
|
||||
## 🌐 API Integration
|
||||
|
||||
### VictoriaMetrics API Examples
|
||||
|
||||
```bash
|
||||
# Query current water levels
|
||||
curl "http://localhost:8428/api/v1/query?query=water_level"
|
||||
|
||||
# Query discharge rates for last hour
|
||||
curl "http://localhost:8428/api/v1/query_range?query=water_discharge&start=$(date -d '1 hour ago' +%s)&end=$(date +%s)&step=300"
|
||||
|
||||
# Query specific station
|
||||
curl "http://localhost:8428/api/v1/query?query=water_level{station_code=\"P.1\"}"
|
||||
|
||||
# High discharge alerts
|
||||
curl "http://localhost:8428/api/v1/query?query=water_discharge_percent>80"
|
||||
```
|
||||
|
||||
## 📊 Performance
|
||||
|
||||
### System Requirements
|
||||
- **CPU**: 1-2 cores (minimal load)
|
||||
- **RAM**: 512MB - 2GB (depending on database)
|
||||
- **Storage**: 1GB+ (for historical data)
|
||||
- **Network**: Stable internet connection
|
||||
|
||||
### Performance Metrics
|
||||
- **Data Collection**: ~300 data points every 15 minutes
|
||||
- **Database Write Speed**: 1000+ points/second (VictoriaMetrics)
|
||||
- **Query Response**: <100ms for recent data
|
||||
- **Storage Efficiency**: 70x compression vs. raw data
|
||||
|
||||
## 🤝 Contributing
|
||||
|
||||
Contributions are welcome! Please:
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch
|
||||
3. Make your changes
|
||||
4. Add tests if applicable
|
||||
5. Submit a pull request
|
||||
|
||||
### Development Setup
|
||||
|
||||
```bash
|
||||
# Clone your fork
|
||||
git clone https://github.com/your-username/thailand-water-monitor.git
|
||||
cd thailand-water-monitor
|
||||
|
||||
# Install development dependencies
|
||||
pip install -r requirements.txt
|
||||
pip install pytest black flake8
|
||||
|
||||
# Run tests
|
||||
pytest
|
||||
|
||||
# Format code
|
||||
black src/
|
||||
```
|
||||
|
||||
## 📄 License
|
||||
|
||||
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
||||
|
||||
## 🙏 Acknowledgments
|
||||
|
||||
- **Royal Irrigation Department (RID)** of Thailand for providing the data API
|
||||
- **VictoriaMetrics** team for the excellent time-series database
|
||||
- **Grafana** team for the visualization platform
|
||||
- **Python community** for the amazing libraries and tools
|
||||
|
||||
## 📞 Support
|
||||
|
||||
- **Issues**: [GitHub Issues](https://github.com/your-username/thailand-water-monitor/issues)
|
||||
- **Discussions**: [GitHub Discussions](https://github.com/your-username/thailand-water-monitor/discussions)
|
||||
- **Documentation**: [Project Wiki](https://github.com/your-username/thailand-water-monitor/wiki)
|
||||
|
||||
---
|
||||
|
||||
## 📁 Project Structure
|
||||
|
||||
```
|
||||
Northern-Thailand-Ping-River-Monitor/
|
||||
├── src/ # Main application code
|
||||
├── tests/ # Test suite
|
||||
├── docs/ # Documentation
|
||||
├── grafana/ # Grafana dashboards
|
||||
├── scripts/ # Utility scripts
|
||||
├── docker-compose.yml # Docker deployment
|
||||
├── Makefile # Development tasks
|
||||
└── requirements.txt # Dependencies
|
||||
```
|
||||
|
||||
See [docs/PROJECT_STRUCTURE.md](docs/PROJECT_STRUCTURE.md) for detailed architecture information.
|
||||
|
||||
## 🔄 CI/CD & Automation
|
||||
|
||||
The project includes comprehensive Gitea Actions workflows:
|
||||
|
||||
- **🧪 CI/CD Pipeline** - Automated testing, building, and deployment
|
||||
- **🔒 Security Scanning** - Daily vulnerability and dependency checks
|
||||
- **📚 Documentation** - Automated API docs and validation
|
||||
- **🚀 Release Management** - Automated releases with multi-arch Docker builds
|
||||
|
||||
See [docs/GITEA_WORKFLOWS.md](docs/GITEA_WORKFLOWS.md) for detailed workflow documentation.
|
||||
|
||||
## 🔗 Repository
|
||||
|
||||
- **Main Repository**: https://git.b4l.co.th/B4L/Northern-Thailand-Ping-River-Monitor
|
||||
- **Issues**: https://git.b4l.co.th/B4L/Northern-Thailand-Ping-River-Monitor/issues
|
||||
- **Actions**: https://git.b4l.co.th/B4L/Northern-Thailand-Ping-River-Monitor/actions
|
||||
- **Documentation**: [docs/](docs/)
|
||||
|
||||
**Made with ❤️ for water resource monitoring in Northern Thailand's Ping River Basin**
|
106
docker-compose.victoriametrics.yml
Normal file
106
docker-compose.victoriametrics.yml
Normal file
@@ -0,0 +1,106 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:latest
|
||||
container_name: ping_river_victoriametrics
|
||||
ports:
|
||||
- "8428:8428"
|
||||
volumes:
|
||||
- vm_data:/victoria-metrics-data
|
||||
- ./vm-config:/etc/victoriametrics
|
||||
command:
|
||||
- '--storageDataPath=/victoria-metrics-data'
|
||||
- '--retentionPeriod=2y'
|
||||
- '--httpListenAddr=:8428'
|
||||
- '--maxConcurrentInserts=32'
|
||||
- '--search.maxQueryDuration=60s'
|
||||
- '--search.maxConcurrentRequests=16'
|
||||
- '--dedup.minScrapeInterval=30s'
|
||||
- '--memory.allowedPercent=80'
|
||||
- '--loggerLevel=INFO'
|
||||
- '--loggerFormat=json'
|
||||
- '--search.maxSeries=1000000'
|
||||
- '--search.maxPointsPerTimeseries=100000'
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- TZ=Asia/Bangkok
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8428/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
labels:
|
||||
- "com.victoriametrics.service=ping-river-monitoring"
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
container_name: ping_river_grafana
|
||||
ports:
|
||||
- "3000:3000"
|
||||
volumes:
|
||||
- grafana_data:/var/lib/grafana
|
||||
- ./grafana/provisioning:/etc/grafana/provisioning
|
||||
- ./grafana/dashboards:/var/lib/grafana/dashboards
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_PASSWORD=admin_password
|
||||
- GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-worldmap-panel
|
||||
- GF_USERS_ALLOW_SIGN_UP=false
|
||||
- GF_SECURITY_ALLOW_EMBEDDING=true
|
||||
- GF_AUTH_ANONYMOUS_ENABLED=false
|
||||
- TZ=Asia/Bangkok
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
victoriametrics:
|
||||
condition: service_healthy
|
||||
labels:
|
||||
- "com.victoriametrics.service=ping-river-monitoring"
|
||||
|
||||
ping-river-monitor:
|
||||
build: .
|
||||
container_name: ping_river_monitor
|
||||
environment:
|
||||
- DB_TYPE=victoriametrics
|
||||
- VM_HOST=victoriametrics
|
||||
- VM_PORT=8428
|
||||
- TZ=Asia/Bangkok
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
victoriametrics:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- ./logs:/app/logs
|
||||
labels:
|
||||
- "com.victoriametrics.service=ping-river-monitoring"
|
||||
|
||||
ping-river-api:
|
||||
build: .
|
||||
container_name: ping_river_api
|
||||
ports:
|
||||
- "8000:8000"
|
||||
environment:
|
||||
- DB_TYPE=victoriametrics
|
||||
- VM_HOST=victoriametrics
|
||||
- VM_PORT=8428
|
||||
- TZ=Asia/Bangkok
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
victoriametrics:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- ./logs:/app/logs
|
||||
command: ["python", "-m", "src.main", "--web-api"]
|
||||
labels:
|
||||
- "com.victoriametrics.service=ping-river-monitoring"
|
||||
|
||||
volumes:
|
||||
vm_data:
|
||||
driver: local
|
||||
grafana_data:
|
||||
driver: local
|
||||
|
||||
networks:
|
||||
default:
|
||||
name: ping-river-monitoring
|
||||
driver: bridge
|
447
docs/DATABASE_DEPLOYMENT_GUIDE.md
Normal file
447
docs/DATABASE_DEPLOYMENT_GUIDE.md
Normal file
@@ -0,0 +1,447 @@
|
||||
# Database Deployment Guide for Thailand Water Monitor
|
||||
|
||||
This guide covers deployment options for storing water monitoring data in production environments.
|
||||
|
||||
## 🏆 Recommendation Summary
|
||||
|
||||
| Database | Best For | Performance | Complexity | Cost |
|
||||
|----------|----------|-------------|------------|------|
|
||||
| **InfluxDB** | Time-series data, dashboards | ⭐⭐⭐⭐⭐ | ⭐⭐⭐ | ⭐⭐⭐ |
|
||||
| **VictoriaMetrics** | High-performance metrics | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐ | ⭐⭐⭐⭐ |
|
||||
| **PostgreSQL** | Complex queries, reliability | ⭐⭐⭐⭐ | ⭐⭐ | ⭐⭐⭐⭐ |
|
||||
| **MySQL** | Familiar, existing infrastructure | ⭐⭐⭐ | ⭐⭐ | ⭐⭐⭐⭐ |
|
||||
|
||||
## 1. InfluxDB Deployment (Recommended for Time-Series)
|
||||
|
||||
### Why InfluxDB?
|
||||
- **Purpose-built** for time-series data
|
||||
- **Excellent compression** (10:1 typical ratio)
|
||||
- **Built-in retention policies** and downsampling
|
||||
- **Great Grafana integration** for dashboards
|
||||
- **High write throughput** (100k+ points/second)
|
||||
|
||||
### Docker Deployment
|
||||
|
||||
```yaml
|
||||
# docker-compose.yml
|
||||
version: '3.8'
|
||||
services:
|
||||
influxdb:
|
||||
image: influxdb:1.8
|
||||
container_name: water_influxdb
|
||||
ports:
|
||||
- "8086:8086"
|
||||
volumes:
|
||||
- influxdb_data:/var/lib/influxdb
|
||||
- ./influxdb.conf:/etc/influxdb/influxdb.conf:ro
|
||||
environment:
|
||||
- INFLUXDB_DB=water_monitoring
|
||||
- INFLUXDB_ADMIN_USER=admin
|
||||
- INFLUXDB_ADMIN_PASSWORD=your_secure_password
|
||||
- INFLUXDB_USER=water_user
|
||||
- INFLUXDB_USER_PASSWORD=water_password
|
||||
restart: unless-stopped
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
container_name: water_grafana
|
||||
ports:
|
||||
- "3000:3000"
|
||||
volumes:
|
||||
- grafana_data:/var/lib/grafana
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_PASSWORD=admin_password
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
influxdb_data:
|
||||
grafana_data:
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
```bash
|
||||
# .env file
|
||||
DB_TYPE=influxdb
|
||||
INFLUX_HOST=localhost
|
||||
INFLUX_PORT=8086
|
||||
INFLUX_DATABASE=water_monitoring
|
||||
INFLUX_USERNAME=water_user
|
||||
INFLUX_PASSWORD=water_password
|
||||
```
|
||||
|
||||
### InfluxDB Configuration
|
||||
```toml
|
||||
# influxdb.conf
|
||||
[meta]
|
||||
dir = "/var/lib/influxdb/meta"
|
||||
|
||||
[data]
|
||||
dir = "/var/lib/influxdb/data"
|
||||
wal-dir = "/var/lib/influxdb/wal"
|
||||
|
||||
# Optimize for time-series data
|
||||
cache-max-memory-size = "1g"
|
||||
cache-snapshot-memory-size = "25m"
|
||||
cache-snapshot-write-cold-duration = "10m"
|
||||
|
||||
# Retention and compression
|
||||
compact-full-write-cold-duration = "4h"
|
||||
max-series-per-database = 1000000
|
||||
max-values-per-tag = 100000
|
||||
|
||||
[coordinator]
|
||||
write-timeout = "10s"
|
||||
max-concurrent-queries = 0
|
||||
query-timeout = "0s"
|
||||
|
||||
[retention]
|
||||
enabled = true
|
||||
check-interval = "30m"
|
||||
|
||||
[http]
|
||||
enabled = true
|
||||
bind-address = ":8086"
|
||||
auth-enabled = true
|
||||
max-body-size = "25000000"
|
||||
max-concurrent-requests = 0
|
||||
max-enqueued-requests = 0
|
||||
```
|
||||
|
||||
### Production Setup Commands
|
||||
```bash
|
||||
# Start services
|
||||
docker-compose up -d
|
||||
|
||||
# Create retention policies
|
||||
docker exec -it water_influxdb influx -username admin -password your_secure_password -execute "
|
||||
CREATE RETENTION POLICY \"raw_data\" ON \"water_monitoring\" DURATION 90d REPLICATION 1 DEFAULT;
|
||||
CREATE RETENTION POLICY \"downsampled\" ON \"water_monitoring\" DURATION 730d REPLICATION 1;
|
||||
"
|
||||
|
||||
# Create continuous queries for downsampling
|
||||
docker exec -it water_influxdb influx -username admin -password your_secure_password -execute "
|
||||
CREATE CONTINUOUS QUERY \"downsample_hourly\" ON \"water_monitoring\"
|
||||
BEGIN
|
||||
SELECT mean(water_level) AS water_level, mean(discharge) AS discharge, mean(discharge_percent) AS discharge_percent
|
||||
INTO \"downsampled\".\"water_data_hourly\"
|
||||
FROM \"water_data\"
|
||||
GROUP BY time(1h), station_code, station_name_en, station_name_th
|
||||
END
|
||||
"
|
||||
```
|
||||
|
||||
## 2. VictoriaMetrics Deployment (High Performance)
|
||||
|
||||
### Why VictoriaMetrics?
|
||||
- **Extremely fast** and resource-efficient
|
||||
- **Better compression** than InfluxDB
|
||||
- **Prometheus-compatible** API
|
||||
- **Lower memory usage**
|
||||
- **Built-in clustering**
|
||||
|
||||
### Docker Deployment
|
||||
```yaml
|
||||
# docker-compose.yml
|
||||
version: '3.8'
|
||||
services:
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:latest
|
||||
container_name: water_victoriametrics
|
||||
ports:
|
||||
- "8428:8428"
|
||||
volumes:
|
||||
- vm_data:/victoria-metrics-data
|
||||
command:
|
||||
- '--storageDataPath=/victoria-metrics-data'
|
||||
- '--retentionPeriod=2y'
|
||||
- '--httpListenAddr=:8428'
|
||||
- '--maxConcurrentInserts=16'
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
vm_data:
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
```bash
|
||||
# .env file
|
||||
DB_TYPE=victoriametrics
|
||||
VM_HOST=localhost
|
||||
VM_PORT=8428
|
||||
```
|
||||
|
||||
## 3. PostgreSQL Deployment (Relational + Time-Series)
|
||||
|
||||
### Why PostgreSQL?
|
||||
- **Mature and reliable**
|
||||
- **Excellent for complex queries**
|
||||
- **TimescaleDB extension** for time-series optimization
|
||||
- **Strong consistency guarantees**
|
||||
- **Rich ecosystem**
|
||||
|
||||
### Docker Deployment with TimescaleDB
|
||||
```yaml
|
||||
# docker-compose.yml
|
||||
version: '3.8'
|
||||
services:
|
||||
postgres:
|
||||
image: timescale/timescaledb:latest-pg14
|
||||
container_name: water_postgres
|
||||
ports:
|
||||
- "5432:5432"
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
- ./init.sql:/docker-entrypoint-initdb.d/init.sql
|
||||
environment:
|
||||
- POSTGRES_DB=water_monitoring
|
||||
- POSTGRES_USER=water_user
|
||||
- POSTGRES_PASSWORD=secure_password
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
```
|
||||
|
||||
### Database Initialization
|
||||
```sql
|
||||
-- init.sql
|
||||
CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE;
|
||||
|
||||
-- Create hypertable for time-series optimization
|
||||
CREATE TABLE water_measurements (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
station_id INT NOT NULL,
|
||||
water_level NUMERIC(10,3),
|
||||
discharge NUMERIC(10,2),
|
||||
discharge_percent NUMERIC(5,2),
|
||||
status VARCHAR(20) DEFAULT 'active',
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Convert to hypertable (TimescaleDB)
|
||||
SELECT create_hypertable('water_measurements', 'timestamp', chunk_time_interval => INTERVAL '1 day');
|
||||
|
||||
-- Create indexes
|
||||
CREATE INDEX idx_water_measurements_station_time ON water_measurements (station_id, timestamp DESC);
|
||||
CREATE INDEX idx_water_measurements_timestamp ON water_measurements (timestamp DESC);
|
||||
|
||||
-- Create retention policy (keep raw data for 2 years)
|
||||
SELECT add_retention_policy('water_measurements', INTERVAL '2 years');
|
||||
|
||||
-- Create continuous aggregates for performance
|
||||
CREATE MATERIALIZED VIEW water_measurements_hourly
|
||||
WITH (timescaledb.continuous) AS
|
||||
SELECT
|
||||
time_bucket('1 hour', timestamp) AS bucket,
|
||||
station_id,
|
||||
AVG(water_level) as avg_water_level,
|
||||
MAX(water_level) as max_water_level,
|
||||
MIN(water_level) as min_water_level,
|
||||
AVG(discharge) as avg_discharge,
|
||||
MAX(discharge) as max_discharge,
|
||||
MIN(discharge) as min_discharge,
|
||||
AVG(discharge_percent) as avg_discharge_percent
|
||||
FROM water_measurements
|
||||
GROUP BY bucket, station_id;
|
||||
|
||||
-- Refresh policy for continuous aggregates
|
||||
SELECT add_continuous_aggregate_policy('water_measurements_hourly',
|
||||
start_offset => INTERVAL '1 day',
|
||||
end_offset => INTERVAL '1 hour',
|
||||
schedule_interval => INTERVAL '1 hour');
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
```bash
|
||||
# .env file
|
||||
DB_TYPE=postgresql
|
||||
POSTGRES_CONNECTION_STRING=postgresql://water_user:secure_password@localhost:5432/water_monitoring
|
||||
```
|
||||
|
||||
## 4. MySQL Deployment (Traditional Relational)
|
||||
|
||||
### Docker Deployment
|
||||
```yaml
|
||||
# docker-compose.yml
|
||||
version: '3.8'
|
||||
services:
|
||||
mysql:
|
||||
image: mysql:8.0
|
||||
container_name: water_mysql
|
||||
ports:
|
||||
- "3306:3306"
|
||||
volumes:
|
||||
- mysql_data:/var/lib/mysql
|
||||
- ./mysql.cnf:/etc/mysql/conf.d/mysql.cnf
|
||||
- ./init.sql:/docker-entrypoint-initdb.d/init.sql
|
||||
environment:
|
||||
- MYSQL_ROOT_PASSWORD=root_password
|
||||
- MYSQL_DATABASE=water_monitoring
|
||||
- MYSQL_USER=water_user
|
||||
- MYSQL_PASSWORD=water_password
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
mysql_data:
|
||||
```
|
||||
|
||||
### MySQL Configuration
|
||||
```ini
|
||||
# mysql.cnf
|
||||
[mysqld]
|
||||
# Optimize for time-series data
|
||||
innodb_buffer_pool_size = 1G
|
||||
innodb_log_file_size = 256M
|
||||
innodb_flush_log_at_trx_commit = 2
|
||||
innodb_flush_method = O_DIRECT
|
||||
|
||||
# Partitioning support
|
||||
partition = ON
|
||||
|
||||
# Query cache
|
||||
query_cache_type = 1
|
||||
query_cache_size = 128M
|
||||
|
||||
# Connection settings
|
||||
max_connections = 200
|
||||
connect_timeout = 10
|
||||
wait_timeout = 600
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
```bash
|
||||
# .env file
|
||||
DB_TYPE=mysql
|
||||
MYSQL_CONNECTION_STRING=mysql://water_user:water_password@localhost:3306/water_monitoring
|
||||
```
|
||||
|
||||
## 5. Installation and Dependencies
|
||||
|
||||
### Required Python Packages
|
||||
```bash
|
||||
# Base requirements
|
||||
pip install requests schedule
|
||||
|
||||
# Database-specific packages
|
||||
pip install influxdb # For InfluxDB
|
||||
pip install sqlalchemy pymysql # For MySQL
|
||||
pip install sqlalchemy psycopg2-binary # For PostgreSQL
|
||||
# VictoriaMetrics uses HTTP API (no extra packages needed)
|
||||
```
|
||||
|
||||
### Updated requirements.txt
|
||||
```txt
|
||||
requests>=2.28.0
|
||||
schedule>=1.2.0
|
||||
pandas>=1.5.0
|
||||
|
||||
# Database adapters (install as needed)
|
||||
influxdb>=5.3.1
|
||||
sqlalchemy>=1.4.0
|
||||
pymysql>=1.0.2
|
||||
psycopg2-binary>=2.9.0
|
||||
```
|
||||
|
||||
## 6. Production Deployment Examples
|
||||
|
||||
### Using InfluxDB (Recommended)
|
||||
```bash
|
||||
# Set environment variables
|
||||
export DB_TYPE=influxdb
|
||||
export INFLUX_HOST=your-influx-server.com
|
||||
export INFLUX_PORT=8086
|
||||
export INFLUX_DATABASE=water_monitoring
|
||||
export INFLUX_USERNAME=water_user
|
||||
export INFLUX_PASSWORD=your_secure_password
|
||||
|
||||
# Run the scraper
|
||||
python water_scraper_v3.py
|
||||
```
|
||||
|
||||
### Using PostgreSQL with TimescaleDB
|
||||
```bash
|
||||
# Set environment variables
|
||||
export DB_TYPE=postgresql
|
||||
export POSTGRES_CONNECTION_STRING=postgresql://water_user:password@your-postgres-server.com:5432/water_monitoring
|
||||
|
||||
# Run the scraper
|
||||
python water_scraper_v3.py
|
||||
```
|
||||
|
||||
### Using VictoriaMetrics
|
||||
```bash
|
||||
# Set environment variables
|
||||
export DB_TYPE=victoriametrics
|
||||
export VM_HOST=your-vm-server.com
|
||||
export VM_PORT=8428
|
||||
|
||||
# Run the scraper
|
||||
python water_scraper_v3.py
|
||||
```
|
||||
|
||||
## 7. Monitoring and Alerting
|
||||
|
||||
### Grafana Dashboard Setup
|
||||
1. **Add Data Source**: Configure your database as a Grafana data source
|
||||
2. **Import Dashboard**: Use pre-built water monitoring dashboards
|
||||
3. **Set Alerts**: Configure alerts for abnormal water levels or discharge rates
|
||||
|
||||
### Example Grafana Queries
|
||||
|
||||
#### InfluxDB Queries
|
||||
```sql
|
||||
-- Current water levels
|
||||
SELECT last("water_level") FROM "water_data" GROUP BY "station_code"
|
||||
|
||||
-- Discharge trends (last 24h)
|
||||
SELECT mean("discharge") FROM "water_data" WHERE time >= now() - 24h GROUP BY time(1h), "station_code"
|
||||
```
|
||||
|
||||
#### PostgreSQL/TimescaleDB Queries
|
||||
```sql
|
||||
-- Current water levels
|
||||
SELECT DISTINCT ON (station_id)
|
||||
station_id, water_level, discharge, timestamp
|
||||
FROM water_measurements
|
||||
ORDER BY station_id, timestamp DESC;
|
||||
|
||||
-- Hourly averages (last 24h)
|
||||
SELECT
|
||||
time_bucket('1 hour', timestamp) as hour,
|
||||
station_id,
|
||||
AVG(water_level) as avg_level,
|
||||
AVG(discharge) as avg_discharge
|
||||
FROM water_measurements
|
||||
WHERE timestamp >= NOW() - INTERVAL '24 hours'
|
||||
GROUP BY hour, station_id
|
||||
ORDER BY hour DESC;
|
||||
```
|
||||
|
||||
## 8. Performance Optimization Tips
|
||||
|
||||
### For All Databases
|
||||
- **Batch inserts**: Insert multiple measurements at once
|
||||
- **Connection pooling**: Reuse database connections
|
||||
- **Indexing**: Ensure proper indexes on timestamp and station_id
|
||||
- **Retention policies**: Automatically delete old data
|
||||
|
||||
### InfluxDB Specific
|
||||
- Use **tags** for metadata (station codes, names)
|
||||
- Use **fields** for numeric values (water levels, discharge)
|
||||
- Configure **retention policies** and **continuous queries**
|
||||
- Enable **compression** for long-term storage
|
||||
|
||||
### PostgreSQL/TimescaleDB Specific
|
||||
- Use **hypertables** for automatic partitioning
|
||||
- Create **continuous aggregates** for common queries
|
||||
- Configure **compression** for older chunks
|
||||
- Use **parallel queries** for large datasets
|
||||
|
||||
### VictoriaMetrics Specific
|
||||
- Use **labels** efficiently (similar to Prometheus)
|
||||
- Configure **retention periods** appropriately
|
||||
- Use **downsampling** for long-term storage
|
||||
- Enable **deduplication** if needed
|
||||
|
||||
This deployment guide provides production-ready configurations for all supported database backends. Choose the one that best fits your infrastructure and requirements.
|
329
docs/DEBIAN_TROUBLESHOOTING.md
Normal file
329
docs/DEBIAN_TROUBLESHOOTING.md
Normal file
@@ -0,0 +1,329 @@
|
||||
# Debian/Linux Troubleshooting Guide
|
||||
|
||||
This guide addresses common issues when running the Thailand Water Monitor on Debian and other Linux distributions.
|
||||
|
||||
## Fixed Issues
|
||||
|
||||
### SQLAlchemy Connection Error (RESOLVED)
|
||||
|
||||
**Error Message:**
|
||||
```
|
||||
2025-07-24 19:48:31,920 - ERROR - Failed to connect to SQLITE: 'Connection' object has no attribute 'commit'
|
||||
2025-07-24 19:48:32,740 - ERROR - Error saving to SQLITE: 'Connection' object has no attribute 'commit'
|
||||
```
|
||||
|
||||
**Root Cause:**
|
||||
This error occurred due to incompatibility between the database adapter code and newer versions of SQLAlchemy. The code was calling `conn.commit()` on a connection object that doesn't have a `commit()` method in newer SQLAlchemy versions.
|
||||
|
||||
**Solution Applied:**
|
||||
Changed from `engine.connect()` to `engine.begin()` context manager, which automatically handles transactions:
|
||||
|
||||
```python
|
||||
# OLD (problematic) code:
|
||||
with self.engine.connect() as conn:
|
||||
conn.execute(text(sql))
|
||||
conn.commit() # This fails in newer SQLAlchemy
|
||||
|
||||
# NEW (fixed) code:
|
||||
with self.engine.begin() as conn:
|
||||
conn.execute(text(sql))
|
||||
# Transaction automatically committed when context exits
|
||||
```
|
||||
|
||||
**Status:** ✅ **FIXED** - The issue has been resolved in the current version.
|
||||
|
||||
## Installation on Debian/Ubuntu
|
||||
|
||||
### System Requirements
|
||||
|
||||
```bash
|
||||
# Update package list
|
||||
sudo apt update
|
||||
|
||||
# Install Python and pip
|
||||
sudo apt install python3 python3-pip python3-venv
|
||||
|
||||
# Install system dependencies for database drivers
|
||||
sudo apt install build-essential python3-dev
|
||||
|
||||
# For MySQL support (optional)
|
||||
sudo apt install default-libmysqlclient-dev
|
||||
|
||||
# For PostgreSQL support (optional)
|
||||
sudo apt install libpq-dev
|
||||
```
|
||||
|
||||
### Python Environment Setup
|
||||
|
||||
```bash
|
||||
# Create virtual environment
|
||||
python3 -m venv water_monitor_env
|
||||
|
||||
# Activate virtual environment
|
||||
source water_monitor_env/bin/activate
|
||||
|
||||
# Install requirements
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### Running the Monitor
|
||||
|
||||
```bash
|
||||
# Test run
|
||||
python water_scraper_v3.py --test
|
||||
|
||||
# Run with specific database
|
||||
export DB_TYPE=sqlite
|
||||
python water_scraper_v3.py
|
||||
|
||||
# Run demo
|
||||
python demo_databases.py
|
||||
```
|
||||
|
||||
## Common Linux Issues
|
||||
|
||||
### 1. Permission Errors
|
||||
|
||||
**Error:**
|
||||
```
|
||||
PermissionError: [Errno 13] Permission denied: 'water_levels.db'
|
||||
```
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Check current directory permissions
|
||||
ls -la
|
||||
|
||||
# Create data directory with proper permissions
|
||||
mkdir -p data
|
||||
chmod 755 data
|
||||
|
||||
# Set database path to data directory
|
||||
export WATER_DB_PATH=data/water_levels.db
|
||||
```
|
||||
|
||||
### 2. Missing System Dependencies
|
||||
|
||||
**Error:**
|
||||
```
|
||||
ImportError: No module named '_sqlite3'
|
||||
```
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Install SQLite development headers
|
||||
sudo apt install libsqlite3-dev
|
||||
|
||||
# Reinstall Python if needed
|
||||
sudo apt install python3-sqlite
|
||||
```
|
||||
|
||||
### 3. Network/Firewall Issues
|
||||
|
||||
**Error:**
|
||||
```
|
||||
requests.exceptions.ConnectionError: HTTPSConnectionPool
|
||||
```
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Test network connectivity
|
||||
curl -I https://hyd-app-db.rid.go.th/hydro1h.html
|
||||
|
||||
# Check firewall rules
|
||||
sudo ufw status
|
||||
|
||||
# Allow outbound HTTPS if needed
|
||||
sudo ufw allow out 443
|
||||
```
|
||||
|
||||
### 4. Systemd Service Setup
|
||||
|
||||
Create service file `/etc/systemd/system/water-monitor.service`:
|
||||
|
||||
```ini
|
||||
[Unit]
|
||||
Description=Thailand Water Level Monitor
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=water-monitor
|
||||
Group=water-monitor
|
||||
WorkingDirectory=/opt/water_level_monitor
|
||||
Environment=PATH=/opt/water_level_monitor/venv/bin
|
||||
Environment=DB_TYPE=sqlite
|
||||
Environment=WATER_DB_PATH=/opt/water_level_monitor/data/water_levels.db
|
||||
ExecStart=/opt/water_level_monitor/venv/bin/python water_scraper_v3.py
|
||||
Restart=always
|
||||
RestartSec=60
|
||||
|
||||
# Security settings
|
||||
NoNewPrivileges=true
|
||||
PrivateTmp=true
|
||||
ProtectSystem=strict
|
||||
ProtectHome=true
|
||||
ReadWritePaths=/opt/water_level_monitor/data
|
||||
ReadWritePaths=/opt/water_level_monitor/logs
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
Enable and start:
|
||||
```bash
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable water-monitor.service
|
||||
sudo systemctl start water-monitor.service
|
||||
sudo systemctl status water-monitor.service
|
||||
```
|
||||
|
||||
### 5. Log Rotation
|
||||
|
||||
Create `/etc/logrotate.d/water-monitor`:
|
||||
|
||||
```
|
||||
/opt/water_level_monitor/water_monitor.log {
|
||||
daily
|
||||
missingok
|
||||
rotate 30
|
||||
compress
|
||||
delaycompress
|
||||
notifempty
|
||||
create 644 water-monitor water-monitor
|
||||
postrotate
|
||||
systemctl reload water-monitor.service
|
||||
endscript
|
||||
}
|
||||
```
|
||||
|
||||
## Database-Specific Issues
|
||||
|
||||
### SQLite
|
||||
|
||||
**Issue:** Database locked
|
||||
```bash
|
||||
# Check for processes using the database
|
||||
sudo lsof /path/to/water_levels.db
|
||||
|
||||
# Kill processes if needed
|
||||
sudo pkill -f water_scraper_v3.py
|
||||
```
|
||||
|
||||
### VictoriaMetrics with HTTPS
|
||||
|
||||
**Configuration:**
|
||||
```bash
|
||||
export DB_TYPE=victoriametrics
|
||||
export VM_HOST=https://your-vm-server.com
|
||||
export VM_PORT=443
|
||||
```
|
||||
|
||||
**Test connection:**
|
||||
```bash
|
||||
curl -k https://your-vm-server.com/health
|
||||
```
|
||||
|
||||
## Performance Optimization
|
||||
|
||||
### 1. System Tuning
|
||||
|
||||
```bash
|
||||
# Increase file descriptor limits
|
||||
echo "* soft nofile 65536" >> /etc/security/limits.conf
|
||||
echo "* hard nofile 65536" >> /etc/security/limits.conf
|
||||
|
||||
# Optimize network settings
|
||||
echo "net.core.rmem_max = 16777216" >> /etc/sysctl.conf
|
||||
echo "net.core.wmem_max = 16777216" >> /etc/sysctl.conf
|
||||
sysctl -p
|
||||
```
|
||||
|
||||
### 2. Database Optimization
|
||||
|
||||
```bash
|
||||
# For SQLite
|
||||
export SQLITE_CACHE_SIZE=10000
|
||||
export SQLITE_SYNCHRONOUS=NORMAL
|
||||
|
||||
# Monitor database size
|
||||
du -h data/water_levels.db
|
||||
```
|
||||
|
||||
## Monitoring and Maintenance
|
||||
|
||||
### Health Check Script
|
||||
|
||||
Create `health_check.sh`:
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
LOG_FILE="/opt/water_level_monitor/water_monitor.log"
|
||||
SERVICE_NAME="water-monitor"
|
||||
|
||||
# Check if service is running
|
||||
if ! systemctl is-active --quiet $SERVICE_NAME; then
|
||||
echo "ERROR: $SERVICE_NAME is not running"
|
||||
systemctl restart $SERVICE_NAME
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check recent log entries
|
||||
RECENT_ERRORS=$(tail -n 100 $LOG_FILE | grep -c "ERROR")
|
||||
if [ $RECENT_ERRORS -gt 5 ]; then
|
||||
echo "WARNING: $RECENT_ERRORS errors found in recent logs"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "OK: Service is healthy"
|
||||
exit 0
|
||||
```
|
||||
|
||||
### Cron Job for Health Checks
|
||||
|
||||
```bash
|
||||
# Add to crontab
|
||||
*/5 * * * * /opt/water_level_monitor/health_check.sh >> /var/log/water-monitor-health.log 2>&1
|
||||
```
|
||||
|
||||
## Getting Help
|
||||
|
||||
### Debug Information
|
||||
|
||||
```bash
|
||||
# System information
|
||||
uname -a
|
||||
python3 --version
|
||||
pip list | grep -E "(sqlalchemy|requests|influxdb)"
|
||||
|
||||
# Service logs
|
||||
journalctl -u water-monitor.service -f
|
||||
|
||||
# Application logs
|
||||
tail -f water_monitor.log
|
||||
|
||||
# Database information
|
||||
sqlite3 water_levels.db ".schema"
|
||||
sqlite3 water_levels.db "SELECT COUNT(*) FROM water_measurements;"
|
||||
```
|
||||
|
||||
### Common Commands
|
||||
|
||||
```bash
|
||||
# Restart service
|
||||
sudo systemctl restart water-monitor.service
|
||||
|
||||
# View logs
|
||||
sudo journalctl -u water-monitor.service --since "1 hour ago"
|
||||
|
||||
# Test configuration
|
||||
python config.py
|
||||
|
||||
# Test database connection
|
||||
python demo_databases.py
|
||||
|
||||
# Manual data fetch
|
||||
python water_scraper_v3.py --test
|
||||
```
|
||||
|
||||
This troubleshooting guide should help resolve most common issues encountered when running the Thailand Water Monitor on Debian and other Linux distributions.
|
293
docs/ENHANCED_SCHEDULER_GUIDE.md
Normal file
293
docs/ENHANCED_SCHEDULER_GUIDE.md
Normal file
@@ -0,0 +1,293 @@
|
||||
# Enhanced Scheduler Guide
|
||||
|
||||
This guide explains the new 15-minute scheduling system that runs continuously throughout each hour to ensure comprehensive data coverage.
|
||||
|
||||
## ✅ **New Scheduling Behavior**
|
||||
|
||||
### **15-Minute Schedule Pattern**
|
||||
- **Timing**: Runs every 15 minutes: 1:00, 1:15, 1:30, 1:45, 2:00, 2:15, 2:30, 2:45, etc.
|
||||
- **Hourly Full Checks**: At :00 minutes (includes gap filling and data updates)
|
||||
- **Quarter-Hour Quick Checks**: At :15, :30, :45 minutes (data fetch only)
|
||||
- **Continuous Coverage**: Ensures no data is missed throughout each hour
|
||||
|
||||
### **Operation Types**
|
||||
- **Full Operations** (at :00): Data fetching + gap filling + data updates
|
||||
- **Quick Operations** (at :15, :30, :45): Data fetching only for performance
|
||||
|
||||
## 🔧 **Technical Implementation**
|
||||
|
||||
### **Scheduler States**
|
||||
```python
|
||||
# State tracking variables
|
||||
self.last_successful_update = None # Timestamp of last successful data update
|
||||
self.retry_mode = False # Whether in quick check mode (skip gap filling)
|
||||
self.next_hourly_check = None # Next scheduled hourly check
|
||||
```
|
||||
|
||||
### **Quarter-Hour Check Process**
|
||||
```python
|
||||
def quarter_hour_check(self):
|
||||
"""15-minute check for new data"""
|
||||
current_time = datetime.datetime.now()
|
||||
minute = current_time.minute
|
||||
|
||||
# Determine if this is a full hourly check (at :00) or a quarter-hour check
|
||||
if minute == 0:
|
||||
logging.info("=== HOURLY CHECK (00:00) ===")
|
||||
self.retry_mode = False # Full check with gap filling and updates
|
||||
else:
|
||||
logging.info(f"=== 15-MINUTE CHECK ({minute:02d}:00) ===")
|
||||
self.retry_mode = True # Skip gap filling and updates on 15-min checks
|
||||
|
||||
new_data_found = self.run_scraping_cycle()
|
||||
|
||||
if new_data_found:
|
||||
self.last_successful_update = datetime.datetime.now()
|
||||
if minute == 0:
|
||||
logging.info("New data found during hourly check")
|
||||
else:
|
||||
logging.info(f"New data found during 15-minute check at :{minute:02d}")
|
||||
else:
|
||||
if minute == 0:
|
||||
logging.info("No new data found during hourly check")
|
||||
else:
|
||||
logging.info(f"No new data found during 15-minute check at :{minute:02d}")
|
||||
```
|
||||
|
||||
### **Scheduler Setup**
|
||||
```python
|
||||
def start_scheduler(self):
|
||||
"""Start enhanced scheduler with 15-minute checks"""
|
||||
# Schedule checks every 15 minutes (at :00, :15, :30, :45)
|
||||
schedule.every().hour.at(":00").do(self.quarter_hour_check)
|
||||
schedule.every().hour.at(":15").do(self.quarter_hour_check)
|
||||
schedule.every().hour.at(":30").do(self.quarter_hour_check)
|
||||
schedule.every().hour.at(":45").do(self.quarter_hour_check)
|
||||
|
||||
while True:
|
||||
schedule.run_pending()
|
||||
time.sleep(30) # Check every 30 seconds
|
||||
```
|
||||
|
||||
## 📊 **New Data Detection Logic**
|
||||
|
||||
### **Smart Detection Algorithm**
|
||||
```python
|
||||
def has_new_data(self) -> bool:
|
||||
"""Check if there is new data available since last successful update"""
|
||||
# Get most recent timestamp from database
|
||||
latest_data = self.get_latest_data(limit=1)
|
||||
|
||||
# Check if we should have newer data by now
|
||||
now = datetime.datetime.now()
|
||||
expected_latest = now.replace(minute=0, second=0, microsecond=0)
|
||||
|
||||
# If current time is past 5 minutes after the hour, we should have data
|
||||
if now.minute >= 5:
|
||||
if latest_timestamp < expected_latest:
|
||||
return True # New data expected
|
||||
|
||||
# Check if we have data for the previous hour
|
||||
previous_hour = expected_latest - datetime.timedelta(hours=1)
|
||||
if latest_timestamp < previous_hour:
|
||||
return True # Missing recent data
|
||||
|
||||
return False # Data is up to date
|
||||
```
|
||||
|
||||
### **Actual Data Verification**
|
||||
```python
|
||||
# Compare timestamps before and after scraping
|
||||
initial_timestamp = get_latest_timestamp_before_scraping()
|
||||
# ... perform scraping ...
|
||||
latest_timestamp = get_latest_timestamp_after_scraping()
|
||||
|
||||
if initial_timestamp is None or latest_timestamp > initial_timestamp:
|
||||
new_data_found = True
|
||||
self.last_successful_update = datetime.datetime.now()
|
||||
```
|
||||
|
||||
## 🚀 **Operational Modes**
|
||||
|
||||
### **Mode 1: Full Hourly Operation (at :00)**
|
||||
- **Schedule**: Every hour at :00 minutes (1:00, 2:00, 3:00, etc.)
|
||||
- **Operations**:
|
||||
- ✅ Fetch current data
|
||||
- ✅ Fill data gaps (last 7 days)
|
||||
- ✅ Update existing data (last 2 days)
|
||||
- **Purpose**: Comprehensive data collection and maintenance
|
||||
|
||||
### **Mode 2: Quick 15-Minute Checks (at :15, :30, :45)**
|
||||
- **Schedule**: Every 15 minutes at quarter-hour marks
|
||||
- **Operations**:
|
||||
- ✅ Fetch current data only
|
||||
- ❌ Skip gap filling (performance optimization)
|
||||
- ❌ Skip data updates (performance optimization)
|
||||
- **Purpose**: Ensure no new data is missed between hourly checks
|
||||
|
||||
## 📋 **Logging Output Examples**
|
||||
|
||||
### **Successful Hourly Check (at :00)**
|
||||
```
|
||||
2025-07-26 01:00:00,123 - INFO - === HOURLY CHECK (00:00) ===
|
||||
2025-07-26 01:00:00,124 - INFO - Starting scraping cycle...
|
||||
2025-07-26 01:00:01,456 - INFO - Successfully fetched 384 data points from API
|
||||
2025-07-26 01:00:02,789 - INFO - New data found: 2025-07-26 01:00:00
|
||||
2025-07-26 01:00:03,012 - INFO - Filled 5 data gaps
|
||||
2025-07-26 01:00:04,234 - INFO - Updated 2 existing measurements
|
||||
2025-07-26 01:00:04,235 - INFO - New data found during hourly check
|
||||
```
|
||||
|
||||
### **15-Minute Quick Check (at :15, :30, :45)**
|
||||
```
|
||||
2025-07-26 01:15:00,123 - INFO - === 15-MINUTE CHECK (15:00) ===
|
||||
2025-07-26 01:15:00,124 - INFO - Starting scraping cycle...
|
||||
2025-07-26 01:15:01,456 - INFO - Successfully fetched 299 data points from API
|
||||
2025-07-26 01:15:02,789 - INFO - New data found: 2025-07-26 01:00:00
|
||||
2025-07-26 01:15:02,790 - INFO - New data found during 15-minute check at :15
|
||||
```
|
||||
|
||||
### **Continuous 15-Minute Pattern**
|
||||
```
|
||||
2025-07-26 01:00:00,123 - INFO - === HOURLY CHECK (00:00) ===
|
||||
2025-07-26 01:00:04,235 - INFO - New data found during hourly check
|
||||
|
||||
2025-07-26 01:15:00,123 - INFO - === 15-MINUTE CHECK (15:00) ===
|
||||
2025-07-26 01:15:02,790 - INFO - No new data found during 15-minute check at :15
|
||||
|
||||
2025-07-26 01:30:00,123 - INFO - === 15-MINUTE CHECK (30:00) ===
|
||||
2025-07-26 01:30:02,790 - INFO - No new data found during 15-minute check at :30
|
||||
|
||||
2025-07-26 01:45:00,123 - INFO - === 15-MINUTE CHECK (45:00) ===
|
||||
2025-07-26 01:45:02,790 - INFO - No new data found during 15-minute check at :45
|
||||
|
||||
2025-07-26 02:00:00,123 - INFO - === HOURLY CHECK (00:00) ===
|
||||
2025-07-26 02:00:04,235 - INFO - New data found during hourly check
|
||||
```
|
||||
|
||||
## ⚙️ **Configuration Options**
|
||||
|
||||
### **Environment Variables**
|
||||
```bash
|
||||
# Retry interval (default: 5 minutes)
|
||||
export RETRY_INTERVAL_MINUTES=5
|
||||
|
||||
# Data availability buffer (default: 5 minutes after hour)
|
||||
export DATA_BUFFER_MINUTES=5
|
||||
|
||||
# Gap filling days (default: 7 days)
|
||||
export GAP_FILL_DAYS=7
|
||||
|
||||
# Update check days (default: 2 days)
|
||||
export UPDATE_DAYS=2
|
||||
```
|
||||
|
||||
### **Scheduler Timing**
|
||||
```python
|
||||
# Hourly checks at top of hour
|
||||
schedule.every().hour.at(":00").do(self.hourly_check)
|
||||
|
||||
# 5-minute retries (dynamically scheduled)
|
||||
schedule.every(5).minutes.do(self.retry_check).tag('retry')
|
||||
|
||||
# Check every 30 seconds for responsive retry scheduling
|
||||
time.sleep(30)
|
||||
```
|
||||
|
||||
## 🔍 **Performance Optimizations**
|
||||
|
||||
### **Retry Mode Optimizations**
|
||||
- **Skip Gap Filling**: Avoids expensive historical data fetching during retries
|
||||
- **Skip Data Updates**: Avoids comparison operations during retries
|
||||
- **Focused API Calls**: Only fetches current day data during retries
|
||||
- **Reduced Database Queries**: Minimal database operations during retries
|
||||
|
||||
### **Resource Management**
|
||||
- **API Rate Limiting**: 1-second delays between API calls
|
||||
- **Database Connection Pooling**: Efficient connection reuse
|
||||
- **Memory Efficiency**: Selective data processing
|
||||
- **Error Recovery**: Automatic retry with exponential backoff
|
||||
|
||||
## 🛠️ **Troubleshooting**
|
||||
|
||||
### **Common Scenarios**
|
||||
|
||||
#### **Stuck in Retry Mode**
|
||||
```
|
||||
# Check if API is returning data
|
||||
curl -X POST https://hyd-app-db.rid.go.th/webservice/getGroupHourlyWaterLevelReportAllHL.ashx
|
||||
|
||||
# Check database connectivity
|
||||
python water_scraper_v3.py --check-gaps 1
|
||||
|
||||
# Manual data fetch test
|
||||
python water_scraper_v3.py --test
|
||||
```
|
||||
|
||||
#### **Missing Hourly Triggers**
|
||||
```
|
||||
# Check system time synchronization
|
||||
timedatectl status
|
||||
|
||||
# Verify scheduler is running
|
||||
ps aux | grep water_scraper
|
||||
|
||||
# Check logs for scheduler activity
|
||||
tail -f water_monitor.log | grep "HOURLY CHECK"
|
||||
```
|
||||
|
||||
#### **False New Data Detection**
|
||||
```
|
||||
# Check latest data in database
|
||||
sqlite3 water_monitoring.db "SELECT MAX(timestamp) FROM water_measurements;"
|
||||
|
||||
# Verify timestamp parsing
|
||||
python -c "
|
||||
import datetime
|
||||
print('Current hour:', datetime.datetime.now().replace(minute=0, second=0, microsecond=0))
|
||||
"
|
||||
```
|
||||
|
||||
## 📈 **Monitoring and Alerts**
|
||||
|
||||
### **Key Metrics to Monitor**
|
||||
- **Hourly Success Rate**: Percentage of hourly checks that find new data
|
||||
- **Retry Duration**: How long system stays in retry mode
|
||||
- **Data Freshness**: Time since last successful data update
|
||||
- **API Response Time**: Performance of data fetching operations
|
||||
|
||||
### **Alert Conditions**
|
||||
- **Extended Retry Mode**: System in retry mode for > 30 minutes
|
||||
- **No Data for 2+ Hours**: No new data found for extended period
|
||||
- **High Error Rate**: Multiple consecutive API failures
|
||||
- **Database Issues**: Connection or save failures
|
||||
|
||||
### **Health Check Script**
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# Check if system is stuck in retry mode
|
||||
RETRY_COUNT=$(tail -n 100 water_monitor.log | grep -c "RETRY CHECK")
|
||||
if [ $RETRY_COUNT -gt 6 ]; then
|
||||
echo "WARNING: System may be stuck in retry mode ($RETRY_COUNT retries in last 100 log entries)"
|
||||
fi
|
||||
|
||||
# Check data freshness
|
||||
LATEST_DATA=$(sqlite3 water_monitoring.db "SELECT MAX(timestamp) FROM water_measurements;")
|
||||
echo "Latest data timestamp: $LATEST_DATA"
|
||||
```
|
||||
|
||||
## 🎯 **Best Practices**
|
||||
|
||||
### **Production Deployment**
|
||||
1. **Monitor Logs**: Watch for retry mode patterns
|
||||
2. **Set Alerts**: Configure notifications for extended retry periods
|
||||
3. **Regular Maintenance**: Weekly gap filling and data validation
|
||||
4. **Backup Strategy**: Regular database backups before major operations
|
||||
|
||||
### **Performance Tuning**
|
||||
1. **Adjust Buffer Time**: Modify data availability buffer based on API patterns
|
||||
2. **Optimize Retry Interval**: Balance between responsiveness and API load
|
||||
3. **Database Indexing**: Ensure proper indexes for timestamp queries
|
||||
4. **Connection Pooling**: Configure appropriate database connection limits
|
||||
|
||||
This enhanced scheduler ensures reliable, efficient, and intelligent water level monitoring with automatic adaptation to data availability patterns.
|
227
docs/ENHANCEMENT_SUMMARY.md
Normal file
227
docs/ENHANCEMENT_SUMMARY.md
Normal file
@@ -0,0 +1,227 @@
|
||||
# 🚀 Northern Thailand Ping River Monitor - Enhancement Summary
|
||||
|
||||
## 🎯 **What We've Accomplished**
|
||||
|
||||
We've successfully transformed your water monitoring system from a simple scraper into a **production-ready, enterprise-grade monitoring platform** focused on the Ping River Basin in Northern Thailand, with modern web interfaces, station management capabilities, and comprehensive observability.
|
||||
|
||||
## 🌟 **Major New Features Added**
|
||||
|
||||
### 1. **FastAPI Web Interface** 🌐
|
||||
- **Interactive Dashboard** at `http://localhost:8000`
|
||||
- **REST API** with comprehensive endpoints
|
||||
- **Station Management** - Add, update, delete monitoring stations
|
||||
- **Real-time Health Monitoring**
|
||||
- **Manual Data Collection Triggers**
|
||||
- **Interactive API Documentation** at `/docs`
|
||||
- **CORS Support** for web applications
|
||||
|
||||
### 2. **Enhanced Architecture** 🏗️
|
||||
- **Type Safety** with Pydantic models and comprehensive type hints
|
||||
- **Data Validation Layer** with range checking and error handling
|
||||
- **Custom Exception Classes** for better error management
|
||||
- **Modular Design** with separated concerns
|
||||
|
||||
### 3. **Observability & Monitoring** 📊
|
||||
- **Metrics Collection System** (counters, gauges, histograms)
|
||||
- **Health Checks** for database, API, and system resources
|
||||
- **Performance Tracking** with response times and success rates
|
||||
- **Enhanced Logging** with colors, rotation, and performance logs
|
||||
|
||||
### 4. **Production Features** 🚀
|
||||
- **Rate Limiting** to prevent API abuse
|
||||
- **Request Tracking** with detailed statistics
|
||||
- **Configuration Validation** on startup
|
||||
- **Graceful Error Handling** and recovery
|
||||
- **Background Task Management**
|
||||
|
||||
## 📁 **New Files Created**
|
||||
|
||||
```
|
||||
src/
|
||||
├── models.py # Data models and type definitions
|
||||
├── exceptions.py # Custom exception classes
|
||||
├── validators.py # Data validation layer
|
||||
├── metrics.py # Metrics collection system
|
||||
├── health_check.py # Health monitoring system
|
||||
├── rate_limiter.py # Rate limiting and request tracking
|
||||
├── logging_config.py # Enhanced logging configuration
|
||||
├── web_api.py # FastAPI web interface
|
||||
├── main.py # Enhanced CLI with multiple modes
|
||||
└── __init__.py # Package initialization
|
||||
|
||||
# Root files
|
||||
├── run.py # Simple startup script
|
||||
├── test_integration.py # Integration test suite
|
||||
├── test_api.py # API endpoint tests
|
||||
└── ENHANCEMENT_SUMMARY.md # This file
|
||||
```
|
||||
|
||||
## 🔧 **Enhanced Existing Files**
|
||||
|
||||
- **`src/water_scraper_v3.py`** - Integrated new features, metrics, validation
|
||||
- **`src/config.py`** - Added configuration validation
|
||||
- **`requirements.txt`** - Added FastAPI, Pydantic, and monitoring dependencies
|
||||
- **`docker-compose.victoriametrics.yml`** - Added web API service
|
||||
- **`Dockerfile`** - Updated for new startup script
|
||||
- **`README.md`** - Updated with new features and usage instructions
|
||||
|
||||
## 🌐 **Web API Endpoints**
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `/` | GET | Interactive dashboard |
|
||||
| `/docs` | GET | API documentation |
|
||||
| `/health` | GET | System health status |
|
||||
| `/metrics` | GET | Application metrics |
|
||||
| `/stations` | GET | List all monitoring stations |
|
||||
| `/measurements/latest` | GET | Latest measurements |
|
||||
| `/measurements/station/{code}` | GET | Station-specific data |
|
||||
| `/scrape/trigger` | POST | Trigger manual data collection |
|
||||
| `/scraping/status` | GET | Scraping status and statistics |
|
||||
| `/config` | GET | Current configuration (masked) |
|
||||
|
||||
## 🚀 **Usage Examples**
|
||||
|
||||
### **Traditional Mode (Enhanced)**
|
||||
```bash
|
||||
# Test single cycle
|
||||
python run.py --test
|
||||
|
||||
# Continuous monitoring
|
||||
python run.py
|
||||
|
||||
# Fill data gaps
|
||||
python run.py --fill-gaps 7
|
||||
|
||||
# Show system status
|
||||
python run.py --status
|
||||
```
|
||||
|
||||
### **Web API Mode (NEW!)**
|
||||
```bash
|
||||
# Start web API server
|
||||
python run.py --web-api
|
||||
|
||||
# Access dashboard
|
||||
open http://localhost:8000
|
||||
|
||||
# View API documentation
|
||||
open http://localhost:8000/docs
|
||||
```
|
||||
|
||||
### **Docker Deployment**
|
||||
```bash
|
||||
# Start complete stack
|
||||
docker-compose -f docker-compose.victoriametrics.yml up -d
|
||||
|
||||
# Services available:
|
||||
# - Water API: http://localhost:8000
|
||||
# - Grafana: http://localhost:3000
|
||||
# - VictoriaMetrics: http://localhost:8428
|
||||
```
|
||||
|
||||
## 📊 **Monitoring & Observability**
|
||||
|
||||
### **Built-in Metrics**
|
||||
- API request counts and response times
|
||||
- Database connection status and save operations
|
||||
- Scraping cycle success/failure rates
|
||||
- System resource usage (memory, etc.)
|
||||
|
||||
### **Health Checks**
|
||||
- Database connectivity and data freshness
|
||||
- External API availability
|
||||
- Memory usage monitoring
|
||||
- Overall system health status
|
||||
|
||||
### **Enhanced Logging**
|
||||
- Colored console output for better readability
|
||||
- File rotation to prevent disk space issues
|
||||
- Performance logging for optimization
|
||||
- Structured logging with proper levels
|
||||
|
||||
## 🔒 **Production Ready Features**
|
||||
|
||||
### **Security & Reliability**
|
||||
- Rate limiting to prevent API abuse
|
||||
- Input validation and sanitization
|
||||
- Graceful error handling and recovery
|
||||
- Configuration validation on startup
|
||||
|
||||
### **Performance**
|
||||
- Efficient metrics collection with minimal overhead
|
||||
- Background task management
|
||||
- Connection pooling and resource management
|
||||
- Optimized database operations
|
||||
|
||||
### **Scalability**
|
||||
- Modular architecture for easy extension
|
||||
- Async support for high concurrency
|
||||
- Configurable resource limits
|
||||
- Health checks for load balancer integration
|
||||
|
||||
## 🧪 **Testing**
|
||||
|
||||
### **Integration Tests**
|
||||
```bash
|
||||
# Run all integration tests
|
||||
python test_integration.py
|
||||
```
|
||||
|
||||
### **API Tests**
|
||||
```bash
|
||||
# Test API endpoints (server must be running)
|
||||
python test_api.py
|
||||
```
|
||||
|
||||
## 📈 **Performance Improvements**
|
||||
|
||||
1. **Request Tracking** - Monitor API performance and success rates
|
||||
2. **Rate Limiting** - Prevent API abuse and ensure stability
|
||||
3. **Data Validation** - Catch errors early and improve data quality
|
||||
4. **Metrics Collection** - Identify bottlenecks and optimization opportunities
|
||||
5. **Health Monitoring** - Proactive issue detection and alerting
|
||||
|
||||
## 🎉 **Benefits Achieved**
|
||||
|
||||
### **For Developers**
|
||||
- **Better Developer Experience** with type hints and validation
|
||||
- **Easier Debugging** with enhanced logging and error messages
|
||||
- **Comprehensive Testing** with integration and API tests
|
||||
- **Modern Architecture** following best practices
|
||||
|
||||
### **For Operations**
|
||||
- **Web Dashboard** for easy monitoring and management
|
||||
- **Health Checks** for automated monitoring integration
|
||||
- **Metrics Collection** for performance analysis
|
||||
- **Production-Ready** deployment with Docker support
|
||||
|
||||
### **For Users**
|
||||
- **REST API** for integration with other systems
|
||||
- **Real-time Data Access** via web interface
|
||||
- **Manual Controls** for triggering data collection
|
||||
- **Status Monitoring** for system visibility
|
||||
|
||||
## 🔮 **Future Enhancement Opportunities**
|
||||
|
||||
1. **Authentication & Authorization** - Add user management and API keys
|
||||
2. **Real-time WebSocket Updates** - Live data streaming to web clients
|
||||
3. **Advanced Analytics** - Trend analysis and forecasting
|
||||
4. **Alert System** - Email/SMS notifications for critical conditions
|
||||
5. **Multi-tenant Support** - Support for multiple organizations
|
||||
6. **Data Export** - CSV, Excel, and other format exports
|
||||
7. **Mobile App** - React Native or Flutter mobile interface
|
||||
|
||||
## 🏆 **Summary**
|
||||
|
||||
Your Thailand Water Monitor has been transformed from a simple data scraper into a **comprehensive, enterprise-grade monitoring platform** that includes:
|
||||
|
||||
- ✅ **Modern Web Interface** with FastAPI
|
||||
- ✅ **Production-Ready Architecture** with proper error handling
|
||||
- ✅ **Comprehensive Monitoring** with metrics and health checks
|
||||
- ✅ **Type Safety** and data validation
|
||||
- ✅ **Enhanced Logging** and observability
|
||||
- ✅ **Docker Support** for easy deployment
|
||||
- ✅ **Extensive Testing** for reliability
|
||||
|
||||
The system is now ready for production deployment and can serve as a foundation for further enhancements and integrations!
|
275
docs/GAP_FILLING_GUIDE.md
Normal file
275
docs/GAP_FILLING_GUIDE.md
Normal file
@@ -0,0 +1,275 @@
|
||||
# Gap Filling and Data Integrity Guide
|
||||
|
||||
This guide explains the enhanced gap-filling functionality that addresses data gaps and missing timestamps in the Thailand Water Monitor.
|
||||
|
||||
## ✅ **Issues Resolved**
|
||||
|
||||
### **1. Data Gaps Problem**
|
||||
- **Before**: Tool only fetched current day data, leaving gaps in historical records
|
||||
- **After**: Automatically detects and fills missing timestamps for the last 7 days
|
||||
|
||||
### **2. Missing Midnight Timestamps**
|
||||
- **Before**: Jump from 23:00 to 01:00 (missing 00:00 midnight data)
|
||||
- **After**: Specifically checks for and fills midnight hour gaps
|
||||
|
||||
### **3. Changed Values**
|
||||
- **Before**: No mechanism to update existing data if values changed on the server
|
||||
- **After**: Compares existing data with fresh API data and updates changed values
|
||||
|
||||
## 🔧 **New Features**
|
||||
|
||||
### **Command Line Interface**
|
||||
```bash
|
||||
# Check for missing data gaps
|
||||
python water_scraper_v3.py --check-gaps [days]
|
||||
|
||||
# Fill missing data gaps
|
||||
python water_scraper_v3.py --fill-gaps [days]
|
||||
|
||||
# Update existing data with latest values
|
||||
python water_scraper_v3.py --update-data [days]
|
||||
|
||||
# Run single test cycle
|
||||
python water_scraper_v3.py --test
|
||||
|
||||
# Show help
|
||||
python water_scraper_v3.py --help
|
||||
```
|
||||
|
||||
### **Automatic Gap Detection**
|
||||
The system now automatically:
|
||||
- Generates expected hourly timestamps for the specified time range
|
||||
- Compares with existing database records
|
||||
- Identifies missing timestamps
|
||||
- Groups missing data by date for efficient API calls
|
||||
|
||||
### **Intelligent Gap Filling**
|
||||
- **Historical Data Fetching**: Retrieves data for specific dates to fill gaps
|
||||
- **Selective Insertion**: Only inserts data for actually missing timestamps
|
||||
- **API Rate Limiting**: Includes delays between API calls to be respectful
|
||||
- **Error Handling**: Continues processing even if some dates fail
|
||||
|
||||
### **Data Update Mechanism**
|
||||
- **Change Detection**: Compares water levels, discharge rates, and percentages
|
||||
- **Precision Checking**: Uses appropriate thresholds (0.001m for water level, 0.1 cms for discharge)
|
||||
- **Selective Updates**: Only updates records where values have actually changed
|
||||
|
||||
## 📊 **Test Results**
|
||||
|
||||
### **Before Enhancement**
|
||||
```
|
||||
Found 22 missing timestamps in the last 2 days:
|
||||
2025-07-23: Missing hours [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
|
||||
2025-07-24: Missing hours [0, 20, 21, 22, 23]
|
||||
2025-07-25: Missing hours [0, 9]
|
||||
```
|
||||
|
||||
### **After Gap Filling**
|
||||
```
|
||||
Gap filling completed. Filled 96 missing data points
|
||||
|
||||
Remaining gaps:
|
||||
2025-07-24: Missing hours [10]
|
||||
2025-07-25: Missing hours [0, 10]
|
||||
```
|
||||
|
||||
**Improvement**: Reduced from 22 missing timestamps to 3 (86% improvement)
|
||||
|
||||
## 🚀 **Enhanced Scraping Cycle**
|
||||
|
||||
The regular scraping cycle now includes three phases:
|
||||
|
||||
### **Phase 1: Current Data Collection**
|
||||
```python
|
||||
# Fetch and save current data
|
||||
water_data = self.fetch_water_data()
|
||||
success = self.save_to_database(water_data)
|
||||
```
|
||||
|
||||
### **Phase 2: Gap Filling (Last 7 Days)**
|
||||
```python
|
||||
# Check for and fill missing data
|
||||
filled_count = self.fill_data_gaps(days_back=7)
|
||||
```
|
||||
|
||||
### **Phase 3: Data Updates (Last 2 Days)**
|
||||
```python
|
||||
# Update existing data with latest values
|
||||
updated_count = self.update_existing_data(days_back=2)
|
||||
```
|
||||
|
||||
## 🔧 **Technical Improvements**
|
||||
|
||||
### **Database Connection Handling**
|
||||
- **SQLite Optimization**: Added timeout and thread safety parameters
|
||||
- **Retry Logic**: Exponential backoff for database lock errors
|
||||
- **Transaction Management**: Proper use of `engine.begin()` for automatic commits
|
||||
|
||||
### **Error Recovery**
|
||||
```python
|
||||
# Retry logic with exponential backoff
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
success = self.db_adapter.save_measurements(water_data)
|
||||
if success:
|
||||
return True
|
||||
except Exception as e:
|
||||
if "database is locked" in str(e).lower():
|
||||
time.sleep(2 ** attempt) # 1s, 2s, 4s delays
|
||||
continue
|
||||
```
|
||||
|
||||
### **Memory Efficiency**
|
||||
- **Selective Data Processing**: Only processes data for missing timestamps
|
||||
- **Batch Processing**: Groups operations by date to minimize API calls
|
||||
- **Resource Management**: Proper cleanup and connection handling
|
||||
|
||||
## 📋 **Usage Examples**
|
||||
|
||||
### **Daily Maintenance**
|
||||
```bash
|
||||
# Check for gaps in the last week
|
||||
python water_scraper_v3.py --check-gaps 7
|
||||
|
||||
# Fill any found gaps
|
||||
python water_scraper_v3.py --fill-gaps 7
|
||||
|
||||
# Update recent data for accuracy
|
||||
python water_scraper_v3.py --update-data 2
|
||||
```
|
||||
|
||||
### **Historical Data Recovery**
|
||||
```bash
|
||||
# Check for gaps in the last month
|
||||
python water_scraper_v3.py --check-gaps 30
|
||||
|
||||
# Fill gaps for the last month (be patient, this takes time)
|
||||
python water_scraper_v3.py --fill-gaps 30
|
||||
```
|
||||
|
||||
### **Production Monitoring**
|
||||
```bash
|
||||
# Quick test to ensure system is working
|
||||
python water_scraper_v3.py --test
|
||||
|
||||
# Check for recent gaps
|
||||
python water_scraper_v3.py --check-gaps 1
|
||||
```
|
||||
|
||||
## 🔍 **Monitoring and Alerts**
|
||||
|
||||
### **Gap Detection Output**
|
||||
```
|
||||
Found 22 missing timestamps:
|
||||
2025-07-23: Missing hours [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
|
||||
2025-07-24: Missing hours [0, 20, 21, 22, 23]
|
||||
2025-07-25: Missing hours [0, 9]
|
||||
```
|
||||
|
||||
### **Gap Filling Progress**
|
||||
```
|
||||
Fetching data for 2025-07-24 to fill 5 missing timestamps
|
||||
Successfully fetched 368 data points from API for 2025-07-24
|
||||
Filled 80 data points for 2025-07-24
|
||||
Gap filling completed. Filled 96 missing data points
|
||||
```
|
||||
|
||||
### **Update Detection**
|
||||
```
|
||||
Checking for updates on 2025-07-24
|
||||
Update needed for P.1 at 2025-07-24 15:00:00
|
||||
Updated 5 measurements for 2025-07-24
|
||||
Data update completed. Updated 5 measurements
|
||||
```
|
||||
|
||||
## ⚙️ **Configuration Options**
|
||||
|
||||
### **Environment Variables**
|
||||
```bash
|
||||
# Database configuration
|
||||
export DB_TYPE=sqlite
|
||||
export WATER_DB_PATH=water_monitoring.db
|
||||
|
||||
# Gap filling settings (can be added to config.py)
|
||||
export GAP_FILL_DAYS=7 # Days to check for gaps
|
||||
export UPDATE_DAYS=2 # Days to check for updates
|
||||
export API_DELAY=1 # Seconds between API calls
|
||||
export MAX_RETRIES=3 # Database retry attempts
|
||||
```
|
||||
|
||||
### **Customizable Parameters**
|
||||
- **Gap Check Period**: Default 7 days, configurable via command line
|
||||
- **Update Period**: Default 2 days, configurable via command line
|
||||
- **API Rate Limiting**: 1-second delay between calls (configurable)
|
||||
- **Retry Logic**: 3 attempts with exponential backoff (configurable)
|
||||
|
||||
## 🛠️ **Troubleshooting**
|
||||
|
||||
### **Common Issues**
|
||||
|
||||
#### **Database Locked Errors**
|
||||
```
|
||||
ERROR - Error saving to SQLITE: database is locked
|
||||
```
|
||||
**Solution**: The retry logic now handles this automatically with exponential backoff.
|
||||
|
||||
#### **API Rate Limiting**
|
||||
```
|
||||
WARNING - Too many requests to API
|
||||
```
|
||||
**Solution**: Increase delay between API calls or reduce the number of days processed at once.
|
||||
|
||||
#### **Missing Data Still Present**
|
||||
```
|
||||
Found X missing timestamps after gap filling
|
||||
```
|
||||
**Possible Causes**:
|
||||
- Data not available on the Thai government server for those timestamps
|
||||
- Network issues during API calls
|
||||
- API returned empty data for those specific times
|
||||
|
||||
### **Debug Commands**
|
||||
```bash
|
||||
# Enable debug logging
|
||||
export LOG_LEVEL=DEBUG
|
||||
python water_scraper_v3.py --check-gaps 1
|
||||
|
||||
# Test specific date range
|
||||
python water_scraper_v3.py --fill-gaps 1
|
||||
|
||||
# Check database directly
|
||||
sqlite3 water_monitoring.db "SELECT COUNT(*) FROM water_measurements;"
|
||||
sqlite3 water_monitoring.db "SELECT timestamp, COUNT(*) FROM water_measurements GROUP BY timestamp ORDER BY timestamp DESC LIMIT 10;"
|
||||
```
|
||||
|
||||
## 📈 **Performance Metrics**
|
||||
|
||||
### **Gap Filling Efficiency**
|
||||
- **API Calls**: Grouped by date to minimize requests
|
||||
- **Processing Speed**: ~100-400 data points per API call
|
||||
- **Success Rate**: 86% gap reduction in test case
|
||||
- **Resource Usage**: Minimal memory footprint with selective processing
|
||||
|
||||
### **Database Performance**
|
||||
- **SQLite Optimization**: Connection pooling and timeout handling
|
||||
- **Transaction Efficiency**: Batch inserts with proper transaction management
|
||||
- **Retry Success**: Automatic recovery from temporary lock conditions
|
||||
|
||||
## 🎯 **Best Practices**
|
||||
|
||||
### **Regular Maintenance**
|
||||
1. **Daily**: Run `--check-gaps 1` to monitor recent data quality
|
||||
2. **Weekly**: Run `--fill-gaps 7` to catch any missed data
|
||||
3. **Monthly**: Run `--update-data 7` to ensure data accuracy
|
||||
|
||||
### **Production Deployment**
|
||||
1. **Automated Scheduling**: Use cron or systemd timers for regular gap checks
|
||||
2. **Monitoring**: Set up alerts for excessive missing data
|
||||
3. **Backup**: Regular database backups before major gap-filling operations
|
||||
|
||||
### **Data Quality Assurance**
|
||||
1. **Validation**: Check for reasonable value ranges after gap filling
|
||||
2. **Comparison**: Compare filled data with nearby timestamps for consistency
|
||||
3. **Documentation**: Log all gap-filling activities for audit trails
|
||||
|
||||
This enhanced gap-filling system ensures comprehensive and accurate water level monitoring with minimal data loss and automatic recovery capabilities.
|
475
docs/GEOLOCATION_GUIDE.md
Normal file
475
docs/GEOLOCATION_GUIDE.md
Normal file
@@ -0,0 +1,475 @@
|
||||
# Geolocation Support for Grafana Geomap
|
||||
|
||||
This guide explains the geolocation functionality added to the Thailand Water Monitor for use with Grafana's geomap visualization.
|
||||
|
||||
## ✅ **Implemented Features**
|
||||
|
||||
### **Database Schema Updates**
|
||||
All database adapters now support geolocation fields:
|
||||
- **latitude**: Decimal latitude coordinates (DECIMAL(10,8) for SQL, REAL for SQLite)
|
||||
- **longitude**: Decimal longitude coordinates (DECIMAL(11,8) for SQL, REAL for SQLite)
|
||||
- **geohash**: Geohash string for efficient spatial indexing (VARCHAR(20)/TEXT)
|
||||
|
||||
### **Station Data Enhancement**
|
||||
Station mapping now includes geolocation fields:
|
||||
```python
|
||||
'8': {
|
||||
'code': 'P.1',
|
||||
'thai_name': 'สะพานนวรัฐ',
|
||||
'english_name': 'Nawarat Bridge',
|
||||
'latitude': 15.6944, # Decimal degrees
|
||||
'longitude': 100.2028, # Decimal degrees
|
||||
'geohash': 'w5q6uuhvfcfp25' # Geohash for P.1
|
||||
}
|
||||
```
|
||||
|
||||
## 🗄️ **Database Schema**
|
||||
|
||||
### **Updated Stations Table**
|
||||
```sql
|
||||
CREATE TABLE stations (
|
||||
id INTEGER PRIMARY KEY,
|
||||
station_code TEXT UNIQUE NOT NULL,
|
||||
thai_name TEXT NOT NULL,
|
||||
english_name TEXT NOT NULL,
|
||||
latitude REAL, -- NEW: Latitude coordinate
|
||||
longitude REAL, -- NEW: Longitude coordinate
|
||||
geohash TEXT, -- NEW: Geohash for spatial indexing
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
```
|
||||
|
||||
### **Database Support**
|
||||
- ✅ **SQLite**: REAL columns for coordinates, TEXT for geohash
|
||||
- ✅ **PostgreSQL**: DECIMAL(10,8) and DECIMAL(11,8) for coordinates, VARCHAR(20) for geohash
|
||||
- ✅ **MySQL**: DECIMAL(10,8) and DECIMAL(11,8) for coordinates, VARCHAR(20) for geohash
|
||||
- ✅ **VictoriaMetrics**: Geolocation data included in metric labels
|
||||
|
||||
## 📊 **Current Station Data**
|
||||
|
||||
### **P.1 - Nawarat Bridge (Sample)**
|
||||
- **Station Code**: P.1
|
||||
- **Thai Name**: สะพานนวรัฐ
|
||||
- **English Name**: Nawarat Bridge
|
||||
- **Latitude**: 15.6944
|
||||
- **Longitude**: 100.2028
|
||||
- **Geohash**: w5q6uuhvfcfp25
|
||||
|
||||
### **Remaining Stations**
|
||||
The following stations are ready for geolocation data when coordinates become available:
|
||||
- P.20 - บ้านเชียงดาว (Ban Chiang Dao)
|
||||
- P.75 - บ้านช่อแล (Ban Chai Lat)
|
||||
- P.92 - บ้านเมืองกึ๊ด (Ban Muang Aut)
|
||||
- P.4A - บ้านแม่แตง (Ban Mae Taeng)
|
||||
- P.67 - บ้านแม่แต (Ban Tae)
|
||||
- P.21 - บ้านริมใต้ (Ban Rim Tai)
|
||||
- P.103 - สะพานวงแหวนรอบ 3 (Ring Bridge 3)
|
||||
- P.82 - บ้านสบวิน (Ban Sob win)
|
||||
- P.84 - บ้านพันตน (Ban Panton)
|
||||
- P.81 - บ้านโป่ง (Ban Pong)
|
||||
- P.5 - สะพานท่านาง (Tha Nang Bridge)
|
||||
- P.77 - บ้านสบแม่สะป๊วด (Baan Sop Mae Sapuord)
|
||||
- P.87 - บ้านป่าซาง (Ban Pa Sang)
|
||||
- P.76 - บ้านแม่อีไฮ (Banb Mae I Hai)
|
||||
- P.85 - บ้านหล่ายแก้ว (Baan Lai Kaew)
|
||||
|
||||
## 🗺️ **Grafana Geomap Integration**
|
||||
|
||||
### **Data Source Configuration**
|
||||
The geolocation data is automatically included in all database queries and can be used directly in Grafana:
|
||||
|
||||
#### **SQLite/PostgreSQL/MySQL Query Example**
|
||||
```sql
|
||||
SELECT
|
||||
m.timestamp,
|
||||
s.station_code,
|
||||
s.english_name,
|
||||
s.thai_name,
|
||||
s.latitude,
|
||||
s.longitude,
|
||||
s.geohash,
|
||||
m.water_level,
|
||||
m.discharge,
|
||||
m.discharge_percent
|
||||
FROM water_measurements m
|
||||
JOIN stations s ON m.station_id = s.id
|
||||
WHERE s.latitude IS NOT NULL
|
||||
AND s.longitude IS NOT NULL
|
||||
ORDER BY m.timestamp DESC
|
||||
```
|
||||
|
||||
#### **VictoriaMetrics Query Example**
|
||||
```promql
|
||||
water_level{latitude!="",longitude!=""}
|
||||
```
|
||||
|
||||
### **Geomap Panel Configuration**
|
||||
|
||||
#### **1. Create Geomap Panel**
|
||||
1. Add new panel in Grafana
|
||||
2. Select "Geomap" visualization
|
||||
3. Configure data source (SQLite/PostgreSQL/MySQL/VictoriaMetrics)
|
||||
|
||||
#### **2. Configure Location Fields**
|
||||
- **Latitude Field**: `latitude`
|
||||
- **Longitude Field**: `longitude`
|
||||
- **Alternative**: Use `geohash` field for geohash-based positioning
|
||||
|
||||
#### **3. Configure Display Options**
|
||||
- **Station Labels**: Use `station_code` or `english_name`
|
||||
- **Tooltip Information**: Include `thai_name`, `water_level`, `discharge`
|
||||
- **Color Mapping**: Map to `water_level` or `discharge_percent`
|
||||
|
||||
#### **4. Sample Geomap Configuration**
|
||||
```json
|
||||
{
|
||||
"type": "geomap",
|
||||
"title": "Thailand Water Stations",
|
||||
"targets": [
|
||||
{
|
||||
"rawSql": "SELECT latitude, longitude, station_code, english_name, water_level, discharge_percent FROM stations s JOIN water_measurements m ON s.id = m.station_id WHERE s.latitude IS NOT NULL AND m.timestamp = (SELECT MAX(timestamp) FROM water_measurements WHERE station_id = s.id)",
|
||||
"format": "table"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"color": {
|
||||
"mode": "continuous-GrYlRd",
|
||||
"field": "water_level"
|
||||
}
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"view": {
|
||||
"id": "coords",
|
||||
"lat": 15.6944,
|
||||
"lon": 100.2028,
|
||||
"zoom": 8
|
||||
},
|
||||
"controls": {
|
||||
"mouseWheelZoom": true,
|
||||
"showZoom": true,
|
||||
"showAttribution": true
|
||||
},
|
||||
"layers": [
|
||||
{
|
||||
"type": "markers",
|
||||
"config": {
|
||||
"size": {
|
||||
"field": "discharge_percent",
|
||||
"min": 5,
|
||||
"max": 20
|
||||
},
|
||||
"color": {
|
||||
"field": "water_level"
|
||||
},
|
||||
"showLegend": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 🔧 **Adding New Station Coordinates**
|
||||
|
||||
### **Method 1: Update Station Mapping**
|
||||
Edit `water_scraper_v3.py` and add coordinates to the station mapping:
|
||||
```python
|
||||
'1': {
|
||||
'code': 'P.20',
|
||||
'thai_name': 'บ้านเชียงดาว',
|
||||
'english_name': 'Ban Chiang Dao',
|
||||
'latitude': 19.3056, # Add actual coordinates
|
||||
'longitude': 98.9264, # Add actual coordinates
|
||||
'geohash': 'w4r6...' # Add actual geohash
|
||||
}
|
||||
```
|
||||
|
||||
### **Method 2: Direct Database Update**
|
||||
```sql
|
||||
UPDATE stations
|
||||
SET latitude = 19.3056, longitude = 98.9264, geohash = 'w4r6uuhvfcfp25'
|
||||
WHERE station_code = 'P.20';
|
||||
```
|
||||
|
||||
### **Method 3: Bulk Update Script**
|
||||
```python
|
||||
import sqlite3
|
||||
|
||||
coordinates = {
|
||||
'P.20': {'lat': 19.3056, 'lon': 98.9264, 'geohash': 'w4r6uuhvfcfp25'},
|
||||
'P.75': {'lat': 18.7756, 'lon': 99.1234, 'geohash': 'w4r5uuhvfcfp25'},
|
||||
# Add more stations...
|
||||
}
|
||||
|
||||
conn = sqlite3.connect('water_monitoring.db')
|
||||
cursor = conn.cursor()
|
||||
|
||||
for station_code, coords in coordinates.items():
|
||||
cursor.execute("""
|
||||
UPDATE stations
|
||||
SET latitude = ?, longitude = ?, geohash = ?
|
||||
WHERE station_code = ?
|
||||
""", (coords['lat'], coords['lon'], coords['geohash'], station_code))
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
```
|
||||
|
||||
## 🌐 **Geohash Information**
|
||||
|
||||
### **What is Geohash?**
|
||||
Geohash is a geocoding system that represents geographic coordinates as a short alphanumeric string. It provides:
|
||||
- **Spatial Indexing**: Efficient spatial queries
|
||||
- **Proximity**: Similar geohashes indicate nearby locations
|
||||
- **Hierarchical**: Longer geohashes provide more precision
|
||||
|
||||
### **Geohash Precision Levels**
|
||||
- **5 characters**: ~2.4km precision
|
||||
- **6 characters**: ~610m precision
|
||||
- **7 characters**: ~76m precision
|
||||
- **8 characters**: ~19m precision
|
||||
- **9+ characters**: <5m precision
|
||||
|
||||
### **Example: P.1 Geohash**
|
||||
- **Geohash**: `w5q6uuhvfcfp25`
|
||||
- **Length**: 14 characters
|
||||
- **Precision**: Sub-meter accuracy
|
||||
- **Location**: Nawarat Bridge, Thailand
|
||||
|
||||
## 📈 **Grafana Visualization Examples**
|
||||
|
||||
### **1. Station Location Map**
|
||||
- **Type**: Geomap with markers
|
||||
- **Data**: Current station locations
|
||||
- **Color**: Water level or discharge percentage
|
||||
- **Size**: Discharge volume
|
||||
|
||||
### **2. Regional Water Levels**
|
||||
- **Type**: Geomap with heatmap
|
||||
- **Data**: Water level data across regions
|
||||
- **Visualization**: Color-coded intensity map
|
||||
- **Filters**: Time range, station groups
|
||||
|
||||
### **3. Alert Zones**
|
||||
- **Type**: Geomap with threshold markers
|
||||
- **Data**: Stations exceeding alert thresholds
|
||||
- **Visualization**: Red markers for high water levels
|
||||
- **Alerts**: Automated notifications for critical levels
|
||||
|
||||
## 🔄 **Updating a Running System**
|
||||
|
||||
### **Automated Migration Script**
|
||||
Use the provided migration script to safely add geolocation columns to your existing database:
|
||||
|
||||
```bash
|
||||
# Stop the water monitoring service first
|
||||
sudo systemctl stop water-monitor
|
||||
|
||||
# Run the migration script
|
||||
python migrate_geolocation.py
|
||||
|
||||
# Restart the service
|
||||
sudo systemctl start water-monitor
|
||||
```
|
||||
|
||||
### **Migration Script Features**
|
||||
- ✅ **Auto-detects database type** from environment variables
|
||||
- ✅ **Checks existing columns** to avoid conflicts
|
||||
- ✅ **Supports all database types** (SQLite, PostgreSQL, MySQL)
|
||||
- ✅ **Adds sample data** for P.1 station
|
||||
- ✅ **Safe operation** - won't break existing data
|
||||
|
||||
### **Step-by-Step Migration Process**
|
||||
|
||||
#### **1. Stop the Application**
|
||||
```bash
|
||||
# If running as systemd service
|
||||
sudo systemctl stop water-monitor
|
||||
|
||||
# If running in screen/tmux
|
||||
# Use Ctrl+C to stop the process
|
||||
|
||||
# If running as Docker container
|
||||
docker stop water-monitor
|
||||
```
|
||||
|
||||
#### **2. Backup Your Database**
|
||||
```bash
|
||||
# SQLite backup
|
||||
cp water_monitoring.db water_monitoring.db.backup
|
||||
|
||||
# PostgreSQL backup
|
||||
pg_dump water_monitoring > water_monitoring_backup.sql
|
||||
|
||||
# MySQL backup
|
||||
mysqldump water_monitoring > water_monitoring_backup.sql
|
||||
```
|
||||
|
||||
#### **3. Run Migration Script**
|
||||
```bash
|
||||
# Default (uses environment variables)
|
||||
python migrate_geolocation.py
|
||||
|
||||
# Or specify database path for SQLite
|
||||
SQLITE_DB_PATH=/path/to/water_monitoring.db python migrate_geolocation.py
|
||||
```
|
||||
|
||||
#### **4. Verify Migration**
|
||||
```bash
|
||||
# Check SQLite schema
|
||||
sqlite3 water_monitoring.db ".schema stations"
|
||||
|
||||
# Check PostgreSQL schema
|
||||
psql -d water_monitoring -c "\d stations"
|
||||
|
||||
# Check MySQL schema
|
||||
mysql -e "DESCRIBE water_monitoring.stations"
|
||||
```
|
||||
|
||||
#### **5. Update Application Code**
|
||||
Ensure you have the latest version of the application with geolocation support:
|
||||
```bash
|
||||
# Pull latest code
|
||||
git pull origin main
|
||||
|
||||
# Install any new dependencies
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
#### **6. Restart Application**
|
||||
```bash
|
||||
# Systemd service
|
||||
sudo systemctl start water-monitor
|
||||
|
||||
# Docker container
|
||||
docker start water-monitor
|
||||
|
||||
# Manual execution
|
||||
python water_scraper_v3.py
|
||||
```
|
||||
|
||||
### **Migration Output Example**
|
||||
```
|
||||
2025-07-28 17:30:00,123 - INFO - Starting geolocation column migration...
|
||||
2025-07-28 17:30:00,124 - INFO - Detected database type: SQLITE
|
||||
2025-07-28 17:30:00,125 - INFO - Migrating SQLite database: water_monitoring.db
|
||||
2025-07-28 17:30:00,126 - INFO - Current columns in stations table: ['id', 'station_code', 'thai_name', 'english_name', 'created_at', 'updated_at']
|
||||
2025-07-28 17:30:00,127 - INFO - Added latitude column
|
||||
2025-07-28 17:30:00,128 - INFO - Added longitude column
|
||||
2025-07-28 17:30:00,129 - INFO - Added geohash column
|
||||
2025-07-28 17:30:00,130 - INFO - Successfully added columns: latitude, longitude, geohash
|
||||
2025-07-28 17:30:00,131 - INFO - Updated P.1 station with sample geolocation data
|
||||
2025-07-28 17:30:00,132 - INFO - P.1 station geolocation: ('P.1', 15.6944, 100.2028, 'w5q6uuhvfcfp25')
|
||||
2025-07-28 17:30:00,133 - INFO - ✅ Migration completed successfully!
|
||||
2025-07-28 17:30:00,134 - INFO - You can now restart your water monitoring application
|
||||
2025-07-28 17:30:00,135 - INFO - The system will automatically use the new geolocation columns
|
||||
```
|
||||
|
||||
## 🔍 **Troubleshooting**
|
||||
|
||||
### **Migration Issues**
|
||||
|
||||
#### **Database Locked Error**
|
||||
```bash
|
||||
# Stop all processes using the database
|
||||
sudo systemctl stop water-monitor
|
||||
pkill -f water_scraper
|
||||
|
||||
# Wait a few seconds, then run migration
|
||||
sleep 5
|
||||
python migrate_geolocation.py
|
||||
```
|
||||
|
||||
#### **Permission Denied**
|
||||
```bash
|
||||
# Check database file permissions
|
||||
ls -la water_monitoring.db
|
||||
|
||||
# Fix permissions if needed
|
||||
sudo chown $USER:$USER water_monitoring.db
|
||||
chmod 664 water_monitoring.db
|
||||
```
|
||||
|
||||
#### **Missing Dependencies**
|
||||
```bash
|
||||
# For PostgreSQL
|
||||
pip install psycopg2-binary
|
||||
|
||||
# For MySQL
|
||||
pip install pymysql
|
||||
|
||||
# For all databases
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### **Verification Issues**
|
||||
|
||||
#### **Missing Coordinates**
|
||||
If stations don't appear on the geomap:
|
||||
1. Check if latitude/longitude are NULL in database
|
||||
2. Verify geolocation data in station mapping
|
||||
3. Ensure database schema includes geolocation columns
|
||||
4. Run migration script if columns are missing
|
||||
|
||||
#### **Incorrect Positioning**
|
||||
If stations appear in wrong locations:
|
||||
1. Verify coordinate format (decimal degrees)
|
||||
2. Check latitude/longitude order (lat first, lon second)
|
||||
3. Validate geohash accuracy
|
||||
|
||||
### **Rollback Procedure**
|
||||
If migration causes issues:
|
||||
|
||||
#### **SQLite Rollback**
|
||||
```bash
|
||||
# Stop application
|
||||
sudo systemctl stop water-monitor
|
||||
|
||||
# Restore backup
|
||||
cp water_monitoring.db.backup water_monitoring.db
|
||||
|
||||
# Restart with old version
|
||||
sudo systemctl start water-monitor
|
||||
```
|
||||
|
||||
#### **PostgreSQL Rollback**
|
||||
```sql
|
||||
-- Remove added columns
|
||||
ALTER TABLE stations DROP COLUMN IF EXISTS latitude;
|
||||
ALTER TABLE stations DROP COLUMN IF EXISTS longitude;
|
||||
ALTER TABLE stations DROP COLUMN IF EXISTS geohash;
|
||||
```
|
||||
|
||||
#### **MySQL Rollback**
|
||||
```sql
|
||||
-- Remove added columns
|
||||
ALTER TABLE stations DROP COLUMN latitude;
|
||||
ALTER TABLE stations DROP COLUMN longitude;
|
||||
ALTER TABLE stations DROP COLUMN geohash;
|
||||
```
|
||||
|
||||
## 🎯 **Next Steps**
|
||||
|
||||
### **Immediate Actions**
|
||||
1. **Gather Coordinates**: Collect GPS coordinates for all 16 stations
|
||||
2. **Update Database**: Add coordinates to remaining stations
|
||||
3. **Create Dashboards**: Build Grafana geomap visualizations
|
||||
|
||||
### **Future Enhancements**
|
||||
1. **Automatic Geocoding**: API integration for address-to-coordinate conversion
|
||||
2. **Mobile GPS**: Mobile app for field coordinate collection
|
||||
3. **Satellite Integration**: Satellite imagery overlay in Grafana
|
||||
4. **Geofencing**: Alert zones based on geographic boundaries
|
||||
|
||||
The geolocation functionality is now fully implemented and ready for use with Grafana's geomap visualization. Station P.1 (Nawarat Bridge) serves as a working example with complete coordinate data.
|
295
docs/GITEA_WORKFLOWS.md
Normal file
295
docs/GITEA_WORKFLOWS.md
Normal file
@@ -0,0 +1,295 @@
|
||||
# 🔄 Gitea Actions Workflows - Northern Thailand Ping River Monitor
|
||||
|
||||
## 📋 Overview
|
||||
|
||||
This document describes the Gitea Actions workflows configured for the Northern Thailand Ping River Monitor project. These workflows provide comprehensive CI/CD, security scanning, and documentation generation.
|
||||
|
||||
## 🚀 Available Workflows
|
||||
|
||||
### 1. **CI/CD Pipeline** (`.gitea/workflows/ci.yml`)
|
||||
|
||||
**Triggers:**
|
||||
- Push to `main` or `develop` branches
|
||||
- Pull requests to `main`
|
||||
- Daily scheduled runs at 2 AM UTC
|
||||
|
||||
**Jobs:**
|
||||
- **Test Suite**: Multi-version Python testing (3.9-3.12)
|
||||
- **Code Quality**: Linting, formatting, and type checking
|
||||
- **Build**: Docker image creation and testing
|
||||
- **Integration Test**: Testing with VictoriaMetrics service
|
||||
- **Deploy Staging**: Automatic deployment to staging (develop branch)
|
||||
- **Deploy Production**: Manual deployment to production (main branch)
|
||||
- **Performance Test**: Load testing after production deployment
|
||||
|
||||
**Key Features:**
|
||||
- ✅ Multi-Python version testing
|
||||
- ✅ Docker multi-architecture builds (amd64, arm64)
|
||||
- ✅ Service integration testing
|
||||
- ✅ Automatic staging deployment
|
||||
- ✅ Manual production approval
|
||||
- ✅ Performance validation
|
||||
|
||||
### 2. **Security & Dependency Updates** (`.gitea/workflows/security.yml`)
|
||||
|
||||
**Triggers:**
|
||||
- Daily scheduled runs at 3 AM UTC
|
||||
- Manual dispatch
|
||||
- Changes to requirements files or Dockerfile
|
||||
|
||||
**Jobs:**
|
||||
- **Dependency Scan**: Safety, Bandit, Semgrep security scans
|
||||
- **Docker Security**: Trivy vulnerability scanning
|
||||
- **License Check**: License compliance verification
|
||||
- **Dependency Updates**: Automated update detection
|
||||
- **Code Quality**: Complexity and maintainability analysis
|
||||
|
||||
**Key Features:**
|
||||
- 🔒 Daily security scans
|
||||
- 📦 Dependency vulnerability detection
|
||||
- 📄 License compliance checking
|
||||
- 🔄 Automated update notifications
|
||||
- 📊 Code quality metrics
|
||||
|
||||
### 3. **Release Workflow** (`.gitea/workflows/release.yml`)
|
||||
|
||||
**Triggers:**
|
||||
- Git tags matching `v*.*.*` pattern
|
||||
- Manual dispatch with version input
|
||||
|
||||
**Jobs:**
|
||||
- **Create Release**: Automated release creation with changelog
|
||||
- **Test Release**: Comprehensive testing across Python versions
|
||||
- **Build Release**: Multi-architecture Docker images with proper tags
|
||||
- **Security Scan**: Trivy security scanning of release images
|
||||
- **Deploy Release**: Production deployment with health checks
|
||||
- **Validate Release**: Post-deployment validation and testing
|
||||
|
||||
**Key Features:**
|
||||
- 🏷️ Automated release creation
|
||||
- 📝 Changelog generation
|
||||
- 🐳 Multi-architecture Docker builds
|
||||
- 🔒 Security scanning
|
||||
- ✅ Comprehensive validation
|
||||
|
||||
### 4. **Documentation** (`.gitea/workflows/docs.yml`)
|
||||
|
||||
**Triggers:**
|
||||
- Changes to documentation files
|
||||
- Changes to Python source files
|
||||
- Manual dispatch
|
||||
|
||||
**Jobs:**
|
||||
- **Validate Docs**: Link checking and structure validation
|
||||
- **Generate API Docs**: OpenAPI specification generation
|
||||
- **Build Sphinx Docs**: Comprehensive API documentation
|
||||
- **Documentation Summary**: Build status and artifact summary
|
||||
|
||||
**Key Features:**
|
||||
- 📚 Automated API documentation
|
||||
- 🔗 Link validation
|
||||
- 📖 Sphinx documentation generation
|
||||
- ✅ Documentation completeness checking
|
||||
|
||||
## 🔧 Workflow Configuration
|
||||
|
||||
### **Required Secrets**
|
||||
|
||||
Configure these secrets in your Gitea repository settings:
|
||||
|
||||
```bash
|
||||
GITEA_TOKEN # Gitea access token for container registry
|
||||
SLACK_WEBHOOK_URL # Optional: Slack notifications
|
||||
STAGING_WEBHOOK_URL # Optional: Staging deployment webhook
|
||||
PRODUCTION_WEBHOOK_URL # Optional: Production deployment webhook
|
||||
```
|
||||
|
||||
### **Environment Variables**
|
||||
|
||||
Key environment variables used across workflows:
|
||||
|
||||
```yaml
|
||||
PYTHON_VERSION: '3.11' # Default Python version
|
||||
REGISTRY: git.b4l.co.th # Container registry
|
||||
IMAGE_NAME: grabowski/northern-thailand-ping-river-monitor
|
||||
```
|
||||
|
||||
## 📊 Workflow Status
|
||||
|
||||
### **CI/CD Pipeline Status**
|
||||
- **Test Coverage**: Multi-version Python testing
|
||||
- **Code Quality**: Automated linting and formatting
|
||||
- **Security**: Integrated security scanning
|
||||
- **Deployment**: Automated staging, manual production
|
||||
|
||||
### **Security Monitoring**
|
||||
- **Daily Scans**: Automated vulnerability detection
|
||||
- **Dependency Updates**: Proactive update notifications
|
||||
- **License Compliance**: Automated license checking
|
||||
- **Code Quality**: Continuous quality monitoring
|
||||
|
||||
### **Release Management**
|
||||
- **Automated Releases**: Tag-based release creation
|
||||
- **Multi-Architecture**: Support for amd64 and arm64
|
||||
- **Security Validation**: Pre-deployment security checks
|
||||
- **Health Monitoring**: Post-deployment validation
|
||||
|
||||
## 🚀 Usage Examples
|
||||
|
||||
### **Triggering Workflows**
|
||||
|
||||
**Manual CI/CD Run:**
|
||||
```bash
|
||||
# Push to trigger CI/CD
|
||||
git push origin main
|
||||
|
||||
# Create pull request to trigger testing
|
||||
git checkout -b feature/new-feature
|
||||
git push origin feature/new-feature
|
||||
# Create PR in Gitea UI
|
||||
```
|
||||
|
||||
**Manual Security Scan:**
|
||||
```bash
|
||||
# Trigger via Gitea Actions UI
|
||||
# Go to Actions → Security & Dependency Updates → Run workflow
|
||||
```
|
||||
|
||||
**Creating a Release:**
|
||||
```bash
|
||||
# Create and push a tag
|
||||
git tag v3.1.1
|
||||
git push origin v3.1.1
|
||||
|
||||
# Or use manual dispatch in Gitea Actions UI
|
||||
```
|
||||
|
||||
### **Monitoring Workflow Results**
|
||||
|
||||
**Check Workflow Status:**
|
||||
1. Navigate to your repository in Gitea
|
||||
2. Click on "Actions" tab
|
||||
3. View workflow runs and their status
|
||||
|
||||
**Download Artifacts:**
|
||||
1. Click on a completed workflow run
|
||||
2. Scroll to "Artifacts" section
|
||||
3. Download reports and logs
|
||||
|
||||
**View Security Reports:**
|
||||
1. Go to Security workflow runs
|
||||
2. Download security-reports artifacts
|
||||
3. Review JSON reports for vulnerabilities
|
||||
|
||||
## 🔍 Troubleshooting
|
||||
|
||||
### **Common Issues**
|
||||
|
||||
**Workflow Fails on Dependencies:**
|
||||
```bash
|
||||
# Check requirements.txt for version conflicts
|
||||
pip-compile requirements.in
|
||||
```
|
||||
|
||||
**Docker Build Fails:**
|
||||
```bash
|
||||
# Test Docker build locally
|
||||
make docker-build
|
||||
docker run --rm ping-river-monitor python run.py --test
|
||||
```
|
||||
|
||||
**Security Scan Failures:**
|
||||
```bash
|
||||
# Run security scans locally
|
||||
safety check -r requirements.txt
|
||||
bandit -r src/
|
||||
```
|
||||
|
||||
**Test Failures:**
|
||||
```bash
|
||||
# Run tests locally
|
||||
make test
|
||||
python tests/test_integration.py
|
||||
```
|
||||
|
||||
### **Debugging Workflows**
|
||||
|
||||
**Enable Debug Logging:**
|
||||
Add to workflow file:
|
||||
```yaml
|
||||
env:
|
||||
ACTIONS_STEP_DEBUG: true
|
||||
ACTIONS_RUNNER_DEBUG: true
|
||||
```
|
||||
|
||||
**Check Workflow Logs:**
|
||||
1. Go to failed workflow run
|
||||
2. Click on failed job
|
||||
3. Expand failed step to see detailed logs
|
||||
|
||||
**Validate Workflow Syntax:**
|
||||
```bash
|
||||
# Validate YAML syntax
|
||||
make validate-workflows
|
||||
```
|
||||
|
||||
## 📈 Performance Optimization
|
||||
|
||||
### **Caching Strategy**
|
||||
- **Pip Cache**: Cached across workflow runs
|
||||
- **Docker Layer Cache**: GitHub Actions cache for faster builds
|
||||
- **Dependency Cache**: Cached based on requirements.txt hash
|
||||
|
||||
### **Parallel Execution**
|
||||
- **Matrix Builds**: Multiple Python versions tested in parallel
|
||||
- **Independent Jobs**: Security scans run independently of tests
|
||||
- **Conditional Execution**: Jobs skip when not needed
|
||||
|
||||
### **Resource Management**
|
||||
- **Timeout Settings**: Prevent hanging workflows
|
||||
- **Resource Limits**: Appropriate runner sizing
|
||||
- **Artifact Cleanup**: Automatic cleanup of old artifacts
|
||||
|
||||
## 🔒 Security Best Practices
|
||||
|
||||
### **Secret Management**
|
||||
- Use Gitea repository secrets for sensitive data
|
||||
- Never commit secrets to repository
|
||||
- Rotate secrets regularly
|
||||
- Use least-privilege access tokens
|
||||
|
||||
### **Container Security**
|
||||
- Multi-stage Docker builds for smaller images
|
||||
- Non-root user in containers
|
||||
- Regular base image updates
|
||||
- Vulnerability scanning before deployment
|
||||
|
||||
### **Code Security**
|
||||
- Automated security scanning in CI/CD
|
||||
- Dependency vulnerability monitoring
|
||||
- License compliance checking
|
||||
- Code quality enforcement
|
||||
|
||||
## 📚 Additional Resources
|
||||
|
||||
### **Gitea Actions Documentation**
|
||||
- [Gitea Actions Overview](https://docs.gitea.io/en-us/usage/actions/)
|
||||
- [Workflow Syntax](https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions)
|
||||
- [Available Actions](https://github.com/marketplace?type=actions)
|
||||
|
||||
### **Project-Specific Resources**
|
||||
- [Contributing Guide](../CONTRIBUTING.md)
|
||||
- [Deployment Checklist](../DEPLOYMENT_CHECKLIST.md)
|
||||
- [Project Structure](PROJECT_STRUCTURE.md)
|
||||
|
||||
### **Monitoring and Alerts**
|
||||
- Workflow status badges in README
|
||||
- Email notifications for failures
|
||||
- Slack/Discord integration for team updates
|
||||
- Grafana dashboards for deployment metrics
|
||||
|
||||
---
|
||||
|
||||
**Workflow Version**: v3.1.0
|
||||
**Last Updated**: 2025-08-12
|
||||
**Maintained By**: Ping River Monitor Team
|
389
docs/HTTPS_CONFIGURATION.md
Normal file
389
docs/HTTPS_CONFIGURATION.md
Normal file
@@ -0,0 +1,389 @@
|
||||
# HTTPS VictoriaMetrics Configuration Guide
|
||||
|
||||
This guide explains how to configure the Thailand Water Monitor to connect to VictoriaMetrics through HTTPS and reverse proxies.
|
||||
|
||||
## Configuration Options
|
||||
|
||||
### 1. Environment Variables for HTTPS
|
||||
|
||||
```bash
|
||||
# Option 1: Full HTTPS URL (Recommended)
|
||||
export DB_TYPE=victoriametrics
|
||||
export VM_HOST=https://vm.example.com
|
||||
export VM_PORT=443
|
||||
|
||||
# Option 2: Host and port separately
|
||||
export DB_TYPE=victoriametrics
|
||||
export VM_HOST=vm.example.com
|
||||
export VM_PORT=443
|
||||
|
||||
# Option 3: Custom port with HTTPS
|
||||
export DB_TYPE=victoriametrics
|
||||
export VM_HOST=https://vm.example.com
|
||||
export VM_PORT=8443
|
||||
```
|
||||
|
||||
### 2. Windows PowerShell Configuration
|
||||
|
||||
```powershell
|
||||
# Set environment variables for HTTPS
|
||||
$env:DB_TYPE="victoriametrics"
|
||||
$env:VM_HOST="https://vm.example.com"
|
||||
$env:VM_PORT="443"
|
||||
|
||||
# Run the water monitor
|
||||
python water_scraper_v3.py
|
||||
```
|
||||
|
||||
### 3. Linux/Mac Configuration
|
||||
|
||||
```bash
|
||||
# Set environment variables for HTTPS
|
||||
export DB_TYPE=victoriametrics
|
||||
export VM_HOST=https://vm.example.com
|
||||
export VM_PORT=443
|
||||
|
||||
# Run the water monitor
|
||||
python water_scraper_v3.py
|
||||
```
|
||||
|
||||
## Reverse Proxy Examples
|
||||
|
||||
### 1. Nginx Reverse Proxy
|
||||
|
||||
```nginx
|
||||
server {
|
||||
listen 443 ssl http2;
|
||||
server_name vm.example.com;
|
||||
|
||||
# SSL Configuration
|
||||
ssl_certificate /path/to/certificate.crt;
|
||||
ssl_certificate_key /path/to/private.key;
|
||||
ssl_protocols TLSv1.2 TLSv1.3;
|
||||
ssl_ciphers ECDHE-RSA-AES256-GCM-SHA512:DHE-RSA-AES256-GCM-SHA512;
|
||||
|
||||
# Security headers
|
||||
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
|
||||
add_header X-Frame-Options DENY always;
|
||||
add_header X-Content-Type-Options nosniff always;
|
||||
|
||||
# Optional: Basic authentication
|
||||
# auth_basic "VictoriaMetrics";
|
||||
# auth_basic_user_file /etc/nginx/.htpasswd;
|
||||
|
||||
location / {
|
||||
proxy_pass http://localhost:8428;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# WebSocket support (if needed)
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
|
||||
# Timeouts
|
||||
proxy_connect_timeout 60s;
|
||||
proxy_send_timeout 60s;
|
||||
proxy_read_timeout 60s;
|
||||
}
|
||||
}
|
||||
|
||||
# Redirect HTTP to HTTPS
|
||||
server {
|
||||
listen 80;
|
||||
server_name vm.example.com;
|
||||
return 301 https://$server_name$request_uri;
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Apache Reverse Proxy
|
||||
|
||||
```apache
|
||||
<VirtualHost *:443>
|
||||
ServerName vm.example.com
|
||||
|
||||
# SSL Configuration
|
||||
SSLEngine on
|
||||
SSLCertificateFile /path/to/certificate.crt
|
||||
SSLCertificateKeyFile /path/to/private.key
|
||||
SSLProtocol all -SSLv3 -TLSv1 -TLSv1.1
|
||||
SSLCipherSuite ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384
|
||||
|
||||
# Security headers
|
||||
Header always set Strict-Transport-Security "max-age=31536000; includeSubDomains"
|
||||
Header always set X-Frame-Options DENY
|
||||
Header always set X-Content-Type-Options nosniff
|
||||
|
||||
# Reverse proxy configuration
|
||||
ProxyPreserveHost On
|
||||
ProxyPass / http://localhost:8428/
|
||||
ProxyPassReverse / http://localhost:8428/
|
||||
|
||||
# Optional: Basic authentication
|
||||
# AuthType Basic
|
||||
# AuthName "VictoriaMetrics"
|
||||
# AuthUserFile /etc/apache2/.htpasswd
|
||||
# Require valid-user
|
||||
</VirtualHost>
|
||||
|
||||
<VirtualHost *:80>
|
||||
ServerName vm.example.com
|
||||
Redirect permanent / https://vm.example.com/
|
||||
</VirtualHost>
|
||||
```
|
||||
|
||||
### 3. Traefik Reverse Proxy
|
||||
|
||||
```yaml
|
||||
# docker-compose.yml with Traefik
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
traefik:
|
||||
image: traefik:v2.10
|
||||
command:
|
||||
- --api.dashboard=true
|
||||
- --entrypoints.web.address=:80
|
||||
- --entrypoints.websecure.address=:443
|
||||
- --providers.docker=true
|
||||
- --certificatesresolvers.letsencrypt.acme.tlschallenge=true
|
||||
- --certificatesresolvers.letsencrypt.acme.email=admin@example.com
|
||||
- --certificatesresolvers.letsencrypt.acme.storage=/letsencrypt/acme.json
|
||||
ports:
|
||||
- "80:80"
|
||||
- "443:443"
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- letsencrypt:/letsencrypt
|
||||
labels:
|
||||
- traefik.http.routers.api.rule=Host(`traefik.example.com`)
|
||||
- traefik.http.routers.api.tls.certresolver=letsencrypt
|
||||
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:latest
|
||||
command:
|
||||
- '--storageDataPath=/victoria-metrics-data'
|
||||
- '--retentionPeriod=2y'
|
||||
- '--httpListenAddr=:8428'
|
||||
volumes:
|
||||
- vm_data:/victoria-metrics-data
|
||||
labels:
|
||||
- traefik.enable=true
|
||||
- traefik.http.routers.vm.rule=Host(`vm.example.com`)
|
||||
- traefik.http.routers.vm.tls.certresolver=letsencrypt
|
||||
- traefik.http.services.vm.loadbalancer.server.port=8428
|
||||
|
||||
volumes:
|
||||
vm_data:
|
||||
letsencrypt:
|
||||
```
|
||||
|
||||
## Testing HTTPS Configuration
|
||||
|
||||
### 1. Test Connection
|
||||
|
||||
```bash
|
||||
# Test HTTPS connection
|
||||
curl -k https://vm.example.com/health
|
||||
|
||||
# Test with specific port
|
||||
curl -k https://vm.example.com:8443/health
|
||||
|
||||
# Test API endpoint
|
||||
curl -k "https://vm.example.com/api/v1/query?query=up"
|
||||
```
|
||||
|
||||
### 2. Test with Water Monitor
|
||||
|
||||
```bash
|
||||
# Set environment variables
|
||||
export DB_TYPE=victoriametrics
|
||||
export VM_HOST=https://vm.example.com
|
||||
export VM_PORT=443
|
||||
|
||||
# Test with demo script
|
||||
python demo_databases.py victoriametrics
|
||||
|
||||
# Run full water monitor
|
||||
python water_scraper_v3.py
|
||||
```
|
||||
|
||||
### 3. Verify SSL Certificate
|
||||
|
||||
```bash
|
||||
# Check SSL certificate
|
||||
openssl s_client -connect vm.example.com:443 -servername vm.example.com
|
||||
|
||||
# Check certificate expiration
|
||||
echo | openssl s_client -connect vm.example.com:443 2>/dev/null | openssl x509 -noout -dates
|
||||
```
|
||||
|
||||
## Configuration Examples
|
||||
|
||||
### 1. Production HTTPS Setup
|
||||
|
||||
```bash
|
||||
# Environment variables for production
|
||||
export DB_TYPE=victoriametrics
|
||||
export VM_HOST=https://metrics.company.com
|
||||
export VM_PORT=443
|
||||
export LOG_LEVEL=INFO
|
||||
export SCRAPING_INTERVAL_HOURS=1
|
||||
|
||||
# Run water monitor
|
||||
python water_scraper_v3.py
|
||||
```
|
||||
|
||||
### 2. Development with Self-Signed Certificate
|
||||
|
||||
```bash
|
||||
# For development with self-signed certificates
|
||||
export DB_TYPE=victoriametrics
|
||||
export VM_HOST=https://dev-vm.local
|
||||
export VM_PORT=443
|
||||
export PYTHONHTTPSVERIFY=0 # Disable SSL verification (dev only)
|
||||
|
||||
python water_scraper_v3.py
|
||||
```
|
||||
|
||||
### 3. Custom Port Configuration
|
||||
|
||||
```bash
|
||||
# Custom HTTPS port
|
||||
export DB_TYPE=victoriametrics
|
||||
export VM_HOST=https://vm.example.com
|
||||
export VM_PORT=8443
|
||||
|
||||
python water_scraper_v3.py
|
||||
```
|
||||
|
||||
## Troubleshooting HTTPS Issues
|
||||
|
||||
### 1. SSL Certificate Errors
|
||||
|
||||
```bash
|
||||
# Error: SSL certificate verify failed
|
||||
# Solution: Check certificate validity
|
||||
openssl x509 -in certificate.crt -text -noout
|
||||
|
||||
# Temporary workaround (not recommended for production)
|
||||
export PYTHONHTTPSVERIFY=0
|
||||
```
|
||||
|
||||
### 2. Connection Timeout
|
||||
|
||||
```bash
|
||||
# Error: Connection timeout
|
||||
# Check firewall and network connectivity
|
||||
telnet vm.example.com 443
|
||||
nc -zv vm.example.com 443
|
||||
```
|
||||
|
||||
### 3. DNS Resolution Issues
|
||||
|
||||
```bash
|
||||
# Error: Name resolution failed
|
||||
# Check DNS resolution
|
||||
nslookup vm.example.com
|
||||
dig vm.example.com
|
||||
```
|
||||
|
||||
### 4. Proxy Configuration Issues
|
||||
|
||||
```bash
|
||||
# Check proxy logs
|
||||
# Nginx
|
||||
tail -f /var/log/nginx/error.log
|
||||
|
||||
# Apache
|
||||
tail -f /var/log/apache2/error.log
|
||||
|
||||
# Test direct connection to backend
|
||||
curl http://localhost:8428/health
|
||||
```
|
||||
|
||||
## Security Best Practices
|
||||
|
||||
### 1. SSL/TLS Configuration
|
||||
|
||||
- Use TLS 1.2 or higher
|
||||
- Disable weak ciphers
|
||||
- Enable HSTS headers
|
||||
- Use strong SSL certificates
|
||||
|
||||
### 2. Authentication
|
||||
|
||||
```nginx
|
||||
# Basic authentication in Nginx
|
||||
auth_basic "VictoriaMetrics Access";
|
||||
auth_basic_user_file /etc/nginx/.htpasswd;
|
||||
|
||||
# Create password file
|
||||
htpasswd -c /etc/nginx/.htpasswd username
|
||||
```
|
||||
|
||||
### 3. Network Security
|
||||
|
||||
- Use firewall rules to restrict access
|
||||
- Consider VPN for internal access
|
||||
- Implement rate limiting
|
||||
- Monitor access logs
|
||||
|
||||
### 4. Certificate Management
|
||||
|
||||
```bash
|
||||
# Auto-renewal with Let's Encrypt
|
||||
certbot renew --dry-run
|
||||
|
||||
# Certificate monitoring
|
||||
echo | openssl s_client -connect vm.example.com:443 2>/dev/null | \
|
||||
openssl x509 -noout -dates | grep notAfter
|
||||
```
|
||||
|
||||
## Docker Configuration for HTTPS
|
||||
|
||||
### 1. Docker Compose with HTTPS
|
||||
|
||||
```yaml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
water-monitor:
|
||||
build: .
|
||||
environment:
|
||||
- DB_TYPE=victoriametrics
|
||||
- VM_HOST=https://vm.example.com
|
||||
- VM_PORT=443
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- victoriametrics
|
||||
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:latest
|
||||
ports:
|
||||
- "8428:8428"
|
||||
volumes:
|
||||
- vm_data:/victoria-metrics-data
|
||||
command:
|
||||
- '--storageDataPath=/victoria-metrics-data'
|
||||
- '--retentionPeriod=2y'
|
||||
- '--httpListenAddr=:8428'
|
||||
|
||||
volumes:
|
||||
vm_data:
|
||||
```
|
||||
|
||||
### 2. Environment File (.env)
|
||||
|
||||
```bash
|
||||
# .env file
|
||||
DB_TYPE=victoriametrics
|
||||
VM_HOST=https://vm.example.com
|
||||
VM_PORT=443
|
||||
LOG_LEVEL=INFO
|
||||
SCRAPING_INTERVAL_HOURS=1
|
||||
```
|
||||
|
||||
This configuration guide provides comprehensive instructions for setting up HTTPS connectivity to VictoriaMetrics through reverse proxies, ensuring secure and reliable data transmission for the Thailand Water Monitor.
|
136
docs/MIGRATION_QUICKSTART.md
Normal file
136
docs/MIGRATION_QUICKSTART.md
Normal file
@@ -0,0 +1,136 @@
|
||||
# Geolocation Migration Quick Start
|
||||
|
||||
This is a quick reference guide for updating a running Thailand Water Monitor system to add geolocation support for Grafana geomap.
|
||||
|
||||
## 🚀 **Quick Migration (5 minutes)**
|
||||
|
||||
### **Step 1: Stop Application**
|
||||
```bash
|
||||
# Stop the service (choose your method)
|
||||
sudo systemctl stop water-monitor
|
||||
# OR
|
||||
docker stop water-monitor
|
||||
# OR use Ctrl+C if running manually
|
||||
```
|
||||
|
||||
### **Step 2: Backup Database**
|
||||
```bash
|
||||
# SQLite backup
|
||||
cp water_monitoring.db water_monitoring.db.backup
|
||||
|
||||
# PostgreSQL backup
|
||||
pg_dump water_monitoring > backup.sql
|
||||
|
||||
# MySQL backup
|
||||
mysqldump water_monitoring > backup.sql
|
||||
```
|
||||
|
||||
### **Step 3: Run Migration**
|
||||
```bash
|
||||
# Run the automated migration script
|
||||
python migrate_geolocation.py
|
||||
```
|
||||
|
||||
### **Step 4: Restart Application**
|
||||
```bash
|
||||
# Restart the service
|
||||
sudo systemctl start water-monitor
|
||||
# OR
|
||||
docker start water-monitor
|
||||
# OR
|
||||
python water_scraper_v3.py
|
||||
```
|
||||
|
||||
## ✅ **Expected Output**
|
||||
```
|
||||
2025-07-28 17:30:00,123 - INFO - Starting geolocation column migration...
|
||||
2025-07-28 17:30:00,124 - INFO - Detected database type: SQLITE
|
||||
2025-07-28 17:30:00,127 - INFO - Added latitude column
|
||||
2025-07-28 17:30:00,128 - INFO - Added longitude column
|
||||
2025-07-28 17:30:00,129 - INFO - Added geohash column
|
||||
2025-07-28 17:30:00,133 - INFO - ✅ Migration completed successfully!
|
||||
```
|
||||
|
||||
## 🗺️ **Verify Geolocation Works**
|
||||
|
||||
### **Check Database**
|
||||
```bash
|
||||
# SQLite
|
||||
sqlite3 water_monitoring.db "SELECT station_code, latitude, longitude, geohash FROM stations WHERE station_code = 'P.1';"
|
||||
|
||||
# Expected output: P.1|15.6944|100.2028|w5q6uuhvfcfp25
|
||||
```
|
||||
|
||||
### **Test Application**
|
||||
```bash
|
||||
# Run a test cycle
|
||||
python water_scraper_v3.py --test
|
||||
|
||||
# Should complete without errors
|
||||
```
|
||||
|
||||
## 🔧 **Grafana Setup**
|
||||
|
||||
### **Query for Geomap**
|
||||
```sql
|
||||
SELECT
|
||||
s.latitude, s.longitude, s.station_code, s.english_name,
|
||||
m.water_level, m.discharge_percent
|
||||
FROM stations s
|
||||
JOIN water_measurements m ON s.id = m.station_id
|
||||
WHERE s.latitude IS NOT NULL
|
||||
AND m.timestamp = (SELECT MAX(timestamp) FROM water_measurements WHERE station_id = s.id)
|
||||
```
|
||||
|
||||
### **Geomap Configuration**
|
||||
1. Create new panel → Select "Geomap"
|
||||
2. Set **Latitude field**: `latitude`
|
||||
3. Set **Longitude field**: `longitude`
|
||||
4. Set **Color field**: `water_level`
|
||||
5. Set **Size field**: `discharge_percent`
|
||||
|
||||
## 🚨 **Troubleshooting**
|
||||
|
||||
### **Database Locked**
|
||||
```bash
|
||||
sudo systemctl stop water-monitor
|
||||
pkill -f water_scraper
|
||||
sleep 5
|
||||
python migrate_geolocation.py
|
||||
```
|
||||
|
||||
### **Permission Error**
|
||||
```bash
|
||||
sudo chown $USER:$USER water_monitoring.db
|
||||
chmod 664 water_monitoring.db
|
||||
```
|
||||
|
||||
### **Missing Dependencies**
|
||||
```bash
|
||||
pip install psycopg2-binary pymysql
|
||||
```
|
||||
|
||||
## 🔄 **Rollback (if needed)**
|
||||
```bash
|
||||
# Stop application
|
||||
sudo systemctl stop water-monitor
|
||||
|
||||
# Restore backup
|
||||
cp water_monitoring.db.backup water_monitoring.db
|
||||
|
||||
# Restart
|
||||
sudo systemctl start water-monitor
|
||||
```
|
||||
|
||||
## 📚 **More Information**
|
||||
- **Full Guide**: See `GEOLOCATION_GUIDE.md`
|
||||
- **Migration Script**: `migrate_geolocation.py`
|
||||
- **Database Schema**: Updated with latitude, longitude, geohash columns
|
||||
|
||||
## 🎯 **What You Get**
|
||||
- ✅ **P.1 Station** ready for geomap (Nawarat Bridge)
|
||||
- ✅ **Database Schema** updated for all 16 stations
|
||||
- ✅ **Grafana Compatible** data structure
|
||||
- ✅ **Backward Compatible** - existing data preserved
|
||||
|
||||
**Total Time**: ~5 minutes for complete migration
|
206
docs/PROJECT_STATUS.md
Normal file
206
docs/PROJECT_STATUS.md
Normal file
@@ -0,0 +1,206 @@
|
||||
# Thailand Water Monitor - Current Project Status
|
||||
|
||||
## 📁 **Clean Project Structure**
|
||||
|
||||
The project has been cleaned up and organized with the following structure:
|
||||
|
||||
```
|
||||
water_level_monitor/
|
||||
├── 📄 .gitignore # Git ignore rules
|
||||
├── 📄 README.md # Main project documentation
|
||||
├── 📄 requirements.txt # Python dependencies
|
||||
├── 📄 config.py # Configuration management
|
||||
├── 📄 water_scraper_v3.py # Main application (15-min scheduler)
|
||||
├── 📄 database_adapters.py # Multi-database support
|
||||
├── 📄 demo_databases.py # Database demonstration
|
||||
├── 📄 Dockerfile # Container configuration
|
||||
├── 📄 docker-compose.victoriametrics.yml # VictoriaMetrics stack
|
||||
├── 📚 Documentation/
|
||||
│ ├── 📄 DATABASE_DEPLOYMENT_GUIDE.md # Multi-database setup guide
|
||||
│ ├── 📄 DEBIAN_TROUBLESHOOTING.md # Linux deployment guide
|
||||
│ ├── 📄 ENHANCED_SCHEDULER_GUIDE.md # 15-minute scheduler guide
|
||||
│ ├── 📄 GAP_FILLING_GUIDE.md # Data gap filling guide
|
||||
│ ├── 📄 HTTPS_CONFIGURATION.md # HTTPS setup guide
|
||||
│ └── 📄 VICTORIAMETRICS_SETUP.md # VictoriaMetrics guide
|
||||
└── 📁 grafana/ # Grafana configuration
|
||||
├── 📁 provisioning/
|
||||
│ ├── 📁 datasources/
|
||||
│ │ └── 📄 victoriametrics.yml # VictoriaMetrics data source
|
||||
│ └── 📁 dashboards/
|
||||
│ └── 📄 dashboard.yml # Dashboard provider config
|
||||
└── 📁 dashboards/
|
||||
└── 📄 water-monitoring-dashboard.json # Pre-built dashboard
|
||||
```
|
||||
|
||||
## 🧹 **Files Removed During Cleanup**
|
||||
|
||||
### **Old Data Files**
|
||||
- ❌ `thailand_water_data_v2.csv` - Old CSV export
|
||||
- ❌ `water_monitor.log` - Log file (regenerated automatically)
|
||||
- ❌ `water_monitoring.db` - SQLite database (recreated automatically)
|
||||
|
||||
### **Outdated Documentation**
|
||||
- ❌ `FINAL_SUMMARY.md` - Contained references to non-existent v2 files
|
||||
- ❌ `PROJECT_SUMMARY.md` - Outdated project information
|
||||
|
||||
### **System Files**
|
||||
- ❌ `__pycache__/` - Python compiled files directory
|
||||
|
||||
## ✅ **Current Features**
|
||||
|
||||
### **Enhanced 15-Minute Scheduler**
|
||||
- **Timing**: Runs every 15 minutes (1:00, 1:15, 1:30, 1:45, 2:00, etc.)
|
||||
- **Full Checks**: At :00 minutes (gap filling + data updates)
|
||||
- **Quick Checks**: At :15, :30, :45 minutes (data fetch only)
|
||||
- **Gap Filling**: Automatically fills missing historical data
|
||||
- **Data Updates**: Updates existing records when values change
|
||||
|
||||
### **Multi-Database Support**
|
||||
- **VictoriaMetrics** (Recommended) - High-performance time-series
|
||||
- **InfluxDB** - Purpose-built time-series database
|
||||
- **PostgreSQL + TimescaleDB** - Relational with time-series optimization
|
||||
- **MySQL** - Traditional relational database
|
||||
- **SQLite** - Local development and testing
|
||||
|
||||
### **Production Features**
|
||||
- **Docker Support**: Complete containerization
|
||||
- **Grafana Integration**: Pre-built dashboards
|
||||
- **HTTPS Configuration**: Secure deployment options
|
||||
- **Health Monitoring**: Comprehensive logging and error handling
|
||||
- **Gap Detection**: Automatic identification of missing data
|
||||
- **Retry Logic**: Database lock handling and network error recovery
|
||||
|
||||
## 🚀 **Quick Start**
|
||||
|
||||
### **1. Basic Setup (SQLite)**
|
||||
```bash
|
||||
cd water_level_monitor
|
||||
pip install -r requirements.txt
|
||||
python water_scraper_v3.py
|
||||
```
|
||||
|
||||
### **2. VictoriaMetrics Setup**
|
||||
```bash
|
||||
# Start VictoriaMetrics + Grafana
|
||||
docker-compose -f docker-compose.victoriametrics.yml up -d
|
||||
|
||||
# Configure environment
|
||||
export DB_TYPE=victoriametrics
|
||||
export VM_HOST=localhost
|
||||
export VM_PORT=8428
|
||||
|
||||
# Run monitor
|
||||
python water_scraper_v3.py
|
||||
```
|
||||
|
||||
### **3. Test Different Databases**
|
||||
```bash
|
||||
# Test all supported databases
|
||||
python demo_databases.py all
|
||||
|
||||
# Test specific database
|
||||
python demo_databases.py victoriametrics
|
||||
```
|
||||
|
||||
## 📊 **Data Collection**
|
||||
|
||||
### **Station Coverage**
|
||||
- **16 Water Monitoring Stations** across Thailand
|
||||
- **Accurate Station Codes**: P.1, P.20, P.21, P.4A, P.5, P.67, P.75, P.76, P.77, P.81, P.82, P.84, P.85, P.87, P.92, P.103
|
||||
- **Bilingual Names**: Thai and English station identification
|
||||
|
||||
### **Metrics Collected**
|
||||
- 🌊 **Water Level**: Measured in meters (m)
|
||||
- 💧 **Discharge**: Measured in cubic meters per second (cms)
|
||||
- 📊 **Discharge Percentage**: Relative to station capacity
|
||||
- ⏰ **Timestamp**: Hour 24 handling (midnight = 00:00 next day)
|
||||
|
||||
### **Data Frequency**
|
||||
- **Every 15 Minutes**: Continuous monitoring
|
||||
- **~300+ Data Points**: Per collection cycle
|
||||
- **Automatic Gap Filling**: Historical data recovery
|
||||
- **Data Updates**: Changed values detection and correction
|
||||
|
||||
## 🔧 **Command Line Tools**
|
||||
|
||||
### **Main Application**
|
||||
```bash
|
||||
python water_scraper_v3.py # Run continuous monitoring
|
||||
python water_scraper_v3.py --test # Single test cycle
|
||||
python water_scraper_v3.py --help # Show help
|
||||
```
|
||||
|
||||
### **Gap Management**
|
||||
```bash
|
||||
python water_scraper_v3.py --check-gaps [days] # Check for missing data
|
||||
python water_scraper_v3.py --fill-gaps [days] # Fill missing data gaps
|
||||
python water_scraper_v3.py --update-data [days] # Update existing data
|
||||
```
|
||||
|
||||
### **Database Testing**
|
||||
```bash
|
||||
python demo_databases.py # SQLite demo
|
||||
python demo_databases.py victoriametrics # VictoriaMetrics demo
|
||||
python demo_databases.py all # Test all databases
|
||||
```
|
||||
|
||||
## 📈 **Monitoring & Visualization**
|
||||
|
||||
### **Grafana Dashboard**
|
||||
- **URL**: http://localhost:3000 (when using docker-compose)
|
||||
- **Username**: admin
|
||||
- **Password**: admin_password
|
||||
- **Features**: Time series charts, status tables, gauges, alerts
|
||||
|
||||
### **VictoriaMetrics API**
|
||||
- **URL**: http://localhost:8428
|
||||
- **Health**: http://localhost:8428/health
|
||||
- **Metrics**: http://localhost:8428/metrics
|
||||
- **Query API**: http://localhost:8428/api/v1/query
|
||||
|
||||
## 🛡️ **Security & Production**
|
||||
|
||||
### **HTTPS Configuration**
|
||||
- Complete guide in `HTTPS_CONFIGURATION.md`
|
||||
- SSL certificate setup
|
||||
- Reverse proxy configuration
|
||||
- Security best practices
|
||||
|
||||
### **Deployment Options**
|
||||
- **Docker**: Containerized deployment
|
||||
- **Systemd**: Linux service configuration
|
||||
- **Cloud**: AWS, GCP, Azure deployment guides
|
||||
- **Monitoring**: Health checks and alerting
|
||||
|
||||
## 📚 **Documentation**
|
||||
|
||||
### **Available Guides**
|
||||
1. **README.md** - Main project documentation
|
||||
2. **DATABASE_DEPLOYMENT_GUIDE.md** - Multi-database setup
|
||||
3. **ENHANCED_SCHEDULER_GUIDE.md** - 15-minute scheduler details
|
||||
4. **GAP_FILLING_GUIDE.md** - Data integrity and gap filling
|
||||
5. **DEBIAN_TROUBLESHOOTING.md** - Linux deployment troubleshooting
|
||||
6. **VICTORIAMETRICS_SETUP.md** - VictoriaMetrics configuration
|
||||
7. **HTTPS_CONFIGURATION.md** - Secure deployment setup
|
||||
|
||||
### **Key Features Documented**
|
||||
- ✅ Installation and configuration
|
||||
- ✅ Multi-database support
|
||||
- ✅ 15-minute scheduling system
|
||||
- ✅ Gap filling and data integrity
|
||||
- ✅ Production deployment
|
||||
- ✅ Monitoring and troubleshooting
|
||||
- ✅ Security configuration
|
||||
|
||||
## 🎯 **Project Status: PRODUCTION READY**
|
||||
|
||||
The Thailand Water Monitor is now:
|
||||
- ✅ **Clean**: All old and redundant files removed
|
||||
- ✅ **Organized**: Clear project structure with proper documentation
|
||||
- ✅ **Enhanced**: 15-minute scheduling with gap filling
|
||||
- ✅ **Scalable**: Multi-database support with VictoriaMetrics
|
||||
- ✅ **Secure**: HTTPS configuration and security best practices
|
||||
- ✅ **Monitored**: Comprehensive logging and Grafana dashboards
|
||||
- ✅ **Documented**: Complete guides for all features and deployment options
|
||||
|
||||
The project is ready for production deployment with professional-grade monitoring capabilities.
|
272
docs/PROJECT_STRUCTURE.md
Normal file
272
docs/PROJECT_STRUCTURE.md
Normal file
@@ -0,0 +1,272 @@
|
||||
# 🏗️ Project Structure - Northern Thailand Ping River Monitor
|
||||
|
||||
## 📁 Directory Layout
|
||||
|
||||
```
|
||||
Northern-Thailand-Ping-River-Monitor/
|
||||
├── 📁 src/ # Main application source code
|
||||
│ ├── __init__.py # Package initialization
|
||||
│ ├── main.py # CLI entry point and main application
|
||||
│ ├── water_scraper_v3.py # Core data collection engine
|
||||
│ ├── web_api.py # FastAPI web interface
|
||||
│ ├── config.py # Configuration management
|
||||
│ ├── database_adapters.py # Database abstraction layer
|
||||
│ ├── models.py # Data models and type definitions
|
||||
│ ├── exceptions.py # Custom exception classes
|
||||
│ ├── validators.py # Data validation layer
|
||||
│ ├── metrics.py # Metrics collection system
|
||||
│ ├── health_check.py # Health monitoring system
|
||||
│ ├── rate_limiter.py # Rate limiting and request tracking
|
||||
│ └── logging_config.py # Enhanced logging configuration
|
||||
├── 📁 docs/ # Documentation files
|
||||
│ ├── STATION_MANAGEMENT_GUIDE.md # Station management documentation
|
||||
│ ├── ENHANCEMENT_SUMMARY.md # Feature enhancement summary
|
||||
│ └── PROJECT_STRUCTURE.md # This file
|
||||
├── 📁 scripts/ # Utility scripts
|
||||
│ └── migrate_geolocation.py # Database migration script
|
||||
├── 📁 grafana/ # Grafana configuration
|
||||
│ ├── dashboards/ # Dashboard definitions
|
||||
│ └── provisioning/ # Grafana provisioning config
|
||||
├── 📁 tests/ # Test files
|
||||
│ ├── test_integration.py # Integration test suite
|
||||
│ ├── test_station_management.py # Station management tests
|
||||
│ └── test_api.py # API endpoint tests
|
||||
├── 📄 run.py # Simple startup script
|
||||
├── 📄 requirements.txt # Production dependencies
|
||||
├── 📄 requirements-dev.txt # Development dependencies
|
||||
├── 📄 setup.py # Package installation script
|
||||
├── 📄 Dockerfile # Docker container definition
|
||||
├── 📄 docker-compose.victoriametrics.yml # Complete stack deployment
|
||||
├── 📄 Makefile # Common development tasks
|
||||
├── 📄 .env.example # Environment configuration template
|
||||
├── 📄 .gitignore # Git ignore patterns
|
||||
├── 📄 .gitlab-ci.yml # CI/CD pipeline configuration
|
||||
├── 📄 LICENSE # MIT license
|
||||
├── 📄 README.md # Main project documentation
|
||||
└── 📄 CONTRIBUTING.md # Contribution guidelines
|
||||
```
|
||||
|
||||
## 🔧 Core Components
|
||||
|
||||
### **Application Layer**
|
||||
- **`src/main.py`** - Command-line interface and application orchestration
|
||||
- **`src/web_api.py`** - FastAPI web interface with REST endpoints
|
||||
- **`src/water_scraper_v3.py`** - Core data collection and processing engine
|
||||
|
||||
### **Data Layer**
|
||||
- **`src/database_adapters.py`** - Multi-database support (SQLite, MySQL, PostgreSQL, InfluxDB, VictoriaMetrics)
|
||||
- **`src/models.py`** - Pydantic data models and type definitions
|
||||
- **`src/validators.py`** - Data validation and sanitization
|
||||
|
||||
### **Infrastructure Layer**
|
||||
- **`src/config.py`** - Configuration management with environment variable support
|
||||
- **`src/logging_config.py`** - Structured logging with rotation and colors
|
||||
- **`src/metrics.py`** - Application metrics collection (counters, gauges, histograms)
|
||||
- **`src/health_check.py`** - System health monitoring and status checks
|
||||
|
||||
### **Utility Layer**
|
||||
- **`src/exceptions.py`** - Custom exception hierarchy
|
||||
- **`src/rate_limiter.py`** - API rate limiting and request tracking
|
||||
|
||||
## 🌐 Web API Structure
|
||||
|
||||
### **Endpoints Organization**
|
||||
```
|
||||
/ # Dashboard homepage
|
||||
├── /health # System health status
|
||||
├── /metrics # Application metrics
|
||||
├── /config # Configuration (masked)
|
||||
├── /stations # Station management
|
||||
│ ├── GET / # List all stations
|
||||
│ ├── POST / # Create new station
|
||||
│ ├── GET /{id} # Get specific station
|
||||
│ ├── PUT /{id} # Update station
|
||||
│ └── DELETE /{id} # Delete station
|
||||
├── /measurements # Data access
|
||||
│ ├── /latest # Latest measurements
|
||||
│ └── /station/{code} # Station-specific data
|
||||
└── /scraping # Data collection control
|
||||
├── /trigger # Manual data collection
|
||||
└── /status # Scraping status
|
||||
```
|
||||
|
||||
### **API Models**
|
||||
- **Request Models**: Station creation/update, query parameters
|
||||
- **Response Models**: Station info, measurements, health status
|
||||
- **Error Models**: Standardized error responses
|
||||
|
||||
## 🗄️ Database Architecture
|
||||
|
||||
### **Supported Databases**
|
||||
1. **SQLite** - Local development and testing
|
||||
2. **MySQL** - Traditional relational database
|
||||
3. **PostgreSQL** - Advanced relational with TimescaleDB support
|
||||
4. **InfluxDB** - Purpose-built time-series database
|
||||
5. **VictoriaMetrics** - High-performance metrics storage
|
||||
|
||||
### **Schema Design**
|
||||
```sql
|
||||
-- Stations table
|
||||
stations (
|
||||
id INTEGER PRIMARY KEY,
|
||||
station_code VARCHAR(10) UNIQUE,
|
||||
thai_name VARCHAR(255),
|
||||
english_name VARCHAR(255),
|
||||
latitude DECIMAL(10,8),
|
||||
longitude DECIMAL(11,8),
|
||||
geohash VARCHAR(20),
|
||||
status VARCHAR(20),
|
||||
created_at TIMESTAMP,
|
||||
updated_at TIMESTAMP
|
||||
)
|
||||
|
||||
-- Measurements table
|
||||
water_measurements (
|
||||
id BIGINT PRIMARY KEY,
|
||||
timestamp DATETIME,
|
||||
station_id INTEGER,
|
||||
water_level DECIMAL(10,3),
|
||||
discharge DECIMAL(10,2),
|
||||
discharge_percent DECIMAL(5,2),
|
||||
status VARCHAR(20),
|
||||
created_at TIMESTAMP,
|
||||
FOREIGN KEY (station_id) REFERENCES stations(id),
|
||||
UNIQUE(timestamp, station_id)
|
||||
)
|
||||
```
|
||||
|
||||
## 🐳 Docker Architecture
|
||||
|
||||
### **Multi-Stage Build**
|
||||
1. **Builder Stage** - Compile dependencies and build artifacts
|
||||
2. **Production Stage** - Minimal runtime environment
|
||||
|
||||
### **Service Composition**
|
||||
- **ping-river-monitor** - Data collection service
|
||||
- **ping-river-api** - Web API service
|
||||
- **victoriametrics** - Time-series database
|
||||
- **grafana** - Visualization dashboard
|
||||
|
||||
## 📊 Monitoring Architecture
|
||||
|
||||
### **Metrics Collection**
|
||||
- **Counters** - API requests, database operations, scraping cycles
|
||||
- **Gauges** - Current values, connection status, resource usage
|
||||
- **Histograms** - Response times, processing durations
|
||||
|
||||
### **Health Checks**
|
||||
- **Database Health** - Connection status, data freshness
|
||||
- **API Health** - External API availability, response times
|
||||
- **System Health** - Memory usage, disk space, CPU load
|
||||
|
||||
### **Logging Levels**
|
||||
- **DEBUG** - Detailed execution information
|
||||
- **INFO** - General operational messages
|
||||
- **WARNING** - Potential issues and recoverable errors
|
||||
- **ERROR** - Serious problems requiring attention
|
||||
- **CRITICAL** - System-threatening issues
|
||||
|
||||
## 🔧 Configuration Management
|
||||
|
||||
### **Environment Variables**
|
||||
```bash
|
||||
# Database
|
||||
DB_TYPE=victoriametrics
|
||||
VM_HOST=localhost
|
||||
VM_PORT=8428
|
||||
|
||||
# Application
|
||||
SCRAPING_INTERVAL_HOURS=1
|
||||
LOG_LEVEL=INFO
|
||||
DATA_RETENTION_DAYS=365
|
||||
|
||||
# Security
|
||||
SECRET_KEY=your-secret-key
|
||||
API_KEY=your-api-key
|
||||
```
|
||||
|
||||
### **Configuration Hierarchy**
|
||||
1. Environment variables (highest priority)
|
||||
2. .env file
|
||||
3. Default values in config.py (lowest priority)
|
||||
|
||||
## 🧪 Testing Architecture
|
||||
|
||||
### **Test Categories**
|
||||
- **Unit Tests** - Individual component testing
|
||||
- **Integration Tests** - System component interaction
|
||||
- **API Tests** - Endpoint functionality and responses
|
||||
- **Performance Tests** - Load and stress testing
|
||||
|
||||
### **Test Data**
|
||||
- **Mock Data** - Simulated API responses
|
||||
- **Test Database** - Isolated test environment
|
||||
- **Fixtures** - Reusable test data sets
|
||||
|
||||
## 📦 Deployment Architecture
|
||||
|
||||
### **Development**
|
||||
```bash
|
||||
python run.py --web-api # Local development server
|
||||
```
|
||||
|
||||
### **Production**
|
||||
```bash
|
||||
docker-compose up -d # Full stack deployment
|
||||
```
|
||||
|
||||
### **CI/CD Pipeline**
|
||||
1. **Test Stage** - Run all tests and quality checks
|
||||
2. **Build Stage** - Create Docker images
|
||||
3. **Deploy Stage** - Deploy to staging/production
|
||||
4. **Health Check** - Verify deployment success
|
||||
|
||||
## 🔒 Security Architecture
|
||||
|
||||
### **Input Validation**
|
||||
- Pydantic models for API requests
|
||||
- Data range validation for measurements
|
||||
- SQL injection prevention through ORM
|
||||
|
||||
### **Authentication** (Future)
|
||||
- API key authentication
|
||||
- JWT token support
|
||||
- Role-based access control
|
||||
|
||||
### **Data Protection**
|
||||
- Environment variable configuration
|
||||
- Sensitive data masking in logs
|
||||
- HTTPS support for production
|
||||
|
||||
## 📈 Performance Architecture
|
||||
|
||||
### **Optimization Strategies**
|
||||
- Database connection pooling
|
||||
- Query optimization and indexing
|
||||
- Response caching for static data
|
||||
- Async processing for I/O operations
|
||||
|
||||
### **Scalability Considerations**
|
||||
- Horizontal scaling with load balancers
|
||||
- Database read replicas
|
||||
- Microservice architecture readiness
|
||||
- Container orchestration support
|
||||
|
||||
## 🔄 Data Flow Architecture
|
||||
|
||||
### **Collection Flow**
|
||||
```
|
||||
External API → Rate Limiter → Data Validator → Database Adapter → Database
|
||||
```
|
||||
|
||||
### **API Flow**
|
||||
```
|
||||
HTTP Request → FastAPI → Business Logic → Database Adapter → HTTP Response
|
||||
```
|
||||
|
||||
### **Monitoring Flow**
|
||||
```
|
||||
Application Events → Metrics Collector → Health Checks → Monitoring Dashboard
|
||||
```
|
||||
|
||||
This architecture provides a solid foundation for a production-ready water monitoring system with excellent maintainability, scalability, and observability.
|
241
docs/STATION_MANAGEMENT_GUIDE.md
Normal file
241
docs/STATION_MANAGEMENT_GUIDE.md
Normal file
@@ -0,0 +1,241 @@
|
||||
# 🏔️ Station Management Guide - Northern Thailand Ping River Monitor
|
||||
|
||||
## 🎯 **Overview**
|
||||
|
||||
The Northern Thailand Ping River Monitor now includes comprehensive station management capabilities, allowing you to dynamically add, update, and remove monitoring stations through the web API.
|
||||
|
||||
## 🌊 **Current Coverage**
|
||||
|
||||
The system currently monitors **16 water stations** along the Ping River Basin:
|
||||
|
||||
### **Upper Ping River (Chiang Mai Province)**
|
||||
- **P.20** - Ban Chiang Dao (บ้านเชียงดาว)
|
||||
- **P.75** - Ban Chai Lat (บ้านช่อแล)
|
||||
- **P.92** - Ban Muang Aut (บ้านเมืองกึ๊ด)
|
||||
- **P.4A** - Ban Mae Taeng (บ้านแม่แตง)
|
||||
- **P.67** - Ban Tae (บ้านแม่แต)
|
||||
- **P.21** - Ban Rim Tai (บ้านริมใต้)
|
||||
- **P.103** - Ring Bridge 3 (สะพานวงแหวนรอบ 3)
|
||||
|
||||
### **Middle Ping River**
|
||||
- **P.1** - Nawarat Bridge (สะพานนวรัฐ) - *Main reference station*
|
||||
- **P.82** - Ban Sob win (บ้านสบวิน)
|
||||
- **P.84** - Ban Panton (บ้านพันตน)
|
||||
- **P.81** - Ban Pong (บ้านโป่ง)
|
||||
- **P.5** - Tha Nang Bridge (สะพานท่านาง)
|
||||
|
||||
### **Lower Ping River**
|
||||
- **P.77** - Baan Sop Mae Sapuord (บ้านสบแม่สะป๊วด)
|
||||
- **P.87** - Ban Pa Sang (บ้านป่าซาง)
|
||||
- **P.76** - Banb Mae I Hai (บ้านแม่อีไฮ)
|
||||
- **P.85** - Baan Lai Kaew (บ้านหล่ายแก้ว)
|
||||
|
||||
## 🔧 **Station Management API**
|
||||
|
||||
### **List All Stations**
|
||||
```bash
|
||||
GET /stations
|
||||
```
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
[
|
||||
{
|
||||
"station_id": 1,
|
||||
"station_code": "P.20",
|
||||
"thai_name": "บ้านเชียงดาว",
|
||||
"english_name": "Ban Chiang Dao",
|
||||
"latitude": 19.36731448032191,
|
||||
"longitude": 98.9688487015384,
|
||||
"geohash": null,
|
||||
"status": "active"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### **Get Specific Station**
|
||||
```bash
|
||||
GET /stations/{station_id}
|
||||
```
|
||||
|
||||
### **Add New Station**
|
||||
```bash
|
||||
POST /stations
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"station_code": "P.NEW",
|
||||
"thai_name": "สถานีใหม่",
|
||||
"english_name": "New Station",
|
||||
"latitude": 18.7875,
|
||||
"longitude": 99.0045,
|
||||
"geohash": "w5q6uuhvfcfp25",
|
||||
"status": "active"
|
||||
}
|
||||
```
|
||||
|
||||
### **Update Station Information**
|
||||
```bash
|
||||
PUT /stations/{station_id}
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"thai_name": "ชื่อใหม่",
|
||||
"english_name": "Updated Name",
|
||||
"latitude": 18.8000,
|
||||
"longitude": 99.0100
|
||||
}
|
||||
```
|
||||
|
||||
### **Delete Station**
|
||||
```bash
|
||||
DELETE /stations/{station_id}
|
||||
```
|
||||
|
||||
## 🧪 **Testing Station Management**
|
||||
|
||||
Use the provided test script to verify station management functionality:
|
||||
|
||||
```bash
|
||||
# Test all station management endpoints
|
||||
python test_station_management.py
|
||||
```
|
||||
|
||||
This will:
|
||||
1. List existing stations
|
||||
2. Create a test station
|
||||
3. Retrieve station details
|
||||
4. Update station information
|
||||
5. Verify changes
|
||||
6. Delete the test station
|
||||
7. Confirm deletion
|
||||
|
||||
## 📊 **Station Data Model**
|
||||
|
||||
### **Required Fields**
|
||||
- `station_code`: Unique identifier (e.g., "P.1", "P.20")
|
||||
- `thai_name`: Thai language name
|
||||
- `english_name`: English language name
|
||||
|
||||
### **Optional Fields**
|
||||
- `latitude`: GPS latitude coordinate (-90 to 90)
|
||||
- `longitude`: GPS longitude coordinate (-180 to 180)
|
||||
- `geohash`: Geohash string for location
|
||||
- `status`: Station status ("active", "inactive", "maintenance", "error")
|
||||
|
||||
### **Validation Rules**
|
||||
- Station codes must be unique
|
||||
- Latitude must be between -90 and 90
|
||||
- Longitude must be between -180 and 180
|
||||
- Names cannot be empty
|
||||
- Status must be valid enum value
|
||||
|
||||
## 🌐 **Web Interface**
|
||||
|
||||
Access the station management interface through the web dashboard:
|
||||
|
||||
1. **Start the API server:**
|
||||
```bash
|
||||
python run.py --web-api
|
||||
```
|
||||
|
||||
2. **Open your browser:**
|
||||
- Dashboard: http://localhost:8000
|
||||
- API Documentation: http://localhost:8000/docs
|
||||
|
||||
3. **Use the interactive API docs** to test station management endpoints
|
||||
|
||||
## 🔄 **Integration with Data Collection**
|
||||
|
||||
- **Dynamic Station Discovery**: New stations are automatically included in data collection
|
||||
- **Real-time Updates**: Station information changes are reflected immediately
|
||||
- **Data Continuity**: Historical data is preserved when updating station details
|
||||
- **Error Handling**: Invalid stations are skipped during data collection
|
||||
|
||||
## 📍 **Geographic Coverage**
|
||||
|
||||
The Ping River Basin monitoring network covers:
|
||||
|
||||
- **Total Distance**: ~400 km from Chiang Dao to Nakhon Sawan
|
||||
- **Elevation Range**: 300m to 1,200m above sea level
|
||||
- **Catchment Area**: ~25,000 km²
|
||||
- **Major Cities**: Chiang Mai, Lamphun, Tak, Nakhon Sawan
|
||||
|
||||
## 🚀 **Usage Examples**
|
||||
|
||||
### **Add a New Upstream Station**
|
||||
```bash
|
||||
curl -X POST "http://localhost:8000/stations" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"station_code": "P.UPSTREAM",
|
||||
"thai_name": "สถานีต้นน้ำ",
|
||||
"english_name": "Upstream Station",
|
||||
"latitude": 19.5000,
|
||||
"longitude": 98.9000,
|
||||
"status": "active"
|
||||
}'
|
||||
```
|
||||
|
||||
### **Update Station Coordinates**
|
||||
```bash
|
||||
curl -X PUT "http://localhost:8000/stations/1" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"latitude": 19.3700,
|
||||
"longitude": 98.9700
|
||||
}'
|
||||
```
|
||||
|
||||
### **Mark Station for Maintenance**
|
||||
```bash
|
||||
curl -X PUT "http://localhost:8000/stations/5" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"status": "maintenance"
|
||||
}'
|
||||
```
|
||||
|
||||
## 🔒 **Best Practices**
|
||||
|
||||
### **Station Naming**
|
||||
- Use consistent code format (P.XX)
|
||||
- Include both Thai and English names
|
||||
- Use descriptive location names
|
||||
|
||||
### **Coordinate Accuracy**
|
||||
- Use high-precision GPS coordinates (6+ decimal places)
|
||||
- Verify coordinates match actual station location
|
||||
- Include geohash for efficient spatial queries
|
||||
|
||||
### **Status Management**
|
||||
- Set status to "maintenance" during repairs
|
||||
- Use "inactive" for temporarily offline stations
|
||||
- Use "error" for stations with data quality issues
|
||||
|
||||
### **Data Integrity**
|
||||
- Test new stations before adding to production
|
||||
- Backup station configuration before major changes
|
||||
- Monitor data quality after station updates
|
||||
|
||||
## 🎯 **Future Enhancements**
|
||||
|
||||
Planned improvements for station management:
|
||||
|
||||
1. **Bulk Operations** - Import/export multiple stations
|
||||
2. **Station Groups** - Organize stations by river section
|
||||
3. **Automated Validation** - GPS coordinate verification
|
||||
4. **Historical Tracking** - Track station configuration changes
|
||||
5. **Alert Integration** - Notifications for station status changes
|
||||
6. **Map Interface** - Visual station management on interactive map
|
||||
|
||||
## 📞 **Support**
|
||||
|
||||
For station management issues:
|
||||
|
||||
1. Check the API documentation at `/docs`
|
||||
2. Run the test script: `python test_station_management.py`
|
||||
3. Review logs for error details
|
||||
4. Verify station data format and validation rules
|
||||
|
||||
The station management system provides flexible control over your monitoring network while maintaining data integrity and system reliability.
|
443
docs/VICTORIAMETRICS_SETUP.md
Normal file
443
docs/VICTORIAMETRICS_SETUP.md
Normal file
@@ -0,0 +1,443 @@
|
||||
# VictoriaMetrics Setup Guide for Thailand Water Monitor
|
||||
|
||||
This guide provides comprehensive instructions for setting up VictoriaMetrics as the time-series database backend for the Thailand Water Monitor.
|
||||
|
||||
## Why VictoriaMetrics?
|
||||
|
||||
VictoriaMetrics is an excellent choice for water monitoring data because:
|
||||
|
||||
- **High Performance**: Up to 10x faster than InfluxDB
|
||||
- **Low Resource Usage**: Uses 10x less RAM than Prometheus
|
||||
- **Better Compression**: 70x better compression than Prometheus
|
||||
- **Prometheus Compatible**: Drop-in replacement for Prometheus
|
||||
- **Easy to Deploy**: Single binary, no dependencies
|
||||
- **Cost Effective**: Open source with commercial support available
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 1. Environment Variables
|
||||
|
||||
Set these environment variables to configure VictoriaMetrics:
|
||||
|
||||
```bash
|
||||
# Windows (PowerShell)
|
||||
$env:DB_TYPE="victoriametrics"
|
||||
$env:VM_HOST="localhost"
|
||||
$env:VM_PORT="8428"
|
||||
|
||||
# Linux/Mac
|
||||
export DB_TYPE=victoriametrics
|
||||
export VM_HOST=localhost
|
||||
export VM_PORT=8428
|
||||
```
|
||||
|
||||
### 2. Start VictoriaMetrics with Docker
|
||||
|
||||
```bash
|
||||
# Simple setup
|
||||
docker run -d \
|
||||
--name victoriametrics \
|
||||
-p 8428:8428 \
|
||||
-v victoria-metrics-data:/victoria-metrics-data \
|
||||
victoriametrics/victoria-metrics:latest \
|
||||
--storageDataPath=/victoria-metrics-data \
|
||||
--retentionPeriod=2y \
|
||||
--httpListenAddr=:8428
|
||||
|
||||
# Verify it's running
|
||||
curl http://localhost:8428/health
|
||||
```
|
||||
|
||||
### 3. Run the Water Monitor
|
||||
|
||||
```bash
|
||||
python water_scraper_v3.py
|
||||
```
|
||||
|
||||
### 4. Access Grafana Dashboard
|
||||
|
||||
```bash
|
||||
# Start with Docker Compose (includes Grafana)
|
||||
docker-compose -f docker-compose.victoriametrics.yml up -d
|
||||
|
||||
# Access Grafana at http://localhost:3000
|
||||
# Username: admin
|
||||
# Password: admin_password
|
||||
```
|
||||
|
||||
## Production Setup
|
||||
|
||||
### Docker Compose Configuration
|
||||
|
||||
Use the provided `docker-compose.victoriametrics.yml` file:
|
||||
|
||||
```bash
|
||||
# Start the complete stack
|
||||
docker-compose -f docker-compose.victoriametrics.yml up -d
|
||||
|
||||
# Check status
|
||||
docker-compose -f docker-compose.victoriametrics.yml ps
|
||||
|
||||
# View logs
|
||||
docker-compose -f docker-compose.victoriametrics.yml logs -f
|
||||
```
|
||||
|
||||
### Manual VictoriaMetrics Configuration
|
||||
|
||||
#### High-Performance Configuration
|
||||
|
||||
```bash
|
||||
docker run -d \
|
||||
--name victoriametrics \
|
||||
-p 8428:8428 \
|
||||
-v victoria-metrics-data:/victoria-metrics-data \
|
||||
victoriametrics/victoria-metrics:latest \
|
||||
--storageDataPath=/victoria-metrics-data \
|
||||
--retentionPeriod=2y \
|
||||
--httpListenAddr=:8428 \
|
||||
--maxConcurrentInserts=32 \
|
||||
--search.maxQueryDuration=60s \
|
||||
--search.maxConcurrentRequests=16 \
|
||||
--dedup.minScrapeInterval=30s \
|
||||
--memory.allowedPercent=80 \
|
||||
--loggerLevel=INFO \
|
||||
--loggerFormat=json \
|
||||
--search.maxSeries=1000000 \
|
||||
--search.maxPointsPerTimeseries=100000
|
||||
```
|
||||
|
||||
#### Configuration Parameters Explained
|
||||
|
||||
| Parameter | Description | Recommended Value |
|
||||
|-----------|-------------|-------------------|
|
||||
| `--storageDataPath` | Data storage directory | `/victoria-metrics-data` |
|
||||
| `--retentionPeriod` | How long to keep data | `2y` (2 years) |
|
||||
| `--httpListenAddr` | HTTP listen address | `:8428` |
|
||||
| `--maxConcurrentInserts` | Max concurrent inserts | `32` |
|
||||
| `--search.maxQueryDuration` | Max query duration | `60s` |
|
||||
| `--search.maxConcurrentRequests` | Max concurrent queries | `16` |
|
||||
| `--dedup.minScrapeInterval` | Deduplication interval | `30s` |
|
||||
| `--memory.allowedPercent` | Max memory usage | `80` |
|
||||
| `--loggerLevel` | Log level | `INFO` |
|
||||
| `--search.maxSeries` | Max time series | `1000000` |
|
||||
|
||||
### Kubernetes Deployment
|
||||
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: victoriametrics
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: victoriametrics
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: victoriametrics
|
||||
spec:
|
||||
containers:
|
||||
- name: victoriametrics
|
||||
image: victoriametrics/victoria-metrics:latest
|
||||
ports:
|
||||
- containerPort: 8428
|
||||
args:
|
||||
- --storageDataPath=/victoria-metrics-data
|
||||
- --retentionPeriod=2y
|
||||
- --httpListenAddr=:8428
|
||||
- --maxConcurrentInserts=32
|
||||
volumeMounts:
|
||||
- name: storage
|
||||
mountPath: /victoria-metrics-data
|
||||
resources:
|
||||
requests:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
limits:
|
||||
memory: "2Gi"
|
||||
cpu: "2000m"
|
||||
volumes:
|
||||
- name: storage
|
||||
persistentVolumeClaim:
|
||||
claimName: victoriametrics-pvc
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: victoriametrics
|
||||
spec:
|
||||
selector:
|
||||
app: victoriametrics
|
||||
ports:
|
||||
- port: 8428
|
||||
targetPort: 8428
|
||||
type: ClusterIP
|
||||
```
|
||||
|
||||
## Data Queries
|
||||
|
||||
### HTTP API Queries
|
||||
|
||||
VictoriaMetrics provides a Prometheus-compatible HTTP API:
|
||||
|
||||
```bash
|
||||
# Current water levels for all stations
|
||||
curl "http://localhost:8428/api/v1/query?query=water_level"
|
||||
|
||||
# Water levels for specific station
|
||||
curl "http://localhost:8428/api/v1/query?query=water_level{station_code=\"P.1\"}"
|
||||
|
||||
# Average discharge over last hour
|
||||
curl "http://localhost:8428/api/v1/query?query=avg_over_time(water_discharge[1h])"
|
||||
|
||||
# High discharge alerts (>80%)
|
||||
curl "http://localhost:8428/api/v1/query?query=water_discharge_percent>80"
|
||||
|
||||
# Time range query (last 6 hours)
|
||||
START=$(date -d '6 hours ago' +%s)
|
||||
END=$(date +%s)
|
||||
curl "http://localhost:8428/api/v1/query_range?query=water_level&start=${START}&end=${END}&step=300"
|
||||
```
|
||||
|
||||
### PromQL Examples
|
||||
|
||||
```promql
|
||||
# Current water levels
|
||||
water_level
|
||||
|
||||
# Water level trends (last 24h)
|
||||
water_level[24h]
|
||||
|
||||
# Discharge rates by station
|
||||
water_discharge{station_code="P.1"}
|
||||
|
||||
# Average discharge across all stations
|
||||
avg(water_discharge)
|
||||
|
||||
# Stations with high discharge (>80%)
|
||||
water_discharge_percent > 80
|
||||
|
||||
# Rate of change in water level
|
||||
rate(water_level[5m])
|
||||
|
||||
# Maximum water level in last hour
|
||||
max_over_time(water_level[1h])
|
||||
|
||||
# Stations with increasing water levels
|
||||
increase(water_level[1h]) > 0
|
||||
```
|
||||
|
||||
## Grafana Integration
|
||||
|
||||
### Data Source Configuration
|
||||
|
||||
1. **Add VictoriaMetrics as Prometheus Data Source**:
|
||||
- URL: `http://localhost:8428` (or `http://victoriametrics:8428` in Docker)
|
||||
- Access: Server (default)
|
||||
- HTTP Method: POST
|
||||
|
||||
2. **Import Dashboard**:
|
||||
- Use the provided `water-monitoring-dashboard.json`
|
||||
- Or create custom dashboards with the queries above
|
||||
|
||||
### Dashboard Panels
|
||||
|
||||
The included dashboard provides:
|
||||
|
||||
- **Time Series**: Water levels and discharge over time
|
||||
- **Table**: Current status of all stations
|
||||
- **Pie Chart**: Discharge percentage distribution
|
||||
- **Gauge**: Average discharge percentage
|
||||
- **Variables**: Filter by station
|
||||
|
||||
## Monitoring and Maintenance
|
||||
|
||||
### Health Checks
|
||||
|
||||
```bash
|
||||
# Check VictoriaMetrics health
|
||||
curl http://localhost:8428/health
|
||||
|
||||
# Check metrics endpoint
|
||||
curl http://localhost:8428/metrics
|
||||
|
||||
# Check configuration
|
||||
curl http://localhost:8428/api/v1/status/config
|
||||
```
|
||||
|
||||
### Performance Monitoring
|
||||
|
||||
```bash
|
||||
# Query performance stats
|
||||
curl http://localhost:8428/api/v1/status/tsdb
|
||||
|
||||
# Memory usage
|
||||
curl http://localhost:8428/api/v1/status/runtime
|
||||
|
||||
# Active queries
|
||||
curl http://localhost:8428/api/v1/status/active_queries
|
||||
```
|
||||
|
||||
### Backup and Restore
|
||||
|
||||
```bash
|
||||
# Create backup
|
||||
docker exec victoriametrics /usr/bin/vmbackup \
|
||||
-storageDataPath=/victoria-metrics-data \
|
||||
-dst=fs:///backup/$(date +%Y%m%d)
|
||||
|
||||
# Restore from backup
|
||||
docker exec victoriametrics /usr/bin/vmrestore \
|
||||
-src=fs:///backup/20250724 \
|
||||
-storageDataPath=/victoria-metrics-data
|
||||
```
|
||||
|
||||
### Log Analysis
|
||||
|
||||
```bash
|
||||
# View logs
|
||||
docker logs victoriametrics
|
||||
|
||||
# Follow logs
|
||||
docker logs -f victoriametrics
|
||||
|
||||
# Search for errors
|
||||
docker logs victoriametrics 2>&1 | grep ERROR
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **Connection Refused**:
|
||||
```bash
|
||||
# Check if VictoriaMetrics is running
|
||||
docker ps | grep victoriametrics
|
||||
|
||||
# Check port binding
|
||||
netstat -tlnp | grep 8428
|
||||
```
|
||||
|
||||
2. **High Memory Usage**:
|
||||
```bash
|
||||
# Reduce memory limit
|
||||
docker run ... --memory.allowedPercent=60 ...
|
||||
```
|
||||
|
||||
3. **Slow Queries**:
|
||||
```bash
|
||||
# Increase query timeout
|
||||
docker run ... --search.maxQueryDuration=120s ...
|
||||
```
|
||||
|
||||
4. **Data Not Appearing**:
|
||||
```bash
|
||||
# Check if data is being written
|
||||
curl "http://localhost:8428/api/v1/query?query=up"
|
||||
|
||||
# Check water monitor logs
|
||||
tail -f water_monitor.log
|
||||
```
|
||||
|
||||
### Performance Tuning
|
||||
|
||||
1. **For High Write Load**:
|
||||
```bash
|
||||
--maxConcurrentInserts=64
|
||||
--insert.maxQueueDuration=60s
|
||||
```
|
||||
|
||||
2. **For High Query Load**:
|
||||
```bash
|
||||
--search.maxConcurrentRequests=32
|
||||
--search.maxQueryDuration=120s
|
||||
```
|
||||
|
||||
3. **For Large Datasets**:
|
||||
```bash
|
||||
--search.maxSeries=10000000
|
||||
--search.maxPointsPerTimeseries=1000000
|
||||
```
|
||||
|
||||
## Security
|
||||
|
||||
### Authentication
|
||||
|
||||
VictoriaMetrics doesn't have built-in authentication. Use a reverse proxy:
|
||||
|
||||
```nginx
|
||||
server {
|
||||
listen 80;
|
||||
server_name victoriametrics.example.com;
|
||||
|
||||
auth_basic "VictoriaMetrics";
|
||||
auth_basic_user_file /etc/nginx/.htpasswd;
|
||||
|
||||
location / {
|
||||
proxy_pass http://localhost:8428;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### TLS/SSL
|
||||
|
||||
```bash
|
||||
# Use nginx or traefik for TLS termination
|
||||
# Or use VictoriaMetrics with TLS:
|
||||
docker run ... \
|
||||
-v /path/to/cert.pem:/cert.pem \
|
||||
-v /path/to/key.pem:/key.pem \
|
||||
victoriametrics/victoria-metrics:latest \
|
||||
--tls \
|
||||
--tlsCertFile=/cert.pem \
|
||||
--tlsKeyFile=/key.pem
|
||||
```
|
||||
|
||||
## Scaling
|
||||
|
||||
### Cluster Setup
|
||||
|
||||
For high availability and horizontal scaling:
|
||||
|
||||
```bash
|
||||
# Start multiple VictoriaMetrics instances
|
||||
docker run -d --name vm1 -p 8428:8428 victoriametrics/victoria-metrics:latest
|
||||
docker run -d --name vm2 -p 8429:8428 victoriametrics/victoria-metrics:latest
|
||||
|
||||
# Use load balancer to distribute queries
|
||||
# Use vminsert/vmselect/vmstorage for true clustering
|
||||
```
|
||||
|
||||
### Resource Requirements
|
||||
|
||||
| Data Points/Hour | RAM | CPU | Storage/Day |
|
||||
|------------------|-----|-----|-------------|
|
||||
| 1,000 | 100MB | 0.1 CPU | 10MB |
|
||||
| 10,000 | 500MB | 0.5 CPU | 100MB |
|
||||
| 100,000 | 2GB | 1 CPU | 1GB |
|
||||
| 1,000,000 | 8GB | 2 CPU | 10GB |
|
||||
|
||||
## Migration
|
||||
|
||||
### From InfluxDB
|
||||
|
||||
```bash
|
||||
# Export from InfluxDB
|
||||
influx -database water_monitoring -execute "SELECT * FROM water_data" -format csv > data.csv
|
||||
|
||||
# Import to VictoriaMetrics (convert to Prometheus format first)
|
||||
# Use vmctl tool for migration
|
||||
```
|
||||
|
||||
### From Prometheus
|
||||
|
||||
```bash
|
||||
# Use vmctl for direct migration
|
||||
vmctl prometheus --prom-snapshot=/path/to/prometheus/data --vm-addr=http://localhost:8428
|
||||
```
|
||||
|
||||
This comprehensive setup guide should help you configure VictoriaMetrics for optimal performance with the Thailand Water Monitor system.
|
179
docs/references/NOTABLE_DOCUMENTS.md
Normal file
179
docs/references/NOTABLE_DOCUMENTS.md
Normal file
@@ -0,0 +1,179 @@
|
||||
# Notable Documents and References
|
||||
|
||||
This document contains important references and external resources related to the Thailand Water Level Monitoring System.
|
||||
|
||||
## 🌊 **Official Thai Government Water Resources**
|
||||
|
||||
### **Royal Irrigation Department (RID) Resources**
|
||||
|
||||
#### **1. Water Level Monitoring Diagram**
|
||||
- **URL**: https://water.rid.go.th/hyd/Diagram/graphic_ping.pdf
|
||||
- **Description**: Official diagram showing the water level monitoring network structure
|
||||
- **Content**: Technical diagrams and network topology for Thailand's water monitoring system
|
||||
- **Language**: Thai
|
||||
- **Format**: PDF
|
||||
- **Usage**: Understanding the official monitoring infrastructure and station relationships
|
||||
|
||||
#### **2. Hourly Water Level Data Portal**
|
||||
- **URL**: https://hyd-app-db.rid.go.th/hydro1h.html
|
||||
- **Description**: Real-time hourly water level data web interface
|
||||
- **Content**: Live data from all 16 monitoring stations across Thailand
|
||||
- **Language**: Thai
|
||||
- **Format**: Web Application
|
||||
- **Usage**: Primary data source for the monitoring system
|
||||
- **API Endpoint**: Used by our scraper to fetch real-time data
|
||||
- **Update Frequency**: Hourly updates
|
||||
- **Data Points**: ~240-384 measurements per hour across all stations
|
||||
|
||||
#### **3. Individual Station Data - P.76 Example**
|
||||
- **URL**: https://www.hydro-1.net/Data/STATION/P.76.html
|
||||
- **Description**: Detailed individual station data page for station P.76
|
||||
- **Content**: Historical data, station details, and specific measurements
|
||||
- **Language**: Thai/English
|
||||
- **Format**: Web Page
|
||||
- **Usage**: Reference for individual station characteristics and historical data patterns
|
||||
- **Station**: P.76 - บ้านแม่อีไฮ (Banb Mae I Hai)
|
||||
|
||||
## 📊 **Data Sources and APIs**
|
||||
|
||||
### **Primary Data Source**
|
||||
- **API Endpoint**: `https://hyd-app-db.rid.go.th/webservice/getGroupHourlyWaterLevelReportAllHL.ashx`
|
||||
- **Method**: POST
|
||||
- **Data Format**: JSON
|
||||
- **Update Schedule**: Hourly (top of each hour)
|
||||
- **Coverage**: All 16 monitoring stations
|
||||
- **Metrics**: Water level (m), Discharge (cms), Discharge percentage (%)
|
||||
|
||||
### **Station Coverage**
|
||||
The system monitors 16 stations across Thailand:
|
||||
- P.1 - สะพานนวรัฐ (Nawarat Bridge)
|
||||
- P.5 - สะพานท่านาง (Tha Nang Bridge)
|
||||
- P.20 - บ้านเชียงดาว (Ban Chiang Dao)
|
||||
- P.21 - บ้านริมใต้ (Ban Rim Tai)
|
||||
- P.4A - บ้านแม่แตง (Ban Mae Taeng)
|
||||
- P.67 - บ้านแม่แต (Ban Tae)
|
||||
- P.75 - บ้านช่อแล (Ban Chai Lat)
|
||||
- P.76 - บ้านแม่อีไฮ (Banb Mae I Hai)
|
||||
- P.77 - บ้านสบแม่สะป๊วด (Baan Sop Mae Sapuord)
|
||||
- P.81 - บ้านโป่ง (Ban Pong)
|
||||
- P.82 - บ้านสบวิน (Ban Sob win)
|
||||
- P.84 - บ้านพันตน (Ban Panton)
|
||||
- P.85 - บ้านหล่ายแก้ว (Baan Lai Kaew)
|
||||
- P.87 - บ้านป่าซาง (Ban Pa Sang)
|
||||
- P.92 - บ้านเมืองกึ๊ด (Ban Muang Aut)
|
||||
- P.103 - สะพานวงแหวนรอบ 3 (Ring Bridge 3)
|
||||
|
||||
## 🔗 **Related Resources**
|
||||
|
||||
### **Technical Documentation**
|
||||
- **Thai Water Resources**: https://water.rid.go.th/
|
||||
- **Hydro Information Network**: https://www.hydro-1.net/
|
||||
- **Royal Irrigation Department**: https://www.rid.go.th/
|
||||
|
||||
### **Data Standards**
|
||||
- **Time Format**: Thai Buddhist calendar (BE) + 24-hour format
|
||||
- **Coordinate System**: WGS84 decimal degrees
|
||||
- **Water Level Units**: Meters (m)
|
||||
- **Discharge Units**: Cubic meters per second (cms)
|
||||
- **Update Frequency**: Hourly at :00 minutes
|
||||
|
||||
### **API Parameters**
|
||||
```javascript
|
||||
{
|
||||
'DW[UtokID]': '1',
|
||||
'DW[BasinID]': '6',
|
||||
'DW[TimeCurrent]': 'DD/MM/YYYY', // Thai Buddhist calendar
|
||||
'_search': 'false',
|
||||
'nd': timestamp_milliseconds,
|
||||
'rows': '100',
|
||||
'page': '1',
|
||||
'sidx': 'indexhourly',
|
||||
'sord': 'asc'
|
||||
}
|
||||
```
|
||||
|
||||
## 📋 **Data Structure Reference**
|
||||
|
||||
### **JSON Response Format**
|
||||
```json
|
||||
{
|
||||
"rows": [
|
||||
{
|
||||
"hourlytime": "1.00", // Hour (1-24, where 24 = midnight next day)
|
||||
"wlvalues1": "2.45", // Water level for station 1 (meters)
|
||||
"qvalues1": "125.3", // Discharge for station 1 (cms)
|
||||
"QPercent1": "45.2", // Discharge percentage for station 1
|
||||
"wlvalues2": "1.89", // Station 2 data...
|
||||
// ... continues for all 16 stations
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### **Station ID Mapping**
|
||||
- Station 1 → P.20 (Ban Chiang Dao)
|
||||
- Station 2 → P.75 (Ban Chai Lat)
|
||||
- Station 3 → P.92 (Ban Muang Aut)
|
||||
- Station 4 → P.4A (Ban Mae Taeng)
|
||||
- Station 5 → P.67 (Ban Tae)
|
||||
- Station 6 → P.21 (Ban Rim Tai)
|
||||
- Station 7 → P.103 (Ring Bridge 3)
|
||||
- Station 8 → P.1 (Nawarat Bridge)
|
||||
- Station 9 → P.82 (Ban Sob win)
|
||||
- Station 10 → P.84 (Ban Panton)
|
||||
- Station 11 → P.81 (Ban Pong)
|
||||
- Station 12 → P.5 (Tha Nang Bridge)
|
||||
- Station 13 → P.77 (Baan Sop Mae Sapuord)
|
||||
- Station 14 → P.87 (Ban Pa Sang)
|
||||
- Station 15 → P.76 (Banb Mae I Hai)
|
||||
- Station 16 → P.85 (Baan Lai Kaew)
|
||||
|
||||
## 🌐 **Geolocation Reference**
|
||||
|
||||
### **Sample Coordinates (P.1 - Nawarat Bridge)**
|
||||
- **Latitude**: 15.6944°N
|
||||
- **Longitude**: 100.2028°E
|
||||
- **Geohash**: w5q6uuhvfcfp25
|
||||
- **Location**: Nakhon Sawan Province, Thailand
|
||||
|
||||
### **Coordinate System**
|
||||
- **Datum**: WGS84
|
||||
- **Format**: Decimal degrees
|
||||
- **Precision**: 4 decimal places (~11m accuracy)
|
||||
- **Usage**: Grafana geomap visualization
|
||||
|
||||
## 📝 **Usage Notes**
|
||||
|
||||
### **Data Collection**
|
||||
- **Frequency**: Every 15 minutes (full check at :00, quick checks at :15, :30, :45)
|
||||
- **Retention**: 2+ years of historical data
|
||||
- **Gap Filling**: Automatic detection and filling of missing data
|
||||
- **Data Updates**: Checks for changed values in recent data
|
||||
|
||||
### **Time Handling**
|
||||
- **Thai Time**: UTC+7 (Asia/Bangkok)
|
||||
- **Buddhist Calendar**: Thai year = Gregorian year + 543
|
||||
- **Hour 24**: Represents midnight (00:00) of the next day
|
||||
- **API Format**: DD/MM/YYYY (Buddhist calendar)
|
||||
|
||||
### **Data Quality**
|
||||
- **Validation**: Automatic data validation and error detection
|
||||
- **Retry Logic**: 15-minute retry intervals when data is unavailable
|
||||
- **Error Handling**: Comprehensive error logging and recovery
|
||||
- **Monitoring**: Health checks and alert conditions
|
||||
|
||||
## 🔍 **Research and Development**
|
||||
|
||||
### **Future Enhancements**
|
||||
- **Additional Stations**: Potential expansion to more monitoring points
|
||||
- **Real-time Alerts**: Threshold-based notification system
|
||||
- **Predictive Analytics**: Water level forecasting capabilities
|
||||
- **Mobile Integration**: Field data collection and verification
|
||||
|
||||
### **Technical Improvements**
|
||||
- **API Optimization**: Enhanced data fetching efficiency
|
||||
- **Database Performance**: Query optimization and indexing
|
||||
- **Visualization**: Advanced Grafana dashboard features
|
||||
- **Integration**: Connection with other water management systems
|
||||
|
||||
This document serves as a comprehensive reference for understanding the data sources, technical specifications, and official resources that support the Thailand Water Level Monitoring System.
|
543
grafana/dashboards/water-monitoring-dashboard.json
Normal file
543
grafana/dashboards/water-monitoring-dashboard.json
Normal file
@@ -0,0 +1,543 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "victoriametrics-uid"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "m"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "victoriametrics-uid"
|
||||
},
|
||||
"expr": "water_level",
|
||||
"interval": "",
|
||||
"legendFormat": "{{station_code}} - {{station_name_en}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Water Levels by Station",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "victoriametrics-uid"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "cms"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "victoriametrics-uid"
|
||||
},
|
||||
"expr": "water_discharge",
|
||||
"interval": "",
|
||||
"legendFormat": "{{station_code}} - {{station_name_en}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Water Discharge by Station",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "victoriametrics-uid"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "auto",
|
||||
"displayMode": "auto",
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Water Level"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "m"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Discharge"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "cms"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Discharge %"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percent"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": false,
|
||||
"displayName": "Station"
|
||||
}
|
||||
]
|
||||
},
|
||||
"pluginVersion": "8.5.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "victoriametrics-uid"
|
||||
},
|
||||
"expr": "water_level",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "victoriametrics-uid"
|
||||
},
|
||||
"expr": "water_discharge",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "victoriametrics-uid"
|
||||
},
|
||||
"expr": "water_discharge_percent",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"title": "Current Station Status",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "merge",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"__name__": true,
|
||||
"job": true,
|
||||
"instance": true
|
||||
},
|
||||
"indexByName": {
|
||||
"station_code": 0,
|
||||
"station_name_en": 1,
|
||||
"station_name_th": 2,
|
||||
"Value #A": 3,
|
||||
"Value #B": 4,
|
||||
"Value #C": 5
|
||||
},
|
||||
"renameByName": {
|
||||
"Value #A": "Water Level",
|
||||
"Value #B": "Discharge",
|
||||
"Value #C": "Discharge %",
|
||||
"station_code": "Station",
|
||||
"station_name_en": "English Name",
|
||||
"station_name_th": "Thai Name"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "victoriametrics-uid"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "right"
|
||||
},
|
||||
"pieType": "pie",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "victoriametrics-uid"
|
||||
},
|
||||
"expr": "water_discharge_percent",
|
||||
"interval": "",
|
||||
"legendFormat": "{{station_code}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Discharge Percentage Distribution",
|
||||
"type": "piechart"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "victoriametrics-uid"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 70
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 90
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 16
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showThresholdLabels": false,
|
||||
"showThresholdMarkers": true,
|
||||
"text": {}
|
||||
},
|
||||
"pluginVersion": "8.5.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "victoriametrics-uid"
|
||||
},
|
||||
"expr": "avg(water_discharge_percent)",
|
||||
"interval": "",
|
||||
"legendFormat": "Average Discharge %",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Average Discharge Percentage",
|
||||
"type": "gauge"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 36,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"water-monitoring",
|
||||
"thailand",
|
||||
"victoriametrics"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "victoriametrics-uid"
|
||||
},
|
||||
"definition": "label_values(water_level, station_code)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Station",
|
||||
"multi": true,
|
||||
"name": "station",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "label_values(water_level, station_code)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "Asia/Bangkok",
|
||||
"title": "Thailand Water Level Monitoring",
|
||||
"uid": "water-monitoring-dashboard",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
12
grafana/provisioning/dashboards/dashboard.yml
Normal file
12
grafana/provisioning/dashboards/dashboard.yml
Normal file
@@ -0,0 +1,12 @@
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: 'water-monitoring'
|
||||
orgId: 1
|
||||
folder: 'Water Monitoring'
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 10
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
17
grafana/provisioning/datasources/victoriametrics.yml
Normal file
17
grafana/provisioning/datasources/victoriametrics.yml
Normal file
@@ -0,0 +1,17 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: VictoriaMetrics
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://victoriametrics:8428
|
||||
isDefault: true
|
||||
editable: true
|
||||
jsonData:
|
||||
httpMethod: POST
|
||||
queryTimeout: 60s
|
||||
timeInterval: 30s
|
||||
customQueryParameters: ''
|
||||
secureJsonData: {}
|
||||
version: 1
|
||||
uid: victoriametrics-uid
|
29
requirements-dev.txt
Normal file
29
requirements-dev.txt
Normal file
@@ -0,0 +1,29 @@
|
||||
# Development dependencies
|
||||
-r requirements.txt
|
||||
|
||||
# Testing
|
||||
pytest==7.4.3
|
||||
pytest-cov==4.1.0
|
||||
pytest-asyncio==0.21.1
|
||||
|
||||
# Code formatting and linting
|
||||
black==23.11.0
|
||||
flake8==6.1.0
|
||||
isort==5.12.0
|
||||
mypy==1.7.1
|
||||
|
||||
# Pre-commit hooks
|
||||
pre-commit==3.5.0
|
||||
|
||||
# Documentation
|
||||
sphinx==7.2.6
|
||||
sphinx-rtd-theme==1.3.0
|
||||
sphinx-autodoc-typehints==1.25.2
|
||||
|
||||
# Development tools
|
||||
ipython==8.17.2
|
||||
jupyter==1.0.0
|
||||
|
||||
# Type stubs
|
||||
types-requests==2.31.0.10
|
||||
types-python-dateutil==2.8.19.14
|
30
requirements.txt
Normal file
30
requirements.txt
Normal file
@@ -0,0 +1,30 @@
|
||||
# Core dependencies
|
||||
requests==2.31.0
|
||||
schedule==1.2.0
|
||||
pandas==2.0.3
|
||||
|
||||
# Web API framework
|
||||
fastapi==0.104.1
|
||||
uvicorn[standard]==0.24.0
|
||||
pydantic==2.5.0
|
||||
|
||||
# Database adapters
|
||||
sqlalchemy==2.0.23
|
||||
influxdb==5.3.1
|
||||
pymysql==1.1.0
|
||||
psycopg2-binary==2.9.9
|
||||
|
||||
# Monitoring and metrics
|
||||
psutil==5.9.6
|
||||
|
||||
# Development dependencies (optional)
|
||||
pytest==7.4.3
|
||||
pytest-cov==4.1.0
|
||||
black==23.11.0
|
||||
flake8==6.1.0
|
||||
mypy==1.7.1
|
||||
pre-commit==3.5.0
|
||||
|
||||
# Documentation
|
||||
sphinx==7.2.6
|
||||
sphinx-rtd-theme==1.3.0
|
14
run.py
Normal file
14
run.py
Normal file
@@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple startup script for Thailand Water Monitor
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add src directory to Python path
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
||||
|
||||
if __name__ == "__main__":
|
||||
from src.main import main
|
||||
main()
|
51
scripts/generate_badges.py
Normal file
51
scripts/generate_badges.py
Normal file
@@ -0,0 +1,51 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate status badges for README.md
|
||||
"""
|
||||
|
||||
import json
|
||||
import requests
|
||||
from datetime import datetime
|
||||
|
||||
def generate_badge_url(label, message, color="brightgreen"):
|
||||
"""Generate a shields.io badge URL"""
|
||||
return f"https://img.shields.io/badge/{label}-{message}-{color}"
|
||||
|
||||
def generate_workflow_badge(repo_url, workflow_name, branch="main"):
|
||||
"""Generate workflow status badge"""
|
||||
# For Gitea, you might need to adjust this based on your instance
|
||||
badge_url = f"{repo_url}/actions/workflows/{workflow_name}/badge.svg?branch={branch}"
|
||||
return badge_url
|
||||
|
||||
def main():
|
||||
"""Generate badges for the project"""
|
||||
repo_url = "https://git.b4l.co.th/B4L/Northern-Thailand-Ping-River-Monitor"
|
||||
|
||||
badges = {
|
||||
"CI/CD": generate_workflow_badge(repo_url, "ci.yml"),
|
||||
"Security": generate_workflow_badge(repo_url, "security.yml"),
|
||||
"Documentation": generate_workflow_badge(repo_url, "docs.yml"),
|
||||
"Python": generate_badge_url("Python", "3.9%2B", "blue"),
|
||||
"FastAPI": generate_badge_url("FastAPI", "0.104%2B", "green"),
|
||||
"Docker": generate_badge_url("Docker", "Ready", "blue"),
|
||||
"License": generate_badge_url("License", "MIT", "green"),
|
||||
"Version": generate_badge_url("Version", "v3.1.0", "blue"),
|
||||
}
|
||||
|
||||
print("# Status Badges")
|
||||
print()
|
||||
print("Add these badges to your README.md:")
|
||||
print()
|
||||
|
||||
for name, url in badges.items():
|
||||
print(f"[]({repo_url})")
|
||||
|
||||
print()
|
||||
print("# Markdown Format")
|
||||
print()
|
||||
|
||||
badge_line = " ".join([f"[]({repo_url})" for name, url in badges.items()])
|
||||
print(badge_line)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
35
scripts/init_git.bat
Normal file
35
scripts/init_git.bat
Normal file
@@ -0,0 +1,35 @@
|
||||
@echo off
|
||||
REM Git initialization script for Northern Thailand Ping River Monitor
|
||||
|
||||
echo 🏔️ Initializing Git repository for Northern Thailand Ping River Monitor
|
||||
|
||||
REM Initialize git repository
|
||||
git init
|
||||
|
||||
REM Add remote origin
|
||||
git remote add origin https://git.b4l.co.th/B4L/Northern-Thailand-Ping-River-Monitor.git
|
||||
|
||||
REM Add all files
|
||||
git add .
|
||||
|
||||
REM Initial commit
|
||||
git commit -m "Initial commit: Northern Thailand Ping River Monitor v3.1.0
|
||||
|
||||
Features:
|
||||
- Real-time water level monitoring for Ping River Basin
|
||||
- 16 monitoring stations from Chiang Dao to Nakhon Sawan
|
||||
- FastAPI web interface with station management
|
||||
- Multi-database support (SQLite, MySQL, PostgreSQL, InfluxDB, VictoriaMetrics)
|
||||
- Comprehensive monitoring and health checks
|
||||
- Docker deployment with Grafana integration
|
||||
- Production-ready architecture with CI/CD pipeline"
|
||||
|
||||
echo ✅ Git repository initialized successfully!
|
||||
echo.
|
||||
echo Next steps:
|
||||
echo 1. Review and edit .env file with your configuration
|
||||
echo 2. Push to remote repository:
|
||||
echo git push -u origin main
|
||||
echo.
|
||||
echo 3. Start the application:
|
||||
echo python run.py --web-api
|
89
scripts/init_git.sh
Normal file
89
scripts/init_git.sh
Normal file
@@ -0,0 +1,89 @@
|
||||
#!/bin/bash
|
||||
# Git initialization script for Northern Thailand Ping River Monitor
|
||||
|
||||
echo "🏔️ Initializing Git repository for Northern Thailand Ping River Monitor"
|
||||
|
||||
# Initialize git repository
|
||||
git init
|
||||
|
||||
# Add remote origin
|
||||
git remote add origin https://git.b4l.co.th/B4L/Northern-Thailand-Ping-River-Monitor.git
|
||||
|
||||
# Create .gitignore if it doesn't exist
|
||||
if [ ! -f .gitignore ]; then
|
||||
echo "Creating .gitignore file..."
|
||||
cat > .gitignore << 'EOF'
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*.so
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# Virtual environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
logs/
|
||||
|
||||
# Database files
|
||||
*.db
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
EOF
|
||||
fi
|
||||
|
||||
# Add all files
|
||||
git add .
|
||||
|
||||
# Initial commit
|
||||
git commit -m "Initial commit: Northern Thailand Ping River Monitor v3.1.0
|
||||
|
||||
Features:
|
||||
- Real-time water level monitoring for Ping River Basin
|
||||
- 16 monitoring stations from Chiang Dao to Nakhon Sawan
|
||||
- FastAPI web interface with station management
|
||||
- Multi-database support (SQLite, MySQL, PostgreSQL, InfluxDB, VictoriaMetrics)
|
||||
- Comprehensive monitoring and health checks
|
||||
- Docker deployment with Grafana integration
|
||||
- Production-ready architecture with CI/CD pipeline"
|
||||
|
||||
echo "✅ Git repository initialized successfully!"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo "1. Review and edit .env file with your configuration"
|
||||
echo "2. Push to remote repository:"
|
||||
echo " git push -u origin main"
|
||||
echo ""
|
||||
echo "3. Start the application:"
|
||||
echo " make run-api"
|
||||
echo " # or: python run.py --web-api"
|
294
scripts/migrate_geolocation.py
Normal file
294
scripts/migrate_geolocation.py
Normal file
@@ -0,0 +1,294 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Migration script to add geolocation columns to existing water monitoring database
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import sqlite3
|
||||
import logging
|
||||
from typing import Dict, Any
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
|
||||
def migrate_sqlite(db_path: str = 'water_monitoring.db') -> bool:
|
||||
"""Migrate SQLite database to add geolocation columns"""
|
||||
try:
|
||||
logging.info(f"Migrating SQLite database: {db_path}")
|
||||
|
||||
# Connect to database
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Check if columns already exist
|
||||
cursor.execute("PRAGMA table_info(stations)")
|
||||
columns = [column[1] for column in cursor.fetchall()]
|
||||
|
||||
logging.info(f"Current columns in stations table: {columns}")
|
||||
|
||||
# Add columns if they don't exist
|
||||
columns_added = []
|
||||
|
||||
if 'latitude' not in columns:
|
||||
cursor.execute("ALTER TABLE stations ADD COLUMN latitude REAL")
|
||||
columns_added.append('latitude')
|
||||
logging.info("Added latitude column")
|
||||
|
||||
if 'longitude' not in columns:
|
||||
cursor.execute("ALTER TABLE stations ADD COLUMN longitude REAL")
|
||||
columns_added.append('longitude')
|
||||
logging.info("Added longitude column")
|
||||
|
||||
if 'geohash' not in columns:
|
||||
cursor.execute("ALTER TABLE stations ADD COLUMN geohash TEXT")
|
||||
columns_added.append('geohash')
|
||||
logging.info("Added geohash column")
|
||||
|
||||
if columns_added:
|
||||
# Update P.1 station with sample geolocation data
|
||||
cursor.execute("""
|
||||
UPDATE stations
|
||||
SET latitude = 15.6944, longitude = 100.2028, geohash = 'w5q6uuhvfcfp25'
|
||||
WHERE station_code = 'P.1'
|
||||
""")
|
||||
|
||||
# Commit changes
|
||||
conn.commit()
|
||||
logging.info(f"Successfully added columns: {', '.join(columns_added)}")
|
||||
logging.info("Updated P.1 station with sample geolocation data")
|
||||
else:
|
||||
logging.info("All geolocation columns already exist")
|
||||
|
||||
# Verify the changes
|
||||
cursor.execute("SELECT station_code, latitude, longitude, geohash FROM stations WHERE station_code = 'P.1'")
|
||||
result = cursor.fetchone()
|
||||
if result:
|
||||
logging.info(f"P.1 station geolocation: {result}")
|
||||
|
||||
conn.close()
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error migrating SQLite database: {e}")
|
||||
return False
|
||||
|
||||
def migrate_postgresql(connection_string: str) -> bool:
|
||||
"""Migrate PostgreSQL database to add geolocation columns"""
|
||||
try:
|
||||
import psycopg2
|
||||
from urllib.parse import urlparse
|
||||
|
||||
logging.info("Migrating PostgreSQL database")
|
||||
|
||||
# Parse connection string
|
||||
parsed = urlparse(connection_string)
|
||||
|
||||
# Connect to database
|
||||
conn = psycopg2.connect(
|
||||
host=parsed.hostname,
|
||||
port=parsed.port or 5432,
|
||||
database=parsed.path[1:], # Remove leading slash
|
||||
user=parsed.username,
|
||||
password=parsed.password
|
||||
)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Check if columns exist
|
||||
cursor.execute("""
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'stations'
|
||||
""")
|
||||
columns = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
logging.info(f"Current columns in stations table: {columns}")
|
||||
|
||||
# Add columns if they don't exist
|
||||
columns_added = []
|
||||
|
||||
if 'latitude' not in columns:
|
||||
cursor.execute("ALTER TABLE stations ADD COLUMN latitude DECIMAL(10,8)")
|
||||
columns_added.append('latitude')
|
||||
logging.info("Added latitude column")
|
||||
|
||||
if 'longitude' not in columns:
|
||||
cursor.execute("ALTER TABLE stations ADD COLUMN longitude DECIMAL(11,8)")
|
||||
columns_added.append('longitude')
|
||||
logging.info("Added longitude column")
|
||||
|
||||
if 'geohash' not in columns:
|
||||
cursor.execute("ALTER TABLE stations ADD COLUMN geohash VARCHAR(20)")
|
||||
columns_added.append('geohash')
|
||||
logging.info("Added geohash column")
|
||||
|
||||
if columns_added:
|
||||
# Update P.1 station with sample geolocation data
|
||||
cursor.execute("""
|
||||
UPDATE stations
|
||||
SET latitude = 15.6944, longitude = 100.2028, geohash = 'w5q6uuhvfcfp25'
|
||||
WHERE station_code = 'P.1'
|
||||
""")
|
||||
|
||||
# Commit changes
|
||||
conn.commit()
|
||||
logging.info(f"Successfully added columns: {', '.join(columns_added)}")
|
||||
logging.info("Updated P.1 station with sample geolocation data")
|
||||
else:
|
||||
logging.info("All geolocation columns already exist")
|
||||
|
||||
conn.close()
|
||||
return True
|
||||
|
||||
except ImportError:
|
||||
logging.error("psycopg2 not installed. Run: pip install psycopg2-binary")
|
||||
return False
|
||||
except Exception as e:
|
||||
logging.error(f"Error migrating PostgreSQL database: {e}")
|
||||
return False
|
||||
|
||||
def migrate_mysql(connection_string: str) -> bool:
|
||||
"""Migrate MySQL database to add geolocation columns"""
|
||||
try:
|
||||
import pymysql
|
||||
from urllib.parse import urlparse
|
||||
|
||||
logging.info("Migrating MySQL database")
|
||||
|
||||
# Parse connection string
|
||||
parsed = urlparse(connection_string)
|
||||
|
||||
# Connect to database
|
||||
conn = pymysql.connect(
|
||||
host=parsed.hostname,
|
||||
port=parsed.port or 3306,
|
||||
database=parsed.path[1:], # Remove leading slash
|
||||
user=parsed.username,
|
||||
password=parsed.password
|
||||
)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Check if columns exist
|
||||
cursor.execute("DESCRIBE stations")
|
||||
columns = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
logging.info(f"Current columns in stations table: {columns}")
|
||||
|
||||
# Add columns if they don't exist
|
||||
columns_added = []
|
||||
|
||||
if 'latitude' not in columns:
|
||||
cursor.execute("ALTER TABLE stations ADD COLUMN latitude DECIMAL(10,8)")
|
||||
columns_added.append('latitude')
|
||||
logging.info("Added latitude column")
|
||||
|
||||
if 'longitude' not in columns:
|
||||
cursor.execute("ALTER TABLE stations ADD COLUMN longitude DECIMAL(11,8)")
|
||||
columns_added.append('longitude')
|
||||
logging.info("Added longitude column")
|
||||
|
||||
if 'geohash' not in columns:
|
||||
cursor.execute("ALTER TABLE stations ADD COLUMN geohash VARCHAR(20)")
|
||||
columns_added.append('geohash')
|
||||
logging.info("Added geohash column")
|
||||
|
||||
if columns_added:
|
||||
# Update P.1 station with sample geolocation data
|
||||
cursor.execute("""
|
||||
UPDATE stations
|
||||
SET latitude = 15.6944, longitude = 100.2028, geohash = 'w5q6uuhvfcfp25'
|
||||
WHERE station_code = 'P.1'
|
||||
""")
|
||||
|
||||
# Commit changes
|
||||
conn.commit()
|
||||
logging.info(f"Successfully added columns: {', '.join(columns_added)}")
|
||||
logging.info("Updated P.1 station with sample geolocation data")
|
||||
else:
|
||||
logging.info("All geolocation columns already exist")
|
||||
|
||||
conn.close()
|
||||
return True
|
||||
|
||||
except ImportError:
|
||||
logging.error("pymysql not installed. Run: pip install pymysql")
|
||||
return False
|
||||
except Exception as e:
|
||||
logging.error(f"Error migrating MySQL database: {e}")
|
||||
return False
|
||||
|
||||
def load_config_from_env() -> Dict[str, Any]:
|
||||
"""Load database configuration from environment variables"""
|
||||
db_type = os.getenv('DB_TYPE', 'sqlite').lower()
|
||||
|
||||
if db_type == 'postgresql':
|
||||
return {
|
||||
'type': 'postgresql',
|
||||
'connection_string': os.getenv('POSTGRES_CONNECTION_STRING',
|
||||
'postgresql://postgres:password@localhost/water_monitoring')
|
||||
}
|
||||
elif db_type == 'mysql':
|
||||
return {
|
||||
'type': 'mysql',
|
||||
'connection_string': os.getenv('MYSQL_CONNECTION_STRING',
|
||||
'mysql://root:password@localhost/water_monitoring')
|
||||
}
|
||||
elif db_type == 'victoriametrics':
|
||||
logging.info("VictoriaMetrics doesn't require schema migration")
|
||||
return {'type': 'victoriametrics'}
|
||||
elif db_type == 'influxdb':
|
||||
logging.info("InfluxDB doesn't require schema migration")
|
||||
return {'type': 'influxdb'}
|
||||
else:
|
||||
# Default to SQLite
|
||||
return {
|
||||
'type': 'sqlite',
|
||||
'db_path': os.getenv('SQLITE_DB_PATH', 'water_monitoring.db')
|
||||
}
|
||||
|
||||
def main():
|
||||
"""Main migration function"""
|
||||
logging.info("Starting geolocation column migration...")
|
||||
|
||||
# Load configuration
|
||||
config = load_config_from_env()
|
||||
db_type = config['type']
|
||||
|
||||
logging.info(f"Detected database type: {db_type.upper()}")
|
||||
|
||||
success = False
|
||||
|
||||
if db_type == 'sqlite':
|
||||
db_path = config.get('db_path', 'water_monitoring.db')
|
||||
if not os.path.exists(db_path):
|
||||
logging.error(f"Database file not found: {db_path}")
|
||||
sys.exit(1)
|
||||
success = migrate_sqlite(db_path)
|
||||
|
||||
elif db_type == 'postgresql':
|
||||
success = migrate_postgresql(config['connection_string'])
|
||||
|
||||
elif db_type == 'mysql':
|
||||
success = migrate_mysql(config['connection_string'])
|
||||
|
||||
elif db_type in ['victoriametrics', 'influxdb']:
|
||||
logging.info(f"{db_type.upper()} doesn't require schema migration")
|
||||
success = True
|
||||
|
||||
else:
|
||||
logging.error(f"Unsupported database type: {db_type}")
|
||||
sys.exit(1)
|
||||
|
||||
if success:
|
||||
logging.info("✅ Migration completed successfully!")
|
||||
logging.info("You can now restart your water monitoring application")
|
||||
logging.info("The system will automatically use the new geolocation columns")
|
||||
else:
|
||||
logging.error("❌ Migration failed!")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
39
scripts/water-monitor.service
Normal file
39
scripts/water-monitor.service
Normal file
@@ -0,0 +1,39 @@
|
||||
[Unit]
|
||||
Description=Thailand Water Level Monitor
|
||||
Documentation=https://github.com/your-username/thailand-water-monitor
|
||||
After=network.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=water-monitor
|
||||
Group=water-monitor
|
||||
WorkingDirectory=/opt/thailand-water-monitor
|
||||
ExecStart=/opt/thailand-water-monitor/venv/bin/python src/water_scraper_v3.py
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
Restart=always
|
||||
RestartSec=60
|
||||
TimeoutStopSec=30
|
||||
|
||||
# Environment variables
|
||||
Environment=DB_TYPE=victoriametrics
|
||||
Environment=VM_HOST=localhost
|
||||
Environment=VM_PORT=8428
|
||||
Environment=PYTHONPATH=/opt/thailand-water-monitor
|
||||
Environment=PYTHONUNBUFFERED=1
|
||||
|
||||
# Security settings
|
||||
NoNewPrivileges=true
|
||||
PrivateTmp=true
|
||||
ProtectSystem=strict
|
||||
ProtectHome=true
|
||||
ReadWritePaths=/opt/thailand-water-monitor
|
||||
CapabilityBoundingSet=
|
||||
|
||||
# Logging
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
SyslogIdentifier=water-monitor
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
96
setup.py
Normal file
96
setup.py
Normal file
@@ -0,0 +1,96 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Setup script for Northern Thailand Ping River Monitor
|
||||
"""
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
import os
|
||||
|
||||
# Read the README file
|
||||
with open("README.md", "r", encoding="utf-8") as fh:
|
||||
long_description = fh.read()
|
||||
|
||||
# Read requirements
|
||||
with open("requirements.txt", "r", encoding="utf-8") as fh:
|
||||
requirements = [line.strip() for line in fh if line.strip() and not line.startswith("#")]
|
||||
|
||||
# Extract core requirements (exclude dev dependencies)
|
||||
core_requirements = []
|
||||
for req in requirements:
|
||||
if not any(dev_keyword in req.lower() for dev_keyword in ['pytest', 'black', 'flake8', 'mypy', 'sphinx']):
|
||||
core_requirements.append(req)
|
||||
|
||||
setup(
|
||||
name="northern-thailand-ping-river-monitor",
|
||||
version="3.1.0",
|
||||
author="Ping River Monitor Team",
|
||||
author_email="contact@example.com",
|
||||
description="Real-time water level monitoring system for the Ping River Basin in Northern Thailand",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://git.b4l.co.th/B4L/Northern-Thailand-Ping-River-Monitor",
|
||||
project_urls={
|
||||
"Bug Tracker": "https://git.b4l.co.th/B4L/Northern-Thailand-Ping-River-Monitor/issues",
|
||||
"Documentation": "https://git.b4l.co.th/B4L/Northern-Thailand-Ping-River-Monitor/wiki",
|
||||
"Source Code": "https://git.b4l.co.th/B4L/Northern-Thailand-Ping-River-Monitor",
|
||||
},
|
||||
packages=find_packages(),
|
||||
classifiers=[
|
||||
"Development Status :: 4 - Beta",
|
||||
"Intended Audience :: Science/Research",
|
||||
"Intended Audience :: System Administrators",
|
||||
"Topic :: Scientific/Engineering :: Hydrology",
|
||||
"Topic :: System :: Monitoring",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Operating System :: OS Independent",
|
||||
"Environment :: Web Environment",
|
||||
"Framework :: FastAPI",
|
||||
],
|
||||
python_requires=">=3.9",
|
||||
install_requires=core_requirements,
|
||||
extras_require={
|
||||
"dev": [
|
||||
"pytest>=7.4.3",
|
||||
"pytest-cov>=4.1.0",
|
||||
"black>=23.11.0",
|
||||
"flake8>=6.1.0",
|
||||
"mypy>=1.7.1",
|
||||
"pre-commit>=3.5.0",
|
||||
],
|
||||
"docs": [
|
||||
"sphinx>=7.2.6",
|
||||
"sphinx-rtd-theme>=1.3.0",
|
||||
],
|
||||
"all": [
|
||||
"influxdb>=5.3.1",
|
||||
"pymysql>=1.1.0",
|
||||
"psycopg2-binary>=2.9.9",
|
||||
],
|
||||
},
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
"ping-river-monitor=src.main:main",
|
||||
"ping-river-api=src.web_api:main",
|
||||
],
|
||||
},
|
||||
include_package_data=True,
|
||||
package_data={
|
||||
"src": ["*.py"],
|
||||
},
|
||||
keywords=[
|
||||
"water monitoring",
|
||||
"hydrology",
|
||||
"thailand",
|
||||
"ping river",
|
||||
"environmental monitoring",
|
||||
"time series",
|
||||
"fastapi",
|
||||
"real-time data",
|
||||
],
|
||||
zip_safe=False,
|
||||
)
|
38
src/__init__.py
Normal file
38
src/__init__.py
Normal file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Northern Thailand Ping River Monitor Package
|
||||
|
||||
A comprehensive real-time water level monitoring system for the Ping River Basin
|
||||
in Northern Thailand, covering Royal Irrigation Department (RID) stations.
|
||||
"""
|
||||
|
||||
__version__ = "3.1.0"
|
||||
__author__ = "Ping River Monitor Team"
|
||||
__description__ = "Northern Thailand Ping River Monitoring System"
|
||||
|
||||
from .water_scraper_v3 import EnhancedWaterMonitorScraper
|
||||
from .database_adapters import create_database_adapter, DatabaseAdapter
|
||||
from .config import Config
|
||||
from .models import WaterMeasurement, StationInfo, DatabaseConfig
|
||||
from .exceptions import (
|
||||
WaterMonitorException,
|
||||
DatabaseConnectionError,
|
||||
APIConnectionError,
|
||||
DataValidationError,
|
||||
ConfigurationError
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'EnhancedWaterMonitorScraper',
|
||||
'create_database_adapter',
|
||||
'DatabaseAdapter',
|
||||
'Config',
|
||||
'WaterMeasurement',
|
||||
'StationInfo',
|
||||
'DatabaseConfig',
|
||||
'WaterMonitorException',
|
||||
'DatabaseConnectionError',
|
||||
'APIConnectionError',
|
||||
'DataValidationError',
|
||||
'ConfigurationError'
|
||||
]
|
191
src/config.py
Normal file
191
src/config.py
Normal file
@@ -0,0 +1,191 @@
|
||||
import os
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
try:
|
||||
from .exceptions import ConfigurationError
|
||||
from .models import DatabaseType, DatabaseConfig
|
||||
except ImportError:
|
||||
# Handle case when running as standalone script
|
||||
class ConfigurationError(Exception):
|
||||
pass
|
||||
|
||||
from enum import Enum
|
||||
|
||||
class DatabaseType(Enum):
|
||||
SQLITE = "sqlite"
|
||||
MYSQL = "mysql"
|
||||
POSTGRESQL = "postgresql"
|
||||
INFLUXDB = "influxdb"
|
||||
VICTORIAMETRICS = "victoriametrics"
|
||||
|
||||
class Config:
|
||||
"""Configuration class for the Water Level Monitor"""
|
||||
|
||||
# Database settings
|
||||
DATABASE_PATH = os.getenv('WATER_DB_PATH', 'water_levels.db')
|
||||
|
||||
# Website settings
|
||||
TARGET_URL = "https://hyd-app-db.rid.go.th/hydro1h.html"
|
||||
API_URL = "https://hyd-app-db.rid.go.th/webservice/getGroupHourlyWaterLevelReportAllHL.ashx"
|
||||
REQUEST_TIMEOUT = int(os.getenv('REQUEST_TIMEOUT', '30'))
|
||||
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||
|
||||
# Database configuration
|
||||
DB_TYPE = os.getenv('DB_TYPE', 'sqlite').lower()
|
||||
|
||||
# VictoriaMetrics settings
|
||||
VM_HOST = os.getenv('VM_HOST', 'vm.newedge.house')
|
||||
VM_PORT = int(os.getenv('VM_PORT', '443'))
|
||||
|
||||
# Support for HTTPS URLs (e.g., behind reverse proxy)
|
||||
VM_URL = os.getenv('VM_URL') # Full URL override (e.g., https://vm.example.com)
|
||||
|
||||
# InfluxDB settings
|
||||
INFLUX_HOST = os.getenv('INFLUX_HOST', 'localhost')
|
||||
INFLUX_PORT = int(os.getenv('INFLUX_PORT', '8086'))
|
||||
INFLUX_DATABASE = os.getenv('INFLUX_DATABASE', 'water_monitoring')
|
||||
INFLUX_USERNAME = os.getenv('INFLUX_USERNAME')
|
||||
INFLUX_PASSWORD = os.getenv('INFLUX_PASSWORD')
|
||||
|
||||
# PostgreSQL settings
|
||||
POSTGRES_CONNECTION_STRING = os.getenv('POSTGRES_CONNECTION_STRING')
|
||||
|
||||
# MySQL settings
|
||||
MYSQL_CONNECTION_STRING = os.getenv('MYSQL_CONNECTION_STRING')
|
||||
|
||||
# Scheduler settings
|
||||
SCRAPING_INTERVAL_HOURS = int(os.getenv('SCRAPING_INTERVAL_HOURS', '1'))
|
||||
|
||||
# Logging settings
|
||||
LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO')
|
||||
LOG_FILE = os.getenv('LOG_FILE', 'water_monitor.log')
|
||||
LOG_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
|
||||
|
||||
# Data retention
|
||||
DATA_RETENTION_DAYS = int(os.getenv('DATA_RETENTION_DAYS', '365'))
|
||||
|
||||
# Retry settings
|
||||
MAX_RETRIES = int(os.getenv('MAX_RETRIES', '3'))
|
||||
RETRY_DELAY_SECONDS = int(os.getenv('RETRY_DELAY_SECONDS', '60'))
|
||||
|
||||
@classmethod
|
||||
def validate_config(cls) -> bool:
|
||||
"""Validate configuration settings"""
|
||||
errors = []
|
||||
|
||||
# Validate database type
|
||||
try:
|
||||
DatabaseType(cls.DB_TYPE)
|
||||
except ValueError:
|
||||
errors.append(f"Invalid DB_TYPE: {cls.DB_TYPE}")
|
||||
|
||||
# Validate database-specific settings
|
||||
if cls.DB_TYPE == 'victoriametrics':
|
||||
if not cls.VM_HOST:
|
||||
errors.append("VM_HOST is required for VictoriaMetrics")
|
||||
if not isinstance(cls.VM_PORT, int) or cls.VM_PORT <= 0:
|
||||
errors.append("VM_PORT must be a positive integer")
|
||||
|
||||
elif cls.DB_TYPE == 'influxdb':
|
||||
if not cls.INFLUX_HOST:
|
||||
errors.append("INFLUX_HOST is required for InfluxDB")
|
||||
if not cls.INFLUX_DATABASE:
|
||||
errors.append("INFLUX_DATABASE is required for InfluxDB")
|
||||
|
||||
elif cls.DB_TYPE in ['postgresql', 'mysql']:
|
||||
connection_string = (cls.POSTGRES_CONNECTION_STRING if cls.DB_TYPE == 'postgresql'
|
||||
else cls.MYSQL_CONNECTION_STRING)
|
||||
if not connection_string:
|
||||
errors.append(f"Connection string is required for {cls.DB_TYPE.upper()}")
|
||||
|
||||
# Validate numeric settings
|
||||
if cls.SCRAPING_INTERVAL_HOURS <= 0:
|
||||
errors.append("SCRAPING_INTERVAL_HOURS must be positive")
|
||||
|
||||
if cls.DATA_RETENTION_DAYS <= 0:
|
||||
errors.append("DATA_RETENTION_DAYS must be positive")
|
||||
|
||||
if errors:
|
||||
raise ConfigurationError(f"Configuration errors: {'; '.join(errors)}")
|
||||
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def get_database_config(cls) -> Dict[str, Any]:
|
||||
"""Returns database configuration based on DB_TYPE"""
|
||||
if cls.DB_TYPE == 'victoriametrics':
|
||||
return {
|
||||
'type': 'victoriametrics',
|
||||
'host': cls.VM_HOST,
|
||||
'port': cls.VM_PORT
|
||||
}
|
||||
elif cls.DB_TYPE == 'influxdb':
|
||||
return {
|
||||
'type': 'influxdb',
|
||||
'host': cls.INFLUX_HOST,
|
||||
'port': cls.INFLUX_PORT,
|
||||
'database': cls.INFLUX_DATABASE,
|
||||
'username': cls.INFLUX_USERNAME,
|
||||
'password': cls.INFLUX_PASSWORD
|
||||
}
|
||||
elif cls.DB_TYPE == 'postgresql':
|
||||
return {
|
||||
'type': 'postgresql',
|
||||
'connection_string': cls.POSTGRES_CONNECTION_STRING or
|
||||
'postgresql://postgres:password@localhost:5432/water_monitoring'
|
||||
}
|
||||
elif cls.DB_TYPE == 'mysql':
|
||||
return {
|
||||
'type': 'mysql',
|
||||
'connection_string': cls.MYSQL_CONNECTION_STRING or
|
||||
'mysql://root:password@localhost:3306/water_monitoring'
|
||||
}
|
||||
else: # sqlite
|
||||
return {
|
||||
'type': 'sqlite',
|
||||
'connection_string': f'sqlite:///{cls.DATABASE_PATH}'
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_all_settings(cls) -> Dict[str, Any]:
|
||||
"""Returns all configuration settings"""
|
||||
return {
|
||||
'DB_TYPE': cls.DB_TYPE,
|
||||
'DATABASE_PATH': cls.DATABASE_PATH,
|
||||
'TARGET_URL': cls.TARGET_URL,
|
||||
'API_URL': cls.API_URL,
|
||||
'REQUEST_TIMEOUT': cls.REQUEST_TIMEOUT,
|
||||
'SCRAPING_INTERVAL_HOURS': cls.SCRAPING_INTERVAL_HOURS,
|
||||
'LOG_LEVEL': cls.LOG_LEVEL,
|
||||
'LOG_FILE': cls.LOG_FILE,
|
||||
'DATA_RETENTION_DAYS': cls.DATA_RETENTION_DAYS,
|
||||
'MAX_RETRIES': cls.MAX_RETRIES,
|
||||
'RETRY_DELAY_SECONDS': cls.RETRY_DELAY_SECONDS,
|
||||
'VM_HOST': cls.VM_HOST,
|
||||
'VM_PORT': cls.VM_PORT,
|
||||
'INFLUX_HOST': cls.INFLUX_HOST,
|
||||
'INFLUX_PORT': cls.INFLUX_PORT,
|
||||
'INFLUX_DATABASE': cls.INFLUX_DATABASE
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def print_settings(cls):
|
||||
"""Prints all current settings"""
|
||||
print("=== Water Level Monitor Configuration ===")
|
||||
for key, value in cls.get_all_settings().items():
|
||||
# Hide sensitive information
|
||||
if 'PASSWORD' in key and value:
|
||||
value = '*' * len(str(value))
|
||||
print(f"{key}: {value}")
|
||||
print("=" * 45)
|
||||
|
||||
print("\nDatabase Configuration:")
|
||||
db_config = cls.get_database_config()
|
||||
for key, value in db_config.items():
|
||||
if 'password' in key and value:
|
||||
value = '*' * len(str(value))
|
||||
print(f" {key}: {value}")
|
||||
print("=" * 45)
|
||||
|
||||
if __name__ == "__main__":
|
||||
Config.print_settings()
|
663
src/database_adapters.py
Normal file
663
src/database_adapters.py
Normal file
@@ -0,0 +1,663 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Database adapters for different storage backends
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
from typing import List, Dict, Optional, Any
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
# Base adapter interface
|
||||
class DatabaseAdapter(ABC):
|
||||
@abstractmethod
|
||||
def connect(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_measurements(self, measurements: List[Dict]) -> bool:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_latest_measurements(self, limit: int = 100) -> List[Dict]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_measurements_by_timerange(self, start_time: datetime.datetime,
|
||||
end_time: datetime.datetime,
|
||||
station_codes: Optional[List[str]] = None) -> List[Dict]:
|
||||
pass
|
||||
|
||||
# InfluxDB Adapter
|
||||
class InfluxDBAdapter(DatabaseAdapter):
|
||||
def __init__(self, host: str = "localhost", port: int = 8086,
|
||||
database: str = "water_monitoring", username: str = None, password: str = None):
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.database = database
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.client = None
|
||||
|
||||
def connect(self):
|
||||
try:
|
||||
from influxdb import InfluxDBClient
|
||||
self.client = InfluxDBClient(
|
||||
host=self.host,
|
||||
port=self.port,
|
||||
username=self.username,
|
||||
password=self.password,
|
||||
database=self.database
|
||||
)
|
||||
|
||||
# Create database if it doesn't exist
|
||||
databases = self.client.get_list_database()
|
||||
if not any(db['name'] == self.database for db in databases):
|
||||
self.client.create_database(self.database)
|
||||
logging.info(f"Created InfluxDB database: {self.database}")
|
||||
|
||||
# Create retention policy (keep data for 2 years, downsample after 30 days)
|
||||
retention_policies = self.client.get_list_retention_policies(self.database)
|
||||
if not any(rp['name'] == 'water_data_policy' for rp in retention_policies):
|
||||
self.client.create_retention_policy(
|
||||
'water_data_policy',
|
||||
'730d', # 2 years
|
||||
'1', # replication factor
|
||||
database=self.database,
|
||||
default=True
|
||||
)
|
||||
|
||||
logging.info("Connected to InfluxDB successfully")
|
||||
return True
|
||||
|
||||
except ImportError:
|
||||
logging.error("InfluxDB client not installed. Run: pip install influxdb")
|
||||
return False
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to connect to InfluxDB: {e}")
|
||||
return False
|
||||
|
||||
def save_measurements(self, measurements: List[Dict]) -> bool:
|
||||
if not self.client:
|
||||
logging.error("InfluxDB client not connected")
|
||||
return False
|
||||
|
||||
try:
|
||||
points = []
|
||||
for measurement in measurements:
|
||||
point = {
|
||||
"measurement": "water_data",
|
||||
"tags": {
|
||||
"station_code": measurement['station_code'],
|
||||
"station_name_en": measurement['station_name_en'],
|
||||
"station_name_th": measurement['station_name_th']
|
||||
},
|
||||
"time": measurement['timestamp'].isoformat(),
|
||||
"fields": {
|
||||
"water_level": float(measurement['water_level']),
|
||||
"discharge": float(measurement['discharge']),
|
||||
"discharge_percent": float(measurement['discharge_percent']) if measurement['discharge_percent'] else None
|
||||
}
|
||||
}
|
||||
points.append(point)
|
||||
|
||||
success = self.client.write_points(points)
|
||||
if success:
|
||||
logging.info(f"Successfully wrote {len(points)} points to InfluxDB")
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error writing to InfluxDB: {e}")
|
||||
return False
|
||||
|
||||
def get_latest_measurements(self, limit: int = 100) -> List[Dict]:
|
||||
if not self.client:
|
||||
return []
|
||||
|
||||
try:
|
||||
query = f"""
|
||||
SELECT last("water_level") as water_level,
|
||||
last("discharge") as discharge,
|
||||
last("discharge_percent") as discharge_percent
|
||||
FROM "water_data"
|
||||
GROUP BY "station_code", "station_name_en", "station_name_th"
|
||||
LIMIT {limit}
|
||||
"""
|
||||
|
||||
result = self.client.query(query)
|
||||
measurements = []
|
||||
|
||||
for point in result.get_points():
|
||||
measurements.append({
|
||||
'timestamp': point['time'],
|
||||
'station_code': point.get('station_code'),
|
||||
'station_name_en': point.get('station_name_en'),
|
||||
'station_name_th': point.get('station_name_th'),
|
||||
'water_level': point.get('water_level'),
|
||||
'discharge': point.get('discharge'),
|
||||
'discharge_percent': point.get('discharge_percent')
|
||||
})
|
||||
|
||||
return measurements
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error querying InfluxDB: {e}")
|
||||
return []
|
||||
|
||||
def get_measurements_by_timerange(self, start_time: datetime.datetime,
|
||||
end_time: datetime.datetime,
|
||||
station_codes: Optional[List[str]] = None) -> List[Dict]:
|
||||
if not self.client:
|
||||
return []
|
||||
|
||||
try:
|
||||
where_clause = f"time >= '{start_time.isoformat()}' AND time <= '{end_time.isoformat()}'"
|
||||
if station_codes:
|
||||
station_filter = "'" + "','".join(station_codes) + "'"
|
||||
where_clause += f" AND station_code IN ({station_filter})"
|
||||
|
||||
query = f"""
|
||||
SELECT "water_level", "discharge", "discharge_percent", "station_code", "station_name_en", "station_name_th"
|
||||
FROM "water_data"
|
||||
WHERE {where_clause}
|
||||
ORDER BY time DESC
|
||||
"""
|
||||
|
||||
result = self.client.query(query)
|
||||
measurements = []
|
||||
|
||||
for point in result.get_points():
|
||||
measurements.append({
|
||||
'timestamp': point['time'],
|
||||
'station_code': point.get('station_code'),
|
||||
'station_name_en': point.get('station_name_en'),
|
||||
'station_name_th': point.get('station_name_th'),
|
||||
'water_level': point.get('water_level'),
|
||||
'discharge': point.get('discharge'),
|
||||
'discharge_percent': point.get('discharge_percent')
|
||||
})
|
||||
|
||||
return measurements
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error querying InfluxDB: {e}")
|
||||
return []
|
||||
|
||||
# MySQL/PostgreSQL Adapter
|
||||
class SQLAdapter(DatabaseAdapter):
|
||||
def __init__(self, connection_string: str, db_type: str = "mysql"):
|
||||
self.connection_string = connection_string
|
||||
self.db_type = db_type.lower()
|
||||
self.engine = None
|
||||
|
||||
# Add SQLite-specific connection parameters for better concurrency
|
||||
if self.db_type == "sqlite":
|
||||
if "?" not in connection_string:
|
||||
self.connection_string += "?timeout=30&check_same_thread=False"
|
||||
else:
|
||||
self.connection_string += "&timeout=30&check_same_thread=False"
|
||||
|
||||
def connect(self):
|
||||
try:
|
||||
from sqlalchemy import create_engine, text
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
self.engine = create_engine(self.connection_string, pool_pre_ping=True)
|
||||
|
||||
# Create tables
|
||||
self._create_tables()
|
||||
|
||||
logging.info(f"Connected to {self.db_type.upper()} successfully")
|
||||
return True
|
||||
|
||||
except ImportError:
|
||||
logging.error("SQLAlchemy not installed. Run: pip install sqlalchemy pymysql")
|
||||
return False
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to connect to {self.db_type.upper()}: {e}")
|
||||
return False
|
||||
|
||||
def _create_tables(self):
|
||||
from sqlalchemy import text
|
||||
|
||||
# Stations table - adjust for different databases
|
||||
if self.db_type == "sqlite":
|
||||
stations_sql = """
|
||||
CREATE TABLE IF NOT EXISTS stations (
|
||||
id INTEGER PRIMARY KEY,
|
||||
station_code TEXT UNIQUE NOT NULL,
|
||||
thai_name TEXT NOT NULL,
|
||||
english_name TEXT NOT NULL,
|
||||
latitude REAL,
|
||||
longitude REAL,
|
||||
geohash TEXT,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
"""
|
||||
|
||||
measurements_sql = """
|
||||
CREATE TABLE IF NOT EXISTS water_measurements (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
timestamp DATETIME NOT NULL,
|
||||
station_id INTEGER NOT NULL,
|
||||
water_level REAL,
|
||||
discharge REAL,
|
||||
discharge_percent REAL,
|
||||
status TEXT DEFAULT 'active',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (station_id) REFERENCES stations(id),
|
||||
UNIQUE(timestamp, station_id)
|
||||
)
|
||||
"""
|
||||
|
||||
# Create indexes separately for SQLite
|
||||
index_sql = [
|
||||
"CREATE INDEX IF NOT EXISTS idx_timestamp ON water_measurements(timestamp)",
|
||||
"CREATE INDEX IF NOT EXISTS idx_station_timestamp ON water_measurements(station_id, timestamp)"
|
||||
]
|
||||
|
||||
elif self.db_type == "postgresql":
|
||||
stations_sql = """
|
||||
CREATE TABLE IF NOT EXISTS stations (
|
||||
id SERIAL PRIMARY KEY,
|
||||
station_code VARCHAR(10) UNIQUE NOT NULL,
|
||||
thai_name VARCHAR(255) NOT NULL,
|
||||
english_name VARCHAR(255) NOT NULL,
|
||||
latitude DECIMAL(10,8),
|
||||
longitude DECIMAL(11,8),
|
||||
geohash VARCHAR(20),
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
"""
|
||||
|
||||
measurements_sql = """
|
||||
CREATE TABLE IF NOT EXISTS water_measurements (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
timestamp TIMESTAMP NOT NULL,
|
||||
station_id INTEGER NOT NULL,
|
||||
water_level NUMERIC(10,3),
|
||||
discharge NUMERIC(10,2),
|
||||
discharge_percent NUMERIC(5,2),
|
||||
status VARCHAR(20) DEFAULT 'active',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (station_id) REFERENCES stations(id),
|
||||
UNIQUE(timestamp, station_id)
|
||||
)
|
||||
"""
|
||||
|
||||
index_sql = [
|
||||
"CREATE INDEX IF NOT EXISTS idx_timestamp ON water_measurements(timestamp)",
|
||||
"CREATE INDEX IF NOT EXISTS idx_station_timestamp ON water_measurements(station_id, timestamp DESC)"
|
||||
]
|
||||
|
||||
else: # MySQL
|
||||
stations_sql = """
|
||||
CREATE TABLE IF NOT EXISTS stations (
|
||||
id INT PRIMARY KEY,
|
||||
station_code VARCHAR(10) UNIQUE NOT NULL,
|
||||
thai_name VARCHAR(255) NOT NULL,
|
||||
english_name VARCHAR(255) NOT NULL,
|
||||
latitude DECIMAL(10,8),
|
||||
longitude DECIMAL(11,8),
|
||||
geohash VARCHAR(20),
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
|
||||
)
|
||||
"""
|
||||
|
||||
measurements_sql = """
|
||||
CREATE TABLE IF NOT EXISTS water_measurements (
|
||||
id BIGINT AUTO_INCREMENT PRIMARY KEY,
|
||||
timestamp DATETIME NOT NULL,
|
||||
station_id INT NOT NULL,
|
||||
water_level DECIMAL(10,3),
|
||||
discharge DECIMAL(10,2),
|
||||
discharge_percent DECIMAL(5,2),
|
||||
status VARCHAR(20) DEFAULT 'active',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (station_id) REFERENCES stations(id),
|
||||
UNIQUE KEY unique_measurement (timestamp, station_id),
|
||||
INDEX idx_timestamp (timestamp),
|
||||
INDEX idx_station_timestamp (station_id, timestamp)
|
||||
)
|
||||
"""
|
||||
index_sql = []
|
||||
|
||||
with self.engine.begin() as conn:
|
||||
conn.execute(text(stations_sql))
|
||||
conn.execute(text(measurements_sql))
|
||||
|
||||
# Create indexes for SQLite and PostgreSQL
|
||||
for index in index_sql:
|
||||
conn.execute(text(index))
|
||||
|
||||
# Transaction is automatically committed when context manager exits
|
||||
|
||||
def save_measurements(self, measurements: List[Dict]) -> bool:
|
||||
if not self.engine:
|
||||
return False
|
||||
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
|
||||
with self.engine.begin() as conn:
|
||||
# Insert/update stations
|
||||
for measurement in measurements:
|
||||
if self.db_type == "sqlite":
|
||||
station_sql = """
|
||||
INSERT OR REPLACE INTO stations (id, station_code, thai_name, english_name, latitude, longitude, geohash, updated_at)
|
||||
VALUES (:station_id, :station_code, :thai_name, :english_name, :latitude, :longitude, :geohash, CURRENT_TIMESTAMP)
|
||||
"""
|
||||
elif self.db_type == "postgresql":
|
||||
station_sql = """
|
||||
INSERT INTO stations (id, station_code, thai_name, english_name, latitude, longitude, geohash, updated_at)
|
||||
VALUES (:station_id, :station_code, :thai_name, :english_name, :latitude, :longitude, :geohash, NOW())
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
thai_name = EXCLUDED.thai_name,
|
||||
english_name = EXCLUDED.english_name,
|
||||
latitude = EXCLUDED.latitude,
|
||||
longitude = EXCLUDED.longitude,
|
||||
geohash = EXCLUDED.geohash,
|
||||
updated_at = NOW()
|
||||
"""
|
||||
else: # MySQL
|
||||
station_sql = """
|
||||
INSERT INTO stations (id, station_code, thai_name, english_name, latitude, longitude, geohash, updated_at)
|
||||
VALUES (:station_id, :station_code, :thai_name, :english_name, :latitude, :longitude, :geohash, NOW())
|
||||
ON DUPLICATE KEY UPDATE
|
||||
thai_name = VALUES(thai_name),
|
||||
english_name = VALUES(english_name),
|
||||
latitude = VALUES(latitude),
|
||||
longitude = VALUES(longitude),
|
||||
geohash = VALUES(geohash),
|
||||
updated_at = NOW()
|
||||
"""
|
||||
|
||||
conn.execute(text(station_sql), {
|
||||
'station_id': measurement['station_id'],
|
||||
'station_code': measurement['station_code'],
|
||||
'thai_name': measurement['station_name_th'],
|
||||
'english_name': measurement['station_name_en'],
|
||||
'latitude': measurement.get('latitude'),
|
||||
'longitude': measurement.get('longitude'),
|
||||
'geohash': measurement.get('geohash')
|
||||
})
|
||||
|
||||
# Insert measurements
|
||||
for measurement in measurements:
|
||||
if self.db_type == "sqlite":
|
||||
measurement_sql = """
|
||||
INSERT OR REPLACE INTO water_measurements
|
||||
(timestamp, station_id, water_level, discharge, discharge_percent, status)
|
||||
VALUES (:timestamp, :station_id, :water_level, :discharge, :discharge_percent, :status)
|
||||
"""
|
||||
elif self.db_type == "postgresql":
|
||||
measurement_sql = """
|
||||
INSERT INTO water_measurements
|
||||
(timestamp, station_id, water_level, discharge, discharge_percent, status)
|
||||
VALUES (:timestamp, :station_id, :water_level, :discharge, :discharge_percent, :status)
|
||||
ON CONFLICT (timestamp, station_id) DO UPDATE SET
|
||||
water_level = EXCLUDED.water_level,
|
||||
discharge = EXCLUDED.discharge,
|
||||
discharge_percent = EXCLUDED.discharge_percent,
|
||||
status = EXCLUDED.status
|
||||
"""
|
||||
else: # MySQL
|
||||
measurement_sql = """
|
||||
INSERT INTO water_measurements
|
||||
(timestamp, station_id, water_level, discharge, discharge_percent, status)
|
||||
VALUES (:timestamp, :station_id, :water_level, :discharge, :discharge_percent, :status)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
water_level = VALUES(water_level),
|
||||
discharge = VALUES(discharge),
|
||||
discharge_percent = VALUES(discharge_percent),
|
||||
status = VALUES(status)
|
||||
"""
|
||||
|
||||
conn.execute(text(measurement_sql), {
|
||||
'timestamp': measurement['timestamp'],
|
||||
'station_id': measurement['station_id'],
|
||||
'water_level': measurement['water_level'],
|
||||
'discharge': measurement['discharge'],
|
||||
'discharge_percent': measurement['discharge_percent'],
|
||||
'status': measurement['status']
|
||||
})
|
||||
|
||||
# Transaction is automatically committed when context manager exits
|
||||
logging.info(f"Successfully saved {len(measurements)} measurements to {self.db_type.upper()}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error saving to {self.db_type.upper()}: {e}")
|
||||
return False
|
||||
|
||||
def get_latest_measurements(self, limit: int = 100) -> List[Dict]:
|
||||
if not self.engine:
|
||||
return []
|
||||
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
|
||||
query = """
|
||||
SELECT m.timestamp, s.station_code, s.english_name, s.thai_name,
|
||||
m.water_level, m.discharge, m.discharge_percent, m.status
|
||||
FROM water_measurements m
|
||||
JOIN stations s ON m.station_id = s.id
|
||||
INNER JOIN (
|
||||
SELECT station_id, MAX(timestamp) as max_timestamp
|
||||
FROM water_measurements
|
||||
GROUP BY station_id
|
||||
) latest ON m.station_id = latest.station_id AND m.timestamp = latest.max_timestamp
|
||||
ORDER BY s.station_code
|
||||
LIMIT :limit
|
||||
"""
|
||||
|
||||
with self.engine.connect() as conn:
|
||||
result = conn.execute(text(query), {'limit': limit})
|
||||
measurements = []
|
||||
|
||||
for row in result:
|
||||
measurements.append({
|
||||
'timestamp': row[0],
|
||||
'station_code': row[1],
|
||||
'station_name_en': row[2],
|
||||
'station_name_th': row[3],
|
||||
'water_level': float(row[4]) if row[4] else None,
|
||||
'discharge': float(row[5]) if row[5] else None,
|
||||
'discharge_percent': float(row[6]) if row[6] else None,
|
||||
'status': row[7]
|
||||
})
|
||||
|
||||
return measurements
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error querying {self.db_type.upper()}: {e}")
|
||||
return []
|
||||
|
||||
def get_measurements_by_timerange(self, start_time: datetime.datetime,
|
||||
end_time: datetime.datetime,
|
||||
station_codes: Optional[List[str]] = None) -> List[Dict]:
|
||||
if not self.engine:
|
||||
return []
|
||||
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
|
||||
where_clause = "m.timestamp BETWEEN :start_time AND :end_time"
|
||||
params = {'start_time': start_time, 'end_time': end_time}
|
||||
|
||||
if station_codes:
|
||||
placeholders = ','.join([f':station_{i}' for i in range(len(station_codes))])
|
||||
where_clause += f" AND s.station_code IN ({placeholders})"
|
||||
for i, code in enumerate(station_codes):
|
||||
params[f'station_{i}'] = code
|
||||
|
||||
query = f"""
|
||||
SELECT m.timestamp, s.station_code, s.english_name, s.thai_name,
|
||||
m.water_level, m.discharge, m.discharge_percent, m.status
|
||||
FROM water_measurements m
|
||||
JOIN stations s ON m.station_id = s.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY m.timestamp DESC, s.station_code
|
||||
"""
|
||||
|
||||
with self.engine.connect() as conn:
|
||||
result = conn.execute(text(query), params)
|
||||
measurements = []
|
||||
|
||||
for row in result:
|
||||
measurements.append({
|
||||
'timestamp': row[0],
|
||||
'station_code': row[1],
|
||||
'station_name_en': row[2],
|
||||
'station_name_th': row[3],
|
||||
'water_level': float(row[4]) if row[4] else None,
|
||||
'discharge': float(row[5]) if row[5] else None,
|
||||
'discharge_percent': float(row[6]) if row[6] else None,
|
||||
'status': row[7]
|
||||
})
|
||||
|
||||
return measurements
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error querying {self.db_type.upper()}: {e}")
|
||||
return []
|
||||
|
||||
# VictoriaMetrics Adapter (using Prometheus format)
|
||||
class VictoriaMetricsAdapter(DatabaseAdapter):
|
||||
def __init__(self, host: str = "localhost", port: int = 8428):
|
||||
self.host = host
|
||||
self.port = port
|
||||
|
||||
# Handle HTTPS URLs and reverse proxy configurations
|
||||
if host.startswith(('http://', 'https://')):
|
||||
self.base_url = host
|
||||
if port != 80 and port != 443 and not host.endswith(f':{port}'):
|
||||
# Only add port if it's not standard and not already in URL
|
||||
if '://' in host and ':' not in host.split('://')[1]:
|
||||
self.base_url = f"{host}:{port}"
|
||||
else:
|
||||
# Default to HTTP for localhost, HTTPS for remote hosts
|
||||
protocol = "https" if host != "localhost" and not host.startswith("127.") else "http"
|
||||
if (protocol == "https" and port == 443) or (protocol == "http" and port == 80):
|
||||
self.base_url = f"{protocol}://{host}"
|
||||
else:
|
||||
self.base_url = f"{protocol}://{host}:{port}"
|
||||
|
||||
def connect(self):
|
||||
try:
|
||||
import requests
|
||||
# Test connection with SSL verification and timeout
|
||||
response = requests.get(
|
||||
f"{self.base_url}/api/v1/status/config",
|
||||
timeout=10,
|
||||
verify=True # Enable SSL verification for HTTPS
|
||||
)
|
||||
if response.status_code == 200:
|
||||
logging.info(f"Connected to VictoriaMetrics successfully at {self.base_url}")
|
||||
return True
|
||||
else:
|
||||
logging.error(f"VictoriaMetrics connection failed: {response.status_code}")
|
||||
return False
|
||||
except requests.exceptions.SSLError as e:
|
||||
logging.error(f"SSL error connecting to VictoriaMetrics: {e}")
|
||||
return False
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
logging.error(f"Connection error to VictoriaMetrics: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to connect to VictoriaMetrics: {e}")
|
||||
return False
|
||||
|
||||
def save_measurements(self, measurements: List[Dict]) -> bool:
|
||||
try:
|
||||
import requests
|
||||
|
||||
# Convert to Prometheus format
|
||||
metrics_data = []
|
||||
timestamp_ms = int(datetime.datetime.now().timestamp() * 1000)
|
||||
|
||||
for measurement in measurements:
|
||||
# Water level metric
|
||||
metrics_data.append(
|
||||
f'water_level{{station_code="{measurement["station_code"]}",'
|
||||
f'station_name_en="{measurement["station_name_en"]}",'
|
||||
f'station_name_th="{measurement["station_name_th"]}"}} '
|
||||
f'{measurement["water_level"]} {timestamp_ms}'
|
||||
)
|
||||
|
||||
# Discharge metric
|
||||
metrics_data.append(
|
||||
f'water_discharge{{station_code="{measurement["station_code"]}",'
|
||||
f'station_name_en="{measurement["station_name_en"]}",'
|
||||
f'station_name_th="{measurement["station_name_th"]}"}} '
|
||||
f'{measurement["discharge"]} {timestamp_ms}'
|
||||
)
|
||||
|
||||
# Discharge percentage metric
|
||||
if measurement["discharge_percent"]:
|
||||
metrics_data.append(
|
||||
f'water_discharge_percent{{station_code="{measurement["station_code"]}",'
|
||||
f'station_name_en="{measurement["station_name_en"]}",'
|
||||
f'station_name_th="{measurement["station_name_th"]}"}} '
|
||||
f'{measurement["discharge_percent"]} {timestamp_ms}'
|
||||
)
|
||||
|
||||
# Send to VictoriaMetrics
|
||||
data = '\n'.join(metrics_data)
|
||||
response = requests.post(
|
||||
f"{self.base_url}/api/v1/import/prometheus",
|
||||
data=data,
|
||||
headers={'Content-Type': 'text/plain'},
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if response.status_code == 204:
|
||||
logging.info(f"Successfully sent {len(measurements)} measurements to VictoriaMetrics")
|
||||
return True
|
||||
else:
|
||||
logging.error(f"VictoriaMetrics import failed: {response.status_code} - {response.text}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error sending to VictoriaMetrics: {e}")
|
||||
return False
|
||||
|
||||
def get_latest_measurements(self, limit: int = 100) -> List[Dict]:
|
||||
# VictoriaMetrics queries would be implemented here
|
||||
# This is a simplified version
|
||||
logging.warning("get_latest_measurements not fully implemented for VictoriaMetrics")
|
||||
return []
|
||||
|
||||
def get_measurements_by_timerange(self, start_time: datetime.datetime,
|
||||
end_time: datetime.datetime,
|
||||
station_codes: Optional[List[str]] = None) -> List[Dict]:
|
||||
# VictoriaMetrics range queries would be implemented here
|
||||
logging.warning("get_measurements_by_timerange not fully implemented for VictoriaMetrics")
|
||||
return []
|
||||
|
||||
# Factory function to create appropriate adapter
|
||||
def create_database_adapter(db_type: str, **kwargs) -> DatabaseAdapter:
|
||||
"""
|
||||
Factory function to create database adapter
|
||||
|
||||
Args:
|
||||
db_type: 'influxdb', 'mysql', 'postgresql', 'sqlite', or 'victoriametrics'
|
||||
**kwargs: Database-specific connection parameters
|
||||
"""
|
||||
db_type = db_type.lower()
|
||||
|
||||
if db_type == 'influxdb':
|
||||
return InfluxDBAdapter(**kwargs)
|
||||
elif db_type == 'mysql':
|
||||
return SQLAdapter(db_type='mysql', **kwargs)
|
||||
elif db_type == 'postgresql':
|
||||
return SQLAdapter(db_type='postgresql', **kwargs)
|
||||
elif db_type == 'sqlite':
|
||||
return SQLAdapter(db_type='sqlite', **kwargs)
|
||||
elif db_type == 'victoriametrics':
|
||||
return VictoriaMetricsAdapter(**kwargs)
|
||||
else:
|
||||
raise ValueError(f"Unsupported database type: {db_type}")
|
331
src/demo_databases.py
Normal file
331
src/demo_databases.py
Normal file
@@ -0,0 +1,331 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Demo script showing different database backend options for water monitoring
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
from water_scraper_v3 import EnhancedWaterMonitorScraper
|
||||
|
||||
def demo_sqlite():
|
||||
"""Demo with SQLite (local development)"""
|
||||
print("=" * 60)
|
||||
print("🗄️ SQLite Demo (Local Development)")
|
||||
print("=" * 60)
|
||||
|
||||
config = {
|
||||
'type': 'sqlite',
|
||||
'connection_string': 'sqlite:///demo_water_sqlite.db'
|
||||
}
|
||||
|
||||
try:
|
||||
scraper = EnhancedWaterMonitorScraper(config)
|
||||
|
||||
# Fetch and save data
|
||||
print("Fetching data from API...")
|
||||
data = scraper.fetch_water_data()
|
||||
|
||||
if data:
|
||||
print(f"✓ Fetched {len(data)} data points")
|
||||
success = scraper.save_to_database(data)
|
||||
|
||||
if success:
|
||||
print("✓ Data saved to SQLite database")
|
||||
|
||||
# Show latest data
|
||||
latest = scraper.get_latest_data(5)
|
||||
print(f"\nLatest 5 measurements:")
|
||||
for measurement in latest:
|
||||
print(f" • {measurement['station_code']} ({measurement['station_name_en']}): "
|
||||
f"{measurement['water_level']:.2f}m, {measurement['discharge']:.1f} cms")
|
||||
else:
|
||||
print("✗ Failed to save data")
|
||||
else:
|
||||
print("✗ No data fetched")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
def demo_influxdb():
|
||||
"""Demo with InfluxDB (requires InfluxDB running)"""
|
||||
print("\n" + "=" * 60)
|
||||
print("📊 InfluxDB Demo (Time-Series Database)")
|
||||
print("=" * 60)
|
||||
|
||||
config = {
|
||||
'type': 'influxdb',
|
||||
'host': 'localhost',
|
||||
'port': 8086,
|
||||
'database': 'water_monitoring_demo',
|
||||
'username': None, # Set if authentication is enabled
|
||||
'password': None
|
||||
}
|
||||
|
||||
try:
|
||||
scraper = EnhancedWaterMonitorScraper(config)
|
||||
|
||||
if scraper.db_adapter and scraper.db_adapter.client:
|
||||
print("✓ Connected to InfluxDB")
|
||||
|
||||
# Fetch and save data
|
||||
print("Fetching data from API...")
|
||||
data = scraper.fetch_water_data()
|
||||
|
||||
if data:
|
||||
print(f"✓ Fetched {len(data)} data points")
|
||||
success = scraper.save_to_database(data)
|
||||
|
||||
if success:
|
||||
print("✓ Data saved to InfluxDB")
|
||||
print("💡 You can now query this data in Grafana or InfluxDB CLI")
|
||||
print(" Example query: SELECT * FROM water_data ORDER BY time DESC LIMIT 10")
|
||||
else:
|
||||
print("✗ Failed to save data")
|
||||
else:
|
||||
print("✗ No data fetched")
|
||||
else:
|
||||
print("✗ Could not connect to InfluxDB")
|
||||
print("💡 Make sure InfluxDB is running: docker run -p 8086:8086 influxdb:1.8")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
print("💡 InfluxDB might not be running or accessible")
|
||||
|
||||
def demo_postgresql():
|
||||
"""Demo with PostgreSQL (requires PostgreSQL running)"""
|
||||
print("\n" + "=" * 60)
|
||||
print("🐘 PostgreSQL Demo (Relational Database)")
|
||||
print("=" * 60)
|
||||
|
||||
config = {
|
||||
'type': 'postgresql',
|
||||
'connection_string': 'postgresql://postgres:password@localhost:5432/water_monitoring'
|
||||
}
|
||||
|
||||
try:
|
||||
scraper = EnhancedWaterMonitorScraper(config)
|
||||
|
||||
if scraper.db_adapter and scraper.db_adapter.engine:
|
||||
print("✓ Connected to PostgreSQL")
|
||||
|
||||
# Fetch and save data
|
||||
print("Fetching data from API...")
|
||||
data = scraper.fetch_water_data()
|
||||
|
||||
if data:
|
||||
print(f"✓ Fetched {len(data)} data points")
|
||||
success = scraper.save_to_database(data)
|
||||
|
||||
if success:
|
||||
print("✓ Data saved to PostgreSQL")
|
||||
print("💡 You can now query this data with SQL")
|
||||
print(" Example: SELECT * FROM water_measurements ORDER BY timestamp DESC LIMIT 10;")
|
||||
else:
|
||||
print("✗ Failed to save data")
|
||||
else:
|
||||
print("✗ No data fetched")
|
||||
else:
|
||||
print("✗ Could not connect to PostgreSQL")
|
||||
print("💡 Make sure PostgreSQL is running with correct credentials")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
print("💡 PostgreSQL might not be running or credentials might be wrong")
|
||||
|
||||
def demo_mysql():
|
||||
"""Demo with MySQL (requires MySQL running)"""
|
||||
print("\n" + "=" * 60)
|
||||
print("🐬 MySQL Demo (Relational Database)")
|
||||
print("=" * 60)
|
||||
|
||||
config = {
|
||||
'type': 'mysql',
|
||||
'connection_string': 'mysql://root:password@localhost:3306/water_monitoring'
|
||||
}
|
||||
|
||||
try:
|
||||
scraper = EnhancedWaterMonitorScraper(config)
|
||||
|
||||
if scraper.db_adapter and scraper.db_adapter.engine:
|
||||
print("✓ Connected to MySQL")
|
||||
|
||||
# Fetch and save data
|
||||
print("Fetching data from API...")
|
||||
data = scraper.fetch_water_data()
|
||||
|
||||
if data:
|
||||
print(f"✓ Fetched {len(data)} data points")
|
||||
success = scraper.save_to_database(data)
|
||||
|
||||
if success:
|
||||
print("✓ Data saved to MySQL")
|
||||
print("💡 You can now query this data with SQL")
|
||||
print(" Example: SELECT * FROM water_measurements ORDER BY timestamp DESC LIMIT 10;")
|
||||
else:
|
||||
print("✗ Failed to save data")
|
||||
else:
|
||||
print("✗ No data fetched")
|
||||
else:
|
||||
print("✗ Could not connect to MySQL")
|
||||
print("💡 Make sure MySQL is running with correct credentials")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
print("💡 MySQL might not be running or credentials might be wrong")
|
||||
|
||||
def demo_victoriametrics():
|
||||
"""Demo with VictoriaMetrics (supports both local and HTTPS configurations)"""
|
||||
print("\n" + "=" * 60)
|
||||
print("⚡ VictoriaMetrics Demo (High-Performance Metrics)")
|
||||
print("=" * 60)
|
||||
|
||||
# Use configuration from environment or config.py
|
||||
from config import Config
|
||||
db_config = Config.get_database_config()
|
||||
|
||||
if db_config['type'] != 'victoriametrics':
|
||||
# Fallback to default local configuration
|
||||
config = {
|
||||
'type': 'victoriametrics',
|
||||
'host': 'vm.newedge.house',
|
||||
'port': 443
|
||||
}
|
||||
else:
|
||||
config = db_config
|
||||
|
||||
print(f"Connecting to: {config['host']}:{config['port']}")
|
||||
|
||||
try:
|
||||
scraper = EnhancedWaterMonitorScraper(config)
|
||||
|
||||
if scraper.db_adapter:
|
||||
# Test connection using the adapter's connect method
|
||||
if scraper.db_adapter.connect():
|
||||
print("✓ Connected to VictoriaMetrics")
|
||||
|
||||
# Fetch and save data
|
||||
print("Fetching data from API...")
|
||||
data = scraper.fetch_water_data()
|
||||
|
||||
if data:
|
||||
print(f"✓ Fetched {len(data)} data points")
|
||||
success = scraper.save_to_database(data)
|
||||
|
||||
if success:
|
||||
print("✓ Data saved to VictoriaMetrics")
|
||||
print("💡 You can now query this data via Prometheus API")
|
||||
|
||||
# Show appropriate query URL based on configuration
|
||||
base_url = scraper.db_adapter.base_url
|
||||
print(f" Example: {base_url}/api/v1/query?query=water_level")
|
||||
print(f" Health check: {base_url}/health")
|
||||
else:
|
||||
print("✗ Failed to save data")
|
||||
else:
|
||||
print("✗ No data fetched")
|
||||
else:
|
||||
print("✗ Could not connect to VictoriaMetrics")
|
||||
if config['host'] == 'localhost':
|
||||
print("💡 Make sure VictoriaMetrics is running locally:")
|
||||
print(" docker run -p 8428:8428 victoriametrics/victoria-metrics")
|
||||
else:
|
||||
print(f"💡 Check if VictoriaMetrics is accessible at {config['host']}:{config['port']}")
|
||||
print("💡 Verify HTTPS configuration and network connectivity")
|
||||
else:
|
||||
print("✗ Failed to initialize VictoriaMetrics adapter")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
print("💡 Check your VictoriaMetrics configuration and network connectivity")
|
||||
|
||||
def show_recommendations():
|
||||
"""Show database recommendations"""
|
||||
print("\n" + "=" * 60)
|
||||
print("🏆 Database Recommendations")
|
||||
print("=" * 60)
|
||||
|
||||
recommendations = [
|
||||
{
|
||||
'name': 'InfluxDB',
|
||||
'best_for': 'Time-series data, Grafana dashboards',
|
||||
'pros': ['Purpose-built for time-series', 'Great compression', 'Built-in retention'],
|
||||
'cons': ['Learning curve', 'Less flexible for complex queries'],
|
||||
'use_case': 'Recommended for most water monitoring deployments'
|
||||
},
|
||||
{
|
||||
'name': 'PostgreSQL + TimescaleDB',
|
||||
'best_for': 'Complex queries, existing PostgreSQL infrastructure',
|
||||
'pros': ['Mature ecosystem', 'SQL compatibility', 'ACID compliance'],
|
||||
'cons': ['More complex setup', 'Higher resource usage'],
|
||||
'use_case': 'Best for organizations already using PostgreSQL'
|
||||
},
|
||||
{
|
||||
'name': 'VictoriaMetrics',
|
||||
'best_for': 'High-performance metrics, Prometheus compatibility',
|
||||
'pros': ['Extremely fast', 'Low resource usage', 'Better compression'],
|
||||
'cons': ['Newer ecosystem', 'Less tooling'],
|
||||
'use_case': 'Best for high-volume, performance-critical deployments'
|
||||
},
|
||||
{
|
||||
'name': 'MySQL',
|
||||
'best_for': 'Existing MySQL infrastructure, familiar SQL',
|
||||
'pros': ['Familiar', 'Mature', 'Wide support'],
|
||||
'cons': ['Not optimized for time-series', 'Manual optimization needed'],
|
||||
'use_case': 'Good for organizations with existing MySQL expertise'
|
||||
}
|
||||
]
|
||||
|
||||
for rec in recommendations:
|
||||
print(f"\n📊 {rec['name']}")
|
||||
print(f" Best for: {rec['best_for']}")
|
||||
print(f" Pros: {', '.join(rec['pros'])}")
|
||||
print(f" Cons: {', '.join(rec['cons'])}")
|
||||
print(f" 💡 {rec['use_case']}")
|
||||
|
||||
def main():
|
||||
"""Main demo function"""
|
||||
print("🌊 Thailand Water Monitor - Database Backend Demo")
|
||||
print("This demo shows how to use different database backends")
|
||||
|
||||
# Always run SQLite demo (no external dependencies)
|
||||
demo_sqlite()
|
||||
|
||||
# Check for command line arguments to run specific demos
|
||||
if len(sys.argv) > 1:
|
||||
db_type = sys.argv[1].lower()
|
||||
|
||||
if db_type == 'influxdb':
|
||||
demo_influxdb()
|
||||
elif db_type == 'postgresql':
|
||||
demo_postgresql()
|
||||
elif db_type == 'mysql':
|
||||
demo_mysql()
|
||||
elif db_type == 'victoriametrics':
|
||||
demo_victoriametrics()
|
||||
elif db_type == 'all':
|
||||
demo_influxdb()
|
||||
demo_postgresql()
|
||||
demo_mysql()
|
||||
demo_victoriametrics()
|
||||
else:
|
||||
print(f"\nUnknown database type: {db_type}")
|
||||
print("Available options: influxdb, postgresql, mysql, victoriametrics, all")
|
||||
else:
|
||||
print("\n💡 To test other databases, run:")
|
||||
print(" python demo_databases.py influxdb")
|
||||
print(" python demo_databases.py postgresql")
|
||||
print(" python demo_databases.py mysql")
|
||||
print(" python demo_databases.py victoriametrics")
|
||||
print(" python demo_databases.py all")
|
||||
|
||||
# Show recommendations
|
||||
show_recommendations()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("✅ Demo completed!")
|
||||
print("📖 See DATABASE_DEPLOYMENT_GUIDE.md for production setup instructions")
|
||||
print("=" * 60)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
32
src/exceptions.py
Normal file
32
src/exceptions.py
Normal file
@@ -0,0 +1,32 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Custom exceptions for water monitoring system
|
||||
"""
|
||||
|
||||
class WaterMonitorException(Exception):
|
||||
"""Base exception for water monitoring system"""
|
||||
pass
|
||||
|
||||
class DatabaseConnectionError(WaterMonitorException):
|
||||
"""Raised when database connection fails"""
|
||||
pass
|
||||
|
||||
class APIConnectionError(WaterMonitorException):
|
||||
"""Raised when API connection fails"""
|
||||
pass
|
||||
|
||||
class DataValidationError(WaterMonitorException):
|
||||
"""Raised when data validation fails"""
|
||||
pass
|
||||
|
||||
class ConfigurationError(WaterMonitorException):
|
||||
"""Raised when configuration is invalid"""
|
||||
pass
|
||||
|
||||
class DataParsingError(WaterMonitorException):
|
||||
"""Raised when data parsing fails"""
|
||||
pass
|
||||
|
||||
class RetryExhaustedError(WaterMonitorException):
|
||||
"""Raised when all retry attempts are exhausted"""
|
||||
pass
|
265
src/health_check.py
Normal file
265
src/health_check.py
Normal file
@@ -0,0 +1,265 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Health check system for water monitoring application
|
||||
"""
|
||||
|
||||
import time
|
||||
import threading
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional, List, Callable
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class HealthStatus(Enum):
|
||||
HEALTHY = "healthy"
|
||||
DEGRADED = "degraded"
|
||||
UNHEALTHY = "unhealthy"
|
||||
|
||||
@dataclass
|
||||
class HealthCheckResult:
|
||||
"""Result of a health check"""
|
||||
name: str
|
||||
status: HealthStatus
|
||||
message: str
|
||||
timestamp: datetime
|
||||
response_time_ms: Optional[float] = None
|
||||
details: Optional[Dict[str, Any]] = None
|
||||
|
||||
class HealthCheck:
|
||||
"""Base health check class"""
|
||||
|
||||
def __init__(self, name: str, timeout_seconds: int = 30):
|
||||
self.name = name
|
||||
self.timeout_seconds = timeout_seconds
|
||||
|
||||
def check(self) -> HealthCheckResult:
|
||||
"""Perform the health check"""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
result = self._perform_check()
|
||||
response_time = (time.time() - start_time) * 1000
|
||||
|
||||
return HealthCheckResult(
|
||||
name=self.name,
|
||||
status=result.get('status', HealthStatus.HEALTHY),
|
||||
message=result.get('message', 'OK'),
|
||||
timestamp=datetime.now(),
|
||||
response_time_ms=response_time,
|
||||
details=result.get('details')
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
response_time = (time.time() - start_time) * 1000
|
||||
logger.error(f"Health check {self.name} failed: {e}")
|
||||
|
||||
return HealthCheckResult(
|
||||
name=self.name,
|
||||
status=HealthStatus.UNHEALTHY,
|
||||
message=f"Check failed: {str(e)}",
|
||||
timestamp=datetime.now(),
|
||||
response_time_ms=response_time
|
||||
)
|
||||
|
||||
def _perform_check(self) -> Dict[str, Any]:
|
||||
"""Override this method to implement the actual check"""
|
||||
raise NotImplementedError
|
||||
|
||||
class DatabaseHealthCheck(HealthCheck):
|
||||
"""Health check for database connectivity"""
|
||||
|
||||
def __init__(self, db_adapter, name: str = "database"):
|
||||
super().__init__(name)
|
||||
self.db_adapter = db_adapter
|
||||
|
||||
def _perform_check(self) -> Dict[str, Any]:
|
||||
if not self.db_adapter:
|
||||
return {
|
||||
'status': HealthStatus.UNHEALTHY,
|
||||
'message': 'Database adapter not initialized'
|
||||
}
|
||||
|
||||
try:
|
||||
# Try to connect
|
||||
if hasattr(self.db_adapter, 'connect'):
|
||||
connected = self.db_adapter.connect()
|
||||
if not connected:
|
||||
return {
|
||||
'status': HealthStatus.UNHEALTHY,
|
||||
'message': 'Database connection failed'
|
||||
}
|
||||
|
||||
# Try to get latest data
|
||||
latest_data = self.db_adapter.get_latest_measurements(limit=1)
|
||||
|
||||
if latest_data:
|
||||
latest_timestamp = latest_data[0].get('timestamp')
|
||||
if isinstance(latest_timestamp, str):
|
||||
latest_timestamp = datetime.fromisoformat(latest_timestamp.replace('Z', '+00:00'))
|
||||
|
||||
# Check if data is recent (within last 2 hours)
|
||||
if datetime.now() - latest_timestamp.replace(tzinfo=None) > timedelta(hours=2):
|
||||
return {
|
||||
'status': HealthStatus.DEGRADED,
|
||||
'message': f'Latest data is old: {latest_timestamp}',
|
||||
'details': {'latest_data_timestamp': str(latest_timestamp)}
|
||||
}
|
||||
|
||||
return {
|
||||
'status': HealthStatus.HEALTHY,
|
||||
'message': 'Database connection OK',
|
||||
'details': {
|
||||
'latest_data_count': len(latest_data),
|
||||
'latest_timestamp': str(latest_data[0].get('timestamp')) if latest_data else None
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'status': HealthStatus.UNHEALTHY,
|
||||
'message': f'Database check failed: {str(e)}'
|
||||
}
|
||||
|
||||
class APIHealthCheck(HealthCheck):
|
||||
"""Health check for external API connectivity"""
|
||||
|
||||
def __init__(self, api_url: str, session, name: str = "api"):
|
||||
super().__init__(name)
|
||||
self.api_url = api_url
|
||||
self.session = session
|
||||
|
||||
def _perform_check(self) -> Dict[str, Any]:
|
||||
try:
|
||||
# Simple GET request to check API availability
|
||||
response = self.session.get(self.api_url, timeout=self.timeout_seconds)
|
||||
|
||||
if response.status_code == 200:
|
||||
return {
|
||||
'status': HealthStatus.HEALTHY,
|
||||
'message': 'API connection OK',
|
||||
'details': {
|
||||
'status_code': response.status_code,
|
||||
'response_size': len(response.content)
|
||||
}
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'status': HealthStatus.DEGRADED,
|
||||
'message': f'API returned status {response.status_code}',
|
||||
'details': {'status_code': response.status_code}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'status': HealthStatus.UNHEALTHY,
|
||||
'message': f'API check failed: {str(e)}'
|
||||
}
|
||||
|
||||
class MemoryHealthCheck(HealthCheck):
|
||||
"""Health check for memory usage"""
|
||||
|
||||
def __init__(self, max_memory_mb: int = 1000, name: str = "memory"):
|
||||
super().__init__(name)
|
||||
self.max_memory_mb = max_memory_mb
|
||||
|
||||
def _perform_check(self) -> Dict[str, Any]:
|
||||
try:
|
||||
import psutil
|
||||
process = psutil.Process()
|
||||
memory_info = process.memory_info()
|
||||
memory_mb = memory_info.rss / 1024 / 1024
|
||||
|
||||
if memory_mb > self.max_memory_mb:
|
||||
return {
|
||||
'status': HealthStatus.DEGRADED,
|
||||
'message': f'High memory usage: {memory_mb:.1f}MB',
|
||||
'details': {'memory_mb': memory_mb, 'max_memory_mb': self.max_memory_mb}
|
||||
}
|
||||
|
||||
return {
|
||||
'status': HealthStatus.HEALTHY,
|
||||
'message': f'Memory usage OK: {memory_mb:.1f}MB',
|
||||
'details': {'memory_mb': memory_mb}
|
||||
}
|
||||
|
||||
except ImportError:
|
||||
return {
|
||||
'status': HealthStatus.HEALTHY,
|
||||
'message': 'Memory check skipped (psutil not available)'
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
'status': HealthStatus.UNHEALTHY,
|
||||
'message': f'Memory check failed: {str(e)}'
|
||||
}
|
||||
|
||||
class HealthCheckManager:
|
||||
"""Manages multiple health checks"""
|
||||
|
||||
def __init__(self):
|
||||
self.checks: List[HealthCheck] = []
|
||||
self.last_results: Dict[str, HealthCheckResult] = {}
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def add_check(self, health_check: HealthCheck):
|
||||
"""Add a health check"""
|
||||
with self._lock:
|
||||
self.checks.append(health_check)
|
||||
|
||||
def run_all_checks(self) -> Dict[str, HealthCheckResult]:
|
||||
"""Run all health checks"""
|
||||
results = {}
|
||||
|
||||
for check in self.checks:
|
||||
try:
|
||||
result = check.check()
|
||||
results[check.name] = result
|
||||
|
||||
with self._lock:
|
||||
self.last_results[check.name] = result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error running health check {check.name}: {e}")
|
||||
results[check.name] = HealthCheckResult(
|
||||
name=check.name,
|
||||
status=HealthStatus.UNHEALTHY,
|
||||
message=f"Check execution failed: {str(e)}",
|
||||
timestamp=datetime.now()
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
def get_overall_status(self) -> HealthStatus:
|
||||
"""Get overall system health status"""
|
||||
if not self.last_results:
|
||||
return HealthStatus.UNHEALTHY
|
||||
|
||||
statuses = [result.status for result in self.last_results.values()]
|
||||
|
||||
if any(status == HealthStatus.UNHEALTHY for status in statuses):
|
||||
return HealthStatus.UNHEALTHY
|
||||
elif any(status == HealthStatus.DEGRADED for status in statuses):
|
||||
return HealthStatus.DEGRADED
|
||||
else:
|
||||
return HealthStatus.HEALTHY
|
||||
|
||||
def get_health_summary(self) -> Dict[str, Any]:
|
||||
"""Get a summary of system health"""
|
||||
overall_status = self.get_overall_status()
|
||||
|
||||
return {
|
||||
'overall_status': overall_status.value,
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'checks': {
|
||||
name: {
|
||||
'status': result.status.value,
|
||||
'message': result.message,
|
||||
'response_time_ms': result.response_time_ms,
|
||||
'timestamp': result.timestamp.isoformat()
|
||||
}
|
||||
for name, result in self.last_results.items()
|
||||
}
|
||||
}
|
135
src/logging_config.py
Normal file
135
src/logging_config.py
Normal file
@@ -0,0 +1,135 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Centralized logging configuration for water monitoring system
|
||||
"""
|
||||
|
||||
import logging
|
||||
import logging.handlers
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
class ColoredFormatter(logging.Formatter):
|
||||
"""Colored console formatter"""
|
||||
|
||||
COLORS = {
|
||||
'DEBUG': '\033[36m', # Cyan
|
||||
'INFO': '\033[32m', # Green
|
||||
'WARNING': '\033[33m', # Yellow
|
||||
'ERROR': '\033[31m', # Red
|
||||
'CRITICAL': '\033[35m', # Magenta
|
||||
'RESET': '\033[0m' # Reset
|
||||
}
|
||||
|
||||
def format(self, record):
|
||||
if hasattr(record, 'levelname'):
|
||||
color = self.COLORS.get(record.levelname, self.COLORS['RESET'])
|
||||
record.levelname = f"{color}{record.levelname}{self.COLORS['RESET']}"
|
||||
return super().format(record)
|
||||
|
||||
def setup_logging(
|
||||
log_level: str = "INFO",
|
||||
log_file: Optional[str] = None,
|
||||
max_file_size: int = 10 * 1024 * 1024, # 10MB
|
||||
backup_count: int = 5,
|
||||
enable_console: bool = True,
|
||||
enable_colors: bool = True
|
||||
) -> logging.Logger:
|
||||
"""
|
||||
Setup comprehensive logging configuration
|
||||
|
||||
Args:
|
||||
log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
||||
log_file: Path to log file (optional)
|
||||
max_file_size: Maximum size of log file before rotation
|
||||
backup_count: Number of backup files to keep
|
||||
enable_console: Whether to enable console logging
|
||||
enable_colors: Whether to enable colored console output
|
||||
|
||||
Returns:
|
||||
Configured logger instance
|
||||
"""
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
if log_file:
|
||||
log_dir = os.path.dirname(log_file)
|
||||
if log_dir and not os.path.exists(log_dir):
|
||||
os.makedirs(log_dir)
|
||||
|
||||
# Configure root logger
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(getattr(logging, log_level.upper()))
|
||||
|
||||
# Clear existing handlers
|
||||
logger.handlers.clear()
|
||||
|
||||
# Create formatters
|
||||
detailed_formatter = logging.Formatter(
|
||||
'%(asctime)s - %(name)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
|
||||
simple_formatter = logging.Formatter(
|
||||
'%(asctime)s - %(levelname)s - %(message)s',
|
||||
datefmt='%H:%M:%S'
|
||||
)
|
||||
|
||||
# Console handler
|
||||
if enable_console:
|
||||
console_handler = logging.StreamHandler()
|
||||
if enable_colors and os.name != 'nt': # Don't use colors on Windows
|
||||
console_formatter = ColoredFormatter(
|
||||
'%(asctime)s - %(levelname)s - %(message)s',
|
||||
datefmt='%H:%M:%S'
|
||||
)
|
||||
else:
|
||||
console_formatter = simple_formatter
|
||||
|
||||
console_handler.setFormatter(console_formatter)
|
||||
console_handler.setLevel(getattr(logging, log_level.upper()))
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
# File handler with rotation
|
||||
if log_file:
|
||||
file_handler = logging.handlers.RotatingFileHandler(
|
||||
log_file,
|
||||
maxBytes=max_file_size,
|
||||
backupCount=backup_count,
|
||||
encoding='utf-8'
|
||||
)
|
||||
file_handler.setFormatter(detailed_formatter)
|
||||
file_handler.setLevel(logging.DEBUG) # Always log everything to file
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
# Add performance logger for metrics
|
||||
perf_logger = logging.getLogger('performance')
|
||||
if log_file:
|
||||
perf_file = log_file.replace('.log', '_performance.log')
|
||||
perf_handler = logging.handlers.RotatingFileHandler(
|
||||
perf_file,
|
||||
maxBytes=max_file_size,
|
||||
backupCount=backup_count,
|
||||
encoding='utf-8'
|
||||
)
|
||||
perf_formatter = logging.Formatter(
|
||||
'%(asctime)s - %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
perf_handler.setFormatter(perf_formatter)
|
||||
perf_logger.addHandler(perf_handler)
|
||||
perf_logger.setLevel(logging.INFO)
|
||||
perf_logger.propagate = False
|
||||
|
||||
return logger
|
||||
|
||||
def log_performance_metric(operation: str, duration: float, additional_info: Optional[str] = None):
|
||||
"""Log performance metrics"""
|
||||
perf_logger = logging.getLogger('performance')
|
||||
message = f"PERF: {operation} took {duration:.3f}s"
|
||||
if additional_info:
|
||||
message += f" - {additional_info}"
|
||||
perf_logger.info(message)
|
||||
|
||||
def get_logger(name: str) -> logging.Logger:
|
||||
"""Get a logger with the specified name"""
|
||||
return logging.getLogger(name)
|
337
src/main.py
Normal file
337
src/main.py
Normal file
@@ -0,0 +1,337 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Main entry point for the Thailand Water Monitor system
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import sys
|
||||
import signal
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from .config import Config
|
||||
from .water_scraper_v3 import EnhancedWaterMonitorScraper
|
||||
from .logging_config import setup_logging, get_logger
|
||||
from .exceptions import ConfigurationError, DatabaseConnectionError
|
||||
from .metrics import get_metrics_collector
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
def setup_signal_handlers(scraper: Optional[EnhancedWaterMonitorScraper] = None):
|
||||
"""Setup signal handlers for graceful shutdown"""
|
||||
def signal_handler(signum, frame):
|
||||
logger.info(f"Received signal {signum}, shutting down gracefully...")
|
||||
if scraper:
|
||||
logger.info("Stopping scraper...")
|
||||
sys.exit(0)
|
||||
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
def run_test_cycle():
|
||||
"""Run a single test cycle"""
|
||||
logger.info("Running test cycle...")
|
||||
|
||||
try:
|
||||
# Validate configuration
|
||||
Config.validate_config()
|
||||
|
||||
# Initialize scraper
|
||||
db_config = Config.get_database_config()
|
||||
scraper = EnhancedWaterMonitorScraper(db_config)
|
||||
|
||||
# Run single scraping cycle
|
||||
result = scraper.run_scraping_cycle()
|
||||
|
||||
if result:
|
||||
logger.info("✅ Test cycle completed successfully")
|
||||
|
||||
# Show some statistics
|
||||
latest_data = scraper.get_latest_data(5)
|
||||
if latest_data:
|
||||
logger.info(f"Latest data points: {len(latest_data)}")
|
||||
for data in latest_data[:3]: # Show first 3
|
||||
logger.info(f" • {data['station_code']}: {data['water_level']:.2f}m, {data['discharge']:.1f} cms")
|
||||
else:
|
||||
logger.warning("⚠️ Test cycle completed but no new data was found")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Test cycle failed: {e}")
|
||||
return False
|
||||
|
||||
def run_continuous_monitoring():
|
||||
"""Run continuous monitoring with scheduling"""
|
||||
logger.info("Starting continuous monitoring...")
|
||||
|
||||
try:
|
||||
# Validate configuration
|
||||
Config.validate_config()
|
||||
|
||||
# Initialize scraper
|
||||
db_config = Config.get_database_config()
|
||||
scraper = EnhancedWaterMonitorScraper(db_config)
|
||||
|
||||
# Setup signal handlers
|
||||
setup_signal_handlers(scraper)
|
||||
|
||||
logger.info(f"Monitoring started with {Config.SCRAPING_INTERVAL_HOURS}h interval")
|
||||
logger.info("Press Ctrl+C to stop")
|
||||
|
||||
# Run initial cycle
|
||||
logger.info("Running initial data collection...")
|
||||
scraper.run_scraping_cycle()
|
||||
|
||||
# Start scheduled monitoring
|
||||
import schedule
|
||||
|
||||
schedule.every(Config.SCRAPING_INTERVAL_HOURS).hours.do(scraper.run_scraping_cycle)
|
||||
|
||||
while True:
|
||||
schedule.run_pending()
|
||||
time.sleep(60) # Check every minute
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Monitoring stopped by user")
|
||||
except Exception as e:
|
||||
logger.error(f"Monitoring failed: {e}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def run_gap_filling(days_back: int):
|
||||
"""Run gap filling for missing data"""
|
||||
logger.info(f"Checking for data gaps in the last {days_back} days...")
|
||||
|
||||
try:
|
||||
# Validate configuration
|
||||
Config.validate_config()
|
||||
|
||||
# Initialize scraper
|
||||
db_config = Config.get_database_config()
|
||||
scraper = EnhancedWaterMonitorScraper(db_config)
|
||||
|
||||
# Fill gaps
|
||||
filled_count = scraper.fill_data_gaps(days_back)
|
||||
|
||||
if filled_count > 0:
|
||||
logger.info(f"✅ Filled {filled_count} missing data points")
|
||||
else:
|
||||
logger.info("✅ No data gaps found")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Gap filling failed: {e}")
|
||||
return False
|
||||
|
||||
def run_data_update(days_back: int):
|
||||
"""Update existing data with latest values"""
|
||||
logger.info(f"Updating existing data for the last {days_back} days...")
|
||||
|
||||
try:
|
||||
# Validate configuration
|
||||
Config.validate_config()
|
||||
|
||||
# Initialize scraper
|
||||
db_config = Config.get_database_config()
|
||||
scraper = EnhancedWaterMonitorScraper(db_config)
|
||||
|
||||
# Update data
|
||||
updated_count = scraper.update_existing_data(days_back)
|
||||
|
||||
if updated_count > 0:
|
||||
logger.info(f"✅ Updated {updated_count} data points")
|
||||
else:
|
||||
logger.info("✅ No data updates needed")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Data update failed: {e}")
|
||||
return False
|
||||
|
||||
def run_web_api():
|
||||
"""Run the FastAPI web interface"""
|
||||
logger.info("Starting web API server...")
|
||||
|
||||
try:
|
||||
import uvicorn
|
||||
from .web_api import app
|
||||
|
||||
# Validate configuration
|
||||
Config.validate_config()
|
||||
|
||||
# Run the server
|
||||
uvicorn.run(
|
||||
app,
|
||||
host="0.0.0.0",
|
||||
port=8000,
|
||||
log_config=None # Use our custom logging
|
||||
)
|
||||
|
||||
except ImportError:
|
||||
logger.error("FastAPI not installed. Run: pip install fastapi uvicorn")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Web API failed: {e}")
|
||||
return False
|
||||
|
||||
def show_status():
|
||||
"""Show current system status"""
|
||||
logger.info("=== Northern Thailand Ping River Monitor Status ===")
|
||||
|
||||
try:
|
||||
# Show configuration
|
||||
Config.print_settings()
|
||||
|
||||
# Test database connection
|
||||
logger.info("\n=== Database Connection Test ===")
|
||||
db_config = Config.get_database_config()
|
||||
scraper = EnhancedWaterMonitorScraper(db_config)
|
||||
|
||||
if scraper.db_adapter:
|
||||
logger.info("✅ Database connection successful")
|
||||
|
||||
# Show latest data
|
||||
latest_data = scraper.get_latest_data(3)
|
||||
if latest_data:
|
||||
logger.info(f"\n=== Latest Data ({len(latest_data)} points) ===")
|
||||
for data in latest_data:
|
||||
timestamp = data['timestamp']
|
||||
if isinstance(timestamp, str):
|
||||
timestamp = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
|
||||
logger.info(f" • {data['station_code']} ({timestamp}): {data['water_level']:.2f}m")
|
||||
else:
|
||||
logger.info("No data found in database")
|
||||
else:
|
||||
logger.error("❌ Database connection failed")
|
||||
|
||||
# Show metrics if available
|
||||
metrics_collector = get_metrics_collector()
|
||||
metrics = metrics_collector.get_all_metrics()
|
||||
|
||||
if any(metrics.values()):
|
||||
logger.info("\n=== Metrics Summary ===")
|
||||
for metric_type, values in metrics.items():
|
||||
if values:
|
||||
logger.info(f"{metric_type.title()}: {len(values)} metrics")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Status check failed: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
"""Main entry point"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Northern Thailand Ping River Monitor",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
%(prog)s --test # Run single test cycle
|
||||
%(prog)s # Run continuous monitoring
|
||||
%(prog)s --web-api # Start web API server
|
||||
%(prog)s --fill-gaps 7 # Fill missing data for last 7 days
|
||||
%(prog)s --update-data 2 # Update existing data for last 2 days
|
||||
%(prog)s --status # Show system status
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--test",
|
||||
action="store_true",
|
||||
help="Run a single test cycle"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--web-api",
|
||||
action="store_true",
|
||||
help="Start the web API server"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--fill-gaps",
|
||||
type=int,
|
||||
metavar="DAYS",
|
||||
help="Fill missing data gaps for the specified number of days back"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--update-data",
|
||||
type=int,
|
||||
metavar="DAYS",
|
||||
help="Update existing data for the specified number of days back"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--status",
|
||||
action="store_true",
|
||||
help="Show current system status"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--log-level",
|
||||
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
|
||||
default=Config.LOG_LEVEL,
|
||||
help="Set logging level"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--log-file",
|
||||
default=Config.LOG_FILE,
|
||||
help="Log file path"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Setup logging
|
||||
setup_logging(
|
||||
log_level=args.log_level,
|
||||
log_file=args.log_file,
|
||||
enable_console=True,
|
||||
enable_colors=True
|
||||
)
|
||||
|
||||
logger.info("🏔️ Northern Thailand Ping River Monitor starting...")
|
||||
logger.info(f"Version: 3.1.0")
|
||||
logger.info(f"Log level: {args.log_level}")
|
||||
|
||||
try:
|
||||
success = False
|
||||
|
||||
if args.test:
|
||||
success = run_test_cycle()
|
||||
elif args.web_api:
|
||||
success = run_web_api()
|
||||
elif args.fill_gaps is not None:
|
||||
success = run_gap_filling(args.fill_gaps)
|
||||
elif args.update_data is not None:
|
||||
success = run_data_update(args.update_data)
|
||||
elif args.status:
|
||||
success = show_status()
|
||||
else:
|
||||
success = run_continuous_monitoring()
|
||||
|
||||
if success:
|
||||
logger.info("✅ Operation completed successfully")
|
||||
sys.exit(0)
|
||||
else:
|
||||
logger.error("❌ Operation failed")
|
||||
sys.exit(1)
|
||||
|
||||
except ConfigurationError as e:
|
||||
logger.error(f"Configuration error: {e}")
|
||||
sys.exit(1)
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Operation cancelled by user")
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
171
src/metrics.py
Normal file
171
src/metrics.py
Normal file
@@ -0,0 +1,171 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Metrics collection and monitoring for water monitoring system
|
||||
"""
|
||||
|
||||
import time
|
||||
import threading
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional, List
|
||||
from dataclasses import dataclass, field
|
||||
from collections import defaultdict, deque
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@dataclass
|
||||
class MetricPoint:
|
||||
"""Single metric data point"""
|
||||
timestamp: datetime
|
||||
value: float
|
||||
labels: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
class MetricsCollector:
|
||||
"""Collects and manages application metrics"""
|
||||
|
||||
def __init__(self, retention_hours: int = 24):
|
||||
self.retention_hours = retention_hours
|
||||
self.metrics: Dict[str, deque] = defaultdict(lambda: deque(maxlen=1000))
|
||||
self.counters: Dict[str, float] = defaultdict(float)
|
||||
self.gauges: Dict[str, float] = defaultdict(float)
|
||||
self.histograms: Dict[str, List[float]] = defaultdict(list)
|
||||
self._lock = threading.Lock()
|
||||
|
||||
# Start cleanup thread
|
||||
self._cleanup_thread = threading.Thread(target=self._cleanup_old_metrics, daemon=True)
|
||||
self._cleanup_thread.start()
|
||||
|
||||
def increment_counter(self, name: str, value: float = 1.0, labels: Optional[Dict[str, str]] = None):
|
||||
"""Increment a counter metric"""
|
||||
with self._lock:
|
||||
key = self._make_key(name, labels)
|
||||
self.counters[key] += value
|
||||
self.metrics[key].append(MetricPoint(datetime.now(), self.counters[key], labels or {}))
|
||||
|
||||
def set_gauge(self, name: str, value: float, labels: Optional[Dict[str, str]] = None):
|
||||
"""Set a gauge metric"""
|
||||
with self._lock:
|
||||
key = self._make_key(name, labels)
|
||||
self.gauges[key] = value
|
||||
self.metrics[key].append(MetricPoint(datetime.now(), value, labels or {}))
|
||||
|
||||
def record_histogram(self, name: str, value: float, labels: Optional[Dict[str, str]] = None):
|
||||
"""Record a histogram value"""
|
||||
with self._lock:
|
||||
key = self._make_key(name, labels)
|
||||
self.histograms[key].append(value)
|
||||
# Keep only recent values
|
||||
if len(self.histograms[key]) > 1000:
|
||||
self.histograms[key] = self.histograms[key][-1000:]
|
||||
|
||||
self.metrics[key].append(MetricPoint(datetime.now(), value, labels or {}))
|
||||
|
||||
def get_counter(self, name: str, labels: Optional[Dict[str, str]] = None) -> float:
|
||||
"""Get current counter value"""
|
||||
key = self._make_key(name, labels)
|
||||
return self.counters.get(key, 0.0)
|
||||
|
||||
def get_gauge(self, name: str, labels: Optional[Dict[str, str]] = None) -> float:
|
||||
"""Get current gauge value"""
|
||||
key = self._make_key(name, labels)
|
||||
return self.gauges.get(key, 0.0)
|
||||
|
||||
def get_histogram_stats(self, name: str, labels: Optional[Dict[str, str]] = None) -> Dict[str, float]:
|
||||
"""Get histogram statistics"""
|
||||
key = self._make_key(name, labels)
|
||||
values = self.histograms.get(key, [])
|
||||
|
||||
if not values:
|
||||
return {'count': 0, 'sum': 0, 'avg': 0, 'min': 0, 'max': 0}
|
||||
|
||||
return {
|
||||
'count': len(values),
|
||||
'sum': sum(values),
|
||||
'avg': sum(values) / len(values),
|
||||
'min': min(values),
|
||||
'max': max(values)
|
||||
}
|
||||
|
||||
def get_all_metrics(self) -> Dict[str, Any]:
|
||||
"""Get all current metrics"""
|
||||
with self._lock:
|
||||
return {
|
||||
'counters': dict(self.counters),
|
||||
'gauges': dict(self.gauges),
|
||||
'histograms': {k: self.get_histogram_stats(k) for k in self.histograms}
|
||||
}
|
||||
|
||||
def _make_key(self, name: str, labels: Optional[Dict[str, str]]) -> str:
|
||||
"""Create a unique key for metric with labels"""
|
||||
if not labels:
|
||||
return name
|
||||
|
||||
label_str = ','.join(f"{k}={v}" for k, v in sorted(labels.items()))
|
||||
return f"{name}{{{label_str}}}"
|
||||
|
||||
def _cleanup_old_metrics(self):
|
||||
"""Clean up old metric data points"""
|
||||
while True:
|
||||
try:
|
||||
cutoff_time = datetime.now() - timedelta(hours=self.retention_hours)
|
||||
|
||||
with self._lock:
|
||||
for metric_name, points in self.metrics.items():
|
||||
# Remove old points
|
||||
while points and points[0].timestamp < cutoff_time:
|
||||
points.popleft()
|
||||
|
||||
time.sleep(3600) # Run cleanup every hour
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in metrics cleanup: {e}")
|
||||
time.sleep(60) # Wait a minute before retrying
|
||||
|
||||
# Global metrics collector instance
|
||||
_metrics_collector = None
|
||||
|
||||
def get_metrics_collector() -> MetricsCollector:
|
||||
"""Get the global metrics collector instance"""
|
||||
global _metrics_collector
|
||||
if _metrics_collector is None:
|
||||
_metrics_collector = MetricsCollector()
|
||||
return _metrics_collector
|
||||
|
||||
# Convenience functions
|
||||
def increment_counter(name: str, value: float = 1.0, labels: Optional[Dict[str, str]] = None):
|
||||
"""Increment a counter metric"""
|
||||
get_metrics_collector().increment_counter(name, value, labels)
|
||||
|
||||
def set_gauge(name: str, value: float, labels: Optional[Dict[str, str]] = None):
|
||||
"""Set a gauge metric"""
|
||||
get_metrics_collector().set_gauge(name, value, labels)
|
||||
|
||||
def record_histogram(name: str, value: float, labels: Optional[Dict[str, str]] = None):
|
||||
"""Record a histogram value"""
|
||||
get_metrics_collector().record_histogram(name, value, labels)
|
||||
|
||||
class Timer:
|
||||
"""Context manager for timing operations"""
|
||||
|
||||
def __init__(self, metric_name: str, labels: Optional[Dict[str, str]] = None):
|
||||
self.metric_name = metric_name
|
||||
self.labels = labels
|
||||
self.start_time = None
|
||||
|
||||
def __enter__(self):
|
||||
self.start_time = time.time()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.start_time:
|
||||
duration = time.time() - self.start_time
|
||||
record_histogram(self.metric_name, duration, self.labels)
|
||||
|
||||
def timer(metric_name: str, labels: Optional[Dict[str, str]] = None):
|
||||
"""Decorator for timing function execution"""
|
||||
def decorator(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
with Timer(metric_name, labels):
|
||||
return func(*args, **kwargs)
|
||||
return wrapper
|
||||
return decorator
|
107
src/models.py
Normal file
107
src/models.py
Normal file
@@ -0,0 +1,107 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Data models for water monitoring system
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Optional, List, Dict, Any
|
||||
from enum import Enum
|
||||
|
||||
class DatabaseType(Enum):
|
||||
SQLITE = "sqlite"
|
||||
MYSQL = "mysql"
|
||||
POSTGRESQL = "postgresql"
|
||||
INFLUXDB = "influxdb"
|
||||
VICTORIAMETRICS = "victoriametrics"
|
||||
|
||||
class StationStatus(Enum):
|
||||
ACTIVE = "active"
|
||||
INACTIVE = "inactive"
|
||||
MAINTENANCE = "maintenance"
|
||||
ERROR = "error"
|
||||
|
||||
@dataclass
|
||||
class StationInfo:
|
||||
"""Station information model"""
|
||||
station_id: int
|
||||
station_code: str
|
||||
thai_name: str
|
||||
english_name: str
|
||||
latitude: Optional[float] = None
|
||||
longitude: Optional[float] = None
|
||||
geohash: Optional[str] = None
|
||||
status: StationStatus = StationStatus.ACTIVE
|
||||
|
||||
@dataclass
|
||||
class WaterMeasurement:
|
||||
"""Water measurement data model"""
|
||||
timestamp: datetime
|
||||
station_info: StationInfo
|
||||
water_level: float
|
||||
discharge: float
|
||||
water_level_unit: str = "m"
|
||||
discharge_unit: str = "cms"
|
||||
discharge_percent: Optional[float] = None
|
||||
status: StationStatus = StationStatus.ACTIVE
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for database storage"""
|
||||
return {
|
||||
'timestamp': self.timestamp,
|
||||
'station_id': self.station_info.station_id,
|
||||
'station_code': self.station_info.station_code,
|
||||
'station_name_en': self.station_info.english_name,
|
||||
'station_name_th': self.station_info.thai_name,
|
||||
'latitude': self.station_info.latitude,
|
||||
'longitude': self.station_info.longitude,
|
||||
'geohash': self.station_info.geohash,
|
||||
'water_level': self.water_level,
|
||||
'water_level_unit': self.water_level_unit,
|
||||
'discharge': self.discharge,
|
||||
'discharge_unit': self.discharge_unit,
|
||||
'discharge_percent': self.discharge_percent,
|
||||
'status': self.status.value
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class DatabaseConfig:
|
||||
"""Database configuration model"""
|
||||
db_type: DatabaseType
|
||||
connection_string: Optional[str] = None
|
||||
host: Optional[str] = None
|
||||
port: Optional[int] = None
|
||||
database: Optional[str] = None
|
||||
username: Optional[str] = None
|
||||
password: Optional[str] = None
|
||||
additional_params: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@dataclass
|
||||
class ScrapingResult:
|
||||
"""Result of a scraping operation"""
|
||||
success: bool
|
||||
measurements_count: int
|
||||
error_message: Optional[str] = None
|
||||
timestamp: datetime = field(default_factory=datetime.now)
|
||||
processing_time_seconds: Optional[float] = None
|
||||
|
||||
@dataclass
|
||||
class StationCreateRequest:
|
||||
"""Request model for creating a new station"""
|
||||
station_code: str
|
||||
thai_name: str
|
||||
english_name: str
|
||||
latitude: Optional[float] = None
|
||||
longitude: Optional[float] = None
|
||||
geohash: Optional[str] = None
|
||||
status: StationStatus = StationStatus.ACTIVE
|
||||
|
||||
@dataclass
|
||||
class StationUpdateRequest:
|
||||
"""Request model for updating an existing station"""
|
||||
thai_name: Optional[str] = None
|
||||
english_name: Optional[str] = None
|
||||
latitude: Optional[float] = None
|
||||
longitude: Optional[float] = None
|
||||
geohash: Optional[str] = None
|
||||
status: Optional[StationStatus] = None
|
167
src/rate_limiter.py
Normal file
167
src/rate_limiter.py
Normal file
@@ -0,0 +1,167 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Rate limiting utilities for API requests
|
||||
"""
|
||||
|
||||
import time
|
||||
import threading
|
||||
from typing import Dict, Optional
|
||||
from collections import deque
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class RateLimiter:
|
||||
"""Token bucket rate limiter"""
|
||||
|
||||
def __init__(self, max_requests: int, time_window_seconds: int):
|
||||
"""
|
||||
Initialize rate limiter
|
||||
|
||||
Args:
|
||||
max_requests: Maximum number of requests allowed
|
||||
time_window_seconds: Time window in seconds
|
||||
"""
|
||||
self.max_requests = max_requests
|
||||
self.time_window = time_window_seconds
|
||||
self.requests = deque()
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def is_allowed(self) -> bool:
|
||||
"""Check if a request is allowed"""
|
||||
with self._lock:
|
||||
now = time.time()
|
||||
|
||||
# Remove old requests outside the time window
|
||||
while self.requests and self.requests[0] <= now - self.time_window:
|
||||
self.requests.popleft()
|
||||
|
||||
# Check if we can make a new request
|
||||
if len(self.requests) < self.max_requests:
|
||||
self.requests.append(now)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def wait_time(self) -> float:
|
||||
"""Get the time to wait before next request is allowed"""
|
||||
with self._lock:
|
||||
if len(self.requests) < self.max_requests:
|
||||
return 0.0
|
||||
|
||||
# Time until the oldest request expires
|
||||
oldest_request = self.requests[0]
|
||||
return max(0.0, (oldest_request + self.time_window) - time.time())
|
||||
|
||||
def wait_if_needed(self):
|
||||
"""Block until a request is allowed"""
|
||||
wait_time = self.wait_time()
|
||||
if wait_time > 0:
|
||||
logger.info(f"Rate limit reached, waiting {wait_time:.2f} seconds")
|
||||
time.sleep(wait_time)
|
||||
|
||||
class AdaptiveRateLimiter:
|
||||
"""Adaptive rate limiter that adjusts based on response times"""
|
||||
|
||||
def __init__(self, initial_rate: float = 1.0, min_rate: float = 0.1, max_rate: float = 10.0):
|
||||
"""
|
||||
Initialize adaptive rate limiter
|
||||
|
||||
Args:
|
||||
initial_rate: Initial requests per second
|
||||
min_rate: Minimum requests per second
|
||||
max_rate: Maximum requests per second
|
||||
"""
|
||||
self.current_rate = initial_rate
|
||||
self.min_rate = min_rate
|
||||
self.max_rate = max_rate
|
||||
self.last_request_time = 0.0
|
||||
self.response_times = deque(maxlen=10)
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def wait_and_record(self, response_time: Optional[float] = None):
|
||||
"""Wait for rate limit and record response time"""
|
||||
with self._lock:
|
||||
now = time.time()
|
||||
|
||||
# Calculate wait time based on current rate
|
||||
time_since_last = now - self.last_request_time
|
||||
min_interval = 1.0 / self.current_rate
|
||||
|
||||
if time_since_last < min_interval:
|
||||
wait_time = min_interval - time_since_last
|
||||
time.sleep(wait_time)
|
||||
now = time.time()
|
||||
|
||||
self.last_request_time = now
|
||||
|
||||
# Record response time and adjust rate
|
||||
if response_time is not None:
|
||||
self.response_times.append(response_time)
|
||||
self._adjust_rate()
|
||||
|
||||
def _adjust_rate(self):
|
||||
"""Adjust rate based on recent response times"""
|
||||
if len(self.response_times) < 3:
|
||||
return
|
||||
|
||||
avg_response_time = sum(self.response_times) / len(self.response_times)
|
||||
|
||||
# Decrease rate if responses are slow
|
||||
if avg_response_time > 5.0: # 5 seconds
|
||||
self.current_rate = max(self.min_rate, self.current_rate * 0.8)
|
||||
logger.info(f"Decreased rate to {self.current_rate:.2f} req/s due to slow responses")
|
||||
|
||||
# Increase rate if responses are fast
|
||||
elif avg_response_time < 1.0: # 1 second
|
||||
self.current_rate = min(self.max_rate, self.current_rate * 1.1)
|
||||
logger.debug(f"Increased rate to {self.current_rate:.2f} req/s")
|
||||
|
||||
class RequestTracker:
|
||||
"""Track API request statistics"""
|
||||
|
||||
def __init__(self):
|
||||
self.total_requests = 0
|
||||
self.successful_requests = 0
|
||||
self.failed_requests = 0
|
||||
self.total_response_time = 0.0
|
||||
self.last_request_time = None
|
||||
self.error_count_by_type = {}
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def record_request(self, success: bool, response_time: float, error_type: Optional[str] = None):
|
||||
"""Record a request"""
|
||||
with self._lock:
|
||||
self.total_requests += 1
|
||||
self.total_response_time += response_time
|
||||
self.last_request_time = datetime.now()
|
||||
|
||||
if success:
|
||||
self.successful_requests += 1
|
||||
else:
|
||||
self.failed_requests += 1
|
||||
if error_type:
|
||||
self.error_count_by_type[error_type] = self.error_count_by_type.get(error_type, 0) + 1
|
||||
|
||||
def get_stats(self) -> Dict[str, any]:
|
||||
"""Get request statistics"""
|
||||
with self._lock:
|
||||
if self.total_requests == 0:
|
||||
return {
|
||||
'total_requests': 0,
|
||||
'success_rate': 0.0,
|
||||
'average_response_time': 0.0,
|
||||
'last_request_time': None,
|
||||
'error_breakdown': {}
|
||||
}
|
||||
|
||||
return {
|
||||
'total_requests': self.total_requests,
|
||||
'successful_requests': self.successful_requests,
|
||||
'failed_requests': self.failed_requests,
|
||||
'success_rate': self.successful_requests / self.total_requests,
|
||||
'average_response_time': self.total_response_time / self.total_requests,
|
||||
'last_request_time': self.last_request_time.isoformat() if self.last_request_time else None,
|
||||
'error_breakdown': dict(self.error_count_by_type)
|
||||
}
|
116
src/validators.py
Normal file
116
src/validators.py
Normal file
@@ -0,0 +1,116 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Data validation utilities for water monitoring system
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
import logging
|
||||
from .exceptions import DataValidationError
|
||||
from .models import WaterMeasurement, StationInfo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DataValidator:
|
||||
"""Validates water measurement data"""
|
||||
|
||||
# Reasonable ranges for water measurements
|
||||
WATER_LEVEL_MIN = -10.0 # meters
|
||||
WATER_LEVEL_MAX = 50.0 # meters
|
||||
DISCHARGE_MIN = 0.0 # cms
|
||||
DISCHARGE_MAX = 10000.0 # cms
|
||||
DISCHARGE_PERCENT_MIN = 0.0
|
||||
DISCHARGE_PERCENT_MAX = 200.0 # Allow some overflow
|
||||
|
||||
@classmethod
|
||||
def validate_measurement(cls, measurement: Dict[str, Any]) -> bool:
|
||||
"""Validate a single measurement"""
|
||||
try:
|
||||
# Check required fields
|
||||
required_fields = ['timestamp', 'station_id', 'water_level', 'discharge']
|
||||
for field in required_fields:
|
||||
if field not in measurement:
|
||||
logger.warning(f"Missing required field: {field}")
|
||||
return False
|
||||
|
||||
# Validate timestamp
|
||||
if not isinstance(measurement['timestamp'], datetime):
|
||||
logger.warning(f"Invalid timestamp type: {type(measurement['timestamp'])}")
|
||||
return False
|
||||
|
||||
# Validate water level
|
||||
water_level = float(measurement['water_level'])
|
||||
if not (cls.WATER_LEVEL_MIN <= water_level <= cls.WATER_LEVEL_MAX):
|
||||
logger.warning(f"Water level out of range: {water_level}")
|
||||
return False
|
||||
|
||||
# Validate discharge
|
||||
discharge = float(measurement['discharge'])
|
||||
if not (cls.DISCHARGE_MIN <= discharge <= cls.DISCHARGE_MAX):
|
||||
logger.warning(f"Discharge out of range: {discharge}")
|
||||
return False
|
||||
|
||||
# Validate discharge percent if present
|
||||
if measurement.get('discharge_percent') is not None:
|
||||
discharge_percent = float(measurement['discharge_percent'])
|
||||
if not (cls.DISCHARGE_PERCENT_MIN <= discharge_percent <= cls.DISCHARGE_PERCENT_MAX):
|
||||
logger.warning(f"Discharge percent out of range: {discharge_percent}")
|
||||
return False
|
||||
|
||||
# Validate station ID
|
||||
station_id = measurement['station_id']
|
||||
if not isinstance(station_id, int) or station_id < 1 or station_id > 16:
|
||||
logger.warning(f"Invalid station ID: {station_id}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.warning(f"Data validation error: {e}")
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def validate_measurements(cls, measurements: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Validate and filter a list of measurements"""
|
||||
valid_measurements = []
|
||||
invalid_count = 0
|
||||
|
||||
for measurement in measurements:
|
||||
if cls.validate_measurement(measurement):
|
||||
valid_measurements.append(measurement)
|
||||
else:
|
||||
invalid_count += 1
|
||||
|
||||
if invalid_count > 0:
|
||||
logger.warning(f"Filtered out {invalid_count} invalid measurements")
|
||||
|
||||
return valid_measurements
|
||||
|
||||
@classmethod
|
||||
def validate_station_info(cls, station_info: Dict[str, Any]) -> bool:
|
||||
"""Validate station information"""
|
||||
try:
|
||||
required_fields = ['station_id', 'station_code', 'thai_name', 'english_name']
|
||||
for field in required_fields:
|
||||
if field not in station_info or not station_info[field]:
|
||||
logger.warning(f"Missing or empty station field: {field}")
|
||||
return False
|
||||
|
||||
# Validate coordinates if present
|
||||
if station_info.get('latitude') is not None:
|
||||
lat = float(station_info['latitude'])
|
||||
if not (-90 <= lat <= 90):
|
||||
logger.warning(f"Invalid latitude: {lat}")
|
||||
return False
|
||||
|
||||
if station_info.get('longitude') is not None:
|
||||
lon = float(station_info['longitude'])
|
||||
if not (-180 <= lon <= 180):
|
||||
logger.warning(f"Invalid longitude: {lon}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.warning(f"Station validation error: {e}")
|
||||
return False
|
539
src/water_scraper_v3.py
Normal file
539
src/water_scraper_v3.py
Normal file
@@ -0,0 +1,539 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Enhanced Water Monitor Scraper with multiple database backend support
|
||||
"""
|
||||
|
||||
import requests
|
||||
import datetime
|
||||
import time
|
||||
import schedule
|
||||
import json
|
||||
import os
|
||||
from typing import List, Dict, Optional
|
||||
|
||||
try:
|
||||
from .database_adapters import create_database_adapter, DatabaseAdapter
|
||||
from .models import WaterMeasurement, StationInfo, ScrapingResult, StationStatus
|
||||
from .validators import DataValidator
|
||||
from .exceptions import APIConnectionError, DataValidationError, DatabaseConnectionError
|
||||
from .metrics import increment_counter, set_gauge, record_histogram, Timer
|
||||
from .rate_limiter import RateLimiter, RequestTracker
|
||||
from .logging_config import get_logger
|
||||
except ImportError:
|
||||
# Handle case when running as standalone script
|
||||
from database_adapters import create_database_adapter, DatabaseAdapter
|
||||
import logging
|
||||
|
||||
def get_logger(name):
|
||||
return logging.getLogger(name)
|
||||
|
||||
def increment_counter(*args, **kwargs):
|
||||
pass
|
||||
|
||||
def set_gauge(*args, **kwargs):
|
||||
pass
|
||||
|
||||
def record_histogram(*args, **kwargs):
|
||||
pass
|
||||
|
||||
class Timer:
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
def __enter__(self):
|
||||
return self
|
||||
def __exit__(self, *args):
|
||||
pass
|
||||
|
||||
class RateLimiter:
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
def wait_if_needed(self):
|
||||
pass
|
||||
|
||||
class RequestTracker:
|
||||
def __init__(self):
|
||||
pass
|
||||
def record_request(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
class DataValidator:
|
||||
@staticmethod
|
||||
def validate_measurements(measurements):
|
||||
return measurements
|
||||
|
||||
# Get logger instance
|
||||
logger = get_logger(__name__)
|
||||
|
||||
class EnhancedWaterMonitorScraper:
|
||||
def __init__(self, db_config: Dict):
|
||||
"""
|
||||
Initialize scraper with database configuration
|
||||
|
||||
Args:
|
||||
db_config: Database configuration dictionary
|
||||
"""
|
||||
self.api_url = "https://hyd-app-db.rid.go.th/webservice/getGroupHourlyWaterLevelReportAllHL.ashx"
|
||||
self.db_config = db_config.copy() # Make a copy to avoid modifying original
|
||||
self.db_adapter = None
|
||||
|
||||
# Scheduler state tracking
|
||||
self.last_successful_update = None
|
||||
self.retry_mode = False
|
||||
self.next_hourly_check = None
|
||||
|
||||
# Rate limiting and request tracking
|
||||
self.rate_limiter = RateLimiter(max_requests=10, time_window_seconds=60)
|
||||
self.request_tracker = RequestTracker()
|
||||
|
||||
# HTTP session for API requests
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
||||
'X-Requested-With': 'XMLHttpRequest'
|
||||
})
|
||||
|
||||
# Station mapping with correct names and geolocation data
|
||||
self.station_mapping = {
|
||||
'1': {
|
||||
'code': 'P.20',
|
||||
'thai_name': 'บ้านเชียงดาว',
|
||||
'english_name': 'Ban Chiang Dao',
|
||||
'latitude': 19.36731448032191,
|
||||
'longitude': 98.9688487015384,
|
||||
'geohash': None
|
||||
},
|
||||
'2': {
|
||||
'code': 'P.75',
|
||||
'thai_name': 'บ้านช่อแล',
|
||||
'english_name': 'Ban Chai Lat',
|
||||
'latitude': 19.145972935976225,
|
||||
'longitude': 99.00735727149247,
|
||||
'geohash': None
|
||||
},
|
||||
'3': {
|
||||
'code': 'P.92',
|
||||
'thai_name': 'บ้านเมืองกึ๊ด',
|
||||
'english_name': 'Ban Muang Aut',
|
||||
'latitude': 19.220518985435646,
|
||||
'longitude': 98.84733127007874,
|
||||
'geohash': None
|
||||
},
|
||||
'4': {
|
||||
'code': 'P.4A',
|
||||
'thai_name': 'บ้านแม่แตง',
|
||||
'english_name': 'Ban Mae Taeng',
|
||||
'latitude': 19.1222679952378,
|
||||
'longitude': 98.94437462084075,
|
||||
'geohash': None
|
||||
},
|
||||
'5': {
|
||||
'code': 'P.67',
|
||||
'thai_name': 'บ้านแม่แต',
|
||||
'english_name': 'Ban Tae',
|
||||
'latitude': 19.009762080002453,
|
||||
'longitude': 98.95978297135508,
|
||||
'geohash': None
|
||||
},
|
||||
'6': {
|
||||
'code': 'P.21',
|
||||
'thai_name': 'บ้านริมใต้',
|
||||
'english_name': 'Ban Rim Tai',
|
||||
'latitude': 18.917459157963293,
|
||||
'longitude': 98.97018092996231,
|
||||
'geohash': None
|
||||
},
|
||||
'7': {
|
||||
'code': 'P.103',
|
||||
'thai_name': 'สะพานวงแหวนรอบ 3',
|
||||
'english_name': 'Ring Bridge 3',
|
||||
'latitude': 18.86664807441675,
|
||||
'longitude': 98.9781107622432,
|
||||
'geohash': None
|
||||
},
|
||||
'8': {
|
||||
'code': 'P.1',
|
||||
'thai_name': 'สะพานนวรัฐ',
|
||||
'english_name': 'Nawarat Bridge',
|
||||
'latitude': 18.7875,
|
||||
'longitude': 99.0045,
|
||||
'geohash': 'w5q6uuhvfcfp25'
|
||||
},
|
||||
'9': {
|
||||
'code': 'P.82',
|
||||
'thai_name': 'บ้านสบวิน',
|
||||
'english_name': 'Ban Sob win',
|
||||
'latitude': 18.6519444,
|
||||
'longitude': 98.69,
|
||||
'geohash': None
|
||||
},
|
||||
'10': {
|
||||
'code': 'P.84',
|
||||
'thai_name': 'บ้านพันตน',
|
||||
'english_name': 'Ban Panton',
|
||||
'latitude': 18.591315274591334,
|
||||
'longitude': 98.79657058508496,
|
||||
'geohash': None
|
||||
},
|
||||
'11': {
|
||||
'code': 'P.81',
|
||||
'thai_name': 'บ้านโป่ง',
|
||||
'english_name': 'Ban Pong',
|
||||
'latitude': 13.805661820610888,
|
||||
'longitude': 99.87174946122846,
|
||||
'geohash': None
|
||||
},
|
||||
'12': {
|
||||
'code': 'P.5',
|
||||
'thai_name': 'สะพานท่านาง',
|
||||
'english_name': 'Tha Nang Bridge',
|
||||
'latitude': 18.580269437546555,
|
||||
'longitude': 99.01021397084362,
|
||||
'geohash': None
|
||||
},
|
||||
'13': {
|
||||
'code': 'P.77',
|
||||
'thai_name': 'บ้านสบแม่สะป๊วด',
|
||||
'english_name': 'Baan Sop Mae Sapuord',
|
||||
'latitude': 18.433347475179602,
|
||||
'longitude': 99.08510036666527,
|
||||
'geohash': None
|
||||
},
|
||||
'14': {
|
||||
'code': 'P.87',
|
||||
'thai_name': 'บ้านป่าซาง',
|
||||
'english_name': 'Ban Pa Sang',
|
||||
'latitude': 18.519121825282486,
|
||||
'longitude': 98.94224374138238,
|
||||
'geohash': None
|
||||
},
|
||||
'15': {
|
||||
'code': 'P.76',
|
||||
'thai_name': 'บ้านแม่อีไฮ',
|
||||
'english_name': 'Banb Mae I Hai',
|
||||
'latitude': 18.141465831254404,
|
||||
'longitude': 98.89642508267181,
|
||||
'geohash': None
|
||||
},
|
||||
'16': {
|
||||
'code': 'P.85',
|
||||
'thai_name': 'บ้านหล่ายแก้ว',
|
||||
'english_name': 'Baan Lai Kaew',
|
||||
'latitude': 18.17856361002219,
|
||||
'longitude': 98.63023114782287,
|
||||
'geohash': None
|
||||
}
|
||||
}
|
||||
|
||||
self.init_database()
|
||||
|
||||
def init_database(self):
|
||||
"""Initialize database connection"""
|
||||
try:
|
||||
# Extract db_type and pass remaining config as kwargs
|
||||
db_config_copy = self.db_config.copy()
|
||||
db_type = db_config_copy.pop('type')
|
||||
self.db_adapter = create_database_adapter(db_type, **db_config_copy)
|
||||
success = self.db_adapter.connect()
|
||||
|
||||
if success:
|
||||
logger.info(f"Successfully connected to {db_type.upper()} database")
|
||||
set_gauge("database_connected", 1)
|
||||
increment_counter("database_connections_successful")
|
||||
else:
|
||||
logger.error(f"Failed to connect to {db_type.upper()} database")
|
||||
set_gauge("database_connected", 0)
|
||||
increment_counter("database_connections_failed")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error initializing database: {e}")
|
||||
set_gauge("database_connected", 0)
|
||||
increment_counter("database_connections_failed")
|
||||
self.db_adapter = None
|
||||
|
||||
def fetch_water_data_for_date(self, target_date: datetime.datetime) -> Optional[List[Dict]]:
|
||||
"""Fetch water levels and discharge data from API for a specific date"""
|
||||
with Timer("api_request_duration"):
|
||||
try:
|
||||
logger.info(f"Starting data fetch from API for date: {target_date.strftime('%Y-%m-%d')}")
|
||||
|
||||
# Rate limiting
|
||||
self.rate_limiter.wait_if_needed()
|
||||
|
||||
# Create Thai format date (Buddhist calendar)
|
||||
thai_year = target_date.year + 543
|
||||
thai_date = f"{target_date.day:02d}/{target_date.month:02d}/{thai_year}"
|
||||
|
||||
# API parameters
|
||||
payload = {
|
||||
'DW[UtokID]': '1',
|
||||
'DW[BasinID]': '6',
|
||||
'DW[TimeCurrent]': thai_date,
|
||||
'_search': 'false',
|
||||
'nd': str(int(time.time() * 1000)),
|
||||
'rows': '100',
|
||||
'page': '1',
|
||||
'sidx': 'indexhourly',
|
||||
'sord': 'asc'
|
||||
}
|
||||
|
||||
logger.debug(f"API parameters: {payload}")
|
||||
|
||||
# POST request to API
|
||||
start_time = time.time()
|
||||
response = self.session.post(self.api_url, data=payload, timeout=30)
|
||||
response_time = time.time() - start_time
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
# Record successful request
|
||||
self.request_tracker.record_request(True, response_time)
|
||||
increment_counter("api_requests_successful")
|
||||
record_histogram("api_response_time", response_time)
|
||||
|
||||
# Parse JSON response
|
||||
try:
|
||||
json_data = response.json()
|
||||
logger.debug(f"API response received: {len(str(json_data))} characters")
|
||||
except ValueError as e:
|
||||
logger.error(f"Error parsing JSON response: {e}")
|
||||
self.request_tracker.record_request(False, response_time, "json_parse_error")
|
||||
increment_counter("api_requests_failed")
|
||||
return None
|
||||
|
||||
water_data = []
|
||||
|
||||
# Parse JSON data
|
||||
if json_data and isinstance(json_data, dict) and 'rows' in json_data:
|
||||
for row in json_data['rows']:
|
||||
try:
|
||||
# Parse timestamp
|
||||
time_str = row.get('hourlytime', '')
|
||||
if not time_str:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Format: "1.00", "2.00", ..., "24.00"
|
||||
api_hour = int(float(time_str))
|
||||
if api_hour < 1 or api_hour > 24:
|
||||
continue
|
||||
|
||||
if api_hour == 24:
|
||||
# Hour 24 = midnight (00:00) of the next day
|
||||
data_time = target_date.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
data_time = data_time + datetime.timedelta(days=1)
|
||||
else:
|
||||
# Hours 1-23 = 01:00-23:00 of the same day
|
||||
data_time = target_date.replace(hour=api_hour, minute=0, second=0, microsecond=0)
|
||||
|
||||
except (ValueError, IndexError):
|
||||
logger.warning(f"Could not parse timestamp: {time_str}")
|
||||
continue
|
||||
|
||||
# Parse all water levels and discharge values
|
||||
station_count = 0
|
||||
for station_num in range(1, 17): # Stations 1-16
|
||||
wl_key = f'wlvalues{station_num}'
|
||||
q_key = f'qvalues{station_num}'
|
||||
qp_key = f'QPercent{station_num}'
|
||||
|
||||
# Check if both water level and discharge data exist
|
||||
if wl_key in row and q_key in row:
|
||||
try:
|
||||
water_level = row[wl_key]
|
||||
discharge = row[q_key]
|
||||
discharge_percent = row.get(qp_key)
|
||||
|
||||
# Skip if values are None or invalid
|
||||
if water_level is None or discharge is None:
|
||||
continue
|
||||
|
||||
# Convert to float
|
||||
water_level = float(water_level)
|
||||
discharge = float(discharge)
|
||||
discharge_percent = float(discharge_percent) if discharge_percent is not None else None
|
||||
|
||||
station_info = self.station_mapping.get(str(station_num), {
|
||||
'code': f'P.{19+station_num}',
|
||||
'thai_name': f'Station {station_num}',
|
||||
'english_name': f'Station {station_num}'
|
||||
})
|
||||
|
||||
water_data.append({
|
||||
'timestamp': data_time,
|
||||
'station_id': station_num,
|
||||
'station_code': station_info['code'],
|
||||
'station_name_en': station_info['english_name'],
|
||||
'station_name_th': station_info['thai_name'],
|
||||
'latitude': station_info.get('latitude'),
|
||||
'longitude': station_info.get('longitude'),
|
||||
'geohash': station_info.get('geohash'),
|
||||
'water_level': water_level,
|
||||
'water_level_unit': 'm',
|
||||
'discharge': discharge,
|
||||
'discharge_unit': 'cms',
|
||||
'discharge_percent': discharge_percent,
|
||||
'status': 'active'
|
||||
})
|
||||
|
||||
station_count += 1
|
||||
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.warning(f"Could not parse data for station {station_num}: {e}")
|
||||
continue
|
||||
|
||||
logger.debug(f"Processed {station_count} stations for time {time_str}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error processing data row: {e}")
|
||||
continue
|
||||
|
||||
# Validate data
|
||||
water_data = DataValidator.validate_measurements(water_data)
|
||||
|
||||
logger.info(f"Successfully fetched {len(water_data)} data points from API for {target_date.strftime('%Y-%m-%d')}")
|
||||
return water_data
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"Network error fetching API data: {e}")
|
||||
self.request_tracker.record_request(False, 0, "network_error")
|
||||
increment_counter("api_requests_failed")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error fetching API data: {e}")
|
||||
self.request_tracker.record_request(False, 0, "unexpected_error")
|
||||
increment_counter("api_requests_failed")
|
||||
return None
|
||||
|
||||
def fetch_water_data(self) -> Optional[List[Dict]]:
|
||||
"""Fetch water levels and discharge data from API for current date"""
|
||||
current_date = datetime.datetime.now()
|
||||
return self.fetch_water_data_for_date(current_date)
|
||||
|
||||
def save_to_database(self, water_data: List[Dict], max_retries: int = 3) -> bool:
|
||||
"""Save water measurements to database with retry logic"""
|
||||
if not self.db_adapter:
|
||||
logger.error("Database adapter not initialized")
|
||||
return False
|
||||
|
||||
if not water_data:
|
||||
logger.warning("No data to save")
|
||||
return False
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
success = self.db_adapter.save_measurements(water_data)
|
||||
if success:
|
||||
logger.info(f"Successfully saved {len(water_data)} measurements to database")
|
||||
increment_counter("database_saves_successful")
|
||||
set_gauge("last_save_timestamp", time.time())
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"Save attempt {attempt + 1} failed, retrying...")
|
||||
|
||||
except Exception as e:
|
||||
if "database is locked" in str(e).lower() and attempt < max_retries - 1:
|
||||
logger.warning(f"Database locked on attempt {attempt + 1}, retrying in {2 ** attempt} seconds...")
|
||||
time.sleep(2 ** attempt) # Exponential backoff
|
||||
continue
|
||||
else:
|
||||
logger.error(f"Error saving to database (attempt {attempt + 1}): {e}")
|
||||
if attempt == max_retries - 1:
|
||||
increment_counter("database_saves_failed")
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def get_latest_data(self, limit: int = 100) -> List[Dict]:
|
||||
"""Get latest data from database"""
|
||||
if not self.db_adapter:
|
||||
return []
|
||||
|
||||
try:
|
||||
return self.db_adapter.get_latest_measurements(limit=limit)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting latest data: {e}")
|
||||
return []
|
||||
|
||||
def run_scraping_cycle(self) -> bool:
|
||||
"""Run a complete scraping cycle"""
|
||||
logger.info("Starting scraping cycle...")
|
||||
|
||||
try:
|
||||
# Fetch current data
|
||||
water_data = self.fetch_water_data()
|
||||
if water_data:
|
||||
success = self.save_to_database(water_data)
|
||||
if success:
|
||||
logger.info("Scraping cycle completed successfully")
|
||||
increment_counter("scraping_cycles_successful")
|
||||
return True
|
||||
else:
|
||||
logger.error("Failed to save data")
|
||||
increment_counter("scraping_cycles_failed")
|
||||
return False
|
||||
else:
|
||||
logger.warning("No data fetched")
|
||||
increment_counter("scraping_cycles_failed")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Scraping cycle failed: {e}")
|
||||
increment_counter("scraping_cycles_failed")
|
||||
return False
|
||||
|
||||
# Main execution for standalone usage
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
# Configure basic logging for standalone usage
|
||||
import logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.FileHandler('water_monitor.log'),
|
||||
logging.StreamHandler()
|
||||
]
|
||||
)
|
||||
|
||||
parser = argparse.ArgumentParser(description="Thailand Water Monitor")
|
||||
parser.add_argument("--test", action="store_true", help="Run single test cycle")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Default SQLite configuration
|
||||
db_config = {
|
||||
'type': 'sqlite',
|
||||
'connection_string': 'sqlite:///water_levels.db'
|
||||
}
|
||||
|
||||
try:
|
||||
scraper = EnhancedWaterMonitorScraper(db_config)
|
||||
|
||||
if args.test:
|
||||
logger.info("Running test cycle...")
|
||||
result = scraper.run_scraping_cycle()
|
||||
if result:
|
||||
logger.info("✅ Test completed successfully")
|
||||
sys.exit(0)
|
||||
else:
|
||||
logger.error("❌ Test failed")
|
||||
sys.exit(1)
|
||||
else:
|
||||
logger.info("Starting continuous monitoring...")
|
||||
schedule.every(1).hours.do(scraper.run_scraping_cycle)
|
||||
|
||||
# Run initial cycle
|
||||
scraper.run_scraping_cycle()
|
||||
|
||||
while True:
|
||||
schedule.run_pending()
|
||||
time.sleep(60)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Monitoring stopped by user")
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
sys.exit(1)
|
620
src/web_api.py
Normal file
620
src/web_api.py
Normal file
@@ -0,0 +1,620 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
FastAPI web interface for water monitoring system
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import threading
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Dict, Any, Optional
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends
|
||||
from fastapi.responses import HTMLResponse, JSONResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from .water_scraper_v3 import EnhancedWaterMonitorScraper
|
||||
from .config import Config
|
||||
from .models import WaterMeasurement, StationInfo, ScrapingResult, StationCreateRequest, StationUpdateRequest, StationStatus
|
||||
from .health_check import HealthCheckManager, DatabaseHealthCheck, APIHealthCheck, MemoryHealthCheck
|
||||
from .metrics import get_metrics_collector, increment_counter, set_gauge
|
||||
from .logging_config import setup_logging, get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# Pydantic models for API responses
|
||||
class StationResponse(BaseModel):
|
||||
station_id: int
|
||||
station_code: str
|
||||
thai_name: str
|
||||
english_name: str
|
||||
latitude: Optional[float] = None
|
||||
longitude: Optional[float] = None
|
||||
geohash: Optional[str] = None
|
||||
status: str = "active"
|
||||
|
||||
class StationCreateModel(BaseModel):
|
||||
station_code: str = Field(..., description="Station code (e.g., P.1, P.20)")
|
||||
thai_name: str = Field(..., description="Thai name of the station")
|
||||
english_name: str = Field(..., description="English name of the station")
|
||||
latitude: Optional[float] = Field(None, ge=-90, le=90, description="Latitude coordinate")
|
||||
longitude: Optional[float] = Field(None, ge=-180, le=180, description="Longitude coordinate")
|
||||
geohash: Optional[str] = Field(None, description="Geohash for the location")
|
||||
status: str = Field("active", description="Station status")
|
||||
|
||||
class StationUpdateModel(BaseModel):
|
||||
thai_name: Optional[str] = Field(None, description="Thai name of the station")
|
||||
english_name: Optional[str] = Field(None, description="English name of the station")
|
||||
latitude: Optional[float] = Field(None, ge=-90, le=90, description="Latitude coordinate")
|
||||
longitude: Optional[float] = Field(None, ge=-180, le=180, description="Longitude coordinate")
|
||||
geohash: Optional[str] = Field(None, description="Geohash for the location")
|
||||
status: Optional[str] = Field(None, description="Station status")
|
||||
|
||||
class MeasurementResponse(BaseModel):
|
||||
timestamp: datetime
|
||||
station_code: str
|
||||
station_name_en: str
|
||||
station_name_th: str
|
||||
water_level: float
|
||||
discharge: float
|
||||
discharge_percent: Optional[float] = None
|
||||
status: str = "active"
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
overall_status: str
|
||||
timestamp: str
|
||||
checks: Dict[str, Dict[str, Any]]
|
||||
|
||||
class MetricsResponse(BaseModel):
|
||||
counters: Dict[str, float]
|
||||
gauges: Dict[str, float]
|
||||
histograms: Dict[str, Dict[str, float]]
|
||||
|
||||
class ScrapingStatusResponse(BaseModel):
|
||||
is_running: bool
|
||||
last_run: Optional[datetime] = None
|
||||
next_run: Optional[datetime] = None
|
||||
total_runs: int = 0
|
||||
successful_runs: int = 0
|
||||
failed_runs: int = 0
|
||||
|
||||
# Global application state
|
||||
app_state = {
|
||||
"scraper": None,
|
||||
"health_manager": None,
|
||||
"scraping_task": None,
|
||||
"is_scraping": False,
|
||||
"scraping_stats": {
|
||||
"total_runs": 0,
|
||||
"successful_runs": 0,
|
||||
"failed_runs": 0,
|
||||
"last_run": None,
|
||||
"next_run": None
|
||||
}
|
||||
}
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Application lifespan manager"""
|
||||
# Startup
|
||||
logger.info("Starting Water Monitor API...")
|
||||
|
||||
# Initialize configuration
|
||||
try:
|
||||
Config.validate_config()
|
||||
logger.info("Configuration validated successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Configuration validation failed: {e}")
|
||||
raise
|
||||
|
||||
# Initialize scraper
|
||||
db_config = Config.get_database_config()
|
||||
app_state["scraper"] = EnhancedWaterMonitorScraper(db_config)
|
||||
|
||||
# Initialize health checks
|
||||
health_manager = HealthCheckManager()
|
||||
health_manager.add_check(DatabaseHealthCheck(app_state["scraper"].db_adapter))
|
||||
health_manager.add_check(APIHealthCheck(Config.API_URL, app_state["scraper"].session))
|
||||
health_manager.add_check(MemoryHealthCheck(max_memory_mb=1000))
|
||||
app_state["health_manager"] = health_manager
|
||||
|
||||
# Start background scraping task
|
||||
app_state["scraping_task"] = asyncio.create_task(background_scraping_task())
|
||||
|
||||
logger.info("Water Monitor API started successfully")
|
||||
|
||||
yield
|
||||
|
||||
# Shutdown
|
||||
logger.info("Shutting down Water Monitor API...")
|
||||
|
||||
if app_state["scraping_task"]:
|
||||
app_state["scraping_task"].cancel()
|
||||
try:
|
||||
await app_state["scraping_task"]
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
logger.info("Water Monitor API shutdown complete")
|
||||
|
||||
# Create FastAPI app
|
||||
app = FastAPI(
|
||||
title="Northern Thailand Ping River Monitor API",
|
||||
description="Real-time water level monitoring system for Northern Thailand's Ping River Basin stations",
|
||||
version="3.1.0",
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
# Add CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # Configure appropriately for production
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
async def background_scraping_task():
|
||||
"""Background task for periodic data scraping"""
|
||||
while True:
|
||||
try:
|
||||
if not app_state["is_scraping"]:
|
||||
app_state["is_scraping"] = True
|
||||
|
||||
# Run scraping cycle
|
||||
scraper = app_state["scraper"]
|
||||
if scraper:
|
||||
logger.info("Starting background scraping cycle")
|
||||
start_time = datetime.now()
|
||||
|
||||
try:
|
||||
result = scraper.run_scraping_cycle()
|
||||
|
||||
# Update stats
|
||||
app_state["scraping_stats"]["total_runs"] += 1
|
||||
app_state["scraping_stats"]["last_run"] = start_time
|
||||
|
||||
if result:
|
||||
app_state["scraping_stats"]["successful_runs"] += 1
|
||||
increment_counter("scraping_cycles_successful")
|
||||
logger.info("Background scraping cycle completed successfully")
|
||||
else:
|
||||
app_state["scraping_stats"]["failed_runs"] += 1
|
||||
increment_counter("scraping_cycles_failed")
|
||||
logger.warning("Background scraping cycle completed with no new data")
|
||||
|
||||
# Update metrics
|
||||
set_gauge("last_scraping_timestamp", start_time.timestamp())
|
||||
|
||||
except Exception as e:
|
||||
app_state["scraping_stats"]["failed_runs"] += 1
|
||||
increment_counter("scraping_cycles_failed")
|
||||
logger.error(f"Background scraping cycle failed: {e}")
|
||||
|
||||
app_state["is_scraping"] = False
|
||||
|
||||
# Calculate next run time
|
||||
interval_seconds = Config.SCRAPING_INTERVAL_HOURS * 3600
|
||||
app_state["scraping_stats"]["next_run"] = datetime.now() + timedelta(seconds=interval_seconds)
|
||||
|
||||
# Wait for next cycle
|
||||
await asyncio.sleep(interval_seconds)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Background scraping task cancelled")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background scraping task: {e}")
|
||||
await asyncio.sleep(60) # Wait a minute before retrying
|
||||
|
||||
# API Routes
|
||||
|
||||
@app.get("/", response_class=HTMLResponse)
|
||||
async def root():
|
||||
"""Root endpoint with basic dashboard"""
|
||||
html_content = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Northern Thailand Ping River Monitor</title>
|
||||
<style>
|
||||
body { font-family: Arial, sans-serif; margin: 40px; }
|
||||
.header { color: #2c3e50; border-bottom: 2px solid #3498db; padding-bottom: 10px; }
|
||||
.section { margin: 20px 0; padding: 15px; border: 1px solid #ddd; border-radius: 5px; }
|
||||
.status-healthy { color: #27ae60; }
|
||||
.status-degraded { color: #f39c12; }
|
||||
.status-unhealthy { color: #e74c3c; }
|
||||
.endpoint { background: #f8f9fa; padding: 10px; margin: 5px 0; border-radius: 3px; }
|
||||
.endpoint code { color: #2c3e50; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="header">
|
||||
<h1>🏔️ Northern Thailand Ping River Monitor API</h1>
|
||||
<p>Real-time water level monitoring system for the Ping River Basin in Northern Thailand</p>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<h2>📊 Quick Status</h2>
|
||||
<p>API is running and monitoring 16 water stations along the Ping River</p>
|
||||
<p>Coverage: From Chiang Dao to Nakhon Sawan</p>
|
||||
<p>Data collection interval: Every hour</p>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<h2>🔗 API Endpoints</h2>
|
||||
<div class="endpoint"><code>GET /health</code> - System health status</div>
|
||||
<div class="endpoint"><code>GET /metrics</code> - Application metrics</div>
|
||||
<div class="endpoint"><code>GET /stations</code> - List all monitoring stations</div>
|
||||
<div class="endpoint"><code>POST /stations</code> - Add new monitoring station</div>
|
||||
<div class="endpoint"><code>PUT /stations/{station_id}</code> - Update station information</div>
|
||||
<div class="endpoint"><code>GET /measurements/latest</code> - Latest measurements</div>
|
||||
<div class="endpoint"><code>GET /measurements/station/{station_code}</code> - Station-specific data</div>
|
||||
<div class="endpoint"><code>POST /scrape/trigger</code> - Trigger manual data collection</div>
|
||||
<div class="endpoint"><code>GET /scraping/status</code> - Scraping status</div>
|
||||
<div class="endpoint"><code>GET /docs</code> - Interactive API documentation</div>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<h2>📈 Monitoring</h2>
|
||||
<p>• Grafana dashboards available for data visualization</p>
|
||||
<p>• Health checks monitor database, API, and system resources</p>
|
||||
<p>• Metrics collection for performance monitoring</p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
return HTMLResponse(content=html_content)
|
||||
|
||||
@app.get("/health", response_model=HealthResponse)
|
||||
async def get_health():
|
||||
"""Get system health status"""
|
||||
increment_counter("api_requests", labels={"endpoint": "health"})
|
||||
|
||||
health_manager = app_state["health_manager"]
|
||||
if not health_manager:
|
||||
raise HTTPException(status_code=503, detail="Health manager not initialized")
|
||||
|
||||
# Run health checks
|
||||
results = health_manager.run_all_checks()
|
||||
summary = health_manager.get_health_summary()
|
||||
|
||||
return HealthResponse(**summary)
|
||||
|
||||
@app.get("/metrics", response_model=MetricsResponse)
|
||||
async def get_metrics():
|
||||
"""Get application metrics"""
|
||||
increment_counter("api_requests", labels={"endpoint": "metrics"})
|
||||
|
||||
metrics_collector = get_metrics_collector()
|
||||
metrics = metrics_collector.get_all_metrics()
|
||||
|
||||
return MetricsResponse(**metrics)
|
||||
|
||||
@app.get("/stations", response_model=List[StationResponse])
|
||||
async def get_stations():
|
||||
"""Get list of all monitoring stations"""
|
||||
increment_counter("api_requests", labels={"endpoint": "stations"})
|
||||
|
||||
scraper = app_state["scraper"]
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=503, detail="Scraper not initialized")
|
||||
|
||||
stations = []
|
||||
for station_id, station_info in scraper.station_mapping.items():
|
||||
stations.append(StationResponse(
|
||||
station_id=int(station_id),
|
||||
station_code=station_info["code"],
|
||||
thai_name=station_info["thai_name"],
|
||||
english_name=station_info["english_name"],
|
||||
latitude=station_info.get("latitude"),
|
||||
longitude=station_info.get("longitude"),
|
||||
status="active"
|
||||
))
|
||||
|
||||
return stations
|
||||
|
||||
@app.post("/stations", response_model=StationResponse)
|
||||
async def create_station(station: StationCreateModel):
|
||||
"""Create a new monitoring station"""
|
||||
increment_counter("api_requests", labels={"endpoint": "create_station"})
|
||||
|
||||
scraper = app_state["scraper"]
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=503, detail="Scraper not initialized")
|
||||
|
||||
try:
|
||||
# Find next available station ID
|
||||
existing_ids = [int(sid) for sid in scraper.station_mapping.keys()]
|
||||
new_station_id = max(existing_ids) + 1 if existing_ids else 1
|
||||
|
||||
# Add to station mapping
|
||||
scraper.station_mapping[str(new_station_id)] = {
|
||||
'code': station.station_code,
|
||||
'thai_name': station.thai_name,
|
||||
'english_name': station.english_name,
|
||||
'latitude': station.latitude,
|
||||
'longitude': station.longitude,
|
||||
'geohash': station.geohash
|
||||
}
|
||||
|
||||
logger.info(f"Created new station: {station.station_code} ({station.english_name})")
|
||||
|
||||
return StationResponse(
|
||||
station_id=new_station_id,
|
||||
station_code=station.station_code,
|
||||
thai_name=station.thai_name,
|
||||
english_name=station.english_name,
|
||||
latitude=station.latitude,
|
||||
longitude=station.longitude,
|
||||
geohash=station.geohash,
|
||||
status=station.status
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating station: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.put("/stations/{station_id}", response_model=StationResponse)
|
||||
async def update_station(station_id: int, updates: StationUpdateModel):
|
||||
"""Update an existing monitoring station"""
|
||||
increment_counter("api_requests", labels={"endpoint": "update_station"})
|
||||
|
||||
scraper = app_state["scraper"]
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=503, detail="Scraper not initialized")
|
||||
|
||||
station_key = str(station_id)
|
||||
if station_key not in scraper.station_mapping:
|
||||
raise HTTPException(status_code=404, detail="Station not found")
|
||||
|
||||
try:
|
||||
station_info = scraper.station_mapping[station_key]
|
||||
|
||||
# Update fields if provided
|
||||
if updates.thai_name is not None:
|
||||
station_info['thai_name'] = updates.thai_name
|
||||
if updates.english_name is not None:
|
||||
station_info['english_name'] = updates.english_name
|
||||
if updates.latitude is not None:
|
||||
station_info['latitude'] = updates.latitude
|
||||
if updates.longitude is not None:
|
||||
station_info['longitude'] = updates.longitude
|
||||
if updates.geohash is not None:
|
||||
station_info['geohash'] = updates.geohash
|
||||
|
||||
logger.info(f"Updated station {station_id}: {station_info['code']}")
|
||||
|
||||
return StationResponse(
|
||||
station_id=station_id,
|
||||
station_code=station_info['code'],
|
||||
thai_name=station_info['thai_name'],
|
||||
english_name=station_info['english_name'],
|
||||
latitude=station_info.get('latitude'),
|
||||
longitude=station_info.get('longitude'),
|
||||
geohash=station_info.get('geohash'),
|
||||
status=updates.status or "active"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating station {station_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.delete("/stations/{station_id}")
|
||||
async def delete_station(station_id: int):
|
||||
"""Delete a monitoring station"""
|
||||
increment_counter("api_requests", labels={"endpoint": "delete_station"})
|
||||
|
||||
scraper = app_state["scraper"]
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=503, detail="Scraper not initialized")
|
||||
|
||||
station_key = str(station_id)
|
||||
if station_key not in scraper.station_mapping:
|
||||
raise HTTPException(status_code=404, detail="Station not found")
|
||||
|
||||
try:
|
||||
station_info = scraper.station_mapping.pop(station_key)
|
||||
logger.info(f"Deleted station {station_id}: {station_info['code']}")
|
||||
|
||||
return {"message": f"Station {station_info['code']} deleted successfully"}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting station {station_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/stations/{station_id}", response_model=StationResponse)
|
||||
async def get_station(station_id: int):
|
||||
"""Get details of a specific monitoring station"""
|
||||
increment_counter("api_requests", labels={"endpoint": "get_station"})
|
||||
|
||||
scraper = app_state["scraper"]
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=503, detail="Scraper not initialized")
|
||||
|
||||
station_key = str(station_id)
|
||||
if station_key not in scraper.station_mapping:
|
||||
raise HTTPException(status_code=404, detail="Station not found")
|
||||
|
||||
station_info = scraper.station_mapping[station_key]
|
||||
|
||||
return StationResponse(
|
||||
station_id=station_id,
|
||||
station_code=station_info['code'],
|
||||
thai_name=station_info['thai_name'],
|
||||
english_name=station_info['english_name'],
|
||||
latitude=station_info.get('latitude'),
|
||||
longitude=station_info.get('longitude'),
|
||||
geohash=station_info.get('geohash'),
|
||||
status="active"
|
||||
)
|
||||
|
||||
@app.get("/measurements/latest", response_model=List[MeasurementResponse])
|
||||
async def get_latest_measurements(limit: int = 100):
|
||||
"""Get latest measurements from all stations"""
|
||||
increment_counter("api_requests", labels={"endpoint": "measurements_latest"})
|
||||
|
||||
scraper = app_state["scraper"]
|
||||
if not scraper or not scraper.db_adapter:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
try:
|
||||
measurements = scraper.get_latest_data(limit=limit)
|
||||
|
||||
response = []
|
||||
for measurement in measurements:
|
||||
response.append(MeasurementResponse(
|
||||
timestamp=measurement["timestamp"],
|
||||
station_code=measurement["station_code"],
|
||||
station_name_en=measurement["station_name_en"],
|
||||
station_name_th=measurement["station_name_th"],
|
||||
water_level=measurement["water_level"],
|
||||
discharge=measurement["discharge"],
|
||||
discharge_percent=measurement.get("discharge_percent"),
|
||||
status=measurement.get("status", "active")
|
||||
))
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching latest measurements: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/measurements/station/{station_code}", response_model=List[MeasurementResponse])
|
||||
async def get_station_measurements(
|
||||
station_code: str,
|
||||
hours: int = 24,
|
||||
limit: int = 1000
|
||||
):
|
||||
"""Get measurements for a specific station"""
|
||||
increment_counter("api_requests", labels={"endpoint": "measurements_station"})
|
||||
|
||||
scraper = app_state["scraper"]
|
||||
if not scraper or not scraper.db_adapter:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
try:
|
||||
# Get measurements for the specified time range
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(hours=hours)
|
||||
|
||||
measurements = scraper.db_adapter.get_measurements_by_timerange(
|
||||
start_time, end_time, station_codes=[station_code]
|
||||
)
|
||||
|
||||
# Limit results
|
||||
measurements = measurements[:limit]
|
||||
|
||||
response = []
|
||||
for measurement in measurements:
|
||||
response.append(MeasurementResponse(
|
||||
timestamp=measurement["timestamp"],
|
||||
station_code=measurement["station_code"],
|
||||
station_name_en=measurement["station_name_en"],
|
||||
station_name_th=measurement["station_name_th"],
|
||||
water_level=measurement["water_level"],
|
||||
discharge=measurement["discharge"],
|
||||
discharge_percent=measurement.get("discharge_percent"),
|
||||
status=measurement.get("status", "active")
|
||||
))
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching station measurements: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/scrape/trigger")
|
||||
async def trigger_scraping(background_tasks: BackgroundTasks):
|
||||
"""Trigger manual data scraping"""
|
||||
increment_counter("api_requests", labels={"endpoint": "scrape_trigger"})
|
||||
|
||||
if app_state["is_scraping"]:
|
||||
raise HTTPException(status_code=409, detail="Scraping already in progress")
|
||||
|
||||
scraper = app_state["scraper"]
|
||||
if not scraper:
|
||||
raise HTTPException(status_code=503, detail="Scraper not initialized")
|
||||
|
||||
def run_scraping():
|
||||
"""Background task to run scraping"""
|
||||
try:
|
||||
app_state["is_scraping"] = True
|
||||
logger.info("Manual scraping triggered via API")
|
||||
|
||||
result = scraper.run_scraping_cycle()
|
||||
|
||||
# Update stats
|
||||
app_state["scraping_stats"]["total_runs"] += 1
|
||||
app_state["scraping_stats"]["last_run"] = datetime.now()
|
||||
|
||||
if result:
|
||||
app_state["scraping_stats"]["successful_runs"] += 1
|
||||
increment_counter("manual_scraping_successful")
|
||||
else:
|
||||
app_state["scraping_stats"]["failed_runs"] += 1
|
||||
increment_counter("manual_scraping_failed")
|
||||
|
||||
except Exception as e:
|
||||
app_state["scraping_stats"]["failed_runs"] += 1
|
||||
increment_counter("manual_scraping_failed")
|
||||
logger.error(f"Manual scraping failed: {e}")
|
||||
finally:
|
||||
app_state["is_scraping"] = False
|
||||
|
||||
background_tasks.add_task(run_scraping)
|
||||
|
||||
return {"message": "Scraping triggered", "status": "started"}
|
||||
|
||||
@app.get("/scraping/status", response_model=ScrapingStatusResponse)
|
||||
async def get_scraping_status():
|
||||
"""Get current scraping status"""
|
||||
increment_counter("api_requests", labels={"endpoint": "scraping_status"})
|
||||
|
||||
stats = app_state["scraping_stats"]
|
||||
|
||||
return ScrapingStatusResponse(
|
||||
is_running=app_state["is_scraping"],
|
||||
last_run=stats["last_run"],
|
||||
next_run=stats["next_run"],
|
||||
total_runs=stats["total_runs"],
|
||||
successful_runs=stats["successful_runs"],
|
||||
failed_runs=stats["failed_runs"]
|
||||
)
|
||||
|
||||
@app.get("/config")
|
||||
async def get_config():
|
||||
"""Get current configuration (sensitive data masked)"""
|
||||
increment_counter("api_requests", labels={"endpoint": "config"})
|
||||
|
||||
config = Config.get_all_settings()
|
||||
|
||||
# Mask sensitive information
|
||||
for key in config:
|
||||
if 'password' in key.lower() or 'secret' in key.lower():
|
||||
if config[key]:
|
||||
config[key] = '*' * 8
|
||||
|
||||
return config
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
# Setup logging
|
||||
setup_logging(
|
||||
log_level=Config.LOG_LEVEL,
|
||||
log_file=Config.LOG_FILE,
|
||||
enable_console=True,
|
||||
enable_colors=True
|
||||
)
|
||||
|
||||
# Run the API server
|
||||
uvicorn.run(
|
||||
"web_api:app",
|
||||
host="0.0.0.0",
|
||||
port=8000,
|
||||
reload=False,
|
||||
log_config=None # Use our custom logging
|
||||
)
|
61
tests/test_api.py
Normal file
61
tests/test_api.py
Normal file
@@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple API test script
|
||||
"""
|
||||
|
||||
import requests
|
||||
import time
|
||||
import json
|
||||
|
||||
def test_api_endpoints():
|
||||
"""Test the main API endpoints"""
|
||||
base_url = "http://localhost:8000"
|
||||
|
||||
endpoints = [
|
||||
"/health",
|
||||
"/metrics",
|
||||
"/stations",
|
||||
"/measurements/latest?limit=5",
|
||||
"/scraping/status",
|
||||
"/config"
|
||||
]
|
||||
|
||||
print("🧪 Testing API endpoints...")
|
||||
print("Make sure the API server is running: python run.py --web-api")
|
||||
print()
|
||||
|
||||
for endpoint in endpoints:
|
||||
try:
|
||||
print(f"Testing {endpoint}...")
|
||||
response = requests.get(f"{base_url}{endpoint}", timeout=5)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
print(f"✅ {endpoint} - Status: {response.status_code}")
|
||||
|
||||
# Show some sample data
|
||||
if endpoint == "/stations":
|
||||
print(f" Found {len(data)} stations")
|
||||
elif endpoint == "/measurements/latest?limit=5":
|
||||
print(f" Found {len(data)} measurements")
|
||||
if data:
|
||||
latest = data[0]
|
||||
print(f" Latest: {latest['station_code']} - {latest['water_level']}m")
|
||||
elif endpoint == "/health":
|
||||
print(f" Overall status: {data.get('overall_status', 'unknown')}")
|
||||
elif endpoint == "/scraping/status":
|
||||
print(f" Is running: {data.get('is_running', False)}")
|
||||
print(f" Total runs: {data.get('total_runs', 0)}")
|
||||
|
||||
else:
|
||||
print(f"❌ {endpoint} - Status: {response.status_code}")
|
||||
|
||||
except requests.exceptions.ConnectionError:
|
||||
print(f"❌ {endpoint} - Connection failed (is the server running?)")
|
||||
except Exception as e:
|
||||
print(f"❌ {endpoint} - Error: {e}")
|
||||
|
||||
print()
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_api_endpoints()
|
210
tests/test_integration.py
Normal file
210
tests/test_integration.py
Normal file
@@ -0,0 +1,210 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Integration test script for the enhanced water monitoring system
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
# Add project root to Python path
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, project_root)
|
||||
|
||||
def test_imports():
|
||||
"""Test that all modules can be imported"""
|
||||
print("Testing imports...")
|
||||
|
||||
try:
|
||||
from src.config import Config
|
||||
from src.models import WaterMeasurement, StationInfo
|
||||
from src.exceptions import WaterMonitorException
|
||||
from src.validators import DataValidator
|
||||
from src.metrics import get_metrics_collector
|
||||
from src.health_check import HealthCheckManager
|
||||
from src.rate_limiter import RateLimiter
|
||||
from src.logging_config import setup_logging
|
||||
print("✅ All imports successful")
|
||||
return True
|
||||
except ImportError as e:
|
||||
print(f"❌ Import failed: {e}")
|
||||
return False
|
||||
|
||||
def test_configuration():
|
||||
"""Test configuration validation"""
|
||||
print("Testing configuration...")
|
||||
|
||||
try:
|
||||
from src.config import Config
|
||||
|
||||
# Test configuration loading
|
||||
settings = Config.get_all_settings()
|
||||
print(f"✅ Configuration loaded: {len(settings)} settings")
|
||||
|
||||
# Test database config
|
||||
db_config = Config.get_database_config()
|
||||
print(f"✅ Database config: {db_config['type']}")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"❌ Configuration test failed: {e}")
|
||||
return False
|
||||
|
||||
def test_metrics():
|
||||
"""Test metrics collection"""
|
||||
print("Testing metrics...")
|
||||
|
||||
try:
|
||||
from src.metrics import increment_counter, set_gauge, record_histogram, get_metrics_collector
|
||||
|
||||
# Test metrics
|
||||
increment_counter("test_counter", 5)
|
||||
set_gauge("test_gauge", 42.0)
|
||||
record_histogram("test_histogram", 1.5)
|
||||
|
||||
# Get metrics
|
||||
collector = get_metrics_collector()
|
||||
metrics = collector.get_all_metrics()
|
||||
|
||||
print(f"✅ Metrics collected: {len(metrics['counters'])} counters, {len(metrics['gauges'])} gauges")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"❌ Metrics test failed: {e}")
|
||||
return False
|
||||
|
||||
def test_validation():
|
||||
"""Test data validation"""
|
||||
print("Testing data validation...")
|
||||
|
||||
try:
|
||||
from src.validators import DataValidator
|
||||
|
||||
# Test valid measurement
|
||||
valid_measurement = {
|
||||
'timestamp': datetime.now(),
|
||||
'station_id': 1,
|
||||
'water_level': 5.5,
|
||||
'discharge': 100.0,
|
||||
'discharge_percent': 75.0
|
||||
}
|
||||
|
||||
result = DataValidator.validate_measurement(valid_measurement)
|
||||
if result:
|
||||
print("✅ Valid measurement passed validation")
|
||||
else:
|
||||
print("❌ Valid measurement failed validation")
|
||||
return False
|
||||
|
||||
# Test invalid measurement
|
||||
invalid_measurement = {
|
||||
'timestamp': datetime.now(),
|
||||
'station_id': 999, # Invalid station ID
|
||||
'water_level': -999.0, # Invalid water level
|
||||
'discharge': -50.0 # Invalid discharge
|
||||
}
|
||||
|
||||
result = DataValidator.validate_measurement(invalid_measurement)
|
||||
if not result:
|
||||
print("✅ Invalid measurement correctly rejected")
|
||||
else:
|
||||
print("❌ Invalid measurement incorrectly accepted")
|
||||
return False
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"❌ Validation test failed: {e}")
|
||||
return False
|
||||
|
||||
def test_rate_limiter():
|
||||
"""Test rate limiting"""
|
||||
print("Testing rate limiter...")
|
||||
|
||||
try:
|
||||
from src.rate_limiter import RateLimiter
|
||||
|
||||
# Create rate limiter (2 requests per second)
|
||||
limiter = RateLimiter(max_requests=2, time_window_seconds=1)
|
||||
|
||||
# Test allowed requests
|
||||
for i in range(2):
|
||||
if not limiter.is_allowed():
|
||||
print(f"❌ Request {i+1} should be allowed")
|
||||
return False
|
||||
|
||||
# Test blocked request
|
||||
if limiter.is_allowed():
|
||||
print("❌ Third request should be blocked")
|
||||
return False
|
||||
|
||||
print("✅ Rate limiter working correctly")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"❌ Rate limiter test failed: {e}")
|
||||
return False
|
||||
|
||||
def test_logging():
|
||||
"""Test logging configuration"""
|
||||
print("Testing logging...")
|
||||
|
||||
try:
|
||||
from src.logging_config import setup_logging, get_logger
|
||||
|
||||
# Setup logging
|
||||
logger = setup_logging(log_level="INFO", enable_console=False)
|
||||
|
||||
# Get a logger
|
||||
test_logger = get_logger("test")
|
||||
test_logger.info("Test log message")
|
||||
|
||||
print("✅ Logging setup successful")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"❌ Logging test failed: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
"""Run all tests"""
|
||||
print("🧪 Running integration tests for Northern Thailand Ping River Monitor v3.1.0")
|
||||
print("=" * 60)
|
||||
|
||||
tests = [
|
||||
test_imports,
|
||||
test_configuration,
|
||||
test_metrics,
|
||||
test_validation,
|
||||
test_rate_limiter,
|
||||
test_logging
|
||||
]
|
||||
|
||||
passed = 0
|
||||
failed = 0
|
||||
|
||||
for test in tests:
|
||||
try:
|
||||
if test():
|
||||
passed += 1
|
||||
else:
|
||||
failed += 1
|
||||
except Exception as e:
|
||||
print(f"❌ Test {test.__name__} crashed: {e}")
|
||||
failed += 1
|
||||
print()
|
||||
|
||||
print("=" * 60)
|
||||
print(f"Test Results: {passed} passed, {failed} failed")
|
||||
|
||||
if failed == 0:
|
||||
print("🎉 All tests passed! The system is ready to use.")
|
||||
print("\nNext steps:")
|
||||
print("1. Run 'python run.py --test' to test data collection")
|
||||
print("2. Run 'python run.py --web-api' to start the web interface")
|
||||
print("3. Visit http://localhost:8000 for the dashboard")
|
||||
return True
|
||||
else:
|
||||
print("❌ Some tests failed. Please check the errors above.")
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = main()
|
||||
sys.exit(0 if success else 1)
|
138
tests/test_station_management.py
Normal file
138
tests/test_station_management.py
Normal file
@@ -0,0 +1,138 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for station management API endpoints
|
||||
"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
|
||||
def test_station_management():
|
||||
"""Test the station management endpoints"""
|
||||
base_url = "http://localhost:8000"
|
||||
|
||||
print("🧪 Testing Station Management API")
|
||||
print("Make sure the API server is running: python run.py --web-api")
|
||||
print()
|
||||
|
||||
# Test data for new station
|
||||
new_station = {
|
||||
"station_code": "P.TEST",
|
||||
"thai_name": "สถานีทดสอบ",
|
||||
"english_name": "Test Station",
|
||||
"latitude": 18.7875,
|
||||
"longitude": 99.0045,
|
||||
"geohash": "w5q6uuhvfcfp25",
|
||||
"status": "active"
|
||||
}
|
||||
|
||||
try:
|
||||
# 1. List existing stations
|
||||
print("1. Listing existing stations...")
|
||||
response = requests.get(f"{base_url}/stations")
|
||||
if response.status_code == 200:
|
||||
stations = response.json()
|
||||
print(f"✅ Found {len(stations)} existing stations")
|
||||
initial_count = len(stations)
|
||||
else:
|
||||
print(f"❌ Failed to list stations: {response.status_code}")
|
||||
return
|
||||
|
||||
# 2. Create new station
|
||||
print("\n2. Creating new test station...")
|
||||
response = requests.post(
|
||||
f"{base_url}/stations",
|
||||
json=new_station,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
created_station = response.json()
|
||||
station_id = created_station["station_id"]
|
||||
print(f"✅ Created station with ID: {station_id}")
|
||||
print(f" Code: {created_station['station_code']}")
|
||||
print(f" Name: {created_station['english_name']}")
|
||||
else:
|
||||
print(f"❌ Failed to create station: {response.status_code}")
|
||||
print(f" Response: {response.text}")
|
||||
return
|
||||
|
||||
# 3. Get specific station
|
||||
print(f"\n3. Getting station details for ID {station_id}...")
|
||||
response = requests.get(f"{base_url}/stations/{station_id}")
|
||||
if response.status_code == 200:
|
||||
station_details = response.json()
|
||||
print(f"✅ Retrieved station details")
|
||||
print(f" Thai name: {station_details['thai_name']}")
|
||||
print(f" Coordinates: {station_details['latitude']}, {station_details['longitude']}")
|
||||
else:
|
||||
print(f"❌ Failed to get station: {response.status_code}")
|
||||
|
||||
# 4. Update station
|
||||
print(f"\n4. Updating station {station_id}...")
|
||||
update_data = {
|
||||
"english_name": "Updated Test Station",
|
||||
"thai_name": "สถานีทดสอบที่อัปเดต"
|
||||
}
|
||||
|
||||
response = requests.put(
|
||||
f"{base_url}/stations/{station_id}",
|
||||
json=update_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
updated_station = response.json()
|
||||
print(f"✅ Updated station successfully")
|
||||
print(f" New name: {updated_station['english_name']}")
|
||||
else:
|
||||
print(f"❌ Failed to update station: {response.status_code}")
|
||||
print(f" Response: {response.text}")
|
||||
|
||||
# 5. Verify station count increased
|
||||
print("\n5. Verifying station count...")
|
||||
response = requests.get(f"{base_url}/stations")
|
||||
if response.status_code == 200:
|
||||
stations = response.json()
|
||||
new_count = len(stations)
|
||||
if new_count == initial_count + 1:
|
||||
print(f"✅ Station count increased from {initial_count} to {new_count}")
|
||||
else:
|
||||
print(f"⚠️ Unexpected station count: {new_count} (expected {initial_count + 1})")
|
||||
|
||||
# 6. Delete test station
|
||||
print(f"\n6. Deleting test station {station_id}...")
|
||||
response = requests.delete(f"{base_url}/stations/{station_id}")
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
print(f"✅ Deleted station: {result['message']}")
|
||||
else:
|
||||
print(f"❌ Failed to delete station: {response.status_code}")
|
||||
|
||||
# 7. Verify station was deleted
|
||||
print("\n7. Verifying station deletion...")
|
||||
response = requests.get(f"{base_url}/stations/{station_id}")
|
||||
if response.status_code == 404:
|
||||
print("✅ Station successfully deleted (404 as expected)")
|
||||
else:
|
||||
print(f"⚠️ Station still exists: {response.status_code}")
|
||||
|
||||
# 8. Final station count check
|
||||
response = requests.get(f"{base_url}/stations")
|
||||
if response.status_code == 200:
|
||||
stations = response.json()
|
||||
final_count = len(stations)
|
||||
if final_count == initial_count:
|
||||
print(f"✅ Station count restored to original: {final_count}")
|
||||
else:
|
||||
print(f"⚠️ Station count mismatch: {final_count} (expected {initial_count})")
|
||||
|
||||
print("\n🎉 Station management tests completed!")
|
||||
|
||||
except requests.exceptions.ConnectionError:
|
||||
print("❌ Connection failed - is the API server running?")
|
||||
print("Start it with: python run.py --web-api")
|
||||
except Exception as e:
|
||||
print(f"❌ Test failed with error: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_station_management()
|
Reference in New Issue
Block a user