Add historical data import functionality

- Add import_historical_data() method to EnhancedWaterMonitorScraper
- Support date range imports with Buddhist calendar API format
- Add CLI arguments --import-historical and --force-overwrite
- Include API rate limiting and skip existing data option
- Enable importing years of historical water level data

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-09-28 14:46:54 +07:00
parent bd812ca5ca
commit 0ff58ecb13
2 changed files with 115 additions and 6 deletions

View File

@@ -140,29 +140,68 @@ def run_gap_filling(days_back: int):
def run_data_update(days_back: int):
"""Update existing data with latest values"""
logger.info(f"Updating existing data for the last {days_back} days...")
try:
# Validate configuration
Config.validate_config()
# Initialize scraper
db_config = Config.get_database_config()
scraper = EnhancedWaterMonitorScraper(db_config)
# Update data
updated_count = scraper.update_existing_data(days_back)
if updated_count > 0:
logger.info(f"✅ Updated {updated_count} data points")
else:
logger.info("✅ No data updates needed")
return True
except Exception as e:
logger.error(f"❌ Data update failed: {e}")
return False
def run_historical_import(start_date_str: str, end_date_str: str, skip_existing: bool = True):
"""Import historical data for a date range"""
try:
# Parse dates
start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
end_date = datetime.strptime(end_date_str, "%Y-%m-%d")
if start_date > end_date:
logger.error("Start date must be before or equal to end date")
return False
logger.info(f"Importing historical data from {start_date.date()} to {end_date.date()}")
if skip_existing:
logger.info("Skipping dates that already have data")
# Validate configuration
Config.validate_config()
# Initialize scraper
db_config = Config.get_database_config()
scraper = EnhancedWaterMonitorScraper(db_config)
# Import historical data
imported_count = scraper.import_historical_data(start_date, end_date, skip_existing)
if imported_count > 0:
logger.info(f"✅ Imported {imported_count} historical data points")
else:
logger.info("✅ No new data imported")
return True
except ValueError as e:
logger.error(f"❌ Invalid date format. Use YYYY-MM-DD: {e}")
return False
except Exception as e:
logger.error(f"❌ Historical import failed: {e}")
return False
def run_web_api():
"""Run the FastAPI web interface"""
logger.info("Starting web API server...")
@@ -320,6 +359,7 @@ Examples:
%(prog)s --web-api # Start web API server
%(prog)s --fill-gaps 7 # Fill missing data for last 7 days
%(prog)s --update-data 2 # Update existing data for last 2 days
%(prog)s --import-historical 2024-01-01 2024-01-31 # Import historical data
%(prog)s --status # Show system status
%(prog)s --alert-check # Check water levels and send alerts
%(prog)s --alert-test # Send test Matrix message
@@ -352,6 +392,19 @@ Examples:
help="Update existing data for the specified number of days back"
)
parser.add_argument(
"--import-historical",
nargs=2,
metavar=("START_DATE", "END_DATE"),
help="Import historical data for date range (YYYY-MM-DD format)"
)
parser.add_argument(
"--force-overwrite",
action="store_true",
help="Overwrite existing data when importing historical data"
)
parser.add_argument(
"--status",
action="store_true",
@@ -408,6 +461,10 @@ Examples:
success = run_gap_filling(args.fill_gaps)
elif args.update_data is not None:
success = run_data_update(args.update_data)
elif args.import_historical is not None:
start_date, end_date = args.import_historical
skip_existing = not args.force_overwrite
success = run_historical_import(start_date, end_date, skip_existing)
elif args.status:
success = show_status()
elif args.alert_check: