Add historical data import functionality

- Add import_historical_data() method to EnhancedWaterMonitorScraper
- Support date range imports with Buddhist calendar API format
- Add CLI arguments --import-historical and --force-overwrite
- Include API rate limiting and skip existing data option
- Enable importing years of historical water level data

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-09-28 14:46:54 +07:00
parent bd812ca5ca
commit 0ff58ecb13
2 changed files with 115 additions and 6 deletions

View File

@@ -163,6 +163,45 @@ def run_data_update(days_back: int):
logger.error(f"❌ Data update failed: {e}")
return False
def run_historical_import(start_date_str: str, end_date_str: str, skip_existing: bool = True):
"""Import historical data for a date range"""
try:
# Parse dates
start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
end_date = datetime.strptime(end_date_str, "%Y-%m-%d")
if start_date > end_date:
logger.error("Start date must be before or equal to end date")
return False
logger.info(f"Importing historical data from {start_date.date()} to {end_date.date()}")
if skip_existing:
logger.info("Skipping dates that already have data")
# Validate configuration
Config.validate_config()
# Initialize scraper
db_config = Config.get_database_config()
scraper = EnhancedWaterMonitorScraper(db_config)
# Import historical data
imported_count = scraper.import_historical_data(start_date, end_date, skip_existing)
if imported_count > 0:
logger.info(f"✅ Imported {imported_count} historical data points")
else:
logger.info("✅ No new data imported")
return True
except ValueError as e:
logger.error(f"❌ Invalid date format. Use YYYY-MM-DD: {e}")
return False
except Exception as e:
logger.error(f"❌ Historical import failed: {e}")
return False
def run_web_api():
"""Run the FastAPI web interface"""
logger.info("Starting web API server...")
@@ -320,6 +359,7 @@ Examples:
%(prog)s --web-api # Start web API server
%(prog)s --fill-gaps 7 # Fill missing data for last 7 days
%(prog)s --update-data 2 # Update existing data for last 2 days
%(prog)s --import-historical 2024-01-01 2024-01-31 # Import historical data
%(prog)s --status # Show system status
%(prog)s --alert-check # Check water levels and send alerts
%(prog)s --alert-test # Send test Matrix message
@@ -352,6 +392,19 @@ Examples:
help="Update existing data for the specified number of days back"
)
parser.add_argument(
"--import-historical",
nargs=2,
metavar=("START_DATE", "END_DATE"),
help="Import historical data for date range (YYYY-MM-DD format)"
)
parser.add_argument(
"--force-overwrite",
action="store_true",
help="Overwrite existing data when importing historical data"
)
parser.add_argument(
"--status",
action="store_true",
@@ -408,6 +461,10 @@ Examples:
success = run_gap_filling(args.fill_gaps)
elif args.update_data is not None:
success = run_data_update(args.update_data)
elif args.import_historical is not None:
start_date, end_date = args.import_historical
skip_existing = not args.force_overwrite
success = run_historical_import(start_date, end_date, skip_existing)
elif args.status:
success = show_status()
elif args.alert_check:

View File

@@ -576,6 +576,58 @@ class EnhancedWaterMonitorScraper:
logger.debug(f"Error checking data existence: {e}")
return False
def import_historical_data(self, start_date: datetime.datetime, end_date: datetime.datetime,
skip_existing: bool = True) -> int:
"""
Import historical data for a date range
Args:
start_date: Start date for historical import
end_date: End date for historical import
skip_existing: Skip dates that already have data (default: True)
Returns:
Number of data points imported
"""
logger.info(f"Starting historical data import from {start_date.date()} to {end_date.date()}")
total_imported = 0
current_date = start_date
while current_date <= end_date:
try:
# Check if data already exists for this date
if skip_existing and self._check_data_exists_for_date(current_date):
logger.info(f"Data already exists for {current_date.date()}, skipping...")
current_date += datetime.timedelta(days=1)
continue
logger.info(f"Importing data for {current_date.date()}...")
# Fetch data for this date
data = self.fetch_water_data_for_date(current_date)
if data:
# Save to database
if self.save_to_database(data):
total_imported += len(data)
logger.info(f"Successfully imported {len(data)} data points for {current_date.date()}")
else:
logger.warning(f"Failed to save data for {current_date.date()}")
else:
logger.warning(f"No data available for {current_date.date()}")
# Add small delay to be respectful to the API
time.sleep(1)
except Exception as e:
logger.error(f"Error importing data for {current_date.date()}: {e}")
current_date += datetime.timedelta(days=1)
logger.info(f"Historical import completed. Total data points imported: {total_imported}")
return total_imported
# Main execution for standalone usage
if __name__ == "__main__":
import argparse