Add historical data import functionality
- Add import_historical_data() method to EnhancedWaterMonitorScraper - Support date range imports with Buddhist calendar API format - Add CLI arguments --import-historical and --force-overwrite - Include API rate limiting and skip existing data option - Enable importing years of historical water level data 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
69
src/main.py
69
src/main.py
@@ -140,29 +140,68 @@ def run_gap_filling(days_back: int):
|
||||
def run_data_update(days_back: int):
|
||||
"""Update existing data with latest values"""
|
||||
logger.info(f"Updating existing data for the last {days_back} days...")
|
||||
|
||||
|
||||
try:
|
||||
# Validate configuration
|
||||
Config.validate_config()
|
||||
|
||||
|
||||
# Initialize scraper
|
||||
db_config = Config.get_database_config()
|
||||
scraper = EnhancedWaterMonitorScraper(db_config)
|
||||
|
||||
|
||||
# Update data
|
||||
updated_count = scraper.update_existing_data(days_back)
|
||||
|
||||
|
||||
if updated_count > 0:
|
||||
logger.info(f"✅ Updated {updated_count} data points")
|
||||
else:
|
||||
logger.info("✅ No data updates needed")
|
||||
|
||||
|
||||
return True
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Data update failed: {e}")
|
||||
return False
|
||||
|
||||
def run_historical_import(start_date_str: str, end_date_str: str, skip_existing: bool = True):
|
||||
"""Import historical data for a date range"""
|
||||
try:
|
||||
# Parse dates
|
||||
start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
|
||||
end_date = datetime.strptime(end_date_str, "%Y-%m-%d")
|
||||
|
||||
if start_date > end_date:
|
||||
logger.error("Start date must be before or equal to end date")
|
||||
return False
|
||||
|
||||
logger.info(f"Importing historical data from {start_date.date()} to {end_date.date()}")
|
||||
if skip_existing:
|
||||
logger.info("Skipping dates that already have data")
|
||||
|
||||
# Validate configuration
|
||||
Config.validate_config()
|
||||
|
||||
# Initialize scraper
|
||||
db_config = Config.get_database_config()
|
||||
scraper = EnhancedWaterMonitorScraper(db_config)
|
||||
|
||||
# Import historical data
|
||||
imported_count = scraper.import_historical_data(start_date, end_date, skip_existing)
|
||||
|
||||
if imported_count > 0:
|
||||
logger.info(f"✅ Imported {imported_count} historical data points")
|
||||
else:
|
||||
logger.info("✅ No new data imported")
|
||||
|
||||
return True
|
||||
|
||||
except ValueError as e:
|
||||
logger.error(f"❌ Invalid date format. Use YYYY-MM-DD: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Historical import failed: {e}")
|
||||
return False
|
||||
|
||||
def run_web_api():
|
||||
"""Run the FastAPI web interface"""
|
||||
logger.info("Starting web API server...")
|
||||
@@ -320,6 +359,7 @@ Examples:
|
||||
%(prog)s --web-api # Start web API server
|
||||
%(prog)s --fill-gaps 7 # Fill missing data for last 7 days
|
||||
%(prog)s --update-data 2 # Update existing data for last 2 days
|
||||
%(prog)s --import-historical 2024-01-01 2024-01-31 # Import historical data
|
||||
%(prog)s --status # Show system status
|
||||
%(prog)s --alert-check # Check water levels and send alerts
|
||||
%(prog)s --alert-test # Send test Matrix message
|
||||
@@ -352,6 +392,19 @@ Examples:
|
||||
help="Update existing data for the specified number of days back"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--import-historical",
|
||||
nargs=2,
|
||||
metavar=("START_DATE", "END_DATE"),
|
||||
help="Import historical data for date range (YYYY-MM-DD format)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--force-overwrite",
|
||||
action="store_true",
|
||||
help="Overwrite existing data when importing historical data"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--status",
|
||||
action="store_true",
|
||||
@@ -408,6 +461,10 @@ Examples:
|
||||
success = run_gap_filling(args.fill_gaps)
|
||||
elif args.update_data is not None:
|
||||
success = run_data_update(args.update_data)
|
||||
elif args.import_historical is not None:
|
||||
start_date, end_date = args.import_historical
|
||||
skip_existing = not args.force_overwrite
|
||||
success = run_historical_import(start_date, end_date, skip_existing)
|
||||
elif args.status:
|
||||
success = show_status()
|
||||
elif args.alert_check:
|
||||
|
@@ -576,6 +576,58 @@ class EnhancedWaterMonitorScraper:
|
||||
logger.debug(f"Error checking data existence: {e}")
|
||||
return False
|
||||
|
||||
def import_historical_data(self, start_date: datetime.datetime, end_date: datetime.datetime,
|
||||
skip_existing: bool = True) -> int:
|
||||
"""
|
||||
Import historical data for a date range
|
||||
|
||||
Args:
|
||||
start_date: Start date for historical import
|
||||
end_date: End date for historical import
|
||||
skip_existing: Skip dates that already have data (default: True)
|
||||
|
||||
Returns:
|
||||
Number of data points imported
|
||||
"""
|
||||
logger.info(f"Starting historical data import from {start_date.date()} to {end_date.date()}")
|
||||
|
||||
total_imported = 0
|
||||
current_date = start_date
|
||||
|
||||
while current_date <= end_date:
|
||||
try:
|
||||
# Check if data already exists for this date
|
||||
if skip_existing and self._check_data_exists_for_date(current_date):
|
||||
logger.info(f"Data already exists for {current_date.date()}, skipping...")
|
||||
current_date += datetime.timedelta(days=1)
|
||||
continue
|
||||
|
||||
logger.info(f"Importing data for {current_date.date()}...")
|
||||
|
||||
# Fetch data for this date
|
||||
data = self.fetch_water_data_for_date(current_date)
|
||||
|
||||
if data:
|
||||
# Save to database
|
||||
if self.save_to_database(data):
|
||||
total_imported += len(data)
|
||||
logger.info(f"Successfully imported {len(data)} data points for {current_date.date()}")
|
||||
else:
|
||||
logger.warning(f"Failed to save data for {current_date.date()}")
|
||||
else:
|
||||
logger.warning(f"No data available for {current_date.date()}")
|
||||
|
||||
# Add small delay to be respectful to the API
|
||||
time.sleep(1)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error importing data for {current_date.date()}: {e}")
|
||||
|
||||
current_date += datetime.timedelta(days=1)
|
||||
|
||||
logger.info(f"Historical import completed. Total data points imported: {total_imported}")
|
||||
return total_imported
|
||||
|
||||
# Main execution for standalone usage
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
Reference in New Issue
Block a user