diff --git a/src/main.py b/src/main.py index 1f50d5c..a82d51c 100644 --- a/src/main.py +++ b/src/main.py @@ -140,29 +140,68 @@ def run_gap_filling(days_back: int): def run_data_update(days_back: int): """Update existing data with latest values""" logger.info(f"Updating existing data for the last {days_back} days...") - + try: # Validate configuration Config.validate_config() - + # Initialize scraper db_config = Config.get_database_config() scraper = EnhancedWaterMonitorScraper(db_config) - + # Update data updated_count = scraper.update_existing_data(days_back) - + if updated_count > 0: logger.info(f"✅ Updated {updated_count} data points") else: logger.info("✅ No data updates needed") - + return True - + except Exception as e: logger.error(f"❌ Data update failed: {e}") return False +def run_historical_import(start_date_str: str, end_date_str: str, skip_existing: bool = True): + """Import historical data for a date range""" + try: + # Parse dates + start_date = datetime.strptime(start_date_str, "%Y-%m-%d") + end_date = datetime.strptime(end_date_str, "%Y-%m-%d") + + if start_date > end_date: + logger.error("Start date must be before or equal to end date") + return False + + logger.info(f"Importing historical data from {start_date.date()} to {end_date.date()}") + if skip_existing: + logger.info("Skipping dates that already have data") + + # Validate configuration + Config.validate_config() + + # Initialize scraper + db_config = Config.get_database_config() + scraper = EnhancedWaterMonitorScraper(db_config) + + # Import historical data + imported_count = scraper.import_historical_data(start_date, end_date, skip_existing) + + if imported_count > 0: + logger.info(f"✅ Imported {imported_count} historical data points") + else: + logger.info("✅ No new data imported") + + return True + + except ValueError as e: + logger.error(f"❌ Invalid date format. Use YYYY-MM-DD: {e}") + return False + except Exception as e: + logger.error(f"❌ Historical import failed: {e}") + return False + def run_web_api(): """Run the FastAPI web interface""" logger.info("Starting web API server...") @@ -320,6 +359,7 @@ Examples: %(prog)s --web-api # Start web API server %(prog)s --fill-gaps 7 # Fill missing data for last 7 days %(prog)s --update-data 2 # Update existing data for last 2 days + %(prog)s --import-historical 2024-01-01 2024-01-31 # Import historical data %(prog)s --status # Show system status %(prog)s --alert-check # Check water levels and send alerts %(prog)s --alert-test # Send test Matrix message @@ -352,6 +392,19 @@ Examples: help="Update existing data for the specified number of days back" ) + parser.add_argument( + "--import-historical", + nargs=2, + metavar=("START_DATE", "END_DATE"), + help="Import historical data for date range (YYYY-MM-DD format)" + ) + + parser.add_argument( + "--force-overwrite", + action="store_true", + help="Overwrite existing data when importing historical data" + ) + parser.add_argument( "--status", action="store_true", @@ -408,6 +461,10 @@ Examples: success = run_gap_filling(args.fill_gaps) elif args.update_data is not None: success = run_data_update(args.update_data) + elif args.import_historical is not None: + start_date, end_date = args.import_historical + skip_existing = not args.force_overwrite + success = run_historical_import(start_date, end_date, skip_existing) elif args.status: success = show_status() elif args.alert_check: diff --git a/src/water_scraper_v3.py b/src/water_scraper_v3.py index 9ad7f80..658a37f 100644 --- a/src/water_scraper_v3.py +++ b/src/water_scraper_v3.py @@ -576,6 +576,58 @@ class EnhancedWaterMonitorScraper: logger.debug(f"Error checking data existence: {e}") return False + def import_historical_data(self, start_date: datetime.datetime, end_date: datetime.datetime, + skip_existing: bool = True) -> int: + """ + Import historical data for a date range + + Args: + start_date: Start date for historical import + end_date: End date for historical import + skip_existing: Skip dates that already have data (default: True) + + Returns: + Number of data points imported + """ + logger.info(f"Starting historical data import from {start_date.date()} to {end_date.date()}") + + total_imported = 0 + current_date = start_date + + while current_date <= end_date: + try: + # Check if data already exists for this date + if skip_existing and self._check_data_exists_for_date(current_date): + logger.info(f"Data already exists for {current_date.date()}, skipping...") + current_date += datetime.timedelta(days=1) + continue + + logger.info(f"Importing data for {current_date.date()}...") + + # Fetch data for this date + data = self.fetch_water_data_for_date(current_date) + + if data: + # Save to database + if self.save_to_database(data): + total_imported += len(data) + logger.info(f"Successfully imported {len(data)} data points for {current_date.date()}") + else: + logger.warning(f"Failed to save data for {current_date.date()}") + else: + logger.warning(f"No data available for {current_date.date()}") + + # Add small delay to be respectful to the API + time.sleep(1) + + except Exception as e: + logger.error(f"Error importing data for {current_date.date()}: {e}") + + current_date += datetime.timedelta(days=1) + + logger.info(f"Historical import completed. Total data points imported: {total_imported}") + return total_imported + # Main execution for standalone usage if __name__ == "__main__": import argparse