Add historical data import functionality
- Add import_historical_data() method to EnhancedWaterMonitorScraper - Support date range imports with Buddhist calendar API format - Add CLI arguments --import-historical and --force-overwrite - Include API rate limiting and skip existing data option - Enable importing years of historical water level data 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
57
src/main.py
57
src/main.py
@@ -163,6 +163,45 @@ def run_data_update(days_back: int):
|
|||||||
logger.error(f"❌ Data update failed: {e}")
|
logger.error(f"❌ Data update failed: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def run_historical_import(start_date_str: str, end_date_str: str, skip_existing: bool = True):
|
||||||
|
"""Import historical data for a date range"""
|
||||||
|
try:
|
||||||
|
# Parse dates
|
||||||
|
start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
|
||||||
|
end_date = datetime.strptime(end_date_str, "%Y-%m-%d")
|
||||||
|
|
||||||
|
if start_date > end_date:
|
||||||
|
logger.error("Start date must be before or equal to end date")
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.info(f"Importing historical data from {start_date.date()} to {end_date.date()}")
|
||||||
|
if skip_existing:
|
||||||
|
logger.info("Skipping dates that already have data")
|
||||||
|
|
||||||
|
# Validate configuration
|
||||||
|
Config.validate_config()
|
||||||
|
|
||||||
|
# Initialize scraper
|
||||||
|
db_config = Config.get_database_config()
|
||||||
|
scraper = EnhancedWaterMonitorScraper(db_config)
|
||||||
|
|
||||||
|
# Import historical data
|
||||||
|
imported_count = scraper.import_historical_data(start_date, end_date, skip_existing)
|
||||||
|
|
||||||
|
if imported_count > 0:
|
||||||
|
logger.info(f"✅ Imported {imported_count} historical data points")
|
||||||
|
else:
|
||||||
|
logger.info("✅ No new data imported")
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except ValueError as e:
|
||||||
|
logger.error(f"❌ Invalid date format. Use YYYY-MM-DD: {e}")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"❌ Historical import failed: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
def run_web_api():
|
def run_web_api():
|
||||||
"""Run the FastAPI web interface"""
|
"""Run the FastAPI web interface"""
|
||||||
logger.info("Starting web API server...")
|
logger.info("Starting web API server...")
|
||||||
@@ -320,6 +359,7 @@ Examples:
|
|||||||
%(prog)s --web-api # Start web API server
|
%(prog)s --web-api # Start web API server
|
||||||
%(prog)s --fill-gaps 7 # Fill missing data for last 7 days
|
%(prog)s --fill-gaps 7 # Fill missing data for last 7 days
|
||||||
%(prog)s --update-data 2 # Update existing data for last 2 days
|
%(prog)s --update-data 2 # Update existing data for last 2 days
|
||||||
|
%(prog)s --import-historical 2024-01-01 2024-01-31 # Import historical data
|
||||||
%(prog)s --status # Show system status
|
%(prog)s --status # Show system status
|
||||||
%(prog)s --alert-check # Check water levels and send alerts
|
%(prog)s --alert-check # Check water levels and send alerts
|
||||||
%(prog)s --alert-test # Send test Matrix message
|
%(prog)s --alert-test # Send test Matrix message
|
||||||
@@ -352,6 +392,19 @@ Examples:
|
|||||||
help="Update existing data for the specified number of days back"
|
help="Update existing data for the specified number of days back"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--import-historical",
|
||||||
|
nargs=2,
|
||||||
|
metavar=("START_DATE", "END_DATE"),
|
||||||
|
help="Import historical data for date range (YYYY-MM-DD format)"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--force-overwrite",
|
||||||
|
action="store_true",
|
||||||
|
help="Overwrite existing data when importing historical data"
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--status",
|
"--status",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
@@ -408,6 +461,10 @@ Examples:
|
|||||||
success = run_gap_filling(args.fill_gaps)
|
success = run_gap_filling(args.fill_gaps)
|
||||||
elif args.update_data is not None:
|
elif args.update_data is not None:
|
||||||
success = run_data_update(args.update_data)
|
success = run_data_update(args.update_data)
|
||||||
|
elif args.import_historical is not None:
|
||||||
|
start_date, end_date = args.import_historical
|
||||||
|
skip_existing = not args.force_overwrite
|
||||||
|
success = run_historical_import(start_date, end_date, skip_existing)
|
||||||
elif args.status:
|
elif args.status:
|
||||||
success = show_status()
|
success = show_status()
|
||||||
elif args.alert_check:
|
elif args.alert_check:
|
||||||
|
@@ -576,6 +576,58 @@ class EnhancedWaterMonitorScraper:
|
|||||||
logger.debug(f"Error checking data existence: {e}")
|
logger.debug(f"Error checking data existence: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def import_historical_data(self, start_date: datetime.datetime, end_date: datetime.datetime,
|
||||||
|
skip_existing: bool = True) -> int:
|
||||||
|
"""
|
||||||
|
Import historical data for a date range
|
||||||
|
|
||||||
|
Args:
|
||||||
|
start_date: Start date for historical import
|
||||||
|
end_date: End date for historical import
|
||||||
|
skip_existing: Skip dates that already have data (default: True)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Number of data points imported
|
||||||
|
"""
|
||||||
|
logger.info(f"Starting historical data import from {start_date.date()} to {end_date.date()}")
|
||||||
|
|
||||||
|
total_imported = 0
|
||||||
|
current_date = start_date
|
||||||
|
|
||||||
|
while current_date <= end_date:
|
||||||
|
try:
|
||||||
|
# Check if data already exists for this date
|
||||||
|
if skip_existing and self._check_data_exists_for_date(current_date):
|
||||||
|
logger.info(f"Data already exists for {current_date.date()}, skipping...")
|
||||||
|
current_date += datetime.timedelta(days=1)
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.info(f"Importing data for {current_date.date()}...")
|
||||||
|
|
||||||
|
# Fetch data for this date
|
||||||
|
data = self.fetch_water_data_for_date(current_date)
|
||||||
|
|
||||||
|
if data:
|
||||||
|
# Save to database
|
||||||
|
if self.save_to_database(data):
|
||||||
|
total_imported += len(data)
|
||||||
|
logger.info(f"Successfully imported {len(data)} data points for {current_date.date()}")
|
||||||
|
else:
|
||||||
|
logger.warning(f"Failed to save data for {current_date.date()}")
|
||||||
|
else:
|
||||||
|
logger.warning(f"No data available for {current_date.date()}")
|
||||||
|
|
||||||
|
# Add small delay to be respectful to the API
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error importing data for {current_date.date()}: {e}")
|
||||||
|
|
||||||
|
current_date += datetime.timedelta(days=1)
|
||||||
|
|
||||||
|
logger.info(f"Historical import completed. Total data points imported: {total_imported}")
|
||||||
|
return total_imported
|
||||||
|
|
||||||
# Main execution for standalone usage
|
# Main execution for standalone usage
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import argparse
|
import argparse
|
||||||
|
Reference in New Issue
Block a user