Implement intelligent data freshness detection for adaptive scheduler

- Add _check_data_freshness() method to detect stale vs fresh data
- Consider data fresh only if latest timestamp is within 2 hours
- Modify run_scraping_cycle() to check data freshness, not just existence
- Return False for stale data to trigger adaptive scheduler retry mode
- Add detailed logging for data age and freshness decisions

This solves the issue where scheduler stayed in hourly mode despite getting
stale data from the API. Now it correctly detects when API returns old data
and switches to retry mode until fresh data becomes available.

Example behavior:
- Fresh data (0.6 hours old): Returns True, stays in hourly mode
- Stale data (68.6 hours old): Returns False, switches to retry mode

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-09-28 20:35:59 +07:00
parent 60e70c2192
commit 1c023369b3

View File

@@ -499,28 +499,71 @@ class EnhancedWaterMonitorScraper:
logger.error(f"Error getting latest data: {e}")
return []
def _check_data_freshness(self, water_data: List[Dict]) -> bool:
"""Check if the fetched data contains recent/fresh timestamps"""
if not water_data:
return False
current_time = datetime.datetime.now()
# Find the most recent timestamp in the data
latest_timestamp = None
for data_point in water_data:
timestamp = data_point.get('timestamp')
if timestamp and (latest_timestamp is None or timestamp > latest_timestamp):
latest_timestamp = timestamp
if latest_timestamp is None:
logger.warning("No valid timestamps found in data")
return False
# Check if the latest data is within the last 2 hours
time_diff = current_time - latest_timestamp
hours_old = time_diff.total_seconds() / 3600
logger.info(f"Latest data timestamp: {latest_timestamp}, age: {hours_old:.1f} hours")
# Consider data fresh if it's less than 2 hours old
is_fresh = hours_old <= 2.0
if not is_fresh:
logger.warning(f"Data is stale ({hours_old:.1f} hours old), switching to retry mode")
else:
logger.info(f"Data is fresh ({hours_old:.1f} hours old)")
return is_fresh
def run_scraping_cycle(self) -> bool:
"""Run a complete scraping cycle"""
"""Run a complete scraping cycle with freshness check"""
logger.info("Starting scraping cycle...")
try:
# Fetch current data
water_data = self.fetch_water_data()
if water_data:
success = self.save_to_database(water_data)
if success:
logger.info("Scraping cycle completed successfully")
increment_counter("scraping_cycles_successful")
return True
# Check if data is fresh/recent
is_fresh = self._check_data_freshness(water_data)
if is_fresh:
success = self.save_to_database(water_data)
if success:
logger.info("Scraping cycle completed successfully with fresh data")
increment_counter("scraping_cycles_successful")
return True
else:
logger.error("Failed to save data")
increment_counter("scraping_cycles_failed")
return False
else:
logger.error("Failed to save data")
# Data exists but is stale
logger.warning("Data fetched but is stale - treating as no fresh data available")
increment_counter("scraping_cycles_failed")
return False
else:
logger.warning("No data fetched")
increment_counter("scraping_cycles_failed")
return False
except Exception as e:
logger.error(f"Scraping cycle failed: {e}")
increment_counter("scraping_cycles_failed")