Enhance freshness detection to check for current hour data availability
- Modify _check_data_freshness() to verify current hour data exists - If running at 20:00 but only have data up to 19:xx, consider it stale - Add tolerance: accept previous hour data if within first 10 minutes - Combine current hour check with age limit (≤2 hours) for robustness - Add detailed logging for current vs latest hour comparison This solves the core issue where scheduler stayed in hourly mode despite missing the expected current hour data from the API. Example scenarios: - 20:57 with data up to 20:xx: Fresh (has current hour) - 20:57 with data up to 19:xx: Stale (missing current hour) → Retry mode - 20:05 with data up to 19:xx: Fresh (tolerance for early hour) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -500,11 +500,12 @@ class EnhancedWaterMonitorScraper:
|
||||
return []
|
||||
|
||||
def _check_data_freshness(self, water_data: List[Dict]) -> bool:
|
||||
"""Check if the fetched data contains recent/fresh timestamps"""
|
||||
"""Check if the fetched data contains expected current hour data"""
|
||||
if not water_data:
|
||||
return False
|
||||
|
||||
current_time = datetime.datetime.now()
|
||||
current_hour = current_time.hour
|
||||
|
||||
# Find the most recent timestamp in the data
|
||||
latest_timestamp = None
|
||||
@@ -517,19 +518,37 @@ class EnhancedWaterMonitorScraper:
|
||||
logger.warning("No valid timestamps found in data")
|
||||
return False
|
||||
|
||||
# Check if the latest data is within the last 2 hours
|
||||
latest_hour = latest_timestamp.hour
|
||||
time_diff = current_time - latest_timestamp
|
||||
hours_old = time_diff.total_seconds() / 3600
|
||||
|
||||
logger.info(f"Latest data timestamp: {latest_timestamp}, age: {hours_old:.1f} hours")
|
||||
logger.info(f"Current time: {current_time.strftime('%H:%M')}, Latest data: {latest_timestamp.strftime('%H:%M')}")
|
||||
logger.info(f"Current hour: {current_hour}, Latest data hour: {latest_hour}, Age: {hours_old:.1f} hours")
|
||||
|
||||
# Consider data fresh if it's less than 2 hours old
|
||||
is_fresh = hours_old <= 2.0
|
||||
# Check if we have data for the current hour or the previous hour
|
||||
# If it's 20:00 and we only have data up to 19:xx, that's stale
|
||||
expected_hour = current_hour
|
||||
has_current_hour = latest_hour >= expected_hour
|
||||
|
||||
# Allow some tolerance: if it's early in the hour (first 10 minutes),
|
||||
# accept data from the previous hour
|
||||
if current_time.minute <= 10 and latest_hour == (current_hour - 1):
|
||||
has_current_hour = True
|
||||
logger.info(f"Early in hour {current_hour}, accepting previous hour {latest_hour} data")
|
||||
|
||||
# Also check that data isn't too old (backup check)
|
||||
not_too_old = hours_old <= 2.0
|
||||
|
||||
is_fresh = has_current_hour and not_too_old
|
||||
|
||||
if not is_fresh:
|
||||
logger.warning(f"Data is stale ({hours_old:.1f} hours old), switching to retry mode")
|
||||
if not has_current_hour:
|
||||
logger.warning(f"Missing current hour data - expected hour {expected_hour}, got {latest_hour}")
|
||||
if not not_too_old:
|
||||
logger.warning(f"Data is too old ({hours_old:.1f} hours)")
|
||||
logger.warning("Data is stale, switching to retry mode")
|
||||
else:
|
||||
logger.info(f"Data is fresh ({hours_old:.1f} hours old)")
|
||||
logger.info(f"Data is fresh - has current/recent hour data")
|
||||
|
||||
return is_fresh
|
||||
|
||||
|
Reference in New Issue
Block a user