Enhance freshness detection to check for current hour data availability

- Modify _check_data_freshness() to verify current hour data exists
- If running at 20:00 but only have data up to 19:xx, consider it stale
- Add tolerance: accept previous hour data if within first 10 minutes
- Combine current hour check with age limit (≤2 hours) for robustness
- Add detailed logging for current vs latest hour comparison

This solves the core issue where scheduler stayed in hourly mode despite
missing the expected current hour data from the API.

Example scenarios:
- 20:57 with data up to 20:xx: Fresh (has current hour)
- 20:57 with data up to 19:xx: Stale (missing current hour) → Retry mode
- 20:05 with data up to 19:xx: Fresh (tolerance for early hour)

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-09-28 20:58:19 +07:00
parent 1c023369b3
commit 5c6a41b2b9

View File

@@ -500,11 +500,12 @@ class EnhancedWaterMonitorScraper:
return []
def _check_data_freshness(self, water_data: List[Dict]) -> bool:
"""Check if the fetched data contains recent/fresh timestamps"""
"""Check if the fetched data contains expected current hour data"""
if not water_data:
return False
current_time = datetime.datetime.now()
current_hour = current_time.hour
# Find the most recent timestamp in the data
latest_timestamp = None
@@ -517,19 +518,37 @@ class EnhancedWaterMonitorScraper:
logger.warning("No valid timestamps found in data")
return False
# Check if the latest data is within the last 2 hours
latest_hour = latest_timestamp.hour
time_diff = current_time - latest_timestamp
hours_old = time_diff.total_seconds() / 3600
logger.info(f"Latest data timestamp: {latest_timestamp}, age: {hours_old:.1f} hours")
logger.info(f"Current time: {current_time.strftime('%H:%M')}, Latest data: {latest_timestamp.strftime('%H:%M')}")
logger.info(f"Current hour: {current_hour}, Latest data hour: {latest_hour}, Age: {hours_old:.1f} hours")
# Consider data fresh if it's less than 2 hours old
is_fresh = hours_old <= 2.0
# Check if we have data for the current hour or the previous hour
# If it's 20:00 and we only have data up to 19:xx, that's stale
expected_hour = current_hour
has_current_hour = latest_hour >= expected_hour
# Allow some tolerance: if it's early in the hour (first 10 minutes),
# accept data from the previous hour
if current_time.minute <= 10 and latest_hour == (current_hour - 1):
has_current_hour = True
logger.info(f"Early in hour {current_hour}, accepting previous hour {latest_hour} data")
# Also check that data isn't too old (backup check)
not_too_old = hours_old <= 2.0
is_fresh = has_current_hour and not_too_old
if not is_fresh:
logger.warning(f"Data is stale ({hours_old:.1f} hours old), switching to retry mode")
if not has_current_hour:
logger.warning(f"Missing current hour data - expected hour {expected_hour}, got {latest_hour}")
if not not_too_old:
logger.warning(f"Data is too old ({hours_old:.1f} hours)")
logger.warning("Data is stale, switching to retry mode")
else:
logger.info(f"Data is fresh ({hours_old:.1f} hours old)")
logger.info(f"Data is fresh - has current/recent hour data")
return is_fresh