diff --git a/src/water_scraper_v3.py b/src/water_scraper_v3.py index 21b5b52..6f3f5ee 100644 --- a/src/water_scraper_v3.py +++ b/src/water_scraper_v3.py @@ -500,11 +500,12 @@ class EnhancedWaterMonitorScraper: return [] def _check_data_freshness(self, water_data: List[Dict]) -> bool: - """Check if the fetched data contains recent/fresh timestamps""" + """Check if the fetched data contains expected current hour data""" if not water_data: return False current_time = datetime.datetime.now() + current_hour = current_time.hour # Find the most recent timestamp in the data latest_timestamp = None @@ -517,19 +518,37 @@ class EnhancedWaterMonitorScraper: logger.warning("No valid timestamps found in data") return False - # Check if the latest data is within the last 2 hours + latest_hour = latest_timestamp.hour time_diff = current_time - latest_timestamp hours_old = time_diff.total_seconds() / 3600 - logger.info(f"Latest data timestamp: {latest_timestamp}, age: {hours_old:.1f} hours") + logger.info(f"Current time: {current_time.strftime('%H:%M')}, Latest data: {latest_timestamp.strftime('%H:%M')}") + logger.info(f"Current hour: {current_hour}, Latest data hour: {latest_hour}, Age: {hours_old:.1f} hours") - # Consider data fresh if it's less than 2 hours old - is_fresh = hours_old <= 2.0 + # Check if we have data for the current hour or the previous hour + # If it's 20:00 and we only have data up to 19:xx, that's stale + expected_hour = current_hour + has_current_hour = latest_hour >= expected_hour + + # Allow some tolerance: if it's early in the hour (first 10 minutes), + # accept data from the previous hour + if current_time.minute <= 10 and latest_hour == (current_hour - 1): + has_current_hour = True + logger.info(f"Early in hour {current_hour}, accepting previous hour {latest_hour} data") + + # Also check that data isn't too old (backup check) + not_too_old = hours_old <= 2.0 + + is_fresh = has_current_hour and not_too_old if not is_fresh: - logger.warning(f"Data is stale ({hours_old:.1f} hours old), switching to retry mode") + if not has_current_hour: + logger.warning(f"Missing current hour data - expected hour {expected_hour}, got {latest_hour}") + if not not_too_old: + logger.warning(f"Data is too old ({hours_old:.1f} hours)") + logger.warning("Data is stale, switching to retry mode") else: - logger.info(f"Data is fresh ({hours_old:.1f} hours old)") + logger.info(f"Data is fresh - has current/recent hour data") return is_fresh