From dff4dd067d756c130c07059a8e3449578ce70bc4 Mon Sep 17 00:00:00 2001
From: grabowski <berwn@buildfor.life>
Date: Sun, 28 Sep 2025 21:03:03 +0700
Subject: [PATCH] Implement strict freshness detection without grace periods
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove tolerance windows and grace periods from data freshness checks
- Require data from current hour only - no exceptions or fallbacks
- If hourly check runs at 21:xx but only has data up to 20:xx, immediately switch to retry mode
- Simplify logic: latest_hour >= current_hour for fresh data
- Remove complex age calculations and tolerance conditions

This ensures the scheduler immediately detects when new hourly data
is not yet available and switches to minute-based retries without delay.

Behavior:
- 21:02 with data up to 21:xx → Fresh (continue hourly)
- 21:02 with data up to 20:xx → Stale (immediate retry mode)
- No grace periods, no tolerance windows, strict hour-based detection

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/water_scraper_v3.py | 39 ++++++++++++---------------------------
 1 file changed, 12 insertions(+), 27 deletions(-)

diff --git a/src/water_scraper_v3.py b/src/water_scraper_v3.py
index 6f3f5ee..02c6426 100644
--- a/src/water_scraper_v3.py
+++ b/src/water_scraper_v3.py
@@ -500,7 +500,7 @@ class EnhancedWaterMonitorScraper:
             return []
     
     def _check_data_freshness(self, water_data: List[Dict]) -> bool:
-        """Check if the fetched data contains expected current hour data"""
+        """Check if the fetched data contains new data for the current hour"""
         if not water_data:
             return False
 
@@ -520,37 +520,22 @@ class EnhancedWaterMonitorScraper:
 
         latest_hour = latest_timestamp.hour
         time_diff = current_time - latest_timestamp
-        hours_old = time_diff.total_seconds() / 3600
+        minutes_old = time_diff.total_seconds() / 60
 
         logger.info(f"Current time: {current_time.strftime('%H:%M')}, Latest data: {latest_timestamp.strftime('%H:%M')}")
-        logger.info(f"Current hour: {current_hour}, Latest data hour: {latest_hour}, Age: {hours_old:.1f} hours")
+        logger.info(f"Current hour: {current_hour}, Latest data hour: {latest_hour}, Age: {minutes_old:.1f} minutes")
 
-        # Check if we have data for the current hour or the previous hour
-        # If it's 20:00 and we only have data up to 19:xx, that's stale
-        expected_hour = current_hour
-        has_current_hour = latest_hour >= expected_hour
+        # Strict check: we need data from the current hour
+        # If it's 20:xx and we only have data up to 19:xx, that's stale - go to retry mode
+        has_current_hour_data = latest_hour >= current_hour
 
-        # Allow some tolerance: if it's early in the hour (first 10 minutes),
-        # accept data from the previous hour
-        if current_time.minute <= 10 and latest_hour == (current_hour - 1):
-            has_current_hour = True
-            logger.info(f"Early in hour {current_hour}, accepting previous hour {latest_hour} data")
-
-        # Also check that data isn't too old (backup check)
-        not_too_old = hours_old <= 2.0
-
-        is_fresh = has_current_hour and not_too_old
-
-        if not is_fresh:
-            if not has_current_hour:
-                logger.warning(f"Missing current hour data - expected hour {expected_hour}, got {latest_hour}")
-            if not not_too_old:
-                logger.warning(f"Data is too old ({hours_old:.1f} hours)")
-            logger.warning("Data is stale, switching to retry mode")
+        if not has_current_hour_data:
+            logger.warning(f"No new data available - expected hour {current_hour}, got {latest_hour}")
+            logger.warning("Switching to retry mode until new data becomes available")
+            return False
         else:
-            logger.info(f"Data is fresh - has current/recent hour data")
-
-        return is_fresh
+            logger.info(f"Fresh data available for current hour {current_hour}")
+            return True
 
     def run_scraping_cycle(self) -> bool:
         """Run a complete scraping cycle with freshness check"""