diff --git a/src/water_scraper_v3.py b/src/water_scraper_v3.py index 7bab6d9..21b5b52 100644 --- a/src/water_scraper_v3.py +++ b/src/water_scraper_v3.py @@ -499,28 +499,71 @@ class EnhancedWaterMonitorScraper: logger.error(f"Error getting latest data: {e}") return [] + def _check_data_freshness(self, water_data: List[Dict]) -> bool: + """Check if the fetched data contains recent/fresh timestamps""" + if not water_data: + return False + + current_time = datetime.datetime.now() + + # Find the most recent timestamp in the data + latest_timestamp = None + for data_point in water_data: + timestamp = data_point.get('timestamp') + if timestamp and (latest_timestamp is None or timestamp > latest_timestamp): + latest_timestamp = timestamp + + if latest_timestamp is None: + logger.warning("No valid timestamps found in data") + return False + + # Check if the latest data is within the last 2 hours + time_diff = current_time - latest_timestamp + hours_old = time_diff.total_seconds() / 3600 + + logger.info(f"Latest data timestamp: {latest_timestamp}, age: {hours_old:.1f} hours") + + # Consider data fresh if it's less than 2 hours old + is_fresh = hours_old <= 2.0 + + if not is_fresh: + logger.warning(f"Data is stale ({hours_old:.1f} hours old), switching to retry mode") + else: + logger.info(f"Data is fresh ({hours_old:.1f} hours old)") + + return is_fresh + def run_scraping_cycle(self) -> bool: - """Run a complete scraping cycle""" + """Run a complete scraping cycle with freshness check""" logger.info("Starting scraping cycle...") - + try: # Fetch current data water_data = self.fetch_water_data() if water_data: - success = self.save_to_database(water_data) - if success: - logger.info("Scraping cycle completed successfully") - increment_counter("scraping_cycles_successful") - return True + # Check if data is fresh/recent + is_fresh = self._check_data_freshness(water_data) + + if is_fresh: + success = self.save_to_database(water_data) + if success: + logger.info("Scraping cycle completed successfully with fresh data") + increment_counter("scraping_cycles_successful") + return True + else: + logger.error("Failed to save data") + increment_counter("scraping_cycles_failed") + return False else: - logger.error("Failed to save data") + # Data exists but is stale + logger.warning("Data fetched but is stale - treating as no fresh data available") increment_counter("scraping_cycles_failed") return False else: logger.warning("No data fetched") increment_counter("scraping_cycles_failed") return False - + except Exception as e: logger.error(f"Scraping cycle failed: {e}") increment_counter("scraping_cycles_failed")