diff --git a/src/water_scraper_v3.py b/src/water_scraper_v3.py index 6ed3ba9..7bab6d9 100644 --- a/src/water_scraper_v3.py +++ b/src/water_scraper_v3.py @@ -337,29 +337,47 @@ class EnhancedWaterMonitorScraper: wl_key = f'wlvalues{station_num}' q_key = f'qvalues{station_num}' qp_key = f'QPercent{station_num}' - - # Check if both water level and discharge data exist - if wl_key in row and q_key in row: + + # Check if water level data exists (required) + if wl_key in row: try: water_level = row[wl_key] - discharge = row[q_key] - discharge_percent = row.get(qp_key) - - # Skip if values are None or invalid - if water_level is None or discharge is None: + + # Skip if water level is None or invalid + if water_level is None: continue - - # Convert to float + + # Convert water level to float (required) water_level = float(water_level) - discharge = float(discharge) - discharge_percent = float(discharge_percent) if discharge_percent is not None else None - + + # Try to parse discharge data (optional) + discharge = None + discharge_percent = None + + if q_key in row: + try: + discharge_raw = row[q_key] + if discharge_raw is not None and discharge_raw != "***": + discharge = float(discharge_raw) + + # Only parse discharge percent if discharge is valid + discharge_percent_raw = row.get(qp_key) + if discharge_percent_raw is not None: + try: + discharge_percent = float(discharge_percent_raw) + except (ValueError, TypeError): + discharge_percent = None + else: + logger.debug(f"Skipping malformed discharge data for station {station_num}: {discharge_raw}") + except (ValueError, TypeError) as e: + logger.debug(f"Could not parse discharge for station {station_num}: {e}") + station_info = self.station_mapping.get(str(station_num), { 'code': f'P.{19+station_num}', 'thai_name': f'Station {station_num}', 'english_name': f'Station {station_num}' }) - + water_data.append({ 'timestamp': data_time, 'station_id': station_num, @@ -376,11 +394,11 @@ class EnhancedWaterMonitorScraper: 'discharge_percent': discharge_percent, 'status': 'active' }) - + station_count += 1 - + except (ValueError, TypeError) as e: - logger.warning(f"Could not parse data for station {station_num}: {e}") + logger.warning(f"Could not parse water level for station {station_num}: {e}") continue logger.debug(f"Processed {station_count} stations for time {time_str}")