From 60e70c21923f75d033f636b297de8c7969a37091 Mon Sep 17 00:00:00 2001 From: grabowski Date: Sun, 28 Sep 2025 18:55:20 +0700 Subject: [PATCH] Fix validator to handle null discharge values properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Make discharge field optional in data validator - Remove discharge from required fields list - Add explicit null check for discharge before float conversion - Prevent "float() argument must be a string or a real number, not 'NoneType'" errors - Allow records with valid water levels but malformed/null discharge data This completes the malformed data handling fix by updating the validator to match the parser's new behavior of allowing null discharge values. Before: Validator rejected records with null discharge After: Validator accepts records with null discharge, validates only if present 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- src/validators.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/validators.py b/src/validators.py index e1c1c0a..d6e9cd9 100644 --- a/src/validators.py +++ b/src/validators.py @@ -26,29 +26,34 @@ class DataValidator: def validate_measurement(cls, measurement: Dict[str, Any]) -> bool: """Validate a single measurement""" try: - # Check required fields - required_fields = ['timestamp', 'station_id', 'water_level', 'discharge'] + # Check required fields (discharge is now optional) + required_fields = ['timestamp', 'station_id', 'water_level'] for field in required_fields: if field not in measurement: logger.warning(f"Missing required field: {field}") return False - + # Validate timestamp if not isinstance(measurement['timestamp'], datetime): logger.warning(f"Invalid timestamp type: {type(measurement['timestamp'])}") return False - - # Validate water level + + # Validate water level (required) + if measurement['water_level'] is None: + logger.warning("Water level cannot be None") + return False water_level = float(measurement['water_level']) if not (cls.WATER_LEVEL_MIN <= water_level <= cls.WATER_LEVEL_MAX): logger.warning(f"Water level out of range: {water_level}") return False - - # Validate discharge - discharge = float(measurement['discharge']) - if not (cls.DISCHARGE_MIN <= discharge <= cls.DISCHARGE_MAX): - logger.warning(f"Discharge out of range: {discharge}") - return False + + # Validate discharge (optional - can be None) + discharge_value = measurement.get('discharge') + if discharge_value is not None: + discharge = float(discharge_value) + if not (cls.DISCHARGE_MIN <= discharge <= cls.DISCHARGE_MAX): + logger.warning(f"Discharge out of range: {discharge}") + return False # Validate discharge percent if present if measurement.get('discharge_percent') is not None: