Fix validator to handle null discharge values properly

- Make discharge field optional in data validator
- Remove discharge from required fields list
- Add explicit null check for discharge before float conversion
- Prevent "float() argument must be a string or a real number, not 'NoneType'" errors
- Allow records with valid water levels but malformed/null discharge data

This completes the malformed data handling fix by updating the validator
to match the parser's new behavior of allowing null discharge values.

Before: Validator rejected records with null discharge
After: Validator accepts records with null discharge, validates only if present

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-09-28 18:55:20 +07:00
parent cc5c4522b8
commit 60e70c2192

View File

@@ -26,29 +26,34 @@ class DataValidator:
def validate_measurement(cls, measurement: Dict[str, Any]) -> bool:
"""Validate a single measurement"""
try:
# Check required fields
required_fields = ['timestamp', 'station_id', 'water_level', 'discharge']
# Check required fields (discharge is now optional)
required_fields = ['timestamp', 'station_id', 'water_level']
for field in required_fields:
if field not in measurement:
logger.warning(f"Missing required field: {field}")
return False
# Validate timestamp
if not isinstance(measurement['timestamp'], datetime):
logger.warning(f"Invalid timestamp type: {type(measurement['timestamp'])}")
return False
# Validate water level
# Validate water level (required)
if measurement['water_level'] is None:
logger.warning("Water level cannot be None")
return False
water_level = float(measurement['water_level'])
if not (cls.WATER_LEVEL_MIN <= water_level <= cls.WATER_LEVEL_MAX):
logger.warning(f"Water level out of range: {water_level}")
return False
# Validate discharge
discharge = float(measurement['discharge'])
if not (cls.DISCHARGE_MIN <= discharge <= cls.DISCHARGE_MAX):
logger.warning(f"Discharge out of range: {discharge}")
return False
# Validate discharge (optional - can be None)
discharge_value = measurement.get('discharge')
if discharge_value is not None:
discharge = float(discharge_value)
if not (cls.DISCHARGE_MIN <= discharge <= cls.DISCHARGE_MAX):
logger.warning(f"Discharge out of range: {discharge}")
return False
# Validate discharge percent if present
if measurement.get('discharge_percent') is not None: