diff --git a/basin3d/plugins/usgs.py b/basin3d/plugins/usgs.py index acc5c60..cc551dd 100644 --- a/basin3d/plugins/usgs.py +++ b/basin3d/plugins/usgs.py @@ -67,10 +67,11 @@ URL_USGS_HUC = "https://water.usgs.gov/GIS/new_huc_rdb.txt" -def convert_discharge(data, parameter, units): +def convert_discharge(data, data_str, parameter, units): """ Convert the River Discharge to m^3 :param data: + :param data_str: :param parameter: :param units: :return: @@ -78,7 +79,10 @@ def convert_discharge(data, parameter, units): if parameter in ['00060', '00061']: # Hardcode conversion from ft^3 to m^3 # for River discharge - data *= 0.028316847 + if data_str == '-999999': + data = int(data_str) + else: + data *= 0.028316847 units = "m^3/s" return data, units @@ -608,8 +612,9 @@ def list(self, query: QueryMeasurementTimeseriesTVP): try: try: data: Optional[float] = float(value['value']) + data_str = value['value'] # Hardcoded unit conversion for river discharge parameters - data, unit_of_measurement = convert_discharge(data, parameter, unit_of_measurement) + data, unit_of_measurement = convert_discharge(data, data_str, parameter, unit_of_measurement) if data: result_quality.add(result_point_quality) diff --git a/tests/resources/usgs_get_data_09110000_missing_vals.json b/tests/resources/usgs_get_data_09110000_missing_vals.json new file mode 100644 index 0000000..c5ddfc3 --- /dev/null +++ b/tests/resources/usgs_get_data_09110000_missing_vals.json @@ -0,0 +1,250 @@ +{ + "name": "ns1:timeSeriesResponseType", + "declaredType": "org.cuahsi.waterml.TimeSeriesResponseType", + "scope": "javax.xml.bind.JAXBElement$GlobalScope", + "value": { + "queryInfo": { + "queryURL": "http://waterservices.usgs.gov/nwis/dvstartDT=2023-04-01&endDT=2023-04-10¶meterCd=00060&siteStatus=all&sites=09110000&siteType=ST&format=json", + "criteria": { + "locationParam": "[ALL:09110000]", + "variableParam": "[00060]", + "timeParam": { + "beginDateTime": "2023-04-01T00:00:00.000", + "endDateTime": "2023-04-10T00:00:00.000" + }, + "parameter": [] + }, + "note": [ + { + "value": "[ALL:09110000]", + "title": "filter:sites" + }, + { + "value": "[ST]", + "title": "filter:siteType" + }, + { + "value": "[mode=RANGE, modifiedSince=null] interval={INTERVAL[2023-04-01T00:00:00.000-04:00/2023-04-10T00:00:00.000-04:00]}", + "title": "filter:timeRange" + }, + { + "value": "methodIds=[ALL]", + "title": "filter:methodId" + }, + { + "value": "2023-12-20T19:40:46.928Z", + "title": "requestDT" + }, + { + "value": "abe6a5f0-9f6f-11ee-8c98-2cea7f58f5ca", + "title": "requestId" + }, + { + "value": "Provisional data are subject to revision. Go to http://waterdata.usgs.gov/nwis/help/?provisional for more information.", + "title": "disclaimer" + }, + { + "value": "vaas01", + "title": "server" + } + ] + }, + "timeSeries": [ + { + "sourceInfo": { + "siteName": "TAYLOR RIVER AT ALMONT, CO.", + "siteCode": [ + { + "value": "09110000", + "network": "NWIS", + "agencyCode": "USGS" + } + ], + "timeZoneInfo": { + "defaultTimeZone": { + "zoneOffset": "-07:00", + "zoneAbbreviation": "MST" + }, + "daylightSavingsTimeZone": { + "zoneOffset": "-06:00", + "zoneAbbreviation": "MDT" + }, + "siteUsesDaylightSavingsTime": true + }, + "geoLocation": { + "geogLocation": { + "srs": "EPSG:4326", + "latitude": 38.66443715, + "longitude": -106.8453172 + }, + "localSiteXY": [] + }, + "note": [], + "siteType": [], + "siteProperty": [ + { + "value": "ST", + "name": "siteTypeCd" + }, + { + "value": "14020001", + "name": "hucCd" + }, + { + "value": "08", + "name": "stateCd" + }, + { + "value": "08051", + "name": "countyCd" + } + ] + }, + "variable": { + "variableCode": [ + { + "value": "00060", + "network": "NWIS", + "vocabulary": "NWIS:UnitValues", + "variableID": 45807197, + "default": true + } + ], + "variableName": "Streamflow, ft³/s", + "variableDescription": "Discharge, cubic feet per second", + "valueType": "Derived Value", + "unit": { + "unitCode": "ft3/s" + }, + "options": { + "option": [ + { + "value": "Mean", + "name": "Statistic", + "optionCode": "00003" + } + ] + }, + "note": [], + "noDataValue": -999999, + "variableProperty": [], + "oid": "45807197" + }, + "values": [ + { + "value": [ + { + "value": "-999999", + "qualifiers": [ + "P", + "Ice" + ], + "dateTime": "2023-04-01T00:00:00.000" + }, + { + "value": "-999999", + "qualifiers": [ + "P", + "Ice" + ], + "dateTime": "2023-04-02T00:00:00.000" + }, + { + "value": "-999999", + "qualifiers": [ + "P", + "Ice" + ], + "dateTime": "2023-04-03T00:00:00.000" + }, + { + "value": "-999999", + "qualifiers": [ + "P", + "Ice" + ], + "dateTime": "2023-04-04T00:00:00.000" + }, + { + "value": "-999999", + "qualifiers": [ + "P", + "Ice" + ], + "dateTime": "2023-04-05T00:00:00.000" + }, + { + "value": "-999999", + "qualifiers": [ + "P", + "Ice" + ], + "dateTime": "2023-04-06T00:00:00.000" + }, + { + "value": "-999999", + "qualifiers": [ + "P", + "Ice" + ], + "dateTime": "2023-04-07T00:00:00.000" + }, + { + "value": "111", + "qualifiers": [ + "P" + ], + "dateTime": "2023-04-08T00:00:00.000" + }, + { + "value": "114", + "qualifiers": [ + "P" + ], + "dateTime": "2023-04-09T00:00:00.000" + }, + { + "value": "122", + "qualifiers": [ + "P" + ], + "dateTime": "2023-04-10T00:00:00.000" + } + ], + "qualifier": [ + { + "qualifierCode": "Ice", + "qualifierDescription": "Value is affected by ice at the measurement site.", + "qualifierID": 0, + "network": "NWIS", + "vocabulary": "uv_rmk_cd" + }, + { + "qualifierCode": "P", + "qualifierDescription": "Provisional data subject to revision.", + "qualifierID": 1, + "network": "NWIS", + "vocabulary": "uv_rmk_cd" + } + ], + "qualityControlLevel": [], + "method": [ + { + "methodDescription": "", + "methodID": 18840 + } + ], + "source": [], + "offset": [], + "sample": [], + "censorCode": [] + } + ], + "name": "USGS:09110000:00060:00003" + } + ] + }, + "nil": false, + "globalScope": true, + "typeSubstituted": false +} diff --git a/tests/test_plugins_usgs.py b/tests/test_plugins_usgs.py index f59953a..2c27d60 100644 --- a/tests/test_plugins_usgs.py +++ b/tests/test_plugins_usgs.py @@ -70,18 +70,24 @@ def test_measurement_timeseries_tvp_observations_usgs_errors(additional_query_pa @pytest.mark.parametrize('additional_filters, usgs_response, expected_results', [ # all-good - ({"monitoring_feature": ["USGS-09110990", "USGS-09111250"], "observed_property": ["RDC"], "result_quality": [ResultQualityEnum.VALIDATED]}, - "usgs_nwis_dv_p00060_l09110990_l09111250.json", {"statistic": StatisticEnum.MEAN, "result_quality": [ResultQualityEnum.VALIDATED], "count": 2}), + ({"monitoring_feature": ["USGS-09110990", "USGS-09111250"], "observed_property": ["RDC"], "result_quality": [ResultQualityEnum.VALIDATED], "start_date": "2020-04-01", "end_date": "2020-04-30"}, + "usgs_nwis_dv_p00060_l09110990_l09111250.json", {"statistic": StatisticEnum.MEAN, "result_quality": [ResultQualityEnum.VALIDATED], "mvp_count": 2, "result_count": [30, 30], "missing_values_count": [0, 0]}), # some-quality-filtered-data - ({"monitoring_feature": ["USGS-09110990"], "observed_property": ["WT"], "result_quality": [ResultQualityEnum.UNVALIDATED]}, + ({"monitoring_feature": ["USGS-09110990"], "observed_property": ["WT"], "result_quality": [ResultQualityEnum.UNVALIDATED], "start_date": "2020-04-01", "end_date": "2020-04-30"}, "usgs_get_data_09110000_VALIDATED_UNVALIDATED_WT_only.json", - {"statistic": StatisticEnum.MEAN, "result_quality": [ResultQualityEnum.UNVALIDATED], "count": 1, "synthesis_msgs": ['09110000 - 00010: 2 timestamps did not match data quality query.']}), + {"statistic": StatisticEnum.MEAN, "result_quality": [ResultQualityEnum.UNVALIDATED], "mvp_count": 1, "result_count": [2], "missing_values_count": [0], + "synthesis_msgs": ['09110000 - 00010: 2 timestamps did not match data quality query.']}), # all-data-filtered - ({"monitoring_feature": ["USGS-09110990"], "observed_property": ["WT"], "result_quality": [ResultQualityEnum.REJECTED]}, + ({"monitoring_feature": ["USGS-09110990"], "observed_property": ["WT"], "result_quality": [ResultQualityEnum.REJECTED], "start_date": "2020-04-01", "end_date": "2020-04-30"}, "usgs_get_data_09110000_VALIDATED_UNVALIDATED_WT_only.json", - {"count": 0, "synthesis_msgs": []}) + {"mvp_count": 0, "result_count": [0], "missing_values_count": [0], "synthesis_msgs": []}), + # all-data-filtered + ({"monitoring_feature": ["USGS-09110990"], "observed_property": ["RDC"], "start_date": "2023-04-01", "end_date": "2023-04-10"}, + "usgs_get_data_09110000_missing_vals.json", + {"statistic": StatisticEnum.MEAN, "result_quality": [ResultQualityEnum.UNVALIDATED], "mvp_count": 1, "result_count": [10], "missing_values_count": [7], + "synthesis_msgs": []}), ], - ids=['all-good', 'some-quality-filtered-data', 'missing-mapping']) + ids=['all-good', 'some-quality-filtered-data', 'missing-mapping', 'missing-values']) def test_measurement_timeseries_tvp_observations_usgs(additional_filters, usgs_response, expected_results, monkeypatch): """ Test USGS Timeseries data query""" @@ -92,8 +98,6 @@ def test_measurement_timeseries_tvp_observations_usgs(additional_filters, usgs_r synthesizer = register(['basin3d.plugins.usgs.USGSDataSourcePlugin']) query = { - "start_date": "2020-04-01", - "end_date": "2020-04-30", "aggregation_duration": TimeFrequencyEnum.DAY, **additional_filters } @@ -102,14 +106,22 @@ def test_measurement_timeseries_tvp_observations_usgs(additional_filters, usgs_r # loop through generator and serialized the object, get actual object and compare if isinstance(measurement_timeseries_tvp_observations, Iterator): - count = 0 + mvp_count = 0 for timeseries in measurement_timeseries_tvp_observations: data = json.loads(timeseries.to_json()) - count += 1 assert data["statistic"]["attr_mapping"]["basin3d_vocab"] == expected_results.get("statistic") for idx, result_quality in enumerate(data["result_quality"]): assert result_quality["attr_mapping"]["basin3d_vocab"] == expected_results.get("result_quality")[idx] - assert count == expected_results.get("count") + result_count = 0 + missing_value_count = 0 + for result_value in data["result"]["value"]: + result_count += 1 + if result_value[1] == -999999: + missing_value_count += 1 + assert result_count == expected_results.get("result_count")[mvp_count] + assert missing_value_count == expected_results.get("missing_values_count")[mvp_count] + mvp_count += 1 + assert mvp_count == expected_results.get("mvp_count") if expected_results.get('synthesis_msgs'): expected_msgs = expected_results.get('synthesis_msgs') msgs = measurement_timeseries_tvp_observations.synthesis_response.messages