diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index 1e6d0aa0..47d5fd52 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -768,7 +768,7 @@ def get_time_series_metadata( unit_of_measure: str | Iterable[str] | None = None, computation_period_identifier: str | Iterable[str] | None = None, computation_identifier: str | Iterable[str] | None = None, - thresholds: int | None = None, + thresholds: float | list[float] | None = None, sublocation_identifier: str | Iterable[str] | None = None, primary: str | Iterable[str] | None = None, parent_time_series_id: str | Iterable[str] | None = None, @@ -1213,7 +1213,7 @@ def get_latest_continuous( approval_status: str | Iterable[str] | None = None, unit_of_measure: str | Iterable[str] | None = None, qualifier: str | Iterable[str] | None = None, - value: int | None = None, + value: str | Iterable[str] | None = None, last_modified: str | Iterable[str] | None = None, skip_geometry: bool | None = None, time: str | Iterable[str] | None = None, @@ -1407,7 +1407,7 @@ def get_latest_daily( approval_status: str | Iterable[str] | None = None, unit_of_measure: str | Iterable[str] | None = None, qualifier: str | Iterable[str] | None = None, - value: int | None = None, + value: str | Iterable[str] | None = None, last_modified: str | Iterable[str] | None = None, skip_geometry: bool | None = None, time: str | Iterable[str] | None = None, diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 0a9f1c71..c8aebd3f 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -86,6 +86,16 @@ "time-series-metadata": "time_series_id", } +# Every service's output id EXCEPT the two that are genuinely user-facing +# (``monitoring_location_id`` and ``time_series_id``). The rest are synthetic +# per-record ids that ``_arrange_cols`` moves to the end of a result frame. +# Derived from ``_OUTPUT_ID_BY_SERVICE`` so adding a service can't silently +# leave a stray id column at the front again. +_EXTRA_ID_COLS = set(_OUTPUT_ID_BY_SERVICE.values()) - { + "monitoring_location_id", + "time_series_id", +} + def _switch_arg_id(ls: dict[str, Any], id_name: str, service: str): """ @@ -803,7 +813,7 @@ def _next_req_url( continue href = link.get("href") if not href: - return href + return None # Refuse to follow a next-page link to a different host — # the request's headers/auth were minted for the original # host and shouldn't leak to whatever a poisoned response @@ -905,7 +915,9 @@ def _get_resp_data( # Organize json into geodataframe and make sure id column comes along. df = gpd.GeoDataFrame.from_features(features) - df["id"] = pd.json_normalize(features)["id"].values + # Mirror the non-geopandas branch's defensive ``f.get("id")`` so a feature + # missing a top-level ``id`` yields None rather than a KeyError. + df["id"] = [f.get("id") for f in features] df = df[["id"] + [col for col in df.columns if col != "id"]] # If no geometry present, then return pandas dataframe. A geodataframe @@ -1292,15 +1304,7 @@ def _arrange_cols( # Move meaningless-to-user, extra id columns to the end # of the dataframe, if they exist - extra_id_col = set(df.columns).intersection( - { - "latest_continuous_id", - "latest_daily_id", - "daily_id", - "continuous_id", - "field_measurement_id", - } - ) + extra_id_col = set(df.columns).intersection(_EXTRA_ID_COLS) # If the arbitrary id column is returned (either due to properties # being none or NaN), then move it to the end of the dataframe, but