diff --git a/dataretrieval/waterdata/chunking.py b/dataretrieval/waterdata/chunking.py index ab079070..04a25171 100644 --- a/dataretrieval/waterdata/chunking.py +++ b/dataretrieval/waterdata/chunking.py @@ -871,13 +871,22 @@ def __init__( self.canonical_url: str | None = None axes = _extract_axes(args) - # No chunkable axes → skip ``build_request`` entirely; the - # common Water Data call shape shouldn't pay for an unused - # request prep on the passthrough hot path. The fetcher - # will run with the user's args verbatim; if that produces - # an over-budget URL, the server (or httpx itself) rejects. if not axes: - return + # No chunkable axis: nothing to split. If the single request fits, + # run it verbatim (the common passthrough). If it's over budget we + # can't chunk it (e.g. a single large CQL ``IN`` clause with no + # top-level ``OR``, or one oversized value), so raise an actionable + # error instead of shipping it for the server to reject with an + # opaque HTTP 414. ``_safe_request_bytes`` treats an un-constructable + # URL (httpx.InvalidURL, > 64 KB) as over budget. + if _safe_request_bytes(build_request, args, url_limit) <= url_limit: + return + raise RequestTooLarge( + f"Request exceeds {url_limit} bytes (URL + body) and has no " + f"chunkable multi-value argument to split (e.g. a single large " + f"CQL `IN` clause, or one oversized value). Narrow the query, " + f"simplify the filter, or split the call manually." + ) # Constructing the initial request can itself trip # ``httpx.InvalidURL`` (URL > 64 KB) — that's the canonical diff --git a/tests/waterdata_chunking_test.py b/tests/waterdata_chunking_test.py index a435a4de..98ed70c9 100644 --- a/tests/waterdata_chunking_test.py +++ b/tests/waterdata_chunking_test.py @@ -161,15 +161,24 @@ def test_extract_axes_skips_singletons_and_never_chunk_params(): def test_chunk_plan_returns_passthrough_when_no_chunkable_axes(): - """Scalar args with nothing to chunk → passthrough, even at a - URL limit the request technically exceeds (the server may 414, - but ``ChunkPlan`` has nothing to split).""" + """Scalar args with nothing to chunk and a request within the limit → + passthrough (no axes).""" args = {"monitoring_location_id": "scalar-only"} - plan = ChunkPlan(args, _fake_build, url_limit=10) + plan = ChunkPlan(args, _fake_build, url_limit=1000) assert plan.axes == [] assert plan.total == 1 +def test_chunk_plan_raises_when_unchunkable_request_exceeds_limit(): + """A request with nothing to chunk that still exceeds the byte limit (e.g. + a single large CQL ``IN`` clause with no top-level ``OR``) raises + RequestTooLarge instead of being shipped for the server to reject with an + opaque HTTP 414.""" + args = {"monitoring_location_id": "scalar-only"} + with pytest.raises(RequestTooLarge): + ChunkPlan(args, _fake_build, url_limit=10) + + def test_chunk_plan_greedy_halving_targets_largest_axis_chunk(): """The biggest chunk across all axes halves first — when one list axis dominates URL bytes, only it gets split until it stops being