Skip to content

Commit ba96199

Browse files
committed
🚧 added initial http error handling
1 parent 53c3a67 commit ba96199

7 files changed

Lines changed: 481 additions & 34 deletions

File tree

nemoguardrails/actions/llm/utils.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
reasoning_trace_var,
2727
tool_calls_var,
2828
)
29-
from nemoguardrails.exceptions import LLMCallException
29+
from nemoguardrails.exceptions import LLMCallException, LLMClientError
3030
from nemoguardrails.logging.explain import LLMCallInfo
3131
from nemoguardrails.logging.llm_tracker import track_llm_call
3232
from nemoguardrails.types import ChatMessage, LLMModel, LLMResponse, LLMResponseChunk, UsageInfo
@@ -311,6 +311,34 @@ def _update_token_stats_from_chunk(chunk: LLMResponseChunk) -> None:
311311
llm_stats.inc("total_completion_tokens", chunk.usage.output_tokens)
312312

313313

314+
def _extract_http_status(exception: BaseException) -> Optional[int]:
315+
"""Extract an HTTP status code from a provider exception, if present.
316+
317+
Checks, in order:
318+
1. ``LLMClientError.status_code`` (NeMo Guardrails client layer).
319+
2. ``exception.status_code`` (OpenAI SDK, httpx).
320+
3. ``exception.response.status_code`` (requests-style wrappers).
321+
322+
Returns ``None`` when no status can be determined or when the status
323+
is ``0`` (used by ``LLMTimeoutError`` / ``LLMConnectionError`` for
324+
client-side failures where no HTTP response was received).
325+
"""
326+
if isinstance(exception, LLMClientError):
327+
return exception.status_code if exception.status_code > 0 else None
328+
329+
status = getattr(exception, "status_code", None)
330+
if isinstance(status, int) and status > 0:
331+
return status
332+
333+
response = getattr(exception, "response", None)
334+
if response is not None:
335+
status = getattr(response, "status_code", None)
336+
if isinstance(status, int) and status > 0:
337+
return status
338+
339+
return None
340+
341+
314342
def _raise_llm_call_exception(
315343
exception: Exception,
316344
model: LLMModel,
@@ -328,11 +356,13 @@ def _raise_llm_call_exception(
328356
if endpoint_url:
329357
context_parts.append(f"endpoint={endpoint_url}")
330358

359+
status = _extract_http_status(exception)
360+
331361
if context_parts:
332362
detail = f"Error invoking LLM ({', '.join(context_parts)})"
333-
raise LLMCallException(exception, detail=detail) from exception
363+
raise LLMCallException(exception, detail=detail, status=status) from exception
334364
else:
335-
raise LLMCallException(exception) from exception
365+
raise LLMCallException(exception, status=status) from exception
336366

337367

338368
def _store_reasoning_traces(response: LLMResponse) -> None:

nemoguardrails/exceptions.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -69,25 +69,28 @@ class StreamingNotSupportedError(InvalidRailsConfigurationError):
6969
class LLMCallException(Exception):
7070
"""A wrapper around the LLM call invocation exception.
7171
72-
This is used to propagate the exception out of the `generate_async` call. The default behavior is to
73-
catch it and return an "Internal server error." message.
72+
This is used to propagate the exception out of the ``generate_async`` call.
73+
When the inner exception carries an HTTP status code (e.g. a
74+
:class:`LLMClientError`), callers can inspect :attr:`status` to decide
75+
which HTTP response code to return to the upstream client.
7476
"""
7577

7678
inner_exception: Union[BaseException, str]
7779
detail: Optional[str]
80+
status: Optional[int]
7881

79-
def __init__(self, inner_exception: Union[BaseException, str], detail: Optional[str] = None):
80-
"""Initialize LLMCallException.
81-
82-
Args:
83-
inner_exception: The original exception that occurred
84-
detail: Optional context to prepend (for example, the model name or endpoint)
85-
"""
82+
def __init__(
83+
self,
84+
inner_exception: Union[BaseException, str],
85+
detail: Optional[str] = None,
86+
status: Optional[int] = None,
87+
):
8688
message = f"{detail or 'LLM Call Exception'}: {str(inner_exception)}"
8789
super().__init__(message)
8890

8991
self.inner_exception = inner_exception
9092
self.detail = detail
93+
self.status = status
9194

9295

9396
class LLMClientError(Exception):

nemoguardrails/guardrails/iorails.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
stream_active_metric,
5454
traced_request,
5555
)
56+
from nemoguardrails.llm.clients._errors import _redact_secrets
5657
from nemoguardrails.llm.taskmanager import LLMTaskManager
5758
from nemoguardrails.rails.llm.buffer import get_buffer_strategy
5859
from nemoguardrails.rails.llm.config import RailsConfig
@@ -413,8 +414,15 @@ async def _generation_task(request_span):
413414
# streaming path.
414415
if self._metrics_enabled:
415416
record_request_error(e)
417+
status = getattr(e, "status", None)
416418
error_payload = json.dumps(
417-
{"error": {"message": str(e), "type": _GENERATION_ERROR_TYPE, "code": "generation_failed"}}
419+
{
420+
"error": {
421+
"message": _redact_secrets(str(e)),
422+
"type": "downstream_error" if status is not None else _GENERATION_ERROR_TYPE,
423+
"code": status if status is not None else "generation_failed",
424+
}
425+
}
418426
)
419427
await streaming_handler.push_chunk(error_payload)
420428
await streaming_handler.push_chunk(END_OF_STREAM) # type: ignore[arg-type]
@@ -521,12 +529,14 @@ async def _run_output_rails_in_streaming(
521529
user_output_chunks = chunk_batch.user_output_chunks
522530
bot_response_chunk = buffer_strategy.format_chunks(chunk_batch.processing_context)
523531

524-
# If the batch contains a generation error from _generation_task,
532+
# If the batch contains an error chunk (generation or downstream HTTP),
525533
# yield it directly and stop — don't feed error JSON through output rails.
526534
for chunk in user_output_chunks:
527535
try:
528536
parsed = json.loads(chunk)
529-
if isinstance(parsed, dict) and parsed.get("error", {}).get("type") == _GENERATION_ERROR_TYPE:
537+
error_obj = parsed.get("error") if isinstance(parsed, dict) else None
538+
error_type = error_obj.get("type") if isinstance(error_obj, dict) else None
539+
if error_type in (_GENERATION_ERROR_TYPE, "downstream_error"):
530540
yield chunk
531541
return
532542
except (json.JSONDecodeError, TypeError):

nemoguardrails/guardrails/rail_action.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,17 @@
2626
from abc import ABC, abstractmethod
2727
from typing import TYPE_CHECKING, Any, Optional, Union
2828

29+
from nemoguardrails.guardrails.api_engine import APIEngineError
2930
from nemoguardrails.guardrails.engine_registry import EngineRegistry
3031
from nemoguardrails.guardrails.guardrails_types import (
3132
LLMMessages,
3233
RailResult,
3334
get_request_id,
3435
truncate,
3536
)
37+
from nemoguardrails.guardrails.model_engine import ModelEngineError
3638
from nemoguardrails.guardrails.telemetry import action_span, record_span_error
39+
from nemoguardrails.llm.clients._errors import _redact_secrets
3740
from nemoguardrails.llm.taskmanager import LLMTaskManager
3841
from nemoguardrails.rails.llm.config import _get_flow_model, _get_flow_name
3942
from nemoguardrails.types import LLMResponse
@@ -101,11 +104,17 @@ async def run(
101104
response = await self._get_response(model_type, prompt)
102105
log.debug("[%s] %s response: %s", req_id, base_flow, truncate(response))
103106
return self._parse_response(response)
107+
except (ModelEngineError, APIEngineError) as e:
108+
record_span_error(span, e)
109+
if e.status is not None:
110+
log.error("[%s] %s failed (HTTP %d): %s", req_id, base_flow, e.status, e)
111+
raise
112+
log.error("[%s] %s failed: %s", req_id, base_flow, e)
113+
return RailResult(is_safe=False, reason=_redact_secrets(f"{base_flow} error: {e}"))
104114
except Exception as e:
105-
# Record an error on the OTEL span
106115
record_span_error(span, e)
107116
log.error("[%s] %s failed: %s", req_id, base_flow, e)
108-
return RailResult(is_safe=False, reason=f"{base_flow} error: {e}")
117+
return RailResult(is_safe=False, reason=_redact_secrets(f"{base_flow} error: {e}"))
109118

110119
def _get_model_type(self, flow: str) -> Optional[str]:
111120
"""Extract model from the flow's ``$model=`` parameter, falling back to :attr:`fallback_model`."""

nemoguardrails/rails/llm/llmrails.py

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
)
7878
from nemoguardrails.kb.kb import KnowledgeBase
7979
from nemoguardrails.llm.cache import CacheInterface, LFUCache
80+
from nemoguardrails.llm.clients._errors import _redact_secrets
8081
from nemoguardrails.llm.models.initializer import (
8182
ModelInitializationError,
8283
init_llm_model,
@@ -899,14 +900,9 @@ async def generate_async(
899900
log.error("Error in generate_async: %s", e, exc_info=True)
900901
streaming_handler = streaming_handler_var.get()
901902
if streaming_handler:
902-
# Push an error chunk instead of None.
903-
error_message = str(e)
904-
error_dict = extract_error_json(error_message)
905-
error_payload: str = json.dumps(error_dict)
903+
error_payload: str = _build_streaming_error_payload(e)
906904
await streaming_handler.push_chunk(error_payload)
907-
# push a termination signal
908905
await streaming_handler.push_chunk(END_OF_STREAM) # type: ignore
909-
# Re-raise the exact exception
910906
raise
911907
else:
912908
# In generation mode, by default the bot response is an instant action.
@@ -1265,12 +1261,8 @@ async def _generation_task():
12651261
state=state,
12661262
)
12671263
except Exception as e:
1268-
# If an exception occurs during generation, push it to the streaming handler as a json string
1269-
# This ensures the streaming pipeline is properly terminated
12701264
log.error(f"Error in generation task: {e}", exc_info=True)
1271-
error_message = str(e)
1272-
error_dict = extract_error_json(error_message)
1273-
error_payload = json.dumps(error_dict)
1265+
error_payload = _build_streaming_error_payload(e)
12741266
await streaming_handler.push_chunk(error_payload)
12751267
await streaming_handler.push_chunk(END_OF_STREAM) # type: ignore
12761268

@@ -1931,3 +1923,42 @@ def _get_last_response_content(response: "GenerationResponse") -> str:
19311923
if isinstance(response.response, str):
19321924
return response.response
19331925
return ""
1926+
1927+
1928+
def _build_streaming_error_payload(e: Exception) -> str:
1929+
"""Build a JSON error payload for SSE streaming from an exception.
1930+
1931+
Normalizes all error shapes from extract_error_json into the
1932+
{"error": {"message", "type", "code"}} format that iorails.py
1933+
expects for error chunk detection.
1934+
"""
1935+
error_dict = extract_error_json(str(e))
1936+
if not isinstance(error_dict, dict):
1937+
error_dict = {}
1938+
error_val = error_dict.get("error")
1939+
status = getattr(e, "status", None)
1940+
error_type = "downstream_error" if status is not None else "generation_error"
1941+
error_code = status if status is not None else "generation_failed"
1942+
1943+
if isinstance(error_val, dict):
1944+
error_val["message"] = _redact_secrets(error_val.get("message", ""))
1945+
if status is not None:
1946+
error_val["code"] = status
1947+
error_val["type"] = "downstream_error"
1948+
else:
1949+
error_val.setdefault("type", error_type)
1950+
error_val.setdefault("code", error_code)
1951+
elif isinstance(error_val, str):
1952+
error_dict["error"] = {
1953+
"message": _redact_secrets(error_val),
1954+
"type": error_type,
1955+
"code": error_code,
1956+
}
1957+
else:
1958+
error_dict["error"] = {
1959+
"message": _redact_secrets(str(e)),
1960+
"type": error_type,
1961+
"code": error_code,
1962+
}
1963+
1964+
return json.dumps(error_dict)

nemoguardrails/server/api.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,13 @@
3131
from openai.types.chat.chat_completion import Choice
3232
from openai.types.chat.chat_completion_message import ChatCompletionMessage
3333
from pydantic import BaseModel, ValidationError
34-
from starlette.responses import RedirectResponse, StreamingResponse
34+
from starlette.responses import JSONResponse, RedirectResponse, StreamingResponse
3535

3636
from nemoguardrails import LLMRails, RailsConfig, utils
37+
from nemoguardrails.exceptions import LLMCallException
38+
from nemoguardrails.guardrails.api_engine import APIEngineError
39+
from nemoguardrails.guardrails.model_engine import ModelEngineError
40+
from nemoguardrails.llm.clients._errors import _redact_secrets
3741
from nemoguardrails.rails.llm.config import Model
3842
from nemoguardrails.rails.llm.options import GenerationResponse
3943
from nemoguardrails.server.datastore.datastore import DataStore
@@ -364,7 +368,7 @@ class ChunkErrorMetadata(BaseModel):
364368
message: str
365369
type: Optional[str] = None
366370
param: Optional[str] = None
367-
code: Optional[str] = None
371+
code: Union[str, int, None] = None
368372

369373

370374
class ChunkError(BaseModel):
@@ -589,12 +593,26 @@ async def chat_completion(body: GuardrailsChatCompletionRequest, request: Reques
589593

590594
except HTTPException:
591595
raise
596+
except (LLMCallException, ModelEngineError, APIEngineError) as ex:
597+
log.exception(ex)
598+
status = getattr(ex, "status", None) or 500
599+
return JSONResponse(
600+
status_code=status,
601+
content=create_error_chat_completion(
602+
model=body.model,
603+
error_message=_redact_secrets(str(ex)),
604+
config_id=config_ids[0] if config_ids else None,
605+
).model_dump(),
606+
)
592607
except Exception as ex:
593608
log.exception(ex)
594-
return create_error_chat_completion(
595-
model=body.model,
596-
error_message="Internal server error",
597-
config_id=config_ids[0] if config_ids else None,
609+
return JSONResponse(
610+
status_code=500,
611+
content=create_error_chat_completion(
612+
model=body.model,
613+
error_message="Internal server error",
614+
config_id=config_ids[0] if config_ids else None,
615+
).model_dump(),
598616
)
599617

600618

0 commit comments

Comments
 (0)