launchdarkly · andrewklatzke · May 18, 2026 · May 6, 2026 · May 6, 2026 · May 7, 2026
@@ -1,7 +1,7 @@
 [project]
-name = "ldai_optimizer"
+name = "launchdarkly-ai-optimizer"
 version = "0.1.0"  # x-release-please-version
-description = "LaunchDarkly AI tool — optimizer"
+description = "LaunchDarkly AI tool — Optimization"
 authors = [{name = "LaunchDarkly", email = "dev@launchdarkly.com"}]
 license = {text = "Apache-2.0"}
 readme = "README.md"

@@ -43,6 +43,7 @@ class JudgeResult:
     rationale: Optional[str] = None
     duration_ms: Optional[float] = None
     usage: Optional[TokenUsage] = None
+    estimated_cost_usd: Optional[float] = None
 
     def to_json(self) -> Dict[str, Any]:
         """
@@ -61,6 +62,8 @@ def to_json(self) -> Dict[str, Any]:
                 "input": self.usage.input,
                 "output": self.usage.output,
             }
+        if self.estimated_cost_usd is not None:
+            result["estimated_cost_usd"] = self.estimated_cost_usd
         return result
 
 
@@ -217,6 +220,8 @@ class OptimizationContext:
     iteration: int = 0  # current iteration number
     duration_ms: Optional[float] = None  # wall-clock time for the agent call in milliseconds
     usage: Optional[TokenUsage] = None  # token usage reported by the agent for this iteration
+    estimated_cost_usd: Optional[float] = None  # estimated cost; USD when pricing available, else total tokens
+    accumulated_token_usage: Optional[int] = None  # single running total across ALL calls in this run (generation + judges + variation)
 
     def copy_without_history(self) -> OptimizationContext:
         """
@@ -236,6 +241,8 @@ def copy_without_history(self) -> OptimizationContext:
             iteration=self.iteration,
             duration_ms=self.duration_ms,
             usage=self.usage,
+            estimated_cost_usd=self.estimated_cost_usd,
+            accumulated_token_usage=self.accumulated_token_usage,
         )
 
     def to_json(self) -> Dict[str, Any]:
@@ -261,6 +268,8 @@ def to_json(self) -> Dict[str, Any]:
             "history": history_list,
             "iteration": self.iteration,
             "duration_ms": self.duration_ms,
+            "estimated_cost_usd": self.estimated_cost_usd,
+            "accumulated_token_usage": self.accumulated_token_usage,
         }
         if self.usage is not None:
             result["usage"] = {

@@ -118,7 +118,7 @@ class AgentOptimizationResultPatch(TypedDict, total=False):
     completionResponse: str
     scores: Dict[str, Any]
     generationLatency: int
-    generationTokens: Dict[str, int]
+    generationTokens: Dict[str, Any]
     evaluationLatencies: Dict[str, float]
     evaluationTokens: Dict[str, Dict[str, int]]
     variation: Dict[str, Any]

@@ -16,6 +16,13 @@
     re.IGNORECASE,
 )
 
+_COST_KEYWORDS = re.compile(
+    r"\b(cheap|cheaper|cheapest|costs?|costly|expensive|budget|affordable|"
+    r"spend|spending|economical|cost-effective|frugal|"
+    r"price|pricing|bill|billing)\b",
+    re.IGNORECASE,
+)
+
 
 def _acceptance_criteria_implies_duration_optimization(
     judges: Optional[Dict[str, OptimizationJudge]],
@@ -39,6 +46,28 @@ def _acceptance_criteria_implies_duration_optimization(
     return False
 
 
+def _acceptance_criteria_implies_cost_optimization(
+    judges: Optional[Dict[str, OptimizationJudge]],
+) -> bool:
+    """Return True if any judge acceptance statement implies a cost reduction goal.
+
+    Scans each judge's acceptance_statement for cost-related keywords. The
+    check is case-insensitive. Returns False when judges is None or no judge
+    carries an acceptance statement.
+
+    :param judges: Judge configuration dict from OptimizationOptions, or None.
+    :return: True if cost optimization should be applied.
+    """
+    if not judges:
+        return False
+    for judge in judges.values():
+        if judge.acceptance_statement and _COST_KEYWORDS.search(
+            judge.acceptance_statement
+        ):
+            return True
+    return False
+
+
 def build_message_history_text(
     history: List[OptimizationContext],
     input_text: str,
@@ -114,6 +143,8 @@ def build_new_variation_prompt(
     variable_choices: List[Dict[str, Any]],
     initial_instructions: str,
     optimize_for_duration: bool = False,
+    optimize_for_cost: bool = False,
+    quality_already_passing: bool = False,
 ) -> str:
     """
     Build the LLM prompt for generating an improved agent configuration.
@@ -133,6 +164,11 @@ def build_new_variation_prompt(
     :param initial_instructions: The original unmodified instructions template
     :param optimize_for_duration: When True, appends a duration optimization section
         instructing the LLM to prefer faster models and simpler instructions.
+    :param optimize_for_cost: When True, appends a cost optimization section
+        instructing the LLM to prefer cheaper models and reduce token usage.
+    :param quality_already_passing: When True, signals that all judge criteria are
+        currently passing and the cost optimization section should instruct the LLM
+        to preserve existing behavior while only reducing cost.
     :return: The assembled prompt string
     """
     sections = [
@@ -147,6 +183,7 @@ def build_new_variation_prompt(
             history, model_choices, variable_choices, initial_instructions
         ),
         variation_prompt_duration_optimization(model_choices) if optimize_for_duration else "",
+        variation_prompt_cost_optimization(model_choices, quality_already_passing=quality_already_passing) if optimize_for_cost else "",
     ]
 
     return "\n\n".join(s for s in sections if s)
@@ -248,6 +285,8 @@ def variation_prompt_configuration(
         lines.append(f"Agent response: <untrusted>{previous_ctx.completion_response}</untrusted>")
         if previous_ctx.duration_ms is not None:
             lines.append(f"Agent duration: {previous_ctx.duration_ms:.0f}ms")
+        if previous_ctx.estimated_cost_usd is not None:
+            lines.append(f"Estimated agent cost: ${previous_ctx.estimated_cost_usd:.6f}")
         return "\n".join(lines)
     else:
         return "\n".join(
@@ -301,6 +340,8 @@ def variation_prompt_feedback(
                 lines.append(feedback_line)
         if ctx.duration_ms is not None:
             lines.append(f"Agent duration: {ctx.duration_ms:.0f}ms")
+        if ctx.estimated_cost_usd is not None:
+            lines.append(f"Estimated agent cost: ${ctx.estimated_cost_usd:.6f}")
     return "\n".join(lines)
 
 
@@ -556,3 +597,76 @@ def variation_prompt_duration_optimization(model_choices: List[str]) -> str:
             "Quality criteria remain the primary objective — do not sacrifice passing scores to achieve lower latency.",
         ]
     )
+
+
+def variation_prompt_cost_optimization(
+    model_choices: List[str],
+    quality_already_passing: bool = False,
+) -> str:
+    """
+    Cost optimization section of the variation prompt.
+
+    Included when acceptance criteria imply a cost reduction goal. Instructs
+    the LLM to treat token usage as a secondary objective — quality criteria
+    must still be met first — and provides concrete guidance on how to reduce
+    cost through model selection and instruction simplification.
+
+    When ``quality_already_passing`` is True, the framing shifts: since all
+    judge criteria are already satisfied, the LLM is instructed to preserve
+    the existing behavior exactly and only apply changes that reduce cost
+    without affecting output quality.
+
+    :param model_choices: List of model IDs the LLM may select from, so it can
+        apply its own knowledge of which models tend to be cheaper.
+    :param quality_already_passing: When True, signals that all judge criteria
+        are currently passing. The section will direct the LLM to preserve
+        output quality and focus exclusively on cost reduction strategies.
+    :return: The cost optimization prompt block.
+    """
+    if quality_already_passing:
+        intent_lines = [
+            "## Cost Optimization:",
+            "The acceptance criteria for this optimization implies that token usage / cost should be reduced.",
+            "*** IMPORTANT: All quality acceptance criteria are currently passing. ***",
+            "The goal of this variation is to reduce cost WITHOUT changing the behavior or quality of the agent's responses.",
+            "Do NOT alter the instructions in ways that would change what the agent says or how it reasons.",
+            "Only apply changes that reduce token usage or switch to a cheaper model while preserving the same output quality.",
+            "If you cannot reduce cost without risking quality, keep the instructions unchanged and only consider a cheaper model.",
+            "",
+        ]
+    else:
+        intent_lines = [
+            "## Cost Optimization:",
+            "The acceptance criteria for this optimization implies that token usage / cost should be reduced.",
+            "In addition to improving quality, generate a variation that aims to reduce the agent's cost.",
+            "",
+        ]
+
+    shared_lines = [
+        "Cost is driven by two factors: (1) the number of tokens processed, and (2) the per-token price of the model.",
+        "Target both factors with the strategies below.",
+        "",
+        "### Reducing token usage (input tokens):",
+        "- Remove redundant, verbose, or repeated phrasing from the instructions.",
+        "- Collapse multi-sentence explanations into a single concise directive.",
+        "- Remove examples or few-shot demonstrations unless they are essential for accuracy.",
+        "- Eliminate instructional scaffolding that the model does not need (e.g. 'You are a helpful assistant that...').",
+        "- Use bullet points instead of prose where possible — they are more token-efficient.",
+        "",
+        "### Reducing token usage (output tokens):",
+        "- Instruct the agent to be concise and avoid unnecessary elaboration.",
+        "- Specify the exact format and length of the expected response (e.g. 'Respond in one sentence.').",
+        "- Set or reduce max_tokens if the current value allows longer responses than needed.",
+        "- Avoid instructions that encourage the agent to 'explain its reasoning' unless required by the acceptance criteria.",
+        "",
+        "### Reducing per-token cost via model selection:",
+        "- Consider switching to a cheaper model from the available choices if quality requirements can still be met.",
+        f"  Available models: {model_choices}",
+        "  Use your knowledge of relative model pricing to prefer lower-cost options.",
+        "  Only switch models if the cheaper model is capable of satisfying the acceptance criteria.",
+        "",
+        "Quality criteria remain the primary objective — do not sacrifice passing scores to achieve lower cost.",
+        "Apply cost-reduction changes incrementally: prefer the smallest change that measurably reduces cost.",
+    ]
+
+    return "\n".join(intent_lines + shared_lines)
@@ -5,7 +5,10 @@
 import logging
 import random
 import re
-from typing import Any, Awaitable, Dict, List, Optional, Tuple, TypeVar, Union
+from typing import TYPE_CHECKING, Any, Awaitable, Dict, List, Optional, Tuple, TypeVar, Union
+
+if TYPE_CHECKING:
+    from ldai.tracker import TokenUsage
 
 from ldai_optimizer._slug_words import _ADJECTIVES, _NOUNS
 
@@ -313,3 +316,44 @@ def judge_passed(score: float, threshold: float, is_inverted: bool) -> bool:
     the score must stay at or below the threshold: ``score <= threshold``.
     """
     return score <= threshold if is_inverted else score >= threshold
+
+
+def estimate_cost(
+    usage: Optional["TokenUsage"],
+    model_config: Optional[Dict[str, Any]],
+) -> Optional[float]:
+    """Estimate the monetary cost of a single agent call in USD.
+
+    Uses ``costPerInputToken`` and ``costPerOutputToken`` from the model config.
+    Returns ``None`` when either ``usage`` is ``None`` or no pricing fields are
+    present on the model config — ensuring the return value is always in USD or
+    absent, never a raw token count. This prevents unit-mismatch bugs when
+    comparing costs across iterations where the model (and its pricing
+    availability) may differ.
+
+    ``costPerCachedInputToken`` is intentionally ignored — the estimate uses
+    input/output tokens only.
+
+    :param usage: Token usage from the agent call. When ``None``, returns ``None``.
+    :param model_config: Model config dict from ``get_model_configs()``, or ``None``.
+    :return: Estimated cost in USD, or ``None`` if usage or pricing data is absent, or if
+        both ``usage.input`` and ``usage.output`` are ``None`` (no token counts available).
+    """
+    if usage is None:
+        return None
+
+    input_price = model_config.get("costPerInputToken") if model_config else None
+    output_price = model_config.get("costPerOutputToken") if model_config else None
+
+    if input_price is None and output_price is None:
+        return None
+
+    cost = 0.0
+    computed = False
+    if input_price is not None and usage.input is not None:
+        cost += usage.input * input_price
+        computed = True
+    if output_price is not None and usage.output is not None:
+        cost += usage.output * output_price
+        computed = True
+    return cost if computed else None