Skip to content
Merged
4 changes: 2 additions & 2 deletions packages/optimization/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
name = "ldai_optimizer"
name = "launchdarkly-ai-optimizer"
version = "0.1.0" # x-release-please-version
description = "LaunchDarkly AI tool — optimizer"
description = "LaunchDarkly AI tool — Optimization"
authors = [{name = "LaunchDarkly", email = "dev@launchdarkly.com"}]
license = {text = "Apache-2.0"}
readme = "README.md"
Expand Down
589 changes: 514 additions & 75 deletions packages/optimization/src/ldai_optimizer/client.py

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions packages/optimization/src/ldai_optimizer/dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class JudgeResult:
rationale: Optional[str] = None
duration_ms: Optional[float] = None
usage: Optional[TokenUsage] = None
estimated_cost_usd: Optional[float] = None

def to_json(self) -> Dict[str, Any]:
"""
Expand All @@ -61,6 +62,8 @@ def to_json(self) -> Dict[str, Any]:
"input": self.usage.input,
"output": self.usage.output,
}
if self.estimated_cost_usd is not None:
result["estimated_cost_usd"] = self.estimated_cost_usd
return result


Expand Down Expand Up @@ -217,6 +220,8 @@ class OptimizationContext:
iteration: int = 0 # current iteration number
duration_ms: Optional[float] = None # wall-clock time for the agent call in milliseconds
usage: Optional[TokenUsage] = None # token usage reported by the agent for this iteration
estimated_cost_usd: Optional[float] = None # estimated cost; USD when pricing available, else total tokens
accumulated_token_usage: Optional[int] = None # single running total across ALL calls in this run (generation + judges + variation)

def copy_without_history(self) -> OptimizationContext:
"""
Expand All @@ -236,6 +241,8 @@ def copy_without_history(self) -> OptimizationContext:
iteration=self.iteration,
duration_ms=self.duration_ms,
usage=self.usage,
estimated_cost_usd=self.estimated_cost_usd,
accumulated_token_usage=self.accumulated_token_usage,
)

def to_json(self) -> Dict[str, Any]:
Expand All @@ -261,6 +268,8 @@ def to_json(self) -> Dict[str, Any]:
"history": history_list,
"iteration": self.iteration,
"duration_ms": self.duration_ms,
"estimated_cost_usd": self.estimated_cost_usd,
"accumulated_token_usage": self.accumulated_token_usage,
}
if self.usage is not None:
result["usage"] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ class AgentOptimizationResultPatch(TypedDict, total=False):
completionResponse: str
scores: Dict[str, Any]
generationLatency: int
generationTokens: Dict[str, int]
generationTokens: Dict[str, Any]
evaluationLatencies: Dict[str, float]
evaluationTokens: Dict[str, Dict[str, int]]
variation: Dict[str, Any]
Expand Down
114 changes: 114 additions & 0 deletions packages/optimization/src/ldai_optimizer/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@
re.IGNORECASE,
)

_COST_KEYWORDS = re.compile(
r"\b(cheap|cheaper|cheapest|costs?|costly|expensive|budget|affordable|"
r"spend|spending|economical|cost-effective|frugal|"
r"price|pricing|bill|billing)\b",
re.IGNORECASE,
)
Comment thread
cursor[bot] marked this conversation as resolved.


def _acceptance_criteria_implies_duration_optimization(
judges: Optional[Dict[str, OptimizationJudge]],
Expand All @@ -39,6 +46,28 @@ def _acceptance_criteria_implies_duration_optimization(
return False


def _acceptance_criteria_implies_cost_optimization(
judges: Optional[Dict[str, OptimizationJudge]],
) -> bool:
"""Return True if any judge acceptance statement implies a cost reduction goal.

Scans each judge's acceptance_statement for cost-related keywords. The
check is case-insensitive. Returns False when judges is None or no judge
carries an acceptance statement.

:param judges: Judge configuration dict from OptimizationOptions, or None.
:return: True if cost optimization should be applied.
"""
if not judges:
return False
for judge in judges.values():
if judge.acceptance_statement and _COST_KEYWORDS.search(
judge.acceptance_statement
):
return True
return False


def build_message_history_text(
history: List[OptimizationContext],
input_text: str,
Expand Down Expand Up @@ -114,6 +143,8 @@ def build_new_variation_prompt(
variable_choices: List[Dict[str, Any]],
initial_instructions: str,
optimize_for_duration: bool = False,
optimize_for_cost: bool = False,
quality_already_passing: bool = False,
) -> str:
"""
Build the LLM prompt for generating an improved agent configuration.
Expand All @@ -133,6 +164,11 @@ def build_new_variation_prompt(
:param initial_instructions: The original unmodified instructions template
:param optimize_for_duration: When True, appends a duration optimization section
instructing the LLM to prefer faster models and simpler instructions.
:param optimize_for_cost: When True, appends a cost optimization section
instructing the LLM to prefer cheaper models and reduce token usage.
:param quality_already_passing: When True, signals that all judge criteria are
currently passing and the cost optimization section should instruct the LLM
to preserve existing behavior while only reducing cost.
:return: The assembled prompt string
"""
sections = [
Expand All @@ -147,6 +183,7 @@ def build_new_variation_prompt(
history, model_choices, variable_choices, initial_instructions
),
variation_prompt_duration_optimization(model_choices) if optimize_for_duration else "",
variation_prompt_cost_optimization(model_choices, quality_already_passing=quality_already_passing) if optimize_for_cost else "",
]

return "\n\n".join(s for s in sections if s)
Expand Down Expand Up @@ -248,6 +285,8 @@ def variation_prompt_configuration(
lines.append(f"Agent response: <untrusted>{previous_ctx.completion_response}</untrusted>")
if previous_ctx.duration_ms is not None:
lines.append(f"Agent duration: {previous_ctx.duration_ms:.0f}ms")
if previous_ctx.estimated_cost_usd is not None:
lines.append(f"Estimated agent cost: ${previous_ctx.estimated_cost_usd:.6f}")
Comment thread
cursor[bot] marked this conversation as resolved.
return "\n".join(lines)
else:
return "\n".join(
Expand Down Expand Up @@ -301,6 +340,8 @@ def variation_prompt_feedback(
lines.append(feedback_line)
if ctx.duration_ms is not None:
lines.append(f"Agent duration: {ctx.duration_ms:.0f}ms")
if ctx.estimated_cost_usd is not None:
lines.append(f"Estimated agent cost: ${ctx.estimated_cost_usd:.6f}")
return "\n".join(lines)


Expand Down Expand Up @@ -556,3 +597,76 @@ def variation_prompt_duration_optimization(model_choices: List[str]) -> str:
"Quality criteria remain the primary objective — do not sacrifice passing scores to achieve lower latency.",
]
)


def variation_prompt_cost_optimization(
model_choices: List[str],
quality_already_passing: bool = False,
) -> str:
"""
Cost optimization section of the variation prompt.

Included when acceptance criteria imply a cost reduction goal. Instructs
the LLM to treat token usage as a secondary objective — quality criteria
must still be met first — and provides concrete guidance on how to reduce
cost through model selection and instruction simplification.

When ``quality_already_passing`` is True, the framing shifts: since all
judge criteria are already satisfied, the LLM is instructed to preserve
the existing behavior exactly and only apply changes that reduce cost
without affecting output quality.

:param model_choices: List of model IDs the LLM may select from, so it can
apply its own knowledge of which models tend to be cheaper.
:param quality_already_passing: When True, signals that all judge criteria
are currently passing. The section will direct the LLM to preserve
output quality and focus exclusively on cost reduction strategies.
:return: The cost optimization prompt block.
"""
if quality_already_passing:
intent_lines = [
"## Cost Optimization:",
"The acceptance criteria for this optimization implies that token usage / cost should be reduced.",
"*** IMPORTANT: All quality acceptance criteria are currently passing. ***",
"The goal of this variation is to reduce cost WITHOUT changing the behavior or quality of the agent's responses.",
"Do NOT alter the instructions in ways that would change what the agent says or how it reasons.",
"Only apply changes that reduce token usage or switch to a cheaper model while preserving the same output quality.",
"If you cannot reduce cost without risking quality, keep the instructions unchanged and only consider a cheaper model.",
"",
]
else:
intent_lines = [
"## Cost Optimization:",
"The acceptance criteria for this optimization implies that token usage / cost should be reduced.",
"In addition to improving quality, generate a variation that aims to reduce the agent's cost.",
"",
]

shared_lines = [
"Cost is driven by two factors: (1) the number of tokens processed, and (2) the per-token price of the model.",
"Target both factors with the strategies below.",
"",
"### Reducing token usage (input tokens):",
"- Remove redundant, verbose, or repeated phrasing from the instructions.",
"- Collapse multi-sentence explanations into a single concise directive.",
"- Remove examples or few-shot demonstrations unless they are essential for accuracy.",
"- Eliminate instructional scaffolding that the model does not need (e.g. 'You are a helpful assistant that...').",
"- Use bullet points instead of prose where possible — they are more token-efficient.",
"",
"### Reducing token usage (output tokens):",
"- Instruct the agent to be concise and avoid unnecessary elaboration.",
"- Specify the exact format and length of the expected response (e.g. 'Respond in one sentence.').",
"- Set or reduce max_tokens if the current value allows longer responses than needed.",
"- Avoid instructions that encourage the agent to 'explain its reasoning' unless required by the acceptance criteria.",
"",
"### Reducing per-token cost via model selection:",
"- Consider switching to a cheaper model from the available choices if quality requirements can still be met.",
f" Available models: {model_choices}",
" Use your knowledge of relative model pricing to prefer lower-cost options.",
" Only switch models if the cheaper model is capable of satisfying the acceptance criteria.",
"",
"Quality criteria remain the primary objective — do not sacrifice passing scores to achieve lower cost.",
"Apply cost-reduction changes incrementally: prefer the smallest change that measurably reduces cost.",
]

return "\n".join(intent_lines + shared_lines)
46 changes: 45 additions & 1 deletion packages/optimization/src/ldai_optimizer/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
import logging
import random
import re
from typing import Any, Awaitable, Dict, List, Optional, Tuple, TypeVar, Union
from typing import TYPE_CHECKING, Any, Awaitable, Dict, List, Optional, Tuple, TypeVar, Union

if TYPE_CHECKING:
from ldai.tracker import TokenUsage

from ldai_optimizer._slug_words import _ADJECTIVES, _NOUNS

Expand Down Expand Up @@ -313,3 +316,44 @@ def judge_passed(score: float, threshold: float, is_inverted: bool) -> bool:
the score must stay at or below the threshold: ``score <= threshold``.
"""
return score <= threshold if is_inverted else score >= threshold


def estimate_cost(
usage: Optional["TokenUsage"],
model_config: Optional[Dict[str, Any]],
) -> Optional[float]:
"""Estimate the monetary cost of a single agent call in USD.

Uses ``costPerInputToken`` and ``costPerOutputToken`` from the model config.
Returns ``None`` when either ``usage`` is ``None`` or no pricing fields are
present on the model config — ensuring the return value is always in USD or
absent, never a raw token count. This prevents unit-mismatch bugs when
comparing costs across iterations where the model (and its pricing
availability) may differ.

``costPerCachedInputToken`` is intentionally ignored — the estimate uses
input/output tokens only.

:param usage: Token usage from the agent call. When ``None``, returns ``None``.
:param model_config: Model config dict from ``get_model_configs()``, or ``None``.
:return: Estimated cost in USD, or ``None`` if usage or pricing data is absent, or if
both ``usage.input`` and ``usage.output`` are ``None`` (no token counts available).
"""
if usage is None:
return None

input_price = model_config.get("costPerInputToken") if model_config else None
output_price = model_config.get("costPerOutputToken") if model_config else None

if input_price is None and output_price is None:
return None

cost = 0.0
computed = False
if input_price is not None and usage.input is not None:
cost += usage.input * input_price
computed = True
if output_price is not None and usage.output is not None:
cost += usage.output * output_price
computed = True
return cost if computed else None
Loading
Loading