From 1fde6cf69e003d9def885d6217767600f2b28366 Mon Sep 17 00:00:00 2001
From: streamkit-devin <devin@streamkit.dev>
Date: Sat, 30 May 2026 16:19:58 +0000
Subject: [PATCH 1/6] feat(speech-gateway): expose Prometheus metrics

Instrument the /tts and /stt handlers with request count, latency, in-flight, upstream-call latency, and rejection-reason metrics, and serve them at GET /metrics (ungated by the concurrency semaphore).

Signed-off-by: streamkit-devin <devin@streamkit.dev>
---
 examples/speech-gateway/.gitignore            |   2 +-
 examples/speech-gateway/README.md             |  25 ++++
 examples/speech-gateway/cmd/gateway/main.go   |  19 ++-
 .../speech-gateway/cmd/gateway/metrics.go     | 108 ++++++++++++++++++
 .../cmd/gateway/metrics_test.go               |  62 ++++++++++
 examples/speech-gateway/go.mod                |  15 +++
 examples/speech-gateway/go.sum                |  46 ++++++++
 7 files changed, 271 insertions(+), 6 deletions(-)
 create mode 100644 examples/speech-gateway/cmd/gateway/metrics.go
 create mode 100644 examples/speech-gateway/cmd/gateway/metrics_test.go
 create mode 100644 examples/speech-gateway/go.sum

diff --git a/examples/speech-gateway/.gitignore b/examples/speech-gateway/.gitignore
index ad230ccfe..9064eb2e2 100644
--- a/examples/speech-gateway/.gitignore
+++ b/examples/speech-gateway/.gitignore
@@ -1 +1 @@
-gateway
+/gateway
diff --git a/examples/speech-gateway/README.md b/examples/speech-gateway/README.md
index 7589bf712..2c8c73e87 100644
--- a/examples/speech-gateway/README.md
+++ b/examples/speech-gateway/README.md
@@ -54,3 +54,28 @@ curl -H "Content-Type: text/plain" --data 'Hello from StreamKit' http://127.0.0.
 ```
 
 Response is `audio/ogg` (Opus mono).
+
+## Metrics
+
+The gateway exposes Prometheus metrics at `GET /metrics` (via `promhttp`). This
+route is **not** gated by `GATEWAY_MAX_CONCURRENCY`, so it stays scrapable even
+when all request slots are in use. A public/hosted instance (e.g. behind
+`tts.streamkit.dev` / `stt.streamkit.dev`) may choose not to expose `/metrics`
+externally — scrape it from inside the trust boundary instead.
+
+Every metric carries an `endpoint` label whose value is exactly `tts` or `stt`.
+
+| Metric | Type | Labels | Description |
+|--------|------|--------|-------------|
+| `gateway_requests_total` | counter | `endpoint`, `method`, `code` | Requests served, by HTTP method and status code. |
+| `gateway_request_duration_seconds` | histogram | `endpoint` | Total handler latency. |
+| `gateway_inflight_requests` | gauge | `endpoint` | Requests currently being handled. |
+| `gateway_upstream_duration_seconds` | histogram | `endpoint` | Time spent calling the skit backend `/api/v1/process`. |
+| `gateway_rejected_total` | counter | `endpoint`, `reason` | Rejected requests; `reason` ∈ `bad_content_type` (415), `too_large` (413), `upstream_error` (502). |
+
+Histogram buckets are tuned for multi-second STT/TTS workloads:
+`0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30` seconds.
+
+```sh
+curl http://127.0.0.1:8080/metrics
+```
diff --git a/examples/speech-gateway/cmd/gateway/main.go b/examples/speech-gateway/cmd/gateway/main.go
index 1e6e79432..557e1954e 100644
--- a/examples/speech-gateway/cmd/gateway/main.go
+++ b/examples/speech-gateway/cmd/gateway/main.go
@@ -22,6 +22,8 @@ import (
 	"strings"
 	"time"
 	"unicode/utf8"
+
+	"github.com/prometheus/client_golang/prometheus/promhttp"
 )
 
 const (
@@ -126,8 +128,11 @@ func main() {
 	}
 
 	mux := http.NewServeMux()
-	mux.HandleFunc("/stt", gw.handleSTT)
-	mux.HandleFunc("/tts", gw.handleTTS)
+	mux.HandleFunc("/stt", instrument("stt", gw.handleSTT))
+	mux.HandleFunc("/tts", instrument("tts", gw.handleTTS))
+	// /metrics is intentionally not gated by the concurrency semaphore so it
+	// stays scrapable while request slots are saturated.
+	mux.Handle("/metrics", promhttp.Handler())
 
 	server := &http.Server{
 		Addr:              cfg.listenAddr,
@@ -244,7 +249,7 @@ func (gw *gateway) handleSTT(w http.ResponseWriter, r *http.Request) {
 	defer release()
 	r.Body = http.MaxBytesReader(w, r.Body, gw.maxBodySize)
 	useBuffer := r.ContentLength > 0 && r.ContentLength <= gw.maxBodySize
-	if err := gw.proxyMultipart(w, r, sttPipelineYAML, "media", "audio/ogg", useBuffer); err != nil {
+	if err := gw.proxyMultipart(w, r, "stt", sttPipelineYAML, "media", "audio/ogg", useBuffer); err != nil {
 		log.Printf("stt error: %v", err)
 		if !errors.Is(err, context.Canceled) {
 			http.Error(w, "upstream error", http.StatusBadGateway)
@@ -315,7 +320,7 @@ func (gw *gateway) handleTTS(w http.ResponseWriter, r *http.Request) {
 	r.Body = io.NopCloser(bytes.NewReader(textBytes))
 
 	useBuffer := true // We've already buffered it
-	if err := gw.proxyMultipart(w, r, ttsPipelineYAML, "media", "text/plain", useBuffer); err != nil {
+	if err := gw.proxyMultipart(w, r, "tts", ttsPipelineYAML, "media", "text/plain", useBuffer); err != nil {
 		log.Printf("tts error: %v", err)
 		if !errors.Is(err, context.Canceled) {
 			http.Error(w, "upstream error", http.StatusBadGateway)
@@ -323,7 +328,7 @@ func (gw *gateway) handleTTS(w http.ResponseWriter, r *http.Request) {
 	}
 }
 
-func (gw *gateway) proxyMultipart(w http.ResponseWriter, r *http.Request, pipelineYAML, mediaField, mediaContentType string, bufferBody bool) error {
+func (gw *gateway) proxyMultipart(w http.ResponseWriter, r *http.Request, endpoint, pipelineYAML, mediaField, mediaContentType string, bufferBody bool) error {
 	ctx := r.Context()
 
 	// Optionally buffer the request body for finite uploads (helps curl -T file).
@@ -376,6 +381,10 @@ func (gw *gateway) proxyMultipart(w http.ResponseWriter, r *http.Request, pipeli
 		req.Header.Set("Authorization", "Bearer "+gw.authToken)
 	}
 
+	upstreamStart := time.Now()
+	defer func() {
+		upstreamDuration.WithLabelValues(endpoint).Observe(time.Since(upstreamStart).Seconds())
+	}()
 	resp, err := gw.client.Do(req)
 	if err != nil {
 		log.Printf("call skit failed: %v", err)
diff --git a/examples/speech-gateway/cmd/gateway/metrics.go b/examples/speech-gateway/cmd/gateway/metrics.go
new file mode 100644
index 000000000..c149c8132
--- /dev/null
+++ b/examples/speech-gateway/cmd/gateway/metrics.go
@@ -0,0 +1,108 @@
+// SPDX-FileCopyrightText: © 2025 StreamKit Contributors
+//
+// SPDX-License-Identifier: MPL-2.0
+
+package main
+
+import (
+	"net/http"
+	"strconv"
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
+)
+
+// Buckets span sub-100ms rejections up to multi-second STT/TTS synthesis.
+var latencyBuckets = []float64{0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30}
+
+var (
+	requestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
+		Name: "gateway_requests_total",
+		Help: "Total speech-gateway requests by endpoint, method, and HTTP status code.",
+	}, []string{"endpoint", "method", "code"})
+
+	requestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
+		Name:    "gateway_request_duration_seconds",
+		Help:    "Total handler latency in seconds by endpoint.",
+		Buckets: latencyBuckets,
+	}, []string{"endpoint"})
+
+	inflightRequests = promauto.NewGaugeVec(prometheus.GaugeOpts{
+		Name: "gateway_inflight_requests",
+		Help: "In-flight speech-gateway requests by endpoint.",
+	}, []string{"endpoint"})
+
+	upstreamDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
+		Name:    "gateway_upstream_duration_seconds",
+		Help:    "Time spent calling the skit backend /api/v1/process by endpoint.",
+		Buckets: latencyBuckets,
+	}, []string{"endpoint"})
+
+	rejectedTotal = promauto.NewCounterVec(prometheus.CounterOpts{
+		Name: "gateway_rejected_total",
+		Help: "Rejected requests by endpoint and reason.",
+	}, []string{"endpoint", "reason"})
+)
+
+// statusRecorder captures the response status code while preserving
+// http.Flusher so proxied responses keep streaming incrementally.
+type statusRecorder struct {
+	http.ResponseWriter
+	code        int
+	wroteHeader bool
+}
+
+func (s *statusRecorder) WriteHeader(code int) {
+	if !s.wroteHeader {
+		s.code = code
+		s.wroteHeader = true
+	}
+	s.ResponseWriter.WriteHeader(code)
+}
+
+func (s *statusRecorder) Write(b []byte) (int, error) {
+	if !s.wroteHeader {
+		s.code = http.StatusOK
+		s.wroteHeader = true
+	}
+	return s.ResponseWriter.Write(b)
+}
+
+func (s *statusRecorder) Flush() {
+	if f, ok := s.ResponseWriter.(http.Flusher); ok {
+		f.Flush()
+	}
+}
+
+// instrument records request count, latency, and in-flight gauge per endpoint,
+// and maps rejection status codes to gateway_rejected_total reasons.
+func instrument(endpoint string, next http.HandlerFunc) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		inflightRequests.WithLabelValues(endpoint).Inc()
+		defer inflightRequests.WithLabelValues(endpoint).Dec()
+
+		rec := &statusRecorder{ResponseWriter: w, code: http.StatusOK}
+		start := time.Now()
+		next(rec, r)
+
+		requestDuration.WithLabelValues(endpoint).Observe(time.Since(start).Seconds())
+		requestsTotal.WithLabelValues(endpoint, r.Method, strconv.Itoa(rec.code)).Inc()
+		if reason := rejectionReason(rec.code); reason != "" {
+			rejectedTotal.WithLabelValues(endpoint, reason).Inc()
+		}
+	}
+}
+
+func rejectionReason(code int) string {
+	switch code {
+	case http.StatusUnsupportedMediaType:
+		return "bad_content_type"
+	case http.StatusRequestEntityTooLarge:
+		return "too_large"
+	case http.StatusBadGateway:
+		return "upstream_error"
+	default:
+		return ""
+	}
+}
diff --git a/examples/speech-gateway/cmd/gateway/metrics_test.go b/examples/speech-gateway/cmd/gateway/metrics_test.go
new file mode 100644
index 000000000..290aa5069
--- /dev/null
+++ b/examples/speech-gateway/cmd/gateway/metrics_test.go
@@ -0,0 +1,62 @@
+// SPDX-FileCopyrightText: © 2025 StreamKit Contributors
+//
+// SPDX-License-Identifier: MPL-2.0
+
+package main
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"github.com/prometheus/client_golang/prometheus/promhttp"
+)
+
+func TestRejectionReason(t *testing.T) {
+	cases := map[int]string{
+		http.StatusUnsupportedMediaType:  "bad_content_type",
+		http.StatusRequestEntityTooLarge: "too_large",
+		http.StatusBadGateway:            "upstream_error",
+		http.StatusOK:                    "",
+		http.StatusBadRequest:            "",
+	}
+	for code, want := range cases {
+		if got := rejectionReason(code); got != want {
+			t.Errorf("rejectionReason(%d) = %q, want %q", code, got, want)
+		}
+	}
+}
+
+func TestInstrumentRecordsRejection(t *testing.T) {
+	h := instrument("tts", func(w http.ResponseWriter, _ *http.Request) {
+		http.Error(w, "nope", http.StatusUnsupportedMediaType)
+	})
+
+	rec := httptest.NewRecorder()
+	h(rec, httptest.NewRequest(http.MethodPost, "/tts", nil))
+	if rec.Code != http.StatusUnsupportedMediaType {
+		t.Fatalf("status = %d, want 415", rec.Code)
+	}
+
+	body := scrapeMetrics(t)
+	for _, want := range []string{
+		`gateway_requests_total{code="415",endpoint="tts",method="POST"}`,
+		`gateway_rejected_total{endpoint="tts",reason="bad_content_type"}`,
+		`gateway_request_duration_seconds_bucket{endpoint="tts"`,
+	} {
+		if !strings.Contains(body, want) {
+			t.Errorf("metrics output missing %q", want)
+		}
+	}
+}
+
+func scrapeMetrics(t *testing.T) string {
+	t.Helper()
+	rec := httptest.NewRecorder()
+	promhttp.Handler().ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/metrics", nil))
+	if rec.Code != http.StatusOK {
+		t.Fatalf("/metrics status = %d, want 200", rec.Code)
+	}
+	return rec.Body.String()
+}
diff --git a/examples/speech-gateway/go.mod b/examples/speech-gateway/go.mod
index a9805a17a..861841153 100644
--- a/examples/speech-gateway/go.mod
+++ b/examples/speech-gateway/go.mod
@@ -1,3 +1,18 @@
 module github.com/streamer45/streamkit/examples/speech-gateway
 
 go 1.24
+
+require github.com/prometheus/client_golang v1.23.2
+
+require (
+	github.com/beorn7/perks v1.0.1 // indirect
+	github.com/cespare/xxhash/v2 v2.3.0 // indirect
+	github.com/kr/text v0.2.0 // indirect
+	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+	github.com/prometheus/client_model v0.6.2 // indirect
+	github.com/prometheus/common v0.66.1 // indirect
+	github.com/prometheus/procfs v0.16.1 // indirect
+	go.yaml.in/yaml/v2 v2.4.2 // indirect
+	golang.org/x/sys v0.35.0 // indirect
+	google.golang.org/protobuf v1.36.8 // indirect
+)
diff --git a/examples/speech-gateway/go.sum b/examples/speech-gateway/go.sum
new file mode 100644
index 000000000..d6b8ca98b
--- /dev/null
+++ b/examples/speech-gateway/go.sum
@@ -0,0 +1,46 @@
+github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
+github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
+github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
+github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
+github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
+github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
+github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
+github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
+github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
+github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
+github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
+github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
+github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
+go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
+go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
+go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
+golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
+golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
+google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

From 5ccf96a3ca0cb16a9e6b850245a1ce58f5c38a46 Mon Sep 17 00:00:00 2001
From: streamkit-devin <devin@streamkit.dev>
Date: Sat, 30 May 2026 16:24:18 +0000
Subject: [PATCH 2/6] feat(speech-gateway): forward X-StreamKit-Service to
 backend

Lets the skit oneshot_pipeline metric split by tts/stt service.

Signed-off-by: streamkit-devin <devin@streamkit.dev>
---
 examples/speech-gateway/cmd/gateway/main.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/speech-gateway/cmd/gateway/main.go b/examples/speech-gateway/cmd/gateway/main.go
index 557e1954e..2f8a4253d 100644
--- a/examples/speech-gateway/cmd/gateway/main.go
+++ b/examples/speech-gateway/cmd/gateway/main.go
@@ -377,6 +377,8 @@ func (gw *gateway) proxyMultipart(w http.ResponseWriter, r *http.Request, endpoi
 	}
 	req.Header.Set("Content-Type", mw.FormDataContentType())
 	req.Close = true
+	// Lets the backend split oneshot_pipeline metrics by {tts,stt,other}.
+	req.Header.Set("X-StreamKit-Service", endpoint)
 	if gw.authToken != "" {
 		req.Header.Set("Authorization", "Bearer "+gw.authToken)
 	}

From 458f023a2b424bbd48c06e24b3516dc8fa3904a1 Mon Sep 17 00:00:00 2001
From: streamkit-devin <devin@streamkit.dev>
Date: Sat, 30 May 2026 16:32:10 +0000
Subject: [PATCH 3/6] fix(speech-gateway): record aborted requests as 499 not
 200

Handlers return without writing on context.Canceled; default the metric to 499 (client closed) unless a status was written, so cancellations are not counted as successes.

Signed-off-by: streamkit-devin <devin@streamkit.dev>
---
 examples/speech-gateway/cmd/gateway/metrics.go     | 12 ++++++++++--
 .../speech-gateway/cmd/gateway/metrics_test.go     | 14 ++++++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/examples/speech-gateway/cmd/gateway/metrics.go b/examples/speech-gateway/cmd/gateway/metrics.go
index c149c8132..734e0b3cc 100644
--- a/examples/speech-gateway/cmd/gateway/metrics.go
+++ b/examples/speech-gateway/cmd/gateway/metrics.go
@@ -16,6 +16,10 @@ import (
 // Buckets span sub-100ms rejections up to multi-second STT/TTS synthesis.
 var latencyBuckets = []float64{0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30}
 
+// codeClientClosed mirrors nginx's 499: the handler returned without writing a
+// response, typically because the client disconnected mid-request.
+const codeClientClosed = 499
+
 var (
 	requestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
 		Name: "gateway_requests_total",
@@ -86,9 +90,13 @@ func instrument(endpoint string, next http.HandlerFunc) http.HandlerFunc {
 		start := time.Now()
 		next(rec, r)
 
+		code := rec.code
+		if !rec.wroteHeader {
+			code = codeClientClosed
+		}
 		requestDuration.WithLabelValues(endpoint).Observe(time.Since(start).Seconds())
-		requestsTotal.WithLabelValues(endpoint, r.Method, strconv.Itoa(rec.code)).Inc()
-		if reason := rejectionReason(rec.code); reason != "" {
+		requestsTotal.WithLabelValues(endpoint, r.Method, strconv.Itoa(code)).Inc()
+		if reason := rejectionReason(code); reason != "" {
 			rejectedTotal.WithLabelValues(endpoint, reason).Inc()
 		}
 	}
diff --git a/examples/speech-gateway/cmd/gateway/metrics_test.go b/examples/speech-gateway/cmd/gateway/metrics_test.go
index 290aa5069..4564a8045 100644
--- a/examples/speech-gateway/cmd/gateway/metrics_test.go
+++ b/examples/speech-gateway/cmd/gateway/metrics_test.go
@@ -51,6 +51,20 @@ func TestInstrumentRecordsRejection(t *testing.T) {
 	}
 }
 
+func TestInstrumentUnwrittenResponseCountedAsClientClosed(t *testing.T) {
+	// Handler returns without writing, mirroring the context.Canceled path
+	// in handleSTT/handleTTS; it must not be counted as a 200.
+	h := instrument("stt", func(http.ResponseWriter, *http.Request) {})
+
+	rec := httptest.NewRecorder()
+	h(rec, httptest.NewRequest(http.MethodPost, "/stt", nil))
+
+	body := scrapeMetrics(t)
+	if !strings.Contains(body, `gateway_requests_total{code="499",endpoint="stt",method="POST"}`) {
+		t.Errorf("expected unwritten response to be counted as code=499; got:\n%s", body)
+	}
+}
+
 func scrapeMetrics(t *testing.T) string {
 	t.Helper()
 	rec := httptest.NewRecorder()

From 9ea4f9a098334098925338d00c06d7ed875f24ca Mon Sep 17 00:00:00 2001
From: streamkit-devin <devin@streamkit.dev>
Date: Sun, 31 May 2026 08:10:00 +0000
Subject: [PATCH 4/6] docs: add local observability stack sample, guide, and
 skill

- Add samples/observability/ compose stack (skit + Prometheus + Grafana,
  optional speech gateway) that auto-provisions both dashboards and ships a
  traffic generator, so the metrics/dashboards can be validated locally.
- Document the plugin/oneshot/gateway metric families and a 'run the stack
  locally' section in the observability guide.
- Add an observability-stack agent skill capturing the validation procedure
  and the demo-image gotchas.
- Add a Dockerfile to the speech-gateway example.

Signed-off-by: streamkit-devin <devin@streamkit.dev>
---
 .agents/skills/observability-stack/SKILL.md   |  87 +++++++++++++++
 .claude/skills/observability-stack            |   1 +
 docs/src/content/docs/guides/observability.md |  38 +++++++
 examples/speech-gateway/Dockerfile            |  13 +++
 samples/observability/README.md               | 100 ++++++++++++++++++
 samples/observability/docker-compose.yml      |  96 +++++++++++++++++
 samples/observability/generate-traffic.sh     |  45 ++++++++
 .../provisioning/dashboards/streamkit.yml     |   9 ++
 .../provisioning/datasources/prometheus.yml   |  11 ++
 .../observability/pipelines/stt-whisper.yml   |  27 +++++
 .../observability/pipelines/tts-kokoro.yml    |  26 +++++
 samples/observability/prometheus.yml          |  12 +++
 samples/observability/skit/entrypoint.sh      |  31 ++++++
 13 files changed, 496 insertions(+)
 create mode 100644 .agents/skills/observability-stack/SKILL.md
 create mode 120000 .claude/skills/observability-stack
 create mode 100644 examples/speech-gateway/Dockerfile
 create mode 100644 samples/observability/README.md
 create mode 100644 samples/observability/docker-compose.yml
 create mode 100755 samples/observability/generate-traffic.sh
 create mode 100644 samples/observability/grafana/provisioning/dashboards/streamkit.yml
 create mode 100644 samples/observability/grafana/provisioning/datasources/prometheus.yml
 create mode 100644 samples/observability/pipelines/stt-whisper.yml
 create mode 100644 samples/observability/pipelines/tts-kokoro.yml
 create mode 100644 samples/observability/prometheus.yml
 create mode 100755 samples/observability/skit/entrypoint.sh

diff --git a/.agents/skills/observability-stack/SKILL.md b/.agents/skills/observability-stack/SKILL.md
new file mode 100644
index 000000000..f02a69b0c
--- /dev/null
+++ b/.agents/skills/observability-stack/SKILL.md
@@ -0,0 +1,87 @@
+---
+name: observability-stack
+description: >-
+  Spin up StreamKit's local observability stack (skit + Prometheus + Grafana,
+  optional speech gateway) and validate the Grafana dashboards end-to-end. Use
+  when testing metrics/dashboards, debugging empty dashboard panels, or
+  reproducing the speech-gateway monitoring setup locally.
+license: MPL-2.0
+---
+
+# Observability stack (local)
+
+`samples/observability/` is a `docker compose` stack that runs skit + Prometheus
++ Grafana (and an optional speech gateway), auto-provisioning both bundled
+dashboards. Use it to validate metrics and dashboards without any cloud setup.
+
+## Run it
+
+```bash
+cd samples/observability
+docker compose up -d
+./generate-traffic.sh                 # direct-to-skit TTS+STT
+# optional gateway row:
+docker compose --profile gateway up -d --build
+./generate-traffic.sh --gateway
+```
+
+Grafana: <http://localhost:3000> (anonymous admin). Prometheus:
+<http://localhost:9090>. skit: <http://localhost:4545>.
+
+## How metrics flow
+
+- **skit → Prometheus via OTLP push.** Prometheus runs with
+  `--web.enable-otlp-receiver`; skit's `SK_TELEMETRY__OTLP_ENDPOINT` points at
+  `…/api/v1/otlp/v1/metrics`. There is **no scrape job** for skit.
+- **gateway → Prometheus via scrape** of the gateway's `/metrics`.
+
+## Validate dashboards (don't just eyeball)
+
+OTLP renames dotted metrics and appends unit suffixes, so verify the metric
+names/labels the panels query actually exist before trusting a panel:
+
+```bash
+# list all metric names Prometheus knows about
+curl -s localhost:9090/api/v1/label/__name__/values | jq -r '.data[]' | sort
+# run a panel's exact PromQL and count series (0 == panel will be "No data")
+curl -s --data-urlencode 'query=<promql>' localhost:9090/api/v1/query \
+  | jq '.data.result | length'
+# inspect a metric's labels
+curl -s 'localhost:9090/api/v1/series?match[]=<metric>' | jq
+```
+
+Key name/label facts:
+
+- Plugin metrics: `plugin_call_duration_seconds_*` (unit suffix present),
+  `plugin_calls_total`; labels `plugin_kind`, `op`.
+- `oneshot_pipeline_duration_*` has **no** `_seconds` suffix (no unit set);
+  labels `status`, and `service` only when an `X-StreamKit-Service` header is
+  forwarded by a service-label-aware skit.
+- Gateway: `gateway_requests_total{endpoint,code}`,
+  `gateway_request_duration_seconds`, `gateway_rejected_total{reason}` (only
+  appears after a 413/415/502 actually occurs).
+
+## Expected "No data" (not bugs)
+
+- Plugin failure panels (`plugin_errors_total` etc.) — counters don't exist
+  until a failure happens.
+- Oneshot "by Service" panels — empty unless the skit build emits the `service`
+  label.
+- Video / MoQ / codec panels — only populate when you run those pipelines.
+
+## Gotchas (most-common causes of empty dashboards)
+
+- **`latest-demo` is stale.** Pin a versioned `-demo` tag; `latest-demo` can
+  predate metrics like `plugin.call.duration`, leaving the Plugins row empty.
+- **Demo-image plugin layout.** `-demo` images ship bare `.so` files but the
+  loader wants `plugins/native/<id>/` bundles; `skit/entrypoint.sh` reassembles
+  them. Symptom: "no plugins found" / "node kind not found in registry".
+- **Model-name mismatch.** A pipeline's `model_path` must exist in the image's
+  `models/`. The stack's `pipelines/` use the names the `-demo` image ships.
+- **Grafana datasource input.** Committed dashboards use `${DS_PROMETHEUS}`;
+  the `dashboard-prep` step rewrites it to the provisioned uid. In compose
+  command strings, escape it as `$${DS_PROMETHEUS}` so compose doesn't
+  interpolate it.
+- **Local auth.** skit needs `SK_AUTH__MODE=disabled` +
+  `SK_PERMISSIONS__ALLOW_INSECURE_NO_AUTH=true` to start unauthenticated on a
+  non-loopback bind. Local only.
diff --git a/.claude/skills/observability-stack b/.claude/skills/observability-stack
new file mode 120000
index 000000000..288055401
--- /dev/null
+++ b/.claude/skills/observability-stack
@@ -0,0 +1 @@
+../../.agents/skills/observability-stack
\ No newline at end of file
diff --git a/docs/src/content/docs/guides/observability.md b/docs/src/content/docs/guides/observability.md
index 52b44674c..0c6dddf3a 100644
--- a/docs/src/content/docs/guides/observability.md
+++ b/docs/src/content/docs/guides/observability.md
@@ -66,6 +66,44 @@ Import [`samples/grafana-dashboard.json`](https://github.com/streamer45/streamki
 
 ![Grafana Dashboard](/screenshots/grafana_dashboard.png)
 
+### What's measured
+
+Beyond HTTP and engine/node throughput, a few metric families are especially
+useful for speech and ML workloads:
+
+- **Plugin / ML inference** — native plugins emit per-call metrics labelled by
+  `plugin_kind` (e.g. `whisper`, `kokoro`) and `op`: `plugin_call_duration_seconds`
+  (histogram), `plugin_calls_total`, and `plugin_errors_total` /
+  `plugin_timeouts_total` / `plugin_panics_total`. This is where inference
+  latency and failures show up — usually the dominant cost of a speech pipeline.
+- **Oneshot pipelines** — `oneshot_pipeline_duration` (histogram) is labelled by
+  `status` (`ok`/`error`). Because every oneshot request hits the same
+  `POST /api/v1/process` endpoint, splitting TTS vs STT requires a trusted
+  `service` label (sent via the `X-StreamKit-Service` header); without it all
+  oneshot traffic collapses into one series.
+- **Speech gateway** — the [speech gateway example](https://github.com/streamer45/streamkit/tree/main/examples/speech-gateway)
+  exposes Prometheus metrics for the front door it puts in front of skit:
+  per-endpoint request rate/latency (`gateway_requests_total`,
+  `gateway_request_duration_seconds`), in-flight gauge, upstream latency, and
+  rejections by reason (`gateway_rejected_total`).
+
+### Run the full stack locally
+
+To see all of the above on the dashboards without any cloud setup, use the
+[`samples/observability`](https://github.com/streamer45/streamkit/tree/main/samples/observability)
+compose stack — it wires skit (OTLP push) + the gateway (scrape) into Prometheus
+and auto-provisions both dashboards in Grafana:
+
+```bash
+cd samples/observability
+docker compose up -d
+./generate-traffic.sh
+# Grafana: http://localhost:3000
+```
+
+See its README for the wiring details and known gotchas (demo-image tag/plugin
+layout, model-name matching, the Prometheus OTLP receiver, and local auth).
+
 ## Traces (OTLP)
 
 Tracing export is controlled by:
diff --git a/examples/speech-gateway/Dockerfile b/examples/speech-gateway/Dockerfile
new file mode 100644
index 000000000..8c86de183
--- /dev/null
+++ b/examples/speech-gateway/Dockerfile
@@ -0,0 +1,13 @@
+# SPDX-FileCopyrightText: © 2025 StreamKit Contributors
+#
+# SPDX-License-Identifier: MPL-2.0
+
+FROM golang:1.24-bookworm AS build
+WORKDIR /src
+COPY . .
+RUN CGO_ENABLED=0 go build -o /gateway ./cmd/gateway
+
+FROM gcr.io/distroless/static-debian12
+COPY --from=build /gateway /gateway
+EXPOSE 8080
+ENTRYPOINT ["/gateway"]
diff --git a/samples/observability/README.md b/samples/observability/README.md
new file mode 100644
index 000000000..0c2e663b9
--- /dev/null
+++ b/samples/observability/README.md
@@ -0,0 +1,100 @@
+<!--
+SPDX-FileCopyrightText: © 2025 StreamKit Contributors
+
+SPDX-License-Identifier: MPL-2.0
+-->
+
+# Local observability stack
+
+A `docker compose` stack that runs **skit + Prometheus + Grafana** (and an
+optional **speech gateway**) so you can see StreamKit's metrics on the bundled
+Grafana dashboards locally — no cloud, no manual import.
+
+## Quick start
+
+```bash
+cd samples/observability
+docker compose up -d            # skit + Prometheus + Grafana
+./generate-traffic.sh           # drive ~20 TTS + STT requests through skit
+```
+
+Then open Grafana at <http://localhost:3000> (anonymous admin, no login). Two
+dashboards are auto-provisioned:
+
+- **StreamKit Performance Dashboard** — the repo's main dashboard
+  ([`samples/grafana-dashboard.json`](../grafana-dashboard.json)), including the
+  **Plugins / ML inference** row.
+- **StreamKit Speech Gateway Dashboard** — the gateway/oneshot dashboard
+  ([`examples/speech-gateway/grafana-dashboard.json`](../../examples/speech-gateway/grafana-dashboard.json)).
+
+| Service    | URL                     |
+| ---------- | ----------------------- |
+| Grafana    | <http://localhost:3000> |
+| Prometheus | <http://localhost:9090> |
+| skit API   | <http://localhost:4545> |
+| gateway    | <http://localhost:8080> (gateway profile only) |
+
+## How metrics get to Prometheus
+
+Two different paths, both visible on the dashboards:
+
+- **skit → Prometheus (OTLP push).** skit exports OTLP metrics to Prometheus'
+  native OTLP receiver, which is enabled with `--web.enable-otlp-receiver`.
+  Configured via `SK_TELEMETRY__OTLP_ENDPOINT` pointing at
+  `http://prometheus:9090/api/v1/otlp/v1/metrics`. This feeds the HTTP, engine,
+  oneshot, and **plugin** metrics.
+- **gateway → Prometheus (scrape).** The speech gateway exposes a classic
+  `/metrics` endpoint that Prometheus scrapes (see `prometheus.yml`). This feeds
+  the **Speech Gateway** row.
+
+## Speech Gateway row
+
+The gateway is behind a compose profile because it requires the gateway
+**metrics** instrumentation:
+
+```bash
+docker compose --profile gateway up -d --build
+./generate-traffic.sh --gateway   # route traffic through the gateway
+```
+
+Notes:
+
+- The gateway's `/metrics` endpoint and the `gateway_*` metrics require the
+  metrics-instrumented gateway. The Speech Gateway dashboard row stays empty
+  until those metrics are present and the gateway has served traffic.
+- The gateway's default STT pipeline targets a Whisper model that must exist on
+  the skit it talks to. The bundled `-demo` image ships `ggml-tiny-q5_1.bin`; if
+  the gateway points at a different model, STT through the gateway will fail
+  while TTS still works. The direct-to-skit traffic path (the default
+  `generate-traffic.sh`) avoids this by shipping its own pipelines under
+  `pipelines/`.
+
+## Known gotchas
+
+These are the sharp edges worth knowing when wiring this up yourself:
+
+- **Pin a versioned `-demo` tag.** `latest-demo` can lag behind released
+  versions and predate metrics like `plugin.call.duration`, which leaves the
+  Plugins / ML inference row empty. This stack pins `v0.5.0-demo`.
+- **Demo image plugin layout.** Current `-demo` images ship native plugins as
+  bare `.so` files under `plugins/native/`, but the loader expects directory
+  bundles (`plugins/native/<id>/` with a `plugin.yml` + the `.so`). `skit serve`
+  otherwise logs "no plugins found" and pipelines fail with "node kind not
+  found". `skit/entrypoint.sh` reassembles the expected layout at startup from
+  the in-repo manifests (mounted at `/repo-manifests`).
+- **Model names must match.** Pipelines reference model files by path; the file
+  must actually be present in the image/`models/` dir. The pipelines under
+  `pipelines/` use the model names the `-demo` image actually ships.
+- **Local auth override.** skit refuses to start unauthenticated on a
+  non-loopback bind unless you opt in. This stack sets
+  `SK_AUTH__MODE=disabled` + `SK_PERMISSIONS__ALLOW_INSECURE_NO_AUTH=true`.
+  **Local testing only** — never do this on an exposed instance.
+- **Grafana dashboard datasource.** The committed dashboards use a
+  `${DS_PROMETHEUS}` datasource input. The `dashboard-prep` step rewrites it to
+  the provisioned datasource uid so the dashboards load without a manual import.
+
+## Cleanup
+
+```bash
+docker compose --profile gateway down -v
+```
diff --git a/samples/observability/docker-compose.yml b/samples/observability/docker-compose.yml
new file mode 100644
index 000000000..305057bc9
--- /dev/null
+++ b/samples/observability/docker-compose.yml
@@ -0,0 +1,96 @@
+# Local observability stack for StreamKit: skit + Prometheus + Grafana, with an
+# optional speech gateway. See README.md for the walkthrough and known gotchas.
+#
+# Usage:
+#   docker compose up -d                  # skit + Prometheus + Grafana
+#   docker compose --profile gateway up -d # also build & run the speech gateway
+#
+# Grafana:    http://localhost:3000  (anonymous admin, no login)
+# Prometheus: http://localhost:9090
+# skit API:   http://localhost:4545
+# gateway:    http://localhost:8080  (gateway profile only)
+
+services:
+  skit:
+    image: ghcr.io/streamer45/streamkit:v0.5.0-demo
+    # Pinned to a versioned -demo tag on purpose: `latest-demo` can lag behind
+    # and predate metrics like plugin.call.duration, leaving dashboard rows empty.
+    entrypoint: ["/entrypoint.sh"]
+    environment:
+      SK_AUTH__MODE: disabled
+      SK_PERMISSIONS__ALLOW_INSECURE_NO_AUTH: "true"
+      SK_PLUGINS__DIRECTORY: /opt/streamkit/np
+      SK_TELEMETRY__ENABLE: "true"
+      SK_TELEMETRY__OTLP_ENDPOINT: http://prometheus:9090/api/v1/otlp/v1/metrics
+    volumes:
+      - ./skit/entrypoint.sh:/entrypoint.sh:ro
+      - ../../plugins/native:/repo-manifests:ro
+    ports:
+      - "4545:4545"
+    healthcheck:
+      test: ["CMD", "curl", "-fsS", "http://localhost:4545/healthz"]
+      interval: 5s
+      timeout: 3s
+      retries: 20
+
+  prometheus:
+    image: prom/prometheus:v3.1.0
+    command:
+      - --config.file=/etc/prometheus/prometheus.yml
+      - --web.enable-otlp-receiver
+      - --storage.tsdb.path=/prometheus
+    volumes:
+      - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
+    ports:
+      - "9090:9090"
+
+  dashboard-prep:
+    image: alpine:3.21
+    # Copies the in-repo dashboards into Grafana's provisioning dir, resolving
+    # the ${DS_PROMETHEUS} template input to the provisioned datasource uid so
+    # the dashboards load without manual import.
+    command:
+      - sh
+      - -c
+      - |
+        set -e
+        for f in /in/*.json; do
+          sed 's/$${DS_PROMETHEUS}/prometheus/g' "$$f" > "/out/$$(basename "$$f")"
+        done
+        echo "prepared dashboards:"; ls -1 /out
+    volumes:
+      - ../../samples/grafana-dashboard.json:/in/streamkit.json:ro
+      - ../../examples/speech-gateway/grafana-dashboard.json:/in/speech-gateway.json:ro
+      - grafana-dashboards:/out
+
+  grafana:
+    image: grafana/grafana:11.4.0
+    environment:
+      GF_AUTH_ANONYMOUS_ENABLED: "true"
+      GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
+      GF_AUTH_DISABLE_LOGIN_FORM: "true"
+      GF_SECURITY_ADMIN_PASSWORD: admin
+    volumes:
+      - ./grafana/provisioning:/etc/grafana/provisioning:ro
+      - grafana-dashboards:/var/lib/grafana/dashboards:ro
+    ports:
+      - "3000:3000"
+    depends_on:
+      - prometheus
+      - dashboard-prep
+
+  gateway:
+    profiles: ["gateway"]
+    build:
+      context: ../../examples/speech-gateway
+    environment:
+      GATEWAY_LISTEN: ":8080"
+      SKIT_URL: http://skit:4545
+    ports:
+      - "8080:8080"
+    depends_on:
+      skit:
+        condition: service_healthy
+
+volumes:
+  grafana-dashboards:
diff --git a/samples/observability/generate-traffic.sh b/samples/observability/generate-traffic.sh
new file mode 100755
index 000000000..87bb74f50
--- /dev/null
+++ b/samples/observability/generate-traffic.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: © 2025 StreamKit Contributors
+#
+# SPDX-License-Identifier: MPL-2.0
+
+# Drives TTS + STT traffic so the dashboards have data. By default it calls
+# skit's oneshot /api/v1/process directly (no gateway required). Pass --gateway
+# to route through the speech gateway instead (requires the `gateway` profile),
+# which also populates the Speech Gateway dashboard row.
+set -euo pipefail
+
+ROUNDS="${ROUNDS:-20}"
+SKIT_URL="${SKIT_URL:-http://localhost:4545}"
+GATEWAY_URL="${GATEWAY_URL:-http://localhost:8080}"
+HERE="$(cd "$(dirname "$0")" && pwd)"
+MODE="direct"
+[ "${1:-}" = "--gateway" ] && MODE="gateway"
+
+tmp="$(mktemp -d)"
+trap 'rm -rf "$tmp"' EXIT
+
+echo "mode=$MODE rounds=$ROUNDS"
+for i in $(seq 1 "$ROUNDS"); do
+  text="StreamKit observability sample, round $i: the quick brown fox."
+  if [ "$MODE" = "gateway" ]; then
+    curl -fsS -o "$tmp/a.ogg" -d "$text" "$GATEWAY_URL/tts"
+    curl -fsS -o /dev/null --data-binary @"$tmp/a.ogg" -H 'Content-Type: audio/ogg' "$GATEWAY_URL/stt" || true
+  else
+    printf '%s' "$text" > "$tmp/in.txt"
+    # X-StreamKit-Service lets a service-label-aware skit (see PR #545) split
+    # oneshot metrics by {tts,stt}; older builds simply ignore the header.
+    curl -fsS -o "$tmp/a.ogg" \
+      -H 'X-StreamKit-Service: tts' \
+      -F "config=<$HERE/pipelines/tts-kokoro.yml" \
+      -F "media=@$tmp/in.txt;type=text/plain;filename=media" \
+      "$SKIT_URL/api/v1/process"
+    curl -fsS -o /dev/null \
+      -H 'X-StreamKit-Service: stt' \
+      -F "config=<$HERE/pipelines/stt-whisper.yml" \
+      -F "media=@$tmp/a.ogg;type=audio/ogg;filename=media" \
+      "$SKIT_URL/api/v1/process" || true
+  fi
+  printf '.'
+done
+echo " done"
diff --git a/samples/observability/grafana/provisioning/dashboards/streamkit.yml b/samples/observability/grafana/provisioning/dashboards/streamkit.yml
new file mode 100644
index 000000000..721b3cba6
--- /dev/null
+++ b/samples/observability/grafana/provisioning/dashboards/streamkit.yml
@@ -0,0 +1,9 @@
+apiVersion: 1
+
+providers:
+  - name: streamkit
+    type: file
+    allowUiUpdates: true
+    options:
+      path: /var/lib/grafana/dashboards
+      foldersFromFilesStructure: false
diff --git a/samples/observability/grafana/provisioning/datasources/prometheus.yml b/samples/observability/grafana/provisioning/datasources/prometheus.yml
new file mode 100644
index 000000000..c70b848dd
--- /dev/null
+++ b/samples/observability/grafana/provisioning/datasources/prometheus.yml
@@ -0,0 +1,11 @@
+apiVersion: 1
+
+datasources:
+  - name: Prometheus
+    uid: prometheus
+    type: prometheus
+    access: proxy
+    url: http://prometheus:9090
+    isDefault: true
+    jsonData:
+      timeInterval: 5s
diff --git a/samples/observability/pipelines/stt-whisper.yml b/samples/observability/pipelines/stt-whisper.yml
new file mode 100644
index 000000000..37f6d1d26
--- /dev/null
+++ b/samples/observability/pipelines/stt-whisper.yml
@@ -0,0 +1,27 @@
+name: stt
+description: Speech-to-Text (Whisper) for the observability sample
+mode: oneshot
+steps:
+  - kind: streamkit::http_input
+  - kind: containers::ogg::demuxer
+  - kind: audio::opus::decoder
+  - kind: audio::resampler
+    params:
+      chunk_frames: 960
+      output_frame_size: 960
+      target_sample_rate: 16000
+  - kind: plugin::native::whisper
+    params:
+      model_path: models/ggml-tiny-q5_1.bin
+      language: en
+      vad_model_path: models/silero_vad.onnx
+      vad_threshold: 0.5
+      min_silence_duration_ms: 700
+      max_segment_duration_secs: 30.0
+  - kind: core::json_serialize
+    params:
+      pretty: false
+      newline_delimited: true
+  - kind: streamkit::http_output
+    params:
+      content_type: application/json
diff --git a/samples/observability/pipelines/tts-kokoro.yml b/samples/observability/pipelines/tts-kokoro.yml
new file mode 100644
index 000000000..cf6435006
--- /dev/null
+++ b/samples/observability/pipelines/tts-kokoro.yml
@@ -0,0 +1,26 @@
+name: tts
+description: Text-to-Speech (Kokoro) for the observability sample
+mode: oneshot
+steps:
+  - kind: streamkit::http_input
+  - kind: core::text_chunker
+    params:
+      min_length: 10
+  - kind: plugin::native::kokoro
+    params:
+      model_dir: "models/kokoro-multi-lang-v1_1"
+      speaker_id: 0
+      speed: 1.0
+      num_threads: 4
+  - kind: audio::resampler
+    params:
+      chunk_frames: 960
+      output_frame_size: 960
+      target_sample_rate: 48000
+  - kind: audio::opus::encoder
+  - kind: containers::webm::muxer
+    params:
+      channels: 1
+      sample_rate: 48000
+      streaming_mode: live
+  - kind: streamkit::http_output
diff --git a/samples/observability/prometheus.yml b/samples/observability/prometheus.yml
new file mode 100644
index 000000000..5296e6f4d
--- /dev/null
+++ b/samples/observability/prometheus.yml
@@ -0,0 +1,12 @@
+global:
+  scrape_interval: 5s
+  evaluation_interval: 5s
+
+# skit pushes metrics via OTLP to Prometheus' native OTLP receiver
+# (enabled with --web.enable-otlp-receiver in docker-compose.yml), so skit
+# does not need a scrape_config here. The speech gateway exposes a classic
+# Prometheus /metrics endpoint and is scraped below.
+scrape_configs:
+  - job_name: speech-gateway
+    static_configs:
+      - targets: ['gateway:8080']
diff --git a/samples/observability/skit/entrypoint.sh b/samples/observability/skit/entrypoint.sh
new file mode 100755
index 000000000..7f58ce5a6
--- /dev/null
+++ b/samples/observability/skit/entrypoint.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+# SPDX-FileCopyrightText: © 2025 StreamKit Contributors
+#
+# SPDX-License-Identifier: MPL-2.0
+
+# The -demo images currently ship native plugins as bare `.so` files under
+# plugins/native/, but the loader expects directory bundles
+# (plugins/native/<id>/ with a plugin.yml + the .so). Without this, `skit serve`
+# logs "no plugins found" and TTS/STT pipelines fail with "node kind not found".
+# We assemble the expected layout from the repo manifests (mounted at
+# /repo-manifests) plus the .so files baked into the image, then start the
+# server. Tracked upstream; remove once the demo image ships bundles directly.
+set -e
+
+SRC=/opt/streamkit/plugins/native
+DST=/opt/streamkit/np/native
+mkdir -p "$DST"
+
+for manifest in /repo-manifests/*/plugin.yml; do
+  [ -f "$manifest" ] || continue
+  id=$(basename "$(dirname "$manifest")")
+  so=$(awk '/^entrypoint:/{print $2}' "$manifest")
+  if [ -n "$so" ] && [ -f "$SRC/$so" ]; then
+    mkdir -p "$DST/$id"
+    cp "$manifest" "$DST/$id/plugin.yml"
+    cp "$SRC/$so" "$DST/$id/$so"
+    echo "assembled plugin bundle: $id ($so)"
+  fi
+done
+
+exec skit serve

From 732b42574e32dec265c12741e97630b7dc250ab8 Mon Sep 17 00:00:00 2001
From: streamkit-devin <devin@streamkit.dev>
Date: Sun, 31 May 2026 08:17:24 +0000
Subject: [PATCH 5/6] fix(observability): TTS sample must mux Ogg so STT
 round-trip works

The TTS pipeline muxed WebM while generate-traffic.sh posts the result to the
STT pipeline as audio/ogg (demuxed by containers::ogg::demuxer), so STT traffic
was silently dropped. Mux Ogg/Opus to match, as the gateway's own TTS pipeline does.

Signed-off-by: streamkit-devin <devin@streamkit.dev>
---
 samples/observability/pipelines/tts-kokoro.yml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/samples/observability/pipelines/tts-kokoro.yml b/samples/observability/pipelines/tts-kokoro.yml
index cf6435006..d5f9fb188 100644
--- a/samples/observability/pipelines/tts-kokoro.yml
+++ b/samples/observability/pipelines/tts-kokoro.yml
@@ -18,9 +18,11 @@ steps:
       output_frame_size: 960
       target_sample_rate: 48000
   - kind: audio::opus::encoder
-  - kind: containers::webm::muxer
+  - kind: containers::ogg::muxer
     params:
       channels: 1
-      sample_rate: 48000
-      streaming_mode: live
+      codec: opus
+      chunk_size: 32768
   - kind: streamkit::http_output
+    params:
+      content_type: audio/ogg

From 0b6a9b0c3c797ae72af7adb604d675c413f57820 Mon Sep 17 00:00:00 2001
From: streamkit-devin <devin@streamkit.dev>
Date: Sun, 31 May 2026 08:22:39 +0000
Subject: [PATCH 6/6] chore(observability): surface gateway STT model mismatch
 once, not per round

Signed-off-by: streamkit-devin <devin@streamkit.dev>
---
 samples/observability/generate-traffic.sh | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/samples/observability/generate-traffic.sh b/samples/observability/generate-traffic.sh
index 87bb74f50..d253d88ea 100755
--- a/samples/observability/generate-traffic.sh
+++ b/samples/observability/generate-traffic.sh
@@ -24,7 +24,13 @@ for i in $(seq 1 "$ROUNDS"); do
   text="StreamKit observability sample, round $i: the quick brown fox."
   if [ "$MODE" = "gateway" ]; then
     curl -fsS -o "$tmp/a.ogg" -d "$text" "$GATEWAY_URL/tts"
-    curl -fsS -o /dev/null --data-binary @"$tmp/a.ogg" -H 'Content-Type: audio/ogg' "$GATEWAY_URL/stt" || true
+    # The gateway's built-in STT pipeline targets a Whisper model the -demo
+    # image may not ship, so STT can return 5xx; surface it once, not per round.
+    code=$(curl -s -o /dev/null -w '%{http_code}' --data-binary @"$tmp/a.ogg" -H 'Content-Type: audio/ogg' "$GATEWAY_URL/stt")
+    case "$code" in
+      2*) ;;
+      *) [ -n "${stt_warned:-}" ] || { printf '\nnote: gateway STT -> HTTP %s; its built-in pipeline needs a Whisper model the demo image may not ship (see README / #553). TTS still populates the gateway row.\n' "$code"; stt_warned=1; } ;;
+    esac
   else
     printf '%s' "$text" > "$tmp/in.txt"
     # X-StreamKit-Service lets a service-label-aware skit (see PR #545) split