From 1fde6cf69e003d9def885d6217767600f2b28366 Mon Sep 17 00:00:00 2001 From: streamkit-devin Date: Sat, 30 May 2026 16:19:58 +0000 Subject: [PATCH 1/6] feat(speech-gateway): expose Prometheus metrics Instrument the /tts and /stt handlers with request count, latency, in-flight, upstream-call latency, and rejection-reason metrics, and serve them at GET /metrics (ungated by the concurrency semaphore). Signed-off-by: streamkit-devin --- examples/speech-gateway/.gitignore | 2 +- examples/speech-gateway/README.md | 25 ++++ examples/speech-gateway/cmd/gateway/main.go | 19 ++- .../speech-gateway/cmd/gateway/metrics.go | 108 ++++++++++++++++++ .../cmd/gateway/metrics_test.go | 62 ++++++++++ examples/speech-gateway/go.mod | 15 +++ examples/speech-gateway/go.sum | 46 ++++++++ 7 files changed, 271 insertions(+), 6 deletions(-) create mode 100644 examples/speech-gateway/cmd/gateway/metrics.go create mode 100644 examples/speech-gateway/cmd/gateway/metrics_test.go create mode 100644 examples/speech-gateway/go.sum diff --git a/examples/speech-gateway/.gitignore b/examples/speech-gateway/.gitignore index ad230ccfe..9064eb2e2 100644 --- a/examples/speech-gateway/.gitignore +++ b/examples/speech-gateway/.gitignore @@ -1 +1 @@ -gateway +/gateway diff --git a/examples/speech-gateway/README.md b/examples/speech-gateway/README.md index 7589bf712..2c8c73e87 100644 --- a/examples/speech-gateway/README.md +++ b/examples/speech-gateway/README.md @@ -54,3 +54,28 @@ curl -H "Content-Type: text/plain" --data 'Hello from StreamKit' http://127.0.0. ``` Response is `audio/ogg` (Opus mono). + +## Metrics + +The gateway exposes Prometheus metrics at `GET /metrics` (via `promhttp`). This +route is **not** gated by `GATEWAY_MAX_CONCURRENCY`, so it stays scrapable even +when all request slots are in use. A public/hosted instance (e.g. behind +`tts.streamkit.dev` / `stt.streamkit.dev`) may choose not to expose `/metrics` +externally — scrape it from inside the trust boundary instead. + +Every metric carries an `endpoint` label whose value is exactly `tts` or `stt`. + +| Metric | Type | Labels | Description | +|--------|------|--------|-------------| +| `gateway_requests_total` | counter | `endpoint`, `method`, `code` | Requests served, by HTTP method and status code. | +| `gateway_request_duration_seconds` | histogram | `endpoint` | Total handler latency. | +| `gateway_inflight_requests` | gauge | `endpoint` | Requests currently being handled. | +| `gateway_upstream_duration_seconds` | histogram | `endpoint` | Time spent calling the skit backend `/api/v1/process`. | +| `gateway_rejected_total` | counter | `endpoint`, `reason` | Rejected requests; `reason` ∈ `bad_content_type` (415), `too_large` (413), `upstream_error` (502). | + +Histogram buckets are tuned for multi-second STT/TTS workloads: +`0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30` seconds. + +```sh +curl http://127.0.0.1:8080/metrics +``` diff --git a/examples/speech-gateway/cmd/gateway/main.go b/examples/speech-gateway/cmd/gateway/main.go index 1e6e79432..557e1954e 100644 --- a/examples/speech-gateway/cmd/gateway/main.go +++ b/examples/speech-gateway/cmd/gateway/main.go @@ -22,6 +22,8 @@ import ( "strings" "time" "unicode/utf8" + + "github.com/prometheus/client_golang/prometheus/promhttp" ) const ( @@ -126,8 +128,11 @@ func main() { } mux := http.NewServeMux() - mux.HandleFunc("/stt", gw.handleSTT) - mux.HandleFunc("/tts", gw.handleTTS) + mux.HandleFunc("/stt", instrument("stt", gw.handleSTT)) + mux.HandleFunc("/tts", instrument("tts", gw.handleTTS)) + // /metrics is intentionally not gated by the concurrency semaphore so it + // stays scrapable while request slots are saturated. + mux.Handle("/metrics", promhttp.Handler()) server := &http.Server{ Addr: cfg.listenAddr, @@ -244,7 +249,7 @@ func (gw *gateway) handleSTT(w http.ResponseWriter, r *http.Request) { defer release() r.Body = http.MaxBytesReader(w, r.Body, gw.maxBodySize) useBuffer := r.ContentLength > 0 && r.ContentLength <= gw.maxBodySize - if err := gw.proxyMultipart(w, r, sttPipelineYAML, "media", "audio/ogg", useBuffer); err != nil { + if err := gw.proxyMultipart(w, r, "stt", sttPipelineYAML, "media", "audio/ogg", useBuffer); err != nil { log.Printf("stt error: %v", err) if !errors.Is(err, context.Canceled) { http.Error(w, "upstream error", http.StatusBadGateway) @@ -315,7 +320,7 @@ func (gw *gateway) handleTTS(w http.ResponseWriter, r *http.Request) { r.Body = io.NopCloser(bytes.NewReader(textBytes)) useBuffer := true // We've already buffered it - if err := gw.proxyMultipart(w, r, ttsPipelineYAML, "media", "text/plain", useBuffer); err != nil { + if err := gw.proxyMultipart(w, r, "tts", ttsPipelineYAML, "media", "text/plain", useBuffer); err != nil { log.Printf("tts error: %v", err) if !errors.Is(err, context.Canceled) { http.Error(w, "upstream error", http.StatusBadGateway) @@ -323,7 +328,7 @@ func (gw *gateway) handleTTS(w http.ResponseWriter, r *http.Request) { } } -func (gw *gateway) proxyMultipart(w http.ResponseWriter, r *http.Request, pipelineYAML, mediaField, mediaContentType string, bufferBody bool) error { +func (gw *gateway) proxyMultipart(w http.ResponseWriter, r *http.Request, endpoint, pipelineYAML, mediaField, mediaContentType string, bufferBody bool) error { ctx := r.Context() // Optionally buffer the request body for finite uploads (helps curl -T file). @@ -376,6 +381,10 @@ func (gw *gateway) proxyMultipart(w http.ResponseWriter, r *http.Request, pipeli req.Header.Set("Authorization", "Bearer "+gw.authToken) } + upstreamStart := time.Now() + defer func() { + upstreamDuration.WithLabelValues(endpoint).Observe(time.Since(upstreamStart).Seconds()) + }() resp, err := gw.client.Do(req) if err != nil { log.Printf("call skit failed: %v", err) diff --git a/examples/speech-gateway/cmd/gateway/metrics.go b/examples/speech-gateway/cmd/gateway/metrics.go new file mode 100644 index 000000000..c149c8132 --- /dev/null +++ b/examples/speech-gateway/cmd/gateway/metrics.go @@ -0,0 +1,108 @@ +// SPDX-FileCopyrightText: © 2025 StreamKit Contributors +// +// SPDX-License-Identifier: MPL-2.0 + +package main + +import ( + "net/http" + "strconv" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +// Buckets span sub-100ms rejections up to multi-second STT/TTS synthesis. +var latencyBuckets = []float64{0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30} + +var ( + requestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "gateway_requests_total", + Help: "Total speech-gateway requests by endpoint, method, and HTTP status code.", + }, []string{"endpoint", "method", "code"}) + + requestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ + Name: "gateway_request_duration_seconds", + Help: "Total handler latency in seconds by endpoint.", + Buckets: latencyBuckets, + }, []string{"endpoint"}) + + inflightRequests = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Name: "gateway_inflight_requests", + Help: "In-flight speech-gateway requests by endpoint.", + }, []string{"endpoint"}) + + upstreamDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ + Name: "gateway_upstream_duration_seconds", + Help: "Time spent calling the skit backend /api/v1/process by endpoint.", + Buckets: latencyBuckets, + }, []string{"endpoint"}) + + rejectedTotal = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "gateway_rejected_total", + Help: "Rejected requests by endpoint and reason.", + }, []string{"endpoint", "reason"}) +) + +// statusRecorder captures the response status code while preserving +// http.Flusher so proxied responses keep streaming incrementally. +type statusRecorder struct { + http.ResponseWriter + code int + wroteHeader bool +} + +func (s *statusRecorder) WriteHeader(code int) { + if !s.wroteHeader { + s.code = code + s.wroteHeader = true + } + s.ResponseWriter.WriteHeader(code) +} + +func (s *statusRecorder) Write(b []byte) (int, error) { + if !s.wroteHeader { + s.code = http.StatusOK + s.wroteHeader = true + } + return s.ResponseWriter.Write(b) +} + +func (s *statusRecorder) Flush() { + if f, ok := s.ResponseWriter.(http.Flusher); ok { + f.Flush() + } +} + +// instrument records request count, latency, and in-flight gauge per endpoint, +// and maps rejection status codes to gateway_rejected_total reasons. +func instrument(endpoint string, next http.HandlerFunc) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + inflightRequests.WithLabelValues(endpoint).Inc() + defer inflightRequests.WithLabelValues(endpoint).Dec() + + rec := &statusRecorder{ResponseWriter: w, code: http.StatusOK} + start := time.Now() + next(rec, r) + + requestDuration.WithLabelValues(endpoint).Observe(time.Since(start).Seconds()) + requestsTotal.WithLabelValues(endpoint, r.Method, strconv.Itoa(rec.code)).Inc() + if reason := rejectionReason(rec.code); reason != "" { + rejectedTotal.WithLabelValues(endpoint, reason).Inc() + } + } +} + +func rejectionReason(code int) string { + switch code { + case http.StatusUnsupportedMediaType: + return "bad_content_type" + case http.StatusRequestEntityTooLarge: + return "too_large" + case http.StatusBadGateway: + return "upstream_error" + default: + return "" + } +} diff --git a/examples/speech-gateway/cmd/gateway/metrics_test.go b/examples/speech-gateway/cmd/gateway/metrics_test.go new file mode 100644 index 000000000..290aa5069 --- /dev/null +++ b/examples/speech-gateway/cmd/gateway/metrics_test.go @@ -0,0 +1,62 @@ +// SPDX-FileCopyrightText: © 2025 StreamKit Contributors +// +// SPDX-License-Identifier: MPL-2.0 + +package main + +import ( + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +func TestRejectionReason(t *testing.T) { + cases := map[int]string{ + http.StatusUnsupportedMediaType: "bad_content_type", + http.StatusRequestEntityTooLarge: "too_large", + http.StatusBadGateway: "upstream_error", + http.StatusOK: "", + http.StatusBadRequest: "", + } + for code, want := range cases { + if got := rejectionReason(code); got != want { + t.Errorf("rejectionReason(%d) = %q, want %q", code, got, want) + } + } +} + +func TestInstrumentRecordsRejection(t *testing.T) { + h := instrument("tts", func(w http.ResponseWriter, _ *http.Request) { + http.Error(w, "nope", http.StatusUnsupportedMediaType) + }) + + rec := httptest.NewRecorder() + h(rec, httptest.NewRequest(http.MethodPost, "/tts", nil)) + if rec.Code != http.StatusUnsupportedMediaType { + t.Fatalf("status = %d, want 415", rec.Code) + } + + body := scrapeMetrics(t) + for _, want := range []string{ + `gateway_requests_total{code="415",endpoint="tts",method="POST"}`, + `gateway_rejected_total{endpoint="tts",reason="bad_content_type"}`, + `gateway_request_duration_seconds_bucket{endpoint="tts"`, + } { + if !strings.Contains(body, want) { + t.Errorf("metrics output missing %q", want) + } + } +} + +func scrapeMetrics(t *testing.T) string { + t.Helper() + rec := httptest.NewRecorder() + promhttp.Handler().ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/metrics", nil)) + if rec.Code != http.StatusOK { + t.Fatalf("/metrics status = %d, want 200", rec.Code) + } + return rec.Body.String() +} diff --git a/examples/speech-gateway/go.mod b/examples/speech-gateway/go.mod index a9805a17a..861841153 100644 --- a/examples/speech-gateway/go.mod +++ b/examples/speech-gateway/go.mod @@ -1,3 +1,18 @@ module github.com/streamer45/streamkit/examples/speech-gateway go 1.24 + +require github.com/prometheus/client_golang v1.23.2 + +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/kr/text v0.2.0 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.66.1 // indirect + github.com/prometheus/procfs v0.16.1 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect + golang.org/x/sys v0.35.0 // indirect + google.golang.org/protobuf v1.36.8 // indirect +) diff --git a/examples/speech-gateway/go.sum b/examples/speech-gateway/go.sum new file mode 100644 index 000000000..d6b8ca98b --- /dev/null +++ b/examples/speech-gateway/go.sum @@ -0,0 +1,46 @@ +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= +github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= +github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= +github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= +go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= +golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= +golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From 5ccf96a3ca0cb16a9e6b850245a1ce58f5c38a46 Mon Sep 17 00:00:00 2001 From: streamkit-devin Date: Sat, 30 May 2026 16:24:18 +0000 Subject: [PATCH 2/6] feat(speech-gateway): forward X-StreamKit-Service to backend Lets the skit oneshot_pipeline metric split by tts/stt service. Signed-off-by: streamkit-devin --- examples/speech-gateway/cmd/gateway/main.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/speech-gateway/cmd/gateway/main.go b/examples/speech-gateway/cmd/gateway/main.go index 557e1954e..2f8a4253d 100644 --- a/examples/speech-gateway/cmd/gateway/main.go +++ b/examples/speech-gateway/cmd/gateway/main.go @@ -377,6 +377,8 @@ func (gw *gateway) proxyMultipart(w http.ResponseWriter, r *http.Request, endpoi } req.Header.Set("Content-Type", mw.FormDataContentType()) req.Close = true + // Lets the backend split oneshot_pipeline metrics by {tts,stt,other}. + req.Header.Set("X-StreamKit-Service", endpoint) if gw.authToken != "" { req.Header.Set("Authorization", "Bearer "+gw.authToken) } From 458f023a2b424bbd48c06e24b3516dc8fa3904a1 Mon Sep 17 00:00:00 2001 From: streamkit-devin Date: Sat, 30 May 2026 16:32:10 +0000 Subject: [PATCH 3/6] fix(speech-gateway): record aborted requests as 499 not 200 Handlers return without writing on context.Canceled; default the metric to 499 (client closed) unless a status was written, so cancellations are not counted as successes. Signed-off-by: streamkit-devin --- examples/speech-gateway/cmd/gateway/metrics.go | 12 ++++++++++-- .../speech-gateway/cmd/gateway/metrics_test.go | 14 ++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/examples/speech-gateway/cmd/gateway/metrics.go b/examples/speech-gateway/cmd/gateway/metrics.go index c149c8132..734e0b3cc 100644 --- a/examples/speech-gateway/cmd/gateway/metrics.go +++ b/examples/speech-gateway/cmd/gateway/metrics.go @@ -16,6 +16,10 @@ import ( // Buckets span sub-100ms rejections up to multi-second STT/TTS synthesis. var latencyBuckets = []float64{0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30} +// codeClientClosed mirrors nginx's 499: the handler returned without writing a +// response, typically because the client disconnected mid-request. +const codeClientClosed = 499 + var ( requestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "gateway_requests_total", @@ -86,9 +90,13 @@ func instrument(endpoint string, next http.HandlerFunc) http.HandlerFunc { start := time.Now() next(rec, r) + code := rec.code + if !rec.wroteHeader { + code = codeClientClosed + } requestDuration.WithLabelValues(endpoint).Observe(time.Since(start).Seconds()) - requestsTotal.WithLabelValues(endpoint, r.Method, strconv.Itoa(rec.code)).Inc() - if reason := rejectionReason(rec.code); reason != "" { + requestsTotal.WithLabelValues(endpoint, r.Method, strconv.Itoa(code)).Inc() + if reason := rejectionReason(code); reason != "" { rejectedTotal.WithLabelValues(endpoint, reason).Inc() } } diff --git a/examples/speech-gateway/cmd/gateway/metrics_test.go b/examples/speech-gateway/cmd/gateway/metrics_test.go index 290aa5069..4564a8045 100644 --- a/examples/speech-gateway/cmd/gateway/metrics_test.go +++ b/examples/speech-gateway/cmd/gateway/metrics_test.go @@ -51,6 +51,20 @@ func TestInstrumentRecordsRejection(t *testing.T) { } } +func TestInstrumentUnwrittenResponseCountedAsClientClosed(t *testing.T) { + // Handler returns without writing, mirroring the context.Canceled path + // in handleSTT/handleTTS; it must not be counted as a 200. + h := instrument("stt", func(http.ResponseWriter, *http.Request) {}) + + rec := httptest.NewRecorder() + h(rec, httptest.NewRequest(http.MethodPost, "/stt", nil)) + + body := scrapeMetrics(t) + if !strings.Contains(body, `gateway_requests_total{code="499",endpoint="stt",method="POST"}`) { + t.Errorf("expected unwritten response to be counted as code=499; got:\n%s", body) + } +} + func scrapeMetrics(t *testing.T) string { t.Helper() rec := httptest.NewRecorder() From 9ea4f9a098334098925338d00c06d7ed875f24ca Mon Sep 17 00:00:00 2001 From: streamkit-devin Date: Sun, 31 May 2026 08:10:00 +0000 Subject: [PATCH 4/6] docs: add local observability stack sample, guide, and skill - Add samples/observability/ compose stack (skit + Prometheus + Grafana, optional speech gateway) that auto-provisions both dashboards and ships a traffic generator, so the metrics/dashboards can be validated locally. - Document the plugin/oneshot/gateway metric families and a 'run the stack locally' section in the observability guide. - Add an observability-stack agent skill capturing the validation procedure and the demo-image gotchas. - Add a Dockerfile to the speech-gateway example. Signed-off-by: streamkit-devin --- .agents/skills/observability-stack/SKILL.md | 87 +++++++++++++++ .claude/skills/observability-stack | 1 + docs/src/content/docs/guides/observability.md | 38 +++++++ examples/speech-gateway/Dockerfile | 13 +++ samples/observability/README.md | 100 ++++++++++++++++++ samples/observability/docker-compose.yml | 96 +++++++++++++++++ samples/observability/generate-traffic.sh | 45 ++++++++ .../provisioning/dashboards/streamkit.yml | 9 ++ .../provisioning/datasources/prometheus.yml | 11 ++ .../observability/pipelines/stt-whisper.yml | 27 +++++ .../observability/pipelines/tts-kokoro.yml | 26 +++++ samples/observability/prometheus.yml | 12 +++ samples/observability/skit/entrypoint.sh | 31 ++++++ 13 files changed, 496 insertions(+) create mode 100644 .agents/skills/observability-stack/SKILL.md create mode 120000 .claude/skills/observability-stack create mode 100644 examples/speech-gateway/Dockerfile create mode 100644 samples/observability/README.md create mode 100644 samples/observability/docker-compose.yml create mode 100755 samples/observability/generate-traffic.sh create mode 100644 samples/observability/grafana/provisioning/dashboards/streamkit.yml create mode 100644 samples/observability/grafana/provisioning/datasources/prometheus.yml create mode 100644 samples/observability/pipelines/stt-whisper.yml create mode 100644 samples/observability/pipelines/tts-kokoro.yml create mode 100644 samples/observability/prometheus.yml create mode 100755 samples/observability/skit/entrypoint.sh diff --git a/.agents/skills/observability-stack/SKILL.md b/.agents/skills/observability-stack/SKILL.md new file mode 100644 index 000000000..f02a69b0c --- /dev/null +++ b/.agents/skills/observability-stack/SKILL.md @@ -0,0 +1,87 @@ +--- +name: observability-stack +description: >- + Spin up StreamKit's local observability stack (skit + Prometheus + Grafana, + optional speech gateway) and validate the Grafana dashboards end-to-end. Use + when testing metrics/dashboards, debugging empty dashboard panels, or + reproducing the speech-gateway monitoring setup locally. +license: MPL-2.0 +--- + +# Observability stack (local) + +`samples/observability/` is a `docker compose` stack that runs skit + Prometheus ++ Grafana (and an optional speech gateway), auto-provisioning both bundled +dashboards. Use it to validate metrics and dashboards without any cloud setup. + +## Run it + +```bash +cd samples/observability +docker compose up -d +./generate-traffic.sh # direct-to-skit TTS+STT +# optional gateway row: +docker compose --profile gateway up -d --build +./generate-traffic.sh --gateway +``` + +Grafana: (anonymous admin). Prometheus: +. skit: . + +## How metrics flow + +- **skit → Prometheus via OTLP push.** Prometheus runs with + `--web.enable-otlp-receiver`; skit's `SK_TELEMETRY__OTLP_ENDPOINT` points at + `…/api/v1/otlp/v1/metrics`. There is **no scrape job** for skit. +- **gateway → Prometheus via scrape** of the gateway's `/metrics`. + +## Validate dashboards (don't just eyeball) + +OTLP renames dotted metrics and appends unit suffixes, so verify the metric +names/labels the panels query actually exist before trusting a panel: + +```bash +# list all metric names Prometheus knows about +curl -s localhost:9090/api/v1/label/__name__/values | jq -r '.data[]' | sort +# run a panel's exact PromQL and count series (0 == panel will be "No data") +curl -s --data-urlencode 'query=' localhost:9090/api/v1/query \ + | jq '.data.result | length' +# inspect a metric's labels +curl -s 'localhost:9090/api/v1/series?match[]=' | jq +``` + +Key name/label facts: + +- Plugin metrics: `plugin_call_duration_seconds_*` (unit suffix present), + `plugin_calls_total`; labels `plugin_kind`, `op`. +- `oneshot_pipeline_duration_*` has **no** `_seconds` suffix (no unit set); + labels `status`, and `service` only when an `X-StreamKit-Service` header is + forwarded by a service-label-aware skit. +- Gateway: `gateway_requests_total{endpoint,code}`, + `gateway_request_duration_seconds`, `gateway_rejected_total{reason}` (only + appears after a 413/415/502 actually occurs). + +## Expected "No data" (not bugs) + +- Plugin failure panels (`plugin_errors_total` etc.) — counters don't exist + until a failure happens. +- Oneshot "by Service" panels — empty unless the skit build emits the `service` + label. +- Video / MoQ / codec panels — only populate when you run those pipelines. + +## Gotchas (most-common causes of empty dashboards) + +- **`latest-demo` is stale.** Pin a versioned `-demo` tag; `latest-demo` can + predate metrics like `plugin.call.duration`, leaving the Plugins row empty. +- **Demo-image plugin layout.** `-demo` images ship bare `.so` files but the + loader wants `plugins/native//` bundles; `skit/entrypoint.sh` reassembles + them. Symptom: "no plugins found" / "node kind not found in registry". +- **Model-name mismatch.** A pipeline's `model_path` must exist in the image's + `models/`. The stack's `pipelines/` use the names the `-demo` image ships. +- **Grafana datasource input.** Committed dashboards use `${DS_PROMETHEUS}`; + the `dashboard-prep` step rewrites it to the provisioned uid. In compose + command strings, escape it as `$${DS_PROMETHEUS}` so compose doesn't + interpolate it. +- **Local auth.** skit needs `SK_AUTH__MODE=disabled` + + `SK_PERMISSIONS__ALLOW_INSECURE_NO_AUTH=true` to start unauthenticated on a + non-loopback bind. Local only. diff --git a/.claude/skills/observability-stack b/.claude/skills/observability-stack new file mode 120000 index 000000000..288055401 --- /dev/null +++ b/.claude/skills/observability-stack @@ -0,0 +1 @@ +../../.agents/skills/observability-stack \ No newline at end of file diff --git a/docs/src/content/docs/guides/observability.md b/docs/src/content/docs/guides/observability.md index 52b44674c..0c6dddf3a 100644 --- a/docs/src/content/docs/guides/observability.md +++ b/docs/src/content/docs/guides/observability.md @@ -66,6 +66,44 @@ Import [`samples/grafana-dashboard.json`](https://github.com/streamer45/streamki ![Grafana Dashboard](/screenshots/grafana_dashboard.png) +### What's measured + +Beyond HTTP and engine/node throughput, a few metric families are especially +useful for speech and ML workloads: + +- **Plugin / ML inference** — native plugins emit per-call metrics labelled by + `plugin_kind` (e.g. `whisper`, `kokoro`) and `op`: `plugin_call_duration_seconds` + (histogram), `plugin_calls_total`, and `plugin_errors_total` / + `plugin_timeouts_total` / `plugin_panics_total`. This is where inference + latency and failures show up — usually the dominant cost of a speech pipeline. +- **Oneshot pipelines** — `oneshot_pipeline_duration` (histogram) is labelled by + `status` (`ok`/`error`). Because every oneshot request hits the same + `POST /api/v1/process` endpoint, splitting TTS vs STT requires a trusted + `service` label (sent via the `X-StreamKit-Service` header); without it all + oneshot traffic collapses into one series. +- **Speech gateway** — the [speech gateway example](https://github.com/streamer45/streamkit/tree/main/examples/speech-gateway) + exposes Prometheus metrics for the front door it puts in front of skit: + per-endpoint request rate/latency (`gateway_requests_total`, + `gateway_request_duration_seconds`), in-flight gauge, upstream latency, and + rejections by reason (`gateway_rejected_total`). + +### Run the full stack locally + +To see all of the above on the dashboards without any cloud setup, use the +[`samples/observability`](https://github.com/streamer45/streamkit/tree/main/samples/observability) +compose stack — it wires skit (OTLP push) + the gateway (scrape) into Prometheus +and auto-provisions both dashboards in Grafana: + +```bash +cd samples/observability +docker compose up -d +./generate-traffic.sh +# Grafana: http://localhost:3000 +``` + +See its README for the wiring details and known gotchas (demo-image tag/plugin +layout, model-name matching, the Prometheus OTLP receiver, and local auth). + ## Traces (OTLP) Tracing export is controlled by: diff --git a/examples/speech-gateway/Dockerfile b/examples/speech-gateway/Dockerfile new file mode 100644 index 000000000..8c86de183 --- /dev/null +++ b/examples/speech-gateway/Dockerfile @@ -0,0 +1,13 @@ +# SPDX-FileCopyrightText: © 2025 StreamKit Contributors +# +# SPDX-License-Identifier: MPL-2.0 + +FROM golang:1.24-bookworm AS build +WORKDIR /src +COPY . . +RUN CGO_ENABLED=0 go build -o /gateway ./cmd/gateway + +FROM gcr.io/distroless/static-debian12 +COPY --from=build /gateway /gateway +EXPOSE 8080 +ENTRYPOINT ["/gateway"] diff --git a/samples/observability/README.md b/samples/observability/README.md new file mode 100644 index 000000000..0c2e663b9 --- /dev/null +++ b/samples/observability/README.md @@ -0,0 +1,100 @@ + + +# Local observability stack + +A `docker compose` stack that runs **skit + Prometheus + Grafana** (and an +optional **speech gateway**) so you can see StreamKit's metrics on the bundled +Grafana dashboards locally — no cloud, no manual import. + +## Quick start + +```bash +cd samples/observability +docker compose up -d # skit + Prometheus + Grafana +./generate-traffic.sh # drive ~20 TTS + STT requests through skit +``` + +Then open Grafana at (anonymous admin, no login). Two +dashboards are auto-provisioned: + +- **StreamKit Performance Dashboard** — the repo's main dashboard + ([`samples/grafana-dashboard.json`](../grafana-dashboard.json)), including the + **Plugins / ML inference** row. +- **StreamKit Speech Gateway Dashboard** — the gateway/oneshot dashboard + ([`examples/speech-gateway/grafana-dashboard.json`](../../examples/speech-gateway/grafana-dashboard.json)). + +| Service | URL | +| ---------- | ----------------------- | +| Grafana | | +| Prometheus | | +| skit API | | +| gateway | (gateway profile only) | + +## How metrics get to Prometheus + +Two different paths, both visible on the dashboards: + +- **skit → Prometheus (OTLP push).** skit exports OTLP metrics to Prometheus' + native OTLP receiver, which is enabled with `--web.enable-otlp-receiver`. + Configured via `SK_TELEMETRY__OTLP_ENDPOINT` pointing at + `http://prometheus:9090/api/v1/otlp/v1/metrics`. This feeds the HTTP, engine, + oneshot, and **plugin** metrics. +- **gateway → Prometheus (scrape).** The speech gateway exposes a classic + `/metrics` endpoint that Prometheus scrapes (see `prometheus.yml`). This feeds + the **Speech Gateway** row. + +## Speech Gateway row + +The gateway is behind a compose profile because it requires the gateway +**metrics** instrumentation: + +```bash +docker compose --profile gateway up -d --build +./generate-traffic.sh --gateway # route traffic through the gateway +``` + +Notes: + +- The gateway's `/metrics` endpoint and the `gateway_*` metrics require the + metrics-instrumented gateway. The Speech Gateway dashboard row stays empty + until those metrics are present and the gateway has served traffic. +- The gateway's default STT pipeline targets a Whisper model that must exist on + the skit it talks to. The bundled `-demo` image ships `ggml-tiny-q5_1.bin`; if + the gateway points at a different model, STT through the gateway will fail + while TTS still works. The direct-to-skit traffic path (the default + `generate-traffic.sh`) avoids this by shipping its own pipelines under + `pipelines/`. + +## Known gotchas + +These are the sharp edges worth knowing when wiring this up yourself: + +- **Pin a versioned `-demo` tag.** `latest-demo` can lag behind released + versions and predate metrics like `plugin.call.duration`, which leaves the + Plugins / ML inference row empty. This stack pins `v0.5.0-demo`. +- **Demo image plugin layout.** Current `-demo` images ship native plugins as + bare `.so` files under `plugins/native/`, but the loader expects directory + bundles (`plugins/native//` with a `plugin.yml` + the `.so`). `skit serve` + otherwise logs "no plugins found" and pipelines fail with "node kind not + found". `skit/entrypoint.sh` reassembles the expected layout at startup from + the in-repo manifests (mounted at `/repo-manifests`). +- **Model names must match.** Pipelines reference model files by path; the file + must actually be present in the image/`models/` dir. The pipelines under + `pipelines/` use the model names the `-demo` image actually ships. +- **Local auth override.** skit refuses to start unauthenticated on a + non-loopback bind unless you opt in. This stack sets + `SK_AUTH__MODE=disabled` + `SK_PERMISSIONS__ALLOW_INSECURE_NO_AUTH=true`. + **Local testing only** — never do this on an exposed instance. +- **Grafana dashboard datasource.** The committed dashboards use a + `${DS_PROMETHEUS}` datasource input. The `dashboard-prep` step rewrites it to + the provisioned datasource uid so the dashboards load without a manual import. + +## Cleanup + +```bash +docker compose --profile gateway down -v +``` diff --git a/samples/observability/docker-compose.yml b/samples/observability/docker-compose.yml new file mode 100644 index 000000000..305057bc9 --- /dev/null +++ b/samples/observability/docker-compose.yml @@ -0,0 +1,96 @@ +# Local observability stack for StreamKit: skit + Prometheus + Grafana, with an +# optional speech gateway. See README.md for the walkthrough and known gotchas. +# +# Usage: +# docker compose up -d # skit + Prometheus + Grafana +# docker compose --profile gateway up -d # also build & run the speech gateway +# +# Grafana: http://localhost:3000 (anonymous admin, no login) +# Prometheus: http://localhost:9090 +# skit API: http://localhost:4545 +# gateway: http://localhost:8080 (gateway profile only) + +services: + skit: + image: ghcr.io/streamer45/streamkit:v0.5.0-demo + # Pinned to a versioned -demo tag on purpose: `latest-demo` can lag behind + # and predate metrics like plugin.call.duration, leaving dashboard rows empty. + entrypoint: ["/entrypoint.sh"] + environment: + SK_AUTH__MODE: disabled + SK_PERMISSIONS__ALLOW_INSECURE_NO_AUTH: "true" + SK_PLUGINS__DIRECTORY: /opt/streamkit/np + SK_TELEMETRY__ENABLE: "true" + SK_TELEMETRY__OTLP_ENDPOINT: http://prometheus:9090/api/v1/otlp/v1/metrics + volumes: + - ./skit/entrypoint.sh:/entrypoint.sh:ro + - ../../plugins/native:/repo-manifests:ro + ports: + - "4545:4545" + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost:4545/healthz"] + interval: 5s + timeout: 3s + retries: 20 + + prometheus: + image: prom/prometheus:v3.1.0 + command: + - --config.file=/etc/prometheus/prometheus.yml + - --web.enable-otlp-receiver + - --storage.tsdb.path=/prometheus + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro + ports: + - "9090:9090" + + dashboard-prep: + image: alpine:3.21 + # Copies the in-repo dashboards into Grafana's provisioning dir, resolving + # the ${DS_PROMETHEUS} template input to the provisioned datasource uid so + # the dashboards load without manual import. + command: + - sh + - -c + - | + set -e + for f in /in/*.json; do + sed 's/$${DS_PROMETHEUS}/prometheus/g' "$$f" > "/out/$$(basename "$$f")" + done + echo "prepared dashboards:"; ls -1 /out + volumes: + - ../../samples/grafana-dashboard.json:/in/streamkit.json:ro + - ../../examples/speech-gateway/grafana-dashboard.json:/in/speech-gateway.json:ro + - grafana-dashboards:/out + + grafana: + image: grafana/grafana:11.4.0 + environment: + GF_AUTH_ANONYMOUS_ENABLED: "true" + GF_AUTH_ANONYMOUS_ORG_ROLE: Admin + GF_AUTH_DISABLE_LOGIN_FORM: "true" + GF_SECURITY_ADMIN_PASSWORD: admin + volumes: + - ./grafana/provisioning:/etc/grafana/provisioning:ro + - grafana-dashboards:/var/lib/grafana/dashboards:ro + ports: + - "3000:3000" + depends_on: + - prometheus + - dashboard-prep + + gateway: + profiles: ["gateway"] + build: + context: ../../examples/speech-gateway + environment: + GATEWAY_LISTEN: ":8080" + SKIT_URL: http://skit:4545 + ports: + - "8080:8080" + depends_on: + skit: + condition: service_healthy + +volumes: + grafana-dashboards: diff --git a/samples/observability/generate-traffic.sh b/samples/observability/generate-traffic.sh new file mode 100755 index 000000000..87bb74f50 --- /dev/null +++ b/samples/observability/generate-traffic.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: © 2025 StreamKit Contributors +# +# SPDX-License-Identifier: MPL-2.0 + +# Drives TTS + STT traffic so the dashboards have data. By default it calls +# skit's oneshot /api/v1/process directly (no gateway required). Pass --gateway +# to route through the speech gateway instead (requires the `gateway` profile), +# which also populates the Speech Gateway dashboard row. +set -euo pipefail + +ROUNDS="${ROUNDS:-20}" +SKIT_URL="${SKIT_URL:-http://localhost:4545}" +GATEWAY_URL="${GATEWAY_URL:-http://localhost:8080}" +HERE="$(cd "$(dirname "$0")" && pwd)" +MODE="direct" +[ "${1:-}" = "--gateway" ] && MODE="gateway" + +tmp="$(mktemp -d)" +trap 'rm -rf "$tmp"' EXIT + +echo "mode=$MODE rounds=$ROUNDS" +for i in $(seq 1 "$ROUNDS"); do + text="StreamKit observability sample, round $i: the quick brown fox." + if [ "$MODE" = "gateway" ]; then + curl -fsS -o "$tmp/a.ogg" -d "$text" "$GATEWAY_URL/tts" + curl -fsS -o /dev/null --data-binary @"$tmp/a.ogg" -H 'Content-Type: audio/ogg' "$GATEWAY_URL/stt" || true + else + printf '%s' "$text" > "$tmp/in.txt" + # X-StreamKit-Service lets a service-label-aware skit (see PR #545) split + # oneshot metrics by {tts,stt}; older builds simply ignore the header. + curl -fsS -o "$tmp/a.ogg" \ + -H 'X-StreamKit-Service: tts' \ + -F "config=<$HERE/pipelines/tts-kokoro.yml" \ + -F "media=@$tmp/in.txt;type=text/plain;filename=media" \ + "$SKIT_URL/api/v1/process" + curl -fsS -o /dev/null \ + -H 'X-StreamKit-Service: stt' \ + -F "config=<$HERE/pipelines/stt-whisper.yml" \ + -F "media=@$tmp/a.ogg;type=audio/ogg;filename=media" \ + "$SKIT_URL/api/v1/process" || true + fi + printf '.' +done +echo " done" diff --git a/samples/observability/grafana/provisioning/dashboards/streamkit.yml b/samples/observability/grafana/provisioning/dashboards/streamkit.yml new file mode 100644 index 000000000..721b3cba6 --- /dev/null +++ b/samples/observability/grafana/provisioning/dashboards/streamkit.yml @@ -0,0 +1,9 @@ +apiVersion: 1 + +providers: + - name: streamkit + type: file + allowUiUpdates: true + options: + path: /var/lib/grafana/dashboards + foldersFromFilesStructure: false diff --git a/samples/observability/grafana/provisioning/datasources/prometheus.yml b/samples/observability/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 000000000..c70b848dd --- /dev/null +++ b/samples/observability/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,11 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + uid: prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + jsonData: + timeInterval: 5s diff --git a/samples/observability/pipelines/stt-whisper.yml b/samples/observability/pipelines/stt-whisper.yml new file mode 100644 index 000000000..37f6d1d26 --- /dev/null +++ b/samples/observability/pipelines/stt-whisper.yml @@ -0,0 +1,27 @@ +name: stt +description: Speech-to-Text (Whisper) for the observability sample +mode: oneshot +steps: + - kind: streamkit::http_input + - kind: containers::ogg::demuxer + - kind: audio::opus::decoder + - kind: audio::resampler + params: + chunk_frames: 960 + output_frame_size: 960 + target_sample_rate: 16000 + - kind: plugin::native::whisper + params: + model_path: models/ggml-tiny-q5_1.bin + language: en + vad_model_path: models/silero_vad.onnx + vad_threshold: 0.5 + min_silence_duration_ms: 700 + max_segment_duration_secs: 30.0 + - kind: core::json_serialize + params: + pretty: false + newline_delimited: true + - kind: streamkit::http_output + params: + content_type: application/json diff --git a/samples/observability/pipelines/tts-kokoro.yml b/samples/observability/pipelines/tts-kokoro.yml new file mode 100644 index 000000000..cf6435006 --- /dev/null +++ b/samples/observability/pipelines/tts-kokoro.yml @@ -0,0 +1,26 @@ +name: tts +description: Text-to-Speech (Kokoro) for the observability sample +mode: oneshot +steps: + - kind: streamkit::http_input + - kind: core::text_chunker + params: + min_length: 10 + - kind: plugin::native::kokoro + params: + model_dir: "models/kokoro-multi-lang-v1_1" + speaker_id: 0 + speed: 1.0 + num_threads: 4 + - kind: audio::resampler + params: + chunk_frames: 960 + output_frame_size: 960 + target_sample_rate: 48000 + - kind: audio::opus::encoder + - kind: containers::webm::muxer + params: + channels: 1 + sample_rate: 48000 + streaming_mode: live + - kind: streamkit::http_output diff --git a/samples/observability/prometheus.yml b/samples/observability/prometheus.yml new file mode 100644 index 000000000..5296e6f4d --- /dev/null +++ b/samples/observability/prometheus.yml @@ -0,0 +1,12 @@ +global: + scrape_interval: 5s + evaluation_interval: 5s + +# skit pushes metrics via OTLP to Prometheus' native OTLP receiver +# (enabled with --web.enable-otlp-receiver in docker-compose.yml), so skit +# does not need a scrape_config here. The speech gateway exposes a classic +# Prometheus /metrics endpoint and is scraped below. +scrape_configs: + - job_name: speech-gateway + static_configs: + - targets: ['gateway:8080'] diff --git a/samples/observability/skit/entrypoint.sh b/samples/observability/skit/entrypoint.sh new file mode 100755 index 000000000..7f58ce5a6 --- /dev/null +++ b/samples/observability/skit/entrypoint.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# SPDX-FileCopyrightText: © 2025 StreamKit Contributors +# +# SPDX-License-Identifier: MPL-2.0 + +# The -demo images currently ship native plugins as bare `.so` files under +# plugins/native/, but the loader expects directory bundles +# (plugins/native// with a plugin.yml + the .so). Without this, `skit serve` +# logs "no plugins found" and TTS/STT pipelines fail with "node kind not found". +# We assemble the expected layout from the repo manifests (mounted at +# /repo-manifests) plus the .so files baked into the image, then start the +# server. Tracked upstream; remove once the demo image ships bundles directly. +set -e + +SRC=/opt/streamkit/plugins/native +DST=/opt/streamkit/np/native +mkdir -p "$DST" + +for manifest in /repo-manifests/*/plugin.yml; do + [ -f "$manifest" ] || continue + id=$(basename "$(dirname "$manifest")") + so=$(awk '/^entrypoint:/{print $2}' "$manifest") + if [ -n "$so" ] && [ -f "$SRC/$so" ]; then + mkdir -p "$DST/$id" + cp "$manifest" "$DST/$id/plugin.yml" + cp "$SRC/$so" "$DST/$id/$so" + echo "assembled plugin bundle: $id ($so)" + fi +done + +exec skit serve From 732b42574e32dec265c12741e97630b7dc250ab8 Mon Sep 17 00:00:00 2001 From: streamkit-devin Date: Sun, 31 May 2026 08:17:24 +0000 Subject: [PATCH 5/6] fix(observability): TTS sample must mux Ogg so STT round-trip works The TTS pipeline muxed WebM while generate-traffic.sh posts the result to the STT pipeline as audio/ogg (demuxed by containers::ogg::demuxer), so STT traffic was silently dropped. Mux Ogg/Opus to match, as the gateway's own TTS pipeline does. Signed-off-by: streamkit-devin --- samples/observability/pipelines/tts-kokoro.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/samples/observability/pipelines/tts-kokoro.yml b/samples/observability/pipelines/tts-kokoro.yml index cf6435006..d5f9fb188 100644 --- a/samples/observability/pipelines/tts-kokoro.yml +++ b/samples/observability/pipelines/tts-kokoro.yml @@ -18,9 +18,11 @@ steps: output_frame_size: 960 target_sample_rate: 48000 - kind: audio::opus::encoder - - kind: containers::webm::muxer + - kind: containers::ogg::muxer params: channels: 1 - sample_rate: 48000 - streaming_mode: live + codec: opus + chunk_size: 32768 - kind: streamkit::http_output + params: + content_type: audio/ogg From 0b6a9b0c3c797ae72af7adb604d675c413f57820 Mon Sep 17 00:00:00 2001 From: streamkit-devin Date: Sun, 31 May 2026 08:22:39 +0000 Subject: [PATCH 6/6] chore(observability): surface gateway STT model mismatch once, not per round Signed-off-by: streamkit-devin --- samples/observability/generate-traffic.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/samples/observability/generate-traffic.sh b/samples/observability/generate-traffic.sh index 87bb74f50..d253d88ea 100755 --- a/samples/observability/generate-traffic.sh +++ b/samples/observability/generate-traffic.sh @@ -24,7 +24,13 @@ for i in $(seq 1 "$ROUNDS"); do text="StreamKit observability sample, round $i: the quick brown fox." if [ "$MODE" = "gateway" ]; then curl -fsS -o "$tmp/a.ogg" -d "$text" "$GATEWAY_URL/tts" - curl -fsS -o /dev/null --data-binary @"$tmp/a.ogg" -H 'Content-Type: audio/ogg' "$GATEWAY_URL/stt" || true + # The gateway's built-in STT pipeline targets a Whisper model the -demo + # image may not ship, so STT can return 5xx; surface it once, not per round. + code=$(curl -s -o /dev/null -w '%{http_code}' --data-binary @"$tmp/a.ogg" -H 'Content-Type: audio/ogg' "$GATEWAY_URL/stt") + case "$code" in + 2*) ;; + *) [ -n "${stt_warned:-}" ] || { printf '\nnote: gateway STT -> HTTP %s; its built-in pipeline needs a Whisper model the demo image may not ship (see README / #553). TTS still populates the gateway row.\n' "$code"; stt_warned=1; } ;; + esac else printf '%s' "$text" > "$tmp/in.txt" # X-StreamKit-Service lets a service-label-aware skit (see PR #545) split