From 51db52f934307bab1f90b64577c59f28be2ee3b4 Mon Sep 17 00:00:00 2001 From: Jan Date: Tue, 7 Apr 2026 12:00:44 +0200 Subject: [PATCH] docs(ingest): align phase-1 status and decoder fallback semantics --- README.md | 696 ++++++----- docs/API.md | 843 ++++++------- docs/audio-ingest-implementation-plan.md | 1097 +++++++++++++++++ docs/audio-ingest-rework.md | 267 ++++ docs/config.sample.json | 27 + internal/config/config_test.go | 8 + internal/control/control.go | 4 +- internal/ingest/adapters/icecast/source.go | 2 + .../ingest/adapters/icecast/source_test.go | 29 + 9 files changed, 2215 insertions(+), 758 deletions(-) create mode 100644 docs/audio-ingest-implementation-plan.md create mode 100644 docs/audio-ingest-rework.md diff --git a/README.md b/README.md index 9fac785..f03528e 100644 --- a/README.md +++ b/README.md @@ -1,340 +1,356 @@ -# fm-rds-tx - -Go-based FM stereo transmitter with RDS. Supports ADALM-Pluto (PlutoSDR) and SoapySDR-compatible TX devices. - -## Status - -**Current status:** `v0.9.0` — runtime hardening milestone - -What is already in place: -- complete DSP chain: audio -> pre-emphasis -> stereo encoding -> RDS -> MPX -> limiter -> FM modulation -- real hardware TX paths for PlutoSDR / SoapySDR backends -- continuous TX engine with runtime telemetry -- dry-run, offline generation, and simulated TX modes -- HTTP control plane with live config patching and runtime/status endpoints -- browser UI on `/` -- live audio ingestion via stdin or HTTP stream input - -Current engineering focus: -- merge/release stabilization after runtime hardening -- deferred hardware-in-the-loop / RF validation work -- deferred device-aware capability / calibration work -- deferred signal self-monitoring work - -For the active runtime-hardening track, see: -- `docs/pro-runtime-hardening-workboard.md` - -## Signal path - -```text -Audio Source -> PreEmphasis(50us/75us/off) -> StereoEncoder(19k + 38k DSB-SC) --> RDS(57k BPSK) -> MPX Combiner -> Limiter -> FM Modulator(+/-75kHz) --> optional split-rate FM upsampling -> SDR backend -> RF output -``` - -For deeper DSP details, see: -- `docs/DSP-CHAIN.md` - -## Prerequisites - -### Go -- Go version from `go.mod` (currently Go 1.22) - -### Native SDR dependencies -Depending on backend, native libraries are required: - -- **SoapySDR backend** - - build with `-tags soapy` - - requires SoapySDR native library (`SoapySDR.dll` / `libSoapySDR.so` / `libSoapySDR.dylib`) - - on Windows, PothosSDR is the expected setup - -- **Pluto backend** - - uses native `libiio` - - Windows expects `libiio.dll` - - Linux build/runtime expects `pkg-config` + `libiio` - -### Hardware / legal -- validate RF output, deviation, filtering, and power with proper measurement equipment -- use only within applicable legal and regulatory constraints - -## Quick start - -## Build - -```powershell -# Build CLI tools without hardware-specific build tags: -go build ./cmd/fmrtx -go build ./cmd/offline - -# Build fmrtx with SoapySDR support: -go build -tags soapy ./cmd/fmrtx -``` - -## Quick verification - -```powershell -# Print effective config -go run ./cmd/fmrtx -print-config - -# Run tests -go test ./... - -# Basic dry-run summary -go run ./cmd/fmrtx --dry-run --dry-output build/dryrun/frame.json -``` - -For additional build/test commands, see: -- `docs/README.md` - -## Common usage flows - -### 1) List available SDR devices - -```powershell -.\fmrtx.exe --list-devices -``` - -### 2) Dry-run / config verification - -```powershell -.\fmrtx.exe --dry-run --dry-output build/dryrun/frame.json - -# Write dry-run JSON to stdout -.\fmrtx.exe --dry-run --dry-output - -``` - -### 3) Offline IQ/composite generation - -```powershell -go run ./cmd/offline -duration 2s -output build/offline/composite.iqf32 - -# Optional output rate override -go run ./cmd/offline -duration 500ms -output build/offline/composite.iqf32 -output-rate 228000 -``` - -### 4) Simulated transmit path - -```powershell -go run ./cmd/fmrtx --simulate-tx --simulate-output build/sim/simulated-soapy.iqf32 --simulate-duration 250ms -``` - -### 5) Real TX with config file - -```powershell -# Start TX service with manual start over HTTP -.\fmrtx.exe --tx --config docs/config.plutosdr.json - -# Start and begin transmitting immediately -.\fmrtx.exe --tx --tx-auto-start --config docs/config.plutosdr.json -``` - -### 6) Live audio via stdin - -```powershell -ffmpeg -i "http://svabi.ch:8443/stream" -f s16le -ar 44100 -ac 2 - | .\fmrtx.exe --tx --tx-auto-start --audio-stdin --config docs/config.plutosdr.json -``` - -### 7) Custom audio input rate - -```powershell -ffmpeg -i source.wav -f s16le -ar 48000 -ac 2 - | .\fmrtx.exe --tx --tx-auto-start --audio-stdin --audio-rate 48000 --config docs/config.plutosdr.json -``` - -### 8) HTTP audio ingest - -Start the control plane with `--audio-http` to accept raw PCM pushes on `/audio/stream` and feed them into the live encoder: - -Set `Content-Type` to `application/octet-stream` (or `audio/L16`) when posting audio data: - -```powershell -ffmpeg -i music.mp3 -f s16le -ar 44100 -ac 2 - | curl -X POST -H "Content-Type: application/octet-stream" --data-binary @- http://localhost:8088/audio/stream -``` - -## CLI overview - -## `fmrtx` -Important runtime modes and flags include: -- `--tx` -- `--tx-auto-start` -- `--dry-run` -- `--dry-output ` -- `--simulate-tx` -- `--simulate-output ` -- `--simulate-duration ` -- `--config ` -- `--print-config` -- `--list-devices` -- `--audio-stdin` -- `--audio-rate ` -- `--audio-http` - -## `offline` -Useful flags include: -- `-duration ` -- `-output ` -- `-output-rate ` - -If the README is too high-level for the exact CLI surface, check: -- `cmd/fmrtx/main.go` -- `cmd/offline/main.go` - -## HTTP control plane - -Base URL: `http://{listenAddress}` (default typically `127.0.0.1:8088`) - -Security note: -- keep the control plane bound locally unless you intentionally place it behind a trusted and hardened access layer - -### Main endpoints - -```text -GET / browser UI -GET /healthz health check -GET /status current config/status snapshot -GET /runtime live engine / driver / audio telemetry -GET /config full config -POST /config patch config / live updates -GET /dry-run synthetic frame summary -POST /tx/start start transmission -POST /tx/stop stop transmission -POST /audio/stream push raw S16LE stereo PCM into live stream buffer (Content-Type: application/octet-stream or audio/L16 required) -``` - -### What the control plane covers -- TX start / stop -- runtime status and driver telemetry -- config inspection -- live patching of selected parameters -- dry-run inspection -- browser-accessible control UI -- optional HTTP audio ingest (enable with `--audio-http`) - -### Live config notes -`POST /config` supports live updates for selected fields such as: -- frequency -- stereo enable/disable -- pilot / RDS injection levels -- RDS enable/disable -- limiter settings -- PS / RadioText - -Some parameters are saved but not live-applied and require restart. - -For the full API contract, examples, live-patch semantics, and `/audio/stream` details, see: -- `docs/API.md` - -## Configuration - -Sample configs: -- `docs/config.sample.json` -- `docs/config.plutosdr.json` -- `docs/config.orangepi-pluto-soapy.json` - -Important config areas include: -- `fm.*` -- `rds.*` -- `audio.*` -- `backend.*` -- `control.*` - -Examples of relevant fields you may want to inspect: -- `fm.outputDrive` -- `fm.mpxGain` -- `fm.bs412Enabled` -- `fm.bs412ThresholdDBr` -- `fm.fmModulationEnabled` -- `backend.kind` -- `backend.driver` -- `backend.deviceArgs` -- `backend.uri` -- `backend.deviceSampleRateHz` -- `backend.outputPath` -- `control.listenAddress` - -For deeper config/API behavior, refer to: -- `internal/config/config.go` -- `docs/API.md` -- `docs/config.sample.json` - -## Development and testing - -Useful commands: - -```powershell -go test ./... -go run ./cmd/fmrtx -print-config -go run ./cmd/fmrtx -config docs/config.sample.json -go run ./cmd/fmrtx --dry-run --dry-output build/dryrun/frame.json -go run ./cmd/fmrtx --simulate-tx --simulate-output build/sim/simulated-soapy.iqf32 --simulate-duration 250ms -go run ./cmd/offline -duration 500ms -output build/offline/composite.iqf32 -``` - -See also: -- `docs/README.md` - -## PlutoSDR / backend notes - -- PlutoSDR commonly runs with a device-side sample rate above composite rate, so split-rate mode may be used automatically -- SoapySDR backend is suitable for Soapy-compatible TX hardware -- backend/device settings are selected through config rather than hardcoded paths -- runtime telemetry should be used to inspect effective TX state during operation - -## Repository layout - -```text -cmd/ - fmrtx/ main CLI - offline/ offline generator -internal/ - app/ TX engine + runtime state - audio/ audio input, resampling, tone generation, stream buffering - config/ config schema and validation - control/ HTTP control plane + browser UI - dryrun/ dry-run JSON summaries - dsp/ DSP primitives - mpx/ MPX combiner - offline/ full offline composite generation - output/ output/backend abstractions - platform/ backend abstractions and device/runtime stats - platform/soapysdr/ CGO SoapySDR binding - platform/plutosdr/ Pluto/libiio backend code - rds/ RDS encoder - stereo/ stereo encoder -docs/ - API.md - DSP-CHAIN.md - README.md - config.sample.json - config.plutosdr.json - config.orangepi-pluto-soapy.json - pro-runtime-hardening-workboard.md -scripts/ -examples/ -``` - -## Planning / workboard - -For the current runtime-hardening / professionalization track, see: -- `docs/pro-runtime-hardening-workboard.md` - -This is the living workboard for: -- status tracking -- confirmed findings -- open technical decisions -- verification notes -- implementation progress - -## Release / project docs - -Additional project docs: -- `CHANGELOG.md` -- `RELEASE.md` -- `docs/README.md` -- `docs/API.md` -- `docs/DSP-CHAIN.md` -- `docs/NOTES.md` - -## Legal note - -This project is intended only for lawful use within relevant license and regulatory constraints. -RF output, deviation, filtering, and transmitted power must be validated with proper measurement equipment. +# fm-rds-tx + +Go-based FM stereo transmitter with RDS. Supports ADALM-Pluto (PlutoSDR) and SoapySDR-compatible TX devices. + +## Status + +**Current status:** `v0.9.0` — runtime hardening milestone + +What is already in place: +- complete DSP chain: audio -> pre-emphasis -> stereo encoding -> RDS -> MPX -> limiter -> FM modulation +- real hardware TX paths for PlutoSDR / SoapySDR backends +- continuous TX engine with runtime telemetry +- dry-run, offline generation, and simulated TX modes +- HTTP control plane with live config patching and runtime/status endpoints +- browser UI on `/` +- ingest runtime in front of TX stream sink, plus shared source/runtime stats +- ingest source factory for `stdin`, `http-raw`, and `icecast` +- Icecast source adapter with reconnect and decoder selection (`auto`/`native`/`ffmpeg`) +- decoder layer with explicit ffmpeg fallback path + +Current engineering focus: +- merge/release stabilization after runtime hardening +- deferred hardware-in-the-loop / RF validation work +- deferred device-aware capability / calibration work +- deferred signal self-monitoring work +- finish native Icecast decoder wiring (`mp3`/`oggvorbis`/`aac` are placeholders; ffmpeg fallback is the currently functional decode path) + +For the active runtime-hardening track, see: +- `docs/pro-runtime-hardening-workboard.md` + +## Signal path + +```text +Audio Source -> PreEmphasis(50us/75us/off) -> StereoEncoder(19k + 38k DSB-SC) +-> RDS(57k BPSK) -> MPX Combiner -> Limiter -> FM Modulator(+/-75kHz) +-> optional split-rate FM upsampling -> SDR backend -> RF output +``` + +For deeper DSP details, see: +- `docs/DSP-CHAIN.md` + +## Prerequisites + +### Go +- Go version from `go.mod` (currently Go 1.22) + +### Native SDR dependencies +Depending on backend, native libraries are required: + +- **SoapySDR backend** + - build with `-tags soapy` + - requires SoapySDR native library (`SoapySDR.dll` / `libSoapySDR.so` / `libSoapySDR.dylib`) + - on Windows, PothosSDR is the expected setup + +- **Pluto backend** + - uses native `libiio` + - Windows expects `libiio.dll` + - Linux build/runtime expects `pkg-config` + `libiio` + +### Hardware / legal +- validate RF output, deviation, filtering, and power with proper measurement equipment +- use only within applicable legal and regulatory constraints + +## Quick start + +## Build + +```powershell +# Build CLI tools without hardware-specific build tags: +go build ./cmd/fmrtx +go build ./cmd/offline + +# Build fmrtx with SoapySDR support: +go build -tags soapy ./cmd/fmrtx +``` + +## Quick verification + +```powershell +# Print effective config +go run ./cmd/fmrtx -print-config + +# Run tests +go test ./... + +# Basic dry-run summary +go run ./cmd/fmrtx --dry-run --dry-output build/dryrun/frame.json +``` + +For additional build/test commands, see: +- `docs/README.md` + +## Common usage flows + +### 1) List available SDR devices + +```powershell +.\fmrtx.exe --list-devices +``` + +### 2) Dry-run / config verification + +```powershell +.\fmrtx.exe --dry-run --dry-output build/dryrun/frame.json + +# Write dry-run JSON to stdout +.\fmrtx.exe --dry-run --dry-output - +``` + +### 3) Offline IQ/composite generation + +```powershell +go run ./cmd/offline -duration 2s -output build/offline/composite.iqf32 + +# Optional output rate override +go run ./cmd/offline -duration 500ms -output build/offline/composite.iqf32 -output-rate 228000 +``` + +### 4) Simulated transmit path + +```powershell +go run ./cmd/fmrtx --simulate-tx --simulate-output build/sim/simulated-soapy.iqf32 --simulate-duration 250ms +``` + +### 5) Real TX with config file + +```powershell +# Start TX service with manual start over HTTP +.\fmrtx.exe --tx --config docs/config.plutosdr.json + +# Start and begin transmitting immediately +.\fmrtx.exe --tx --tx-auto-start --config docs/config.plutosdr.json +``` + +### 6) Live audio via stdin + +```powershell +ffmpeg -i "http://svabi.ch:8443/stream" -f s16le -ar 44100 -ac 2 - | .\fmrtx.exe --tx --tx-auto-start --audio-stdin --config docs/config.plutosdr.json +``` + +### 7) Custom audio input rate + +```powershell +ffmpeg -i source.wav -f s16le -ar 48000 -ac 2 - | .\fmrtx.exe --tx --tx-auto-start --audio-stdin --audio-rate 48000 --config docs/config.plutosdr.json +``` + +### 8) HTTP audio ingest + +Start the control plane with `--audio-http` to accept raw PCM pushes on `/audio/stream` and feed them into the live encoder: + +Set `Content-Type` to `application/octet-stream` (or `audio/L16`) when posting audio data: + +```powershell +ffmpeg -i music.mp3 -f s16le -ar 44100 -ac 2 - | curl -X POST -H "Content-Type: application/octet-stream" --data-binary @- http://localhost:8088/audio/stream +``` + +### 9) Icecast ingest via config + +Use `ingest.kind = "icecast"` and set `ingest.icecast.url` in config. + +Decoder semantics in Phase 1: +- `ingest.icecast.decoder = "auto"`: try native by content-type, fallback to ffmpeg on unsupported paths +- `ingest.icecast.decoder = "native"`: native only, no fallback +- `ingest.icecast.decoder = "ffmpeg"` (or `fallback`): ffmpeg only + +Current implementation note: native codec packages exist but are placeholders; practical decode today is ffmpeg fallback. + +## CLI overview + +## `fmrtx` +Important runtime modes and flags include: +- `--tx` +- `--tx-auto-start` +- `--dry-run` +- `--dry-output ` +- `--simulate-tx` +- `--simulate-output ` +- `--simulate-duration ` +- `--config ` +- `--print-config` +- `--list-devices` +- `--audio-stdin` +- `--audio-rate ` +- `--audio-http` + +## `offline` +Useful flags include: +- `-duration ` +- `-output ` +- `-output-rate ` + +If the README is too high-level for the exact CLI surface, check: +- `cmd/fmrtx/main.go` +- `cmd/offline/main.go` + +## HTTP control plane + +Base URL: `http://{listenAddress}` (default typically `127.0.0.1:8088`) + +Security note: +- keep the control plane bound locally unless you intentionally place it behind a trusted and hardened access layer + +### Main endpoints + +```text +GET / browser UI +GET /healthz health check +GET /status current config/status snapshot +GET /runtime live engine / driver / audio telemetry +GET /config full config +POST /config patch config / live updates +GET /dry-run synthetic frame summary +POST /tx/start start transmission +POST /tx/stop stop transmission +POST /audio/stream push raw S16LE stereo PCM into live stream buffer (Content-Type: application/octet-stream or audio/L16 required) +``` + +### What the control plane covers +- TX start / stop +- runtime status and driver telemetry +- config inspection +- live patching of selected parameters +- dry-run inspection +- browser-accessible control UI +- optional HTTP audio ingest (enable with `--audio-http`) + +### Live config notes +`POST /config` supports live updates for selected fields such as: +- frequency +- stereo enable/disable +- pilot / RDS injection levels +- RDS enable/disable +- limiter settings +- PS / RadioText + +Some parameters are saved but not live-applied and require restart. + +For the full API contract, examples, live-patch semantics, and `/audio/stream` details, see: +- `docs/API.md` + +## Configuration + +Sample configs: +- `docs/config.sample.json` +- `docs/config.plutosdr.json` +- `docs/config.orangepi-pluto-soapy.json` + +Important config areas include: +- `fm.*` +- `rds.*` +- `audio.*` +- `backend.*` +- `control.*` +- `ingest.*` + +Examples of relevant fields you may want to inspect: +- `fm.outputDrive` +- `fm.mpxGain` +- `fm.bs412Enabled` +- `fm.bs412ThresholdDBr` +- `fm.fmModulationEnabled` +- `backend.kind` +- `backend.driver` +- `backend.deviceArgs` +- `backend.uri` +- `backend.deviceSampleRateHz` +- `backend.outputPath` +- `control.listenAddress` + +For deeper config/API behavior, refer to: +- `internal/config/config.go` +- `docs/API.md` +- `docs/config.sample.json` + +## Development and testing + +Useful commands: + +```powershell +go test ./... +go run ./cmd/fmrtx -print-config +go run ./cmd/fmrtx -config docs/config.sample.json +go run ./cmd/fmrtx --dry-run --dry-output build/dryrun/frame.json +go run ./cmd/fmrtx --simulate-tx --simulate-output build/sim/simulated-soapy.iqf32 --simulate-duration 250ms +go run ./cmd/offline -duration 500ms -output build/offline/composite.iqf32 +``` + +See also: +- `docs/README.md` + +## PlutoSDR / backend notes + +- PlutoSDR commonly runs with a device-side sample rate above composite rate, so split-rate mode may be used automatically +- SoapySDR backend is suitable for Soapy-compatible TX hardware +- backend/device settings are selected through config rather than hardcoded paths +- runtime telemetry should be used to inspect effective TX state during operation + +## Repository layout + +```text +cmd/ + fmrtx/ main CLI + offline/ offline generator +internal/ + app/ TX engine + runtime state + audio/ audio input, resampling, tone generation, stream buffering + config/ config schema and validation + control/ HTTP control plane + browser UI + dryrun/ dry-run JSON summaries + dsp/ DSP primitives + mpx/ MPX combiner + offline/ full offline composite generation + output/ output/backend abstractions + platform/ backend abstractions and device/runtime stats + platform/soapysdr/ CGO SoapySDR binding + platform/plutosdr/ Pluto/libiio backend code + rds/ RDS encoder + stereo/ stereo encoder +docs/ + API.md + DSP-CHAIN.md + README.md + config.sample.json + config.plutosdr.json + config.orangepi-pluto-soapy.json + pro-runtime-hardening-workboard.md +scripts/ +examples/ +``` + +## Planning / workboard + +For the current runtime-hardening / professionalization track, see: +- `docs/pro-runtime-hardening-workboard.md` + +This is the living workboard for: +- status tracking +- confirmed findings +- open technical decisions +- verification notes +- implementation progress + +## Release / project docs + +Additional project docs: +- `CHANGELOG.md` +- `RELEASE.md` +- `docs/README.md` +- `docs/API.md` +- `docs/DSP-CHAIN.md` +- `docs/NOTES.md` + +## Legal note + +This project is intended only for lawful use within relevant license and regulatory constraints. +RF output, deviation, filtering, and transmitted power must be validated with proper measurement equipment. diff --git a/docs/API.md b/docs/API.md index 58d3ac1..fa9a6d8 100644 --- a/docs/API.md +++ b/docs/API.md @@ -1,416 +1,427 @@ -# fm-rds-tx HTTP Control API - -Base URL: `http://{listenAddress}` (default `127.0.0.1:8088`) - ---- - -## Endpoints - -### `GET /healthz` - -Health check. - -**Response:** -```json -{"ok": true} -``` - -This endpoint is a simple liveness signal — it does not include runtime-state data or audit counters. Use it for readiness/liveness probes. - - ---- - -### `GET /status` - -Current transmitter status (read-only snapshot). Runtime indicator, alert, and queue stats from the running TX controller are mirrored here for quick health checks. - -**Response:** -```json -{ - "service": "fm-rds-tx", - "backend": "pluto", - "frequencyMHz": 100.0, - "stereoEnabled": true, - "rdsEnabled": true, - "preEmphasisTauUS": 50, - "limiterEnabled": true, - "fmModulationEnabled": true, - "runtimeIndicator": "normal", - "runtimeAlert": "", - "queue": { - "capacity": 3, - "depth": 1, - "fillLevel": 0.33, - "health": "low" - } -} -``` - -`runtimeIndicator` is derived from the engine queue health plus any late buffers observed in the last 5 seconds and can be "normal", "degraded", or "queueCritical". - -`runtimeState` mirrors the same runtime-state machine string that `/runtime` exposes as `engine.state` when a TX controller is active, so quick health checks reuse the same terminology. - -`runtimeAlert` surfaces a short reason (e.g. "queue health low" or "late buffers") when the indicator is not "normal", but late-buffer alerts expire after a few seconds once cycle times settle so the signal doesn't stay stuck on degraded. The cumulative `lateBuffers` counter returned by `/runtime` still shows how many late cycles have occurred since start for post-mortem diagnosis. - - ---- - -### `GET /runtime` - -Live engine and driver telemetry. Only populated when TX is active. - -**Response:** -```json -{ - "engine": { - "state": "running", - "runtimeStateDurationSeconds": 12.4, - "appliedFrequencyMHz": 100.0, - "chunksProduced": 12345, - "totalSamples": 1408950000, - "underruns": 0, - "lastError": "", - "uptimeSeconds": 3614.2, - "faultCount": 2, - "lastFault": { - "time": "2026-04-06T00:00:00Z", - "reason": "queueCritical", - "severity": "faulted", - "message": "queue health critical for 5 checks" - }, - "faultHistory": [ - { - "time": "2026-04-06T00:00:00Z", - "reason": "queueCritical", - "severity": "faulted", - "message": "queue health critical for 5 checks" - } - ], - "transitionHistory": [ - { - "time": "2026-04-06T00:00:00Z", - "from": "running", - "to": "degraded", - "severity": "warn" - } - ] - }, - "driver": { - "txEnabled": true, - "streamActive": true, - "framesWritten": 12345, - "samplesWritten": 1408950000, - "underruns": 0, - "underrunStreak": 0, - "maxUnderrunStreak": 0, - "effectiveSampleRateHz": 2280000 - }, - "controlAudit": { - "methodNotAllowed": 0, - "unsupportedMediaType": 0, - "bodyTooLarge": 0, - "unexpectedBody": 0 - } -} -``` -`engine.state` spiegelt jetzt die Runtime-State-Maschine wider (idle, arming, prebuffering, running, degraded, muted, faulted, stopping) und bietet eine erste beobachtbare Basis für Fault-Transitions. - -`runtimeStateDurationSeconds` sagt, wie viele Sekunden die Engine bereits im aktuellen Runtime-Zustand verweilt. So erkennt man schnell, ob `muted`/`degraded` zu lange dauern oder ob ein Übergang gerade frisch begonnen hat. - -`transitionHistory` liefert die jüngsten Übergänge (from/to, severity, timestamp) damit API und UI die Runtime History synchronisieren können. - -`engine.appliedFrequencyMHz` meldet die zuletzt tatsächlich getunte Frequenz auf der Hardware, sodass man sie mit dem gewünschten `/config`-Wert vergleichen und ausstehende Live-Updates sofort entdecken kann. - -`driver.underrunStreak` reports how many consecutive reads returned silence, and `driver.maxUnderrunStreak` captures the longest such run since the engine started. Together they help differentiate short glitches from persistent underrun storms and can be plotted alongside queue health sparkline telemetry. - -`lastFault.reason` kann jetzt auch `writeTimeout` lauten, wenn der Treiber Schreibaufrufe wiederholt verweigert oder blockiert. Die Control-Plane hebt solche Driver-Faults hervor, damit man Blockaden im Writer-Pfad ohne Log-Search sieht. - -`controlAudit` mirrors the control plane's HTTP reject counters (405/415/413/400). Whenever the HTTP server rejects a request (method not allowed, unsupported media type, body too large, or unexpected body), the respective counter increments — this lets runtime telemetry spot abusive clients without polluting the runtime state payload. - - ---- - -### `POST /runtime/fault/reset` - -Manually acknowledge a `faulted` runtime state so the supervisor can re-enter the recovery path (the engine moves back to `degraded` once the reset succeeds). - -**Response:** -```json -{"ok": true} -``` - -**Errors:** -- `405 Method Not Allowed` if the request is not a POST -- `503 Service Unavailable` when no TX controller is attached (`--tx` mode not active) -- `409 Conflict` when the engine is not currently faulted or the reset was rejected (e.g. still throttled) - ---- - -### `GET /config` - -Full current configuration (all fields, including non-patchable). - -**Response:** Complete `Config` JSON object. - ---- - -### `POST /config` - -**Live parameter update.** Changes are applied to the running TX engine immediately — no restart required. Only include fields you want to change (PATCH semantics). - -The control snapshot (GET /config) only reflects new values once they pass validation and, if the TX engine is running, after the live update succeeded. That keeps the API from reporting desired values that were rejected or still pending. - -**Request body:** JSON with any subset of patchable fields. - -**Content-Type:** `application/json` (charset parameters allowed). Requests without it are rejected with 415 Unsupported Media Type. - -**Response:** -```json -{"ok": true, "live": true} -``` - -`"live": true` = changes were forwarded to the running engine. -`"live": false` = engine not active, changes saved for next start. - -#### Patchable fields — DSP (applied within ~50ms) - -| Field | Type | Range | Description | -|---|---|---|---| -| `frequencyMHz` | float | 65–110 | TX center frequency. Tunes hardware LO live. | -| `outputDrive` | float | 0–10 | Composite output level multiplier (empfohlen 1..4). | -| `stereoEnabled` | bool | | Enable/disable stereo (pilot + 38kHz subcarrier). | -| `pilotLevel` | float | 0–0.2 | 19 kHz pilot injection level. | -| `rdsInjection` | float | 0–0.15 | 57 kHz RDS subcarrier injection level. | -| `rdsEnabled` | bool | | Enable/disable RDS subcarrier. | -| `limiterEnabled` | bool | | Enable/disable MPX peak limiter. | -| `limiterCeiling` | float | 0–2 | Limiter ceiling (max composite amplitude). | - -#### Patchable fields — RDS text (applied within ~88ms) - -| Field | Type | Max length | Description | -|---|---|---|---| -| `ps` | string | 8 chars | Program Service name (station name on receiver display). | -| `radioText` | string | 64 chars | RadioText message (scrolling text on receiver). | - -When `radioText` is updated, the RDS A/B flag toggles automatically per spec, signaling receivers to refresh their display. - -#### Patchable fields — other (saved, not live-applied) - -| Field | Type | Description | -|---|---|---| -| `toneLeftHz` | float | Left tone frequency (test generator). | -| `toneRightHz` | float | Right tone frequency (test generator). | -| `toneAmplitude` | float | Test tone amplitude (0–1). | -| `preEmphasisTauUS` | float | Pre-emphasis time constant. **Requires restart.** | - -#### Examples - -```bash -# Tune to 99.5 MHz -curl -X POST localhost:8088/config -d '{"frequencyMHz": 99.5}' - -# Switch to mono -curl -X POST localhost:8088/config -d '{"stereoEnabled": false}' - -# Update now-playing text -curl -X POST localhost:8088/config \ - -d '{"ps": "MYRADIO", "radioText": "Artist - Song Title"}' - -# Reduce power + disable limiter -curl -X POST localhost:8088/config \ - -d '{"outputDrive": 0.8, "limiterEnabled": false}' - -# Full update -curl -X POST localhost:8088/config -d '{ - "frequencyMHz": 101.3, - "outputDrive": 2.2, - "stereoEnabled": true, - "pilotLevel": 0.041, - "rdsInjection": 0.021, - "rdsEnabled": true, - "limiterEnabled": true, - "limiterCeiling": 1.0, - "ps": "PIRATE", - "radioText": "Broadcasting from the attic" -}' -``` - -#### Error handling - -Invalid values return `400 Bad Request` with a descriptive message: -```bash -curl -X POST localhost:8088/config -d '{"frequencyMHz": 200}' -# → 400: frequencyMHz out of range (65-110) -``` - ---- - -### `POST /tx/start` - -Start transmission. Requires `--tx` mode with hardware. - -**Response:** -```json -{"ok": true, "action": "started"} -``` - -**Errors:** -- `405` if not POST -- `503` if no TX controller (not in `--tx` mode) -- `409` if already running - ---- - -### `POST /tx/stop` - -Stop transmission. - -**Response:** -```json -{"ok": true, "action": "stopped"} -``` - ---- - -### `GET /dry-run` - -Generate a synthetic frame summary without hardware. Useful for config verification. - -**Response:** `FrameSummary` JSON with mode, rates, source info, preview samples. - ---- - -## Live update architecture - -All live updates are **lock-free** in the DSP path: - -| What | Mechanism | Latency | -|---|---|---| -| DSP params | `atomic.Pointer[LiveParams]` loaded once per chunk | ≤ 50ms | -| RDS text | `atomic.Value` in encoder, read at group boundary | ≤ 88ms | -| TX frequency | `atomic.Pointer` in engine, `driver.Tune()` between chunks | ≤ 50ms | - -No mutex, no channel, no allocation in the real-time path. The HTTP goroutine writes atomics, the DSP goroutine reads them. - -## Parameters that require restart - -These cannot be hot-reloaded (they affect DSP pipeline structure): - -- `compositeRateHz` — changes sample rate of entire DSP chain -- `deviceSampleRateHz` — changes hardware rate / upsampler ratio -- `maxDeviationHz` — changes FM modulator scaling -- `preEmphasisTauUS` — changes filter coefficients -- `rds.pi` / `rds.pty` — rarely change, baked into encoder init -- `audio.inputPath` — audio source selection -- `backend.kind` / `backend.device` — hardware selection - ---- - -### `POST /audio/stream` - -Push raw audio data into the live stream buffer. Format: **S16LE stereo PCM** at the configured `--audio-rate` (default 44100 Hz). - -Requires `--audio-stdin`, `--audio-http`, or another configured stream source to feed the buffer. - -**Request:** Binary body, `application/octet-stream`, raw S16LE stereo PCM bytes. Set `Content-Type` to `application/octet-stream` or `audio/L16`; other media types are rejected. Requests larger than 512 MiB are rejected with `413 Request Entity Too Large`. - -**Response:** -```json -{ - "ok": true, - "frames": 4096, - "stats": { - "available": 12000, - "capacity": 131072, - "buffered": 0.09, - "bufferedDurationSeconds": 0.27, - "highWatermark": 15000, - "highWatermarkDurationSeconds": 0.34, - "written": 890000, - "underruns": 0, - "overflows": 0 - } -} -``` - -**Example:** -```bash -# Push a file -ffmpeg -i song.mp3 -f s16le -ar 44100 -ac 2 - | \ - curl -X POST -H "Content-Type: application/octet-stream" --data-binary @- http://pluto:8088/audio/stream -``` - -**Errors:** -- `405` if not POST -- `415` if Content-Type is missing or unsupported (must be `application/octet-stream` or `audio/L16`) -- `413` if the upload body exceeds the 512 MiB limit -- `503` if no audio stream configured - ---- - -## Audio Streaming - -### Stdin pipe (primary method) - -Pipe any audio source through ffmpeg into the transmitter: - -```bash -# Internet radio stream -ffmpeg -i "http://stream.example.com/radio.mp3" -f s16le -ar 44100 -ac 2 - | \ - fmrtx --tx --tx-auto-start --audio-stdin --config config.json - -# Local music file -ffmpeg -i music.flac -f s16le -ar 44100 -ac 2 - | \ - fmrtx --tx --tx-auto-start --audio-stdin - -# Playlist (ffmpeg concat) -ffmpeg -f concat -i playlist.txt -f s16le -ar 44100 -ac 2 - | \ - fmrtx --tx --tx-auto-start --audio-stdin - -# PulseAudio / ALSA capture (Linux) -parecord --format=s16le --rate=44100 --channels=2 - | \ - fmrtx --tx --tx-auto-start --audio-stdin - -# Custom sample rate (e.g. 48kHz source) -ffmpeg -i source.wav -f s16le -ar 48000 -ac 2 - | \ - fmrtx --tx --tx-auto-start --audio-stdin --audio-rate 48000 -``` - -### HTTP audio push - -Push audio from a remote machine via the HTTP API. Run the server with `--audio-http` (and typically `--tx`/`--tx-auto-start`) so the `/audio/stream` endpoint is available. - -```bash -# From another machine on the network -ffmpeg -i music.mp3 -f s16le -ar 44100 -ac 2 - | \ - curl -X POST -H "Content-Type: application/octet-stream" --data-binary @- http://pluto-host:8088/audio/stream -``` - -### Audio buffer - -The stream uses a lock-free ring buffer (default: 2 seconds at input rate). Buffer stats are available in `GET /runtime` under `audioStream`: - -```json -{ - "audioStream": { - "available": 12000, - "capacity": 131072, - "buffered": 0.09, - "bufferedDurationSeconds": 0.27, - "highWatermark": 15000, - "highWatermarkDurationSeconds": 0.34, - "written": 890000, - "underruns": 0, - "overflows": 0 - } -} -``` - -- **underruns**: DSP consumed faster than audio arrived (silence inserted) -- **overflows**: Audio arrived faster than DSP consumed (data dropped) -- **buffered**: Fill ratio (0.0 = empty, 1.0 = full) -- **bufferedDurationSeconds**: Approximate seconds of audio queued in the buffer (`available` frames divided by the sample rate) -- **highWatermark**: Highest observed buffer occupancy (frames) since the buffer was created -- **highWatermarkDurationSeconds**: Equivalent peak time (`highWatermark` frames divided by the sample rate) - -When no audio is streaming, the transmitter falls back to the configured tone generator or silence. +# fm-rds-tx HTTP Control API + +Base URL: `http://{listenAddress}` (default `127.0.0.1:8088`) + +--- + +## Endpoints + +### `GET /healthz` + +Health check. + +**Response:** +```json +{"ok": true} +``` + +This endpoint is a simple liveness signal — it does not include runtime-state data or audit counters. Use it for readiness/liveness probes. + + +--- + +### `GET /status` + +Current transmitter status (read-only snapshot). Runtime indicator, alert, and queue stats from the running TX controller are mirrored here for quick health checks. + +**Response:** +```json +{ + "service": "fm-rds-tx", + "backend": "pluto", + "frequencyMHz": 100.0, + "stereoEnabled": true, + "rdsEnabled": true, + "preEmphasisTauUS": 50, + "limiterEnabled": true, + "fmModulationEnabled": true, + "runtimeIndicator": "normal", + "runtimeAlert": "", + "queue": { + "capacity": 3, + "depth": 1, + "fillLevel": 0.33, + "health": "low" + } +} +``` + +`runtimeIndicator` is derived from the engine queue health plus any late buffers observed in the last 5 seconds and can be "normal", "degraded", or "queueCritical". + +`runtimeState` mirrors the same runtime-state machine string that `/runtime` exposes as `engine.state` when a TX controller is active, so quick health checks reuse the same terminology. + +`runtimeAlert` surfaces a short reason (e.g. "queue health low" or "late buffers") when the indicator is not "normal", but late-buffer alerts expire after a few seconds once cycle times settle so the signal doesn't stay stuck on degraded. The cumulative `lateBuffers` counter returned by `/runtime` still shows how many late cycles have occurred since start for post-mortem diagnosis. + + +--- + +### `GET /runtime` + +Live engine and driver telemetry. When ingest runtime is configured, this endpoint also exposes shared ingest/source stats under `ingest`. + +**Response:** +```json +{ + "engine": { + "state": "running", + "runtimeStateDurationSeconds": 12.4, + "appliedFrequencyMHz": 100.0, + "chunksProduced": 12345, + "totalSamples": 1408950000, + "underruns": 0, + "lastError": "", + "uptimeSeconds": 3614.2, + "faultCount": 2, + "lastFault": { + "time": "2026-04-06T00:00:00Z", + "reason": "queueCritical", + "severity": "faulted", + "message": "queue health critical for 5 checks" + }, + "faultHistory": [ + { + "time": "2026-04-06T00:00:00Z", + "reason": "queueCritical", + "severity": "faulted", + "message": "queue health critical for 5 checks" + } + ], + "transitionHistory": [ + { + "time": "2026-04-06T00:00:00Z", + "from": "running", + "to": "degraded", + "severity": "warn" + } + ] + }, + "driver": { + "txEnabled": true, + "streamActive": true, + "framesWritten": 12345, + "samplesWritten": 1408950000, + "underruns": 0, + "underrunStreak": 0, + "maxUnderrunStreak": 0, + "effectiveSampleRateHz": 2280000 + }, + "controlAudit": { + "methodNotAllowed": 0, + "unsupportedMediaType": 0, + "bodyTooLarge": 0, + "unexpectedBody": 0 + }, + "ingest": { + "active": { + "id": "icecast-main", + "kind": "icecast", + "family": "streaming", + "transport": "http", + "codec": "auto", + "detail": "http://example.invalid/stream" + }, + "source": { + "state": "running", + "connected": true, + "chunksIn": 123, + "samplesIn": 251904 + }, + "runtime": { + "state": "running", + "droppedFrames": 0, + "convertErrors": 0, + "writeBlocked": false + } + } +} +``` +`engine.state` spiegelt jetzt die Runtime-State-Maschine wider (idle, arming, prebuffering, running, degraded, muted, faulted, stopping) und bietet eine erste beobachtbare Basis für Fault-Transitions. + +`runtimeStateDurationSeconds` sagt, wie viele Sekunden die Engine bereits im aktuellen Runtime-Zustand verweilt. So erkennt man schnell, ob `muted`/`degraded` zu lange dauern oder ob ein Übergang gerade frisch begonnen hat. + +`transitionHistory` liefert die jüngsten Übergänge (from/to, severity, timestamp) damit API und UI die Runtime History synchronisieren können. + +`engine.appliedFrequencyMHz` meldet die zuletzt tatsächlich getunte Frequenz auf der Hardware, sodass man sie mit dem gewünschten `/config`-Wert vergleichen und ausstehende Live-Updates sofort entdecken kann. + +`driver.underrunStreak` reports how many consecutive reads returned silence, and `driver.maxUnderrunStreak` captures the longest such run since the engine started. Together they help differentiate short glitches from persistent underrun storms and can be plotted alongside queue health sparkline telemetry. + +`lastFault.reason` kann jetzt auch `writeTimeout` lauten, wenn der Treiber Schreibaufrufe wiederholt verweigert oder blockiert. Die Control-Plane hebt solche Driver-Faults hervor, damit man Blockaden im Writer-Pfad ohne Log-Search sieht. + +`controlAudit` mirrors the control plane's HTTP reject counters (405/415/413/400). Whenever the HTTP server rejects a request (method not allowed, unsupported media type, body too large, or unexpected body), the respective counter increments — this lets runtime telemetry spot abusive clients without polluting the runtime state payload. + + +--- + +### `POST /runtime/fault/reset` + +Manually acknowledge a `faulted` runtime state so the supervisor can re-enter the recovery path (the engine moves back to `degraded` once the reset succeeds). + +**Response:** +```json +{"ok": true} +``` + +**Errors:** +- `405 Method Not Allowed` if the request is not a POST +- `503 Service Unavailable` when no TX controller is attached (`--tx` mode not active) +- `409 Conflict` when the engine is not currently faulted or the reset was rejected (e.g. still throttled) + +--- + +### `GET /config` + +Full current configuration (all fields, including non-patchable). + +**Response:** Complete `Config` JSON object. + +--- + +### `POST /config` + +**Live parameter update.** Changes are applied to the running TX engine immediately — no restart required. Only include fields you want to change (PATCH semantics). + +The control snapshot (GET /config) only reflects new values once they pass validation and, if the TX engine is running, after the live update succeeded. That keeps the API from reporting desired values that were rejected or still pending. + +**Request body:** JSON with any subset of patchable fields. + +**Content-Type:** `application/json` (charset parameters allowed). Requests without it are rejected with 415 Unsupported Media Type. + +**Response:** +```json +{"ok": true, "live": true} +``` + +`"live": true` = changes were forwarded to the running engine. +`"live": false` = engine not active, changes saved for next start. + +#### Patchable fields — DSP (applied within ~50ms) + +| Field | Type | Range | Description | +|---|---|---|---| +| `frequencyMHz` | float | 65–110 | TX center frequency. Tunes hardware LO live. | +| `outputDrive` | float | 0–10 | Composite output level multiplier (empfohlen 1..4). | +| `stereoEnabled` | bool | | Enable/disable stereo (pilot + 38kHz subcarrier). | +| `pilotLevel` | float | 0–0.2 | 19 kHz pilot injection level. | +| `rdsInjection` | float | 0–0.15 | 57 kHz RDS subcarrier injection level. | +| `rdsEnabled` | bool | | Enable/disable RDS subcarrier. | +| `limiterEnabled` | bool | | Enable/disable MPX peak limiter. | +| `limiterCeiling` | float | 0–2 | Limiter ceiling (max composite amplitude). | + +#### Patchable fields — RDS text (applied within ~88ms) + +| Field | Type | Max length | Description | +|---|---|---|---| +| `ps` | string | 8 chars | Program Service name (station name on receiver display). | +| `radioText` | string | 64 chars | RadioText message (scrolling text on receiver). | + +When `radioText` is updated, the RDS A/B flag toggles automatically per spec, signaling receivers to refresh their display. + +#### Patchable fields — other (saved, not live-applied) + +| Field | Type | Description | +|---|---|---| +| `toneLeftHz` | float | Left tone frequency (test generator). | +| `toneRightHz` | float | Right tone frequency (test generator). | +| `toneAmplitude` | float | Test tone amplitude (0–1). | +| `preEmphasisTauUS` | float | Pre-emphasis time constant. **Requires restart.** | + +#### Examples + +```bash +# Tune to 99.5 MHz +curl -X POST localhost:8088/config -d '{"frequencyMHz": 99.5}' + +# Switch to mono +curl -X POST localhost:8088/config -d '{"stereoEnabled": false}' + +# Update now-playing text +curl -X POST localhost:8088/config \ + -d '{"ps": "MYRADIO", "radioText": "Artist - Song Title"}' + +# Reduce power + disable limiter +curl -X POST localhost:8088/config \ + -d '{"outputDrive": 0.8, "limiterEnabled": false}' + +# Full update +curl -X POST localhost:8088/config -d '{ + "frequencyMHz": 101.3, + "outputDrive": 2.2, + "stereoEnabled": true, + "pilotLevel": 0.041, + "rdsInjection": 0.021, + "rdsEnabled": true, + "limiterEnabled": true, + "limiterCeiling": 1.0, + "ps": "PIRATE", + "radioText": "Broadcasting from the attic" +}' +``` + +#### Error handling + +Invalid values return `400 Bad Request` with a descriptive message: +```bash +curl -X POST localhost:8088/config -d '{"frequencyMHz": 200}' +# → 400: frequencyMHz out of range (65-110) +``` + +--- + +### `POST /tx/start` + +Start transmission. Requires `--tx` mode with hardware. + +**Response:** +```json +{"ok": true, "action": "started"} +``` + +**Errors:** +- `405` if not POST +- `503` if no TX controller (not in `--tx` mode) +- `409` if already running + +--- + +### `POST /tx/stop` + +Stop transmission. + +**Response:** +```json +{"ok": true, "action": "stopped"} +``` + +--- + +### `GET /dry-run` + +Generate a synthetic frame summary without hardware. Useful for config verification. + +**Response:** `FrameSummary` JSON with mode, rates, source info, preview samples. + +--- + +## Live update architecture + +All live updates are **lock-free** in the DSP path: + +| What | Mechanism | Latency | +|---|---|---| +| DSP params | `atomic.Pointer[LiveParams]` loaded once per chunk | ≤ 50ms | +| RDS text | `atomic.Value` in encoder, read at group boundary | ≤ 88ms | +| TX frequency | `atomic.Pointer` in engine, `driver.Tune()` between chunks | ≤ 50ms | + +No mutex, no channel, no allocation in the real-time path. The HTTP goroutine writes atomics, the DSP goroutine reads them. + +## Parameters that require restart + +These cannot be hot-reloaded (they affect DSP pipeline structure): + +- `compositeRateHz` — changes sample rate of entire DSP chain +- `deviceSampleRateHz` — changes hardware rate / upsampler ratio +- `maxDeviationHz` — changes FM modulator scaling +- `preEmphasisTauUS` — changes filter coefficients +- `rds.pi` / `rds.pty` — rarely change, baked into encoder init +- `audio.inputPath` — audio source selection +- `backend.kind` / `backend.device` — hardware selection + +--- + +### `POST /audio/stream` + +Push raw audio data into the ingest `http-raw` source. Format: **S16LE PCM** (`ingest.httpRaw.format`), currently validated as `s16le`, with channels/sample-rate from ingest config. + +Requires HTTP ingest wiring (typically `--audio-http`, which maps ingest kind to `http-raw`). + +**Request:** Binary body, `application/octet-stream`, raw S16LE stereo PCM bytes. Set `Content-Type` to `application/octet-stream` or `audio/L16`; other media types are rejected. Requests larger than 512 MiB are rejected with `413 Request Entity Too Large`. + +**Response:** +```json +{ + "ok": true, + "frames": 4096 +} +``` + +**Example:** +```bash +# Push a file +ffmpeg -i song.mp3 -f s16le -ar 44100 -ac 2 - | \ + curl -X POST -H "Content-Type: application/octet-stream" --data-binary @- http://pluto:8088/audio/stream +``` + +**Errors:** +- `405` if not POST +- `415` if Content-Type is missing or unsupported (must be `application/octet-stream` or `audio/L16`) +- `413` if the upload body exceeds the 512 MiB limit +- `503` if HTTP raw ingest is not configured + +--- + +## Audio Streaming + +### Stdin pipe (primary method) + +Pipe any audio source through ffmpeg into the transmitter: + +```bash +# Internet radio stream +ffmpeg -i "http://stream.example.com/radio.mp3" -f s16le -ar 44100 -ac 2 - | \ + fmrtx --tx --tx-auto-start --audio-stdin --config config.json + +# Local music file +ffmpeg -i music.flac -f s16le -ar 44100 -ac 2 - | \ + fmrtx --tx --tx-auto-start --audio-stdin + +# Playlist (ffmpeg concat) +ffmpeg -f concat -i playlist.txt -f s16le -ar 44100 -ac 2 - | \ + fmrtx --tx --tx-auto-start --audio-stdin + +# PulseAudio / ALSA capture (Linux) +parecord --format=s16le --rate=44100 --channels=2 - | \ + fmrtx --tx --tx-auto-start --audio-stdin + +# Custom sample rate (e.g. 48kHz source) +ffmpeg -i source.wav -f s16le -ar 48000 -ac 2 - | \ + fmrtx --tx --tx-auto-start --audio-stdin --audio-rate 48000 +``` + +### HTTP audio push + +Push audio from a remote machine via the HTTP API. Run the server with `--audio-http` (and typically `--tx`/`--tx-auto-start`) so the `/audio/stream` endpoint is available. + +```bash +# From another machine on the network +ffmpeg -i music.mp3 -f s16le -ar 44100 -ac 2 - | \ + curl -X POST -H "Content-Type: application/octet-stream" --data-binary @- http://pluto-host:8088/audio/stream +``` + +### Audio buffer + +The stream uses a lock-free ring buffer (default: 2 seconds at input rate). Buffer stats are available in `GET /runtime` under `audioStream`: + +```json +{ + "audioStream": { + "available": 12000, + "capacity": 131072, + "buffered": 0.09, + "bufferedDurationSeconds": 0.27, + "highWatermark": 15000, + "highWatermarkDurationSeconds": 0.34, + "written": 890000, + "underruns": 0, + "overflows": 0 + } +} +``` + +- **underruns**: DSP consumed faster than audio arrived (silence inserted) +- **overflows**: Audio arrived faster than DSP consumed (data dropped) +- **buffered**: Fill ratio (0.0 = empty, 1.0 = full) +- **bufferedDurationSeconds**: Approximate seconds of audio queued in the buffer (`available` frames divided by the sample rate) +- **highWatermark**: Highest observed buffer occupancy (frames) since the buffer was created +- **highWatermarkDurationSeconds**: Equivalent peak time (`highWatermark` frames divided by the sample rate) + +When no audio is streaming, the transmitter falls back to the configured tone generator or silence. diff --git a/docs/audio-ingest-implementation-plan.md b/docs/audio-ingest-implementation-plan.md new file mode 100644 index 0000000..fe5a19c --- /dev/null +++ b/docs/audio-ingest-implementation-plan.md @@ -0,0 +1,1097 @@ +# Audio Ingest Implementation Plan + +Status: Phase-1 fixup snapshot (2026-04-07) +Owner: Jan +Scope: `fm-rds-tx` +Related: `docs/audio-ingest-rework.md` + +## Goal + +Build a first-class audio ingest subsystem that supports multiple source families without pushing transport-specific logic into the FM TX engine or DSP path. + +This plan starts with a pragmatic integration strategy: + +- keep the existing TX/DSP pipeline stable +- introduce a new `internal/ingest` runtime in front of it +- preserve `audio.StreamSource` as the immediate TX-facing sink for now +- bring **Icecast ingest into Phase 1**, alongside stdin/raw HTTP ingest +- treat **native decoding as a first-class goal from the start**, not a late add-on + +The key architectural principle is: + +> Source-family specifics live in source adapters. Shared buffering, health, lifecycle, conversion, and handoff to TX live in a common ingest runtime. + +## Actual Phase-1 status (2026-04-07) + +Implemented: +- `internal/ingest` runtime in front of `audio.StreamSource` +- ingest source factory and config mapping for `stdin`, `http-raw`, `icecast` +- stdin and HTTP raw adapters feeding shared runtime +- runtime and source stats exposed via `/runtime` as `ingest.*` +- Icecast source adapter with reconnect/backoff and decoder preference modes (`auto`, `native`, `ffmpeg`) +- decoder registry and explicit ffmpeg fallback decoder implementation + +Still open on purpose: +- native `mp3`, `oggvorbis`, `aac` decoder packages are placeholders (`ErrUnsupported`) +- real decode path for Icecast is currently ffmpeg fallback +- no AoIP/SRT ingest integration into shared runtime yet +- no multi-source orchestration/failover policy yet + +--- + +## Non-goals for the first implementation wave + +The first wave should **not** attempt to solve everything at once. + +Out of scope initially: + +- full multi-source orchestration with seamless hot failover +- exhaustive native decoding support for every compressed format and edge case +- replacing the existing `offline.Generator` source contract +- redesigning the TX runtime state machine +- broad UI redesign +- a universal media graph framework + +We want a clean, incremental path, not a big-bang rewrite. + +--- + +## Current state of the codebase + +The repository already has most of the TX-side hooks needed for a proper ingest subsystem: + +- `cmd/fmrtx/main.go` + - creates `audio.StreamSource` + - wires it into the engine via `engine.SetStreamSource(...)` + - starts stdin and `/audio/stream` ingest paths directly +- `internal/app/engine.go` + - accepts a stream source via `SetStreamSource(...)` + - wraps it in `audio.NewStreamResampler(...)` + - injects it upstream of DSP via `generator.SetExternalSource(...)` +- `internal/audio/stream.go` + - provides a TX-facing SPSC ring buffer + - provides a simple `StreamResampler` + - tracks underruns, overflows, buffering, high watermark +- `internal/offline/generator.go` + - already cleanly accepts an external audio source +- `aoiprxkit/` + - already contains useful RTP/AES67/SAP/SRT receive-side primitives and stats + +This means the right move is **not** to redesign the FM core, but to formalize the missing ingest layer in front of the existing TX path. + +--- + +## Target architecture + +## Layers + +### 1. Source adapters + +Each adapter owns family-specific behavior, for example: + +- process control for ffmpeg-based adapters +- reconnect loops for Icecast +- RTP depacketization and jitter buffering for AoIP +- protocol-specific metadata and health signals + +Examples: + +- stdin PCM +- HTTP raw PCM +- Icecast stream +- RTP/AES67 +- SRT +- future ffmpeg-backed generic URL/file ingest + +### 2. Decoder layer + +A dedicated decoder layer sits between transport/session adapters and the shared ingest runtime. + +Responsibilities: + +- decode compressed audio streams into normalized PCM chunks +- keep codec-specific logic out of Icecast and other source adapters +- allow multiple decoder implementations behind a common interface +- prefer native Go decoders where they are stable and good enough +- allow an ffmpeg-backed fallback only as an implementation detail, not as the architecture + +Examples: + +- MP3 +- Ogg/Vorbis +- AAC/ADTS where practical +- later: Opus or other codecs as needed + +Initial decoder priority should be: + +1. MP3 +2. Ogg/Vorbis +3. AAC/ADTS +4. Opus later if a concrete source requirement justifies it + +### 3. Shared ingest runtime + +A common ingest runtime sits between decoders/source adapters and TX. + +Responsibilities: + +- source lifecycle +- prebuffering policy +- normalized source state +- family-neutral telemetry +- format conversion into TX-facing audio frames +- writing into the existing `audio.StreamSource` +- later: failover/orchestration + +### 4. Existing TX path + +The TX side stays mostly unchanged: + +- `audio.StreamSource` +- `audio.StreamResampler` +- `Engine.SetStreamSource(...)` +- `offline.Generator.SetExternalSource(...)` +- FM/DSP chain + +The TX engine should not know whether input came from stdin, Icecast, SRT, RTP, or something else. + +--- + +## Why Icecast is in Phase 1 + +Icecast should be introduced early, not postponed. + +Reasons: + +- it exercises a real long-running network stream rather than one-shot raw pushes +- it forces lifecycle design immediately: connecting, connected, stalled, reconnecting, failed +- it forces buffering and liveness behavior to be designed properly +- it prevents the ingest layer from being accidentally overfit to only raw PCM push workflows +- it reflects an important real-world ingest path for FM rebroadcast/transcoding scenarios +- it forces the project to define a real decoder boundary early + +Early Icecast support **should aim for native decoding where practical**. + +Initial Icecast strategy should therefore be: + +- separate transport/runtime concerns from decoding concerns +- define a decoder interface from the beginning +- prefer native Go decoders for common formats where mature libraries exist +- keep an ffmpeg-backed decoder only as fallback or temporary compatibility path +- keep the ingest runtime and source adapter interfaces clean enough that decoder implementation can evolve without redesigning the whole ingest subsystem + +--- + +## Phase plan + +## Phase 1: create the ingest runtime and ship first adapters + +### Deliverables + +- new `internal/ingest` package +- a decoder abstraction as part of the ingest subsystem +- a shared ingest runtime in front of `audio.StreamSource` +- adapters for: + - stdin PCM + - raw HTTP PCM + - Icecast stream +- decoder boundary with preference/fallback policy in place +- explicit Phase-1 codec prioritization: MP3 first, Ogg/Vorbis second, AAC/ADTS third +- runtime and source stats exposed in control API +- command/config plumbing for selecting an ingest source + +### Phase 1 boundary + +At the end of Phase 1: + +- TX still consumes through `audio.StreamSource` +- DSP path is unchanged +- source families are no longer wired directly into `cmd/fmrtx/main.go` +- Icecast works with reconnect + observable runtime state +- decoder selection/fallback behavior is explicit and test-covered +- native decoder implementations remain a follow-up item + +--- + +## Phase 2: integrate structured network audio families + +### Deliverables + +- adapters backed by `aoiprxkit` +- RTP/AES67 ingest +- SRT ingest +- shared source stats mapped into ingest runtime stats + +### Notes + +- family-specific jitter/packet handling stays inside adapter/family code +- TX side continues to see normalized stereo frames only + +--- + +## Phase 3: source selection, fallback, and richer policy + +### Deliverables + +- primary/fallback source model +- failure policy +- source switching policy +- improved operator telemetry +- optional source prioritization and warm standby + +This phase should only start once single-source ingest is stable. + +--- + +## New package structure + +Proposed initial layout: + +```text +internal/ + ingest/ + types.go + source.go + runtime.go + convert.go + stats.go + factory.go + decoder/ + decoder.go + mp3/ + decoder.go + aac/ + decoder.go + oggvorbis/ + decoder.go + fallback/ + ffmpeg.go + adapters/ + stdinpcm/ + source.go + httpraw/ + source.go + icecast/ + source.go + reconnect.go +``` + +Later additions: + +```text +internal/ingest/adapters/ + aoip/ + srt/ + ffmpeg/ +``` + +Notes: + +- codec-specific logic should live under `internal/ingest/decoder/` +- ffmpeg, if retained at all, should live under an explicit fallback package +- keep source-family code out of `internal/app` and `internal/offline` + +--- + +## Core interfaces + +These are design targets, not fixed signatures. + +## Normalized ingest-side frame model + +The ingest layer needs a family-neutral PCM representation before converting into the TX-facing `audio.Frame` stream. + +Proposed shape: + +```go +type PCMChunk struct { + Samples []int32 + Channels int + SampleRateHz int + Sequence uint64 + Timestamp time.Time + SourceID string + Discontinuity bool +} +``` + +Rationale: + +- expressive enough for RTP/AES67/SRT/decoded Icecast output +- allows transport metadata to be preserved long enough for runtime logic and stats +- avoids forcing all adapters into the same byte-stream assumption + +Future extension points if needed: + +- `Codec string` +- `ClockDomain string` +- `BitDepth int` +- `PTS time.Duration` + +--- + +## Source descriptor + +```go +type SourceDescriptor struct { + ID string + Kind string + Family string + Transport string + Codec string + Channels int + SampleRateHz int + Detail string +} +``` + +Examples: + +- `Kind=stdin-pcm`, `Family=raw`, `Transport=stdin` +- `Kind=http-raw`, `Family=raw`, `Transport=http` +- `Kind=icecast`, `Family=streaming`, `Transport=http` +- later `Kind=aes67`, `Family=aoip`, `Transport=rtp` + +--- + +## Source interface + +Two patterns are reasonable: + +- channel-based delivery +- sink/callback-based delivery + +For the first implementation, channel-based is usually easier to reason about. + +```go +type Source interface { + Descriptor() SourceDescriptor + Start(ctx context.Context) error + Stop() error + Chunks() <-chan PCMChunk + Errors() <-chan error + Stats() SourceStats +} +``` + +Alternative callback model is acceptable if it reduces allocations or simplifies integration. + +Important constraint: + +- the source adapter owns family-specific I/O +- the ingest runtime owns shared buffering/handoff policy + +--- + +## Shared source stats + +```go +type SourceStats struct { + State string + Connected bool + LastChunkAt time.Time + ChunksIn uint64 + SamplesIn uint64 + BufferedSeconds float64 + Overflows uint64 + Underruns uint64 + Reconnects uint64 + Discontinuities uint64 + TransportLoss uint64 + Reorders uint64 + JitterDepth int + LastError string +} +``` + +Not every source will populate every field. + +That is okay. + +The common runtime should expose a stable superset and leave unsupported fields at zero/default. + +--- + +## Shared ingest runtime + +## Responsibilities + +The runtime is the main missing abstraction in the current codebase. + +Responsibilities: + +- own exactly one active source in Phase 1 +- start/stop the source cleanly +- receive normalized `PCMChunk`s +- convert them into TX-facing stereo frames +- write them into `audio.StreamSource` +- enforce prebuffering policy where relevant +- expose common ingest state and health +- detect stalls/reconnects/discontinuities + +## Non-responsibilities + +The runtime should **not**: + +- parse RTP +- manage ffmpeg stderr parsing for generic protocol details +- implement protocol-specific jitter buffering directly +- manipulate FM/DSP runtime states directly + +It reports ingest health; TX remains responsible for TX health. + +--- + +## TX-facing sink strategy + +For now, keep this path: + +- ingest runtime writes into `audio.StreamSource` +- `Engine.SetStreamSource(...)` remains unchanged +- `audio.StreamResampler` remains the final rate adaptation step into composite/DSP rate + +This minimizes risk. + +It also keeps future refactors optional instead of mandatory. + +--- + +## Conversion policy + +A shared conversion layer is required between `PCMChunk` and `audio.StreamSource`. + +## Initial policy + +- accept mono or stereo only in Phase 1 if that keeps implementation smaller +- mono input is duplicated to stereo +- stereo input is mapped directly L/R +- channels > 2 are rejected initially unless a simple, explicit downmix policy is added +- normalize to the existing `audio.Sample` range `[-1, +1]` +- clipping should be explicit and measured, not silent and invisible + +## Why a dedicated conversion layer matters + +Without it, each source adapter will start doing its own ad hoc format mapping. +That is exactly what the new ingest subsystem is supposed to prevent. + +--- + +## Icecast adapter design + +## Scope for the first Icecast implementation + +The first version needs to support a robust operator-visible ingest path **and** establish the decoder boundary correctly. + +It does not need to support every codec/container combination from day one, but it should not assume ffmpeg as the architectural default. + +## Recommended structure + +### Transport/lifecycle layer + +Responsibilities: + +- connect to Icecast URL over HTTP +- validate response +- track connection state +- reconnect with backoff +- observe stalls / EOF / disconnects +- surface metadata and errors + +Implementation guidance: + +- prefer a Go library or a thin wrapper around the standard Go HTTP client for Icecast transport/session handling +- do not hand-roll unnecessary low-level protocol machinery when existing libraries or the standard client already cover it well +- keep transport/session concerns isolated from decoder logic and ingest runtime logic + +### Decode layer + +Preferred initial option: + +- use native Go decoders for the first targeted formats where mature libraries exist +- decode compressed stream data into PCM chunks behind a decoder interface +- prioritize MP3 first and Ogg/Vorbis second because they are likely to give the best early return for Icecast support +- evaluate AAC/ADTS next once the decoder boundary and streaming behavior are stable + +Fallback option: + +- keep an ffmpeg-backed decoder implementation available only as fallback/compatibility path + +This keeps the first release practical while preserving architecture. + +The key is to avoid letting “ffmpeg exists” collapse the whole ingest abstraction. + +Meaning: + +- Icecast adapter uses a transport/session client layer plus a decoder interface +- transport/session handling should preferably come from a Go library or a thin wrapper around the standard HTTP client +- decoder choice can be native Go or fallback ffmpeg +- Icecast remains an adapter in `internal/ingest/adapters/icecast` +- runtime still sees a normal source + +## Expected Icecast states + +At minimum: + +- `idle` +- `connecting` +- `buffering` +- `running` +- `stalled` +- `reconnecting` +- `failed` +- `stopped` + +These should be visible via runtime stats and eventually UI. + +--- + +## stdin PCM adapter + +Purpose: + +- preserve current CLI-based piping workflows +- move direct ingest logic out of `cmd/fmrtx/main.go` + +Responsibilities: + +- read S16LE stereo PCM from stdin +- emit `PCMChunk`s or equivalent normalized blocks +- expose simple source stats + +This adapter should be intentionally boring. + +--- + +## raw HTTP PCM adapter + +Purpose: + +- preserve current `/audio/stream` functionality +- move it behind the shared ingest runtime instead of writing directly to `audio.StreamSource` + +There are two reasonable implementation paths: + +### Option A: keep `/audio/stream` as a push endpoint owned by control server + +- control server accepts request body +- forwards PCM blocks into an ingest-owned writer/sink +- ingest runtime still owns buffering/health + +### Option B: implement an explicit push source abstraction + +- source adapter exposes a writable sink +- control plane writes into that sink + +For Phase 1, Option A is probably the fastest path. + +But the important part is: + +- control server should no longer push directly into TX buffer +- it should push into the ingest subsystem + +--- + +## Runtime stats model + +Add a top-level ingest section to `/runtime`. + +Proposed shape: + +```json +{ + "ingest": { + "active": { + "id": "icecast-main", + "kind": "icecast", + "state": "running", + "sampleRateHz": 44100, + "channels": 2, + "bufferedSeconds": 1.4, + "reconnects": 1, + "lastError": "" + }, + "runtime": { + "state": "running", + "prebuffering": false, + "lastChunkAt": "...", + "droppedFrames": 0, + "convertErrors": 0, + "writeBlocked": false + } + } +} +``` + +This should sit alongside: + +- driver stats +- engine stats +- audio stream stats +- control audit stats + +Initially, `audioStream` may remain exposed for debugging, but `ingest` should become the operator-facing abstraction. + +--- + +## Config shape evolution + +Do not overload existing `audio.*` forever. + +The current `audio` config primarily models file/tone/test input assumptions. + +Introduce a new config subtree for ingest. + +## Proposed shape + +```json +{ + "ingest": { + "kind": "icecast", + "prebufferMs": 1500, + "stallTimeoutMs": 3000, + "reconnect": { + "enabled": true, + "initialBackoffMs": 1000, + "maxBackoffMs": 15000 + }, + "stdin": { + "sampleRateHz": 44100, + "channels": 2, + "format": "s16le" + }, + "httpRaw": { + "sampleRateHz": 44100, + "channels": 2, + "format": "s16le" + }, + "icecast": { + "url": "http://...", + "decoder": "ffmpeg" + } + } +} +``` + +Notes: + +- keep current flags working initially for backward compatibility +- map them internally into the new ingest config +- do not force config migration immediately + +--- + +## CLI evolution + +Current flags: + +- `--audio-stdin` +- `--audio-rate` +- `--audio-http` + +These can stay temporarily, but should become compatibility shims. + +Possible future direction: + +- `--ingest stdin` +- `--ingest http-raw` +- `--ingest icecast` +- `--icecast-url ...` + +The exact CLI can wait, but internal structure should already assume a source factory. + +--- + +## File-by-file implementation plan + +## 1. Add new ingest package skeleton + +Create: + +- `internal/ingest/types.go` +- `internal/ingest/source.go` +- `internal/ingest/runtime.go` +- `internal/ingest/convert.go` +- `internal/ingest/stats.go` +- `internal/ingest/factory.go` + +### Acceptance + +- package compiles +- no behavior change yet + +--- + +## 2. Implement stdin adapter + +Create: + +- `internal/ingest/adapters/stdinpcm/source.go` + +Responsibilities: + +- read stdin PCM +- emit normalized chunks +- report basic stats + +### Acceptance + +- reproduces current `--audio-stdin` behavior through ingest runtime +- TX still works unchanged downstream + +--- + +## 3. Implement shared ingest runtime with `audio.StreamSource` sink + +Runtime should: + +- own source start/stop +- convert PCM chunks to `audio.Frame`s +- write into `audio.StreamSource` +- track runtime state and counters + +### Acceptance + +- stdin path works end-to-end +- engine remains unchanged except wiring +- `/runtime` can expose ingest stats + +--- + +## 4. Rewire `cmd/fmrtx/main.go` + +Replace direct source-specific logic with: + +- source selection +- ingest runtime creation +- runtime start/stop +- existing engine wiring + +### Important + +Remove direct writes like: + +- stdin goroutine writing directly into `audio.StreamSource` +- HTTP handler writing directly into `audio.StreamSource` + +They should now pass through ingest runtime abstractions. + +### Acceptance + +- codepath is cleaner +- source-family logic no longer lives in main + +--- + +## 5. Rework raw HTTP ingest to target ingest runtime + +Modify control layer so `/audio/stream` targets ingest subsystem rather than TX ring directly. + +Likely affected file: + +- `internal/control/control.go` + +### Acceptance + +- `/audio/stream` still works +- stats reflect ingest runtime, not just raw ring buffer + +--- + +## 6. Implement decoder layer and Icecast adapter + +Create: + +- `internal/ingest/decoder/decoder.go` +- `internal/ingest/decoder/mp3/decoder.go` +- `internal/ingest/decoder/aac/decoder.go` +- `internal/ingest/decoder/oggvorbis/decoder.go` +- optional fallback: `internal/ingest/decoder/fallback/ffmpeg.go` +- `internal/ingest/adapters/icecast/source.go` +- `internal/ingest/adapters/icecast/reconnect.go` + +### Responsibilities + +- decoder interface turns compressed audio into PCM chunks +- native decoder implementations cover the initial target formats where stable libraries exist +- Icecast adapter handles HTTP connect/reconnect/lifecycle +- Icecast transport/session handling should use a Go library or a thin wrapper around the standard HTTP client where appropriate +- Icecast adapter selects and drives a decoder +- emit PCM chunks +- expose state transitions and errors + +### Acceptance + +- long-running Icecast ingest works +- native decoding is used for the initial supported formats +- disconnect/reconnect is observable and recovers automatically +- fallback path is explicit, not architectural default +- TX path remains stable + +--- + +## 7. Add ingest stats to control API + +Likely affected files: + +- `internal/control/control.go` +- possibly UI if runtime page surfaces ingest info + +### Acceptance + +- `/runtime` shows ingest state +- operator can tell whether source is connecting/running/stalled/reconnecting + +--- + +## 8. Introduce ingest config structure + +Likely affected file: + +- `internal/config/config.go` + +### Strategy + +- add new config subtree without breaking old flags immediately +- map legacy flag combinations into new config internally + +### Acceptance + +- existing flows still work +- new ingest configs can select Icecast cleanly + +--- + +## Testing plan + +## Unit tests + +### `internal/ingest/convert.go` + +Test: + +- mono to stereo duplication +- stereo pass-through +- unsupported channel counts +- clipping/normalization behavior +- chunk boundary correctness + +### stdin adapter + +Test: + +- reads PCM correctly +- emits expected sample counts +- EOF handling + +### ingest runtime + +Test: + +- source start/stop lifecycle +- writes converted frames into sink +- prebuffer behavior +- stall detection +- source error propagation + +### Icecast adapter + +Use test HTTP server where possible. + +Test: + +- connect success +- reconnect after disconnect +- state transitions +- decoder failure handling +- backoff behavior + +--- + +## Integration tests + +### TX path with ingest runtime + +Test: + +- ingest runtime feeding `audio.StreamSource` +- engine consumes without regression +- runtime stats remain coherent + +### `/audio/stream` + +Test: + +- POST still works +- control path now targets ingest layer + +### Icecast smoke test + +Even if partly gated or environment-specific, define a repeatable smoke path. + +--- + +## Operational telemetry requirements + +At minimum, operators should be able to answer these questions: + +- what source is active? +- what family is it? +- is it connected? +- how much audio is buffered? +- when did we last receive audio? +- are we reconnecting? +- what was the last ingest error? +- are stalls/discontinuities happening? + +If those are not visible, ingest debugging will be painful. + +--- + +## Risks and mitigations + +## Risk 1: pushing too much complexity into Phase 1 + +Mitigation: + +- keep one active source only +- preserve `audio.StreamSource` +- avoid failover until the single-source path is stable + +## Risk 2: decode strategy pollutes architecture + +Mitigation: + +- isolate codec logic behind a decoder interface +- prefer native Go decoders for the initial supported formats +- if ffmpeg is retained, keep it in an explicit fallback decoder package +- do not let decode mechanism define runtime abstractions + +## Risk 3: duplicated buffering causing latency confusion + +Mitigation: + +- document each buffering layer clearly +- expose ingest buffered seconds separately from TX ring stats +- keep prebuffer policy explicit + +## Risk 4: unclear ownership of resampling + +Mitigation: + +- keep transport/family decode at native source rate +- keep final TX-facing adaptation centralized near current `StreamResampler` +- do not add ad hoc resamplers in every adapter unless protocol-specific needs require it + +## Risk 5: channel/format sprawl too early + +Mitigation: + +- define a strict Phase 1 acceptance matrix +- only support the combinations we actually test + +--- + +## Recommended Phase 1 acceptance matrix + +### stdin PCM + +- format: S16LE +- channels: 2 +- sample rates: 44100, 48000 + +### raw HTTP PCM + +- format: S16LE +- channels: 2 +- sample rates: 44100, 48000 + +### Icecast + +- one known-good stream path +- reconnect behavior verified +- native decoding works for at least MP3 in Phase 1 +- ideally native decoding also works for Ogg/Vorbis in Phase 1 +- AAC/ADTS can enter Phase 1 only if the chosen decoder and stream behavior are solid enough +- decoded output normalized into stereo frames + +Optional but useful: + +- mono handling for at least one ingest path + +--- + +## Suggested implementation order + +1. add ingest package skeleton +2. implement conversion helpers +3. implement stdin adapter +4. implement ingest runtime writing into `audio.StreamSource` +5. rewire `cmd/fmrtx/main.go` to use runtime for stdin +6. route `/audio/stream` into ingest runtime +7. expose ingest stats in `/runtime` +8. implement decoder layer with native codec support for initial target formats, in this order: + - MP3 + - Ogg/Vorbis + - AAC/ADTS if stable enough +9. implement Icecast adapter with reconnect + decoder selection +10. add ingest config subtree and compatibility mapping +11. polish tests, docs, and operator-facing runtime fields + +This order gives a narrow vertical slice early, then extends it. + +--- + +## Concrete code touch points + +### New files + +- `internal/ingest/types.go` +- `internal/ingest/source.go` +- `internal/ingest/runtime.go` +- `internal/ingest/convert.go` +- `internal/ingest/stats.go` +- `internal/ingest/factory.go` +- `internal/ingest/decoder/decoder.go` +- `internal/ingest/decoder/mp3/decoder.go` +- `internal/ingest/decoder/aac/decoder.go` +- `internal/ingest/decoder/oggvorbis/decoder.go` +- optional fallback: `internal/ingest/decoder/fallback/ffmpeg.go` +- `internal/ingest/adapters/stdinpcm/source.go` +- `internal/ingest/adapters/icecast/source.go` +- `internal/ingest/adapters/icecast/reconnect.go` + +### Existing files likely to change + +- `cmd/fmrtx/main.go` +- `internal/control/control.go` +- `internal/config/config.go` +- possibly `internal/app/engine.go` only for wiring or runtime exposure, not architectural overhaul + +### Existing files that should stay mostly untouched + +- `internal/offline/generator.go` +- most DSP files +- output/backend implementations + +--- + +## Final design stance + +The new ingest subsystem should be treated as a first-class runtime boundary, not as a pile of helper functions. + +The repository already has the correct TX-side seam: + +- external source +- stream buffer +- final resampler +- engine/DSP separation + +So the implementation should respect that and formalize the missing upstream ingest layer. + +The most important practical decisions in this plan are: + +- **Icecast enters in Phase 1** +- **native decoding is a first-class target from the start** +- fallback decoding is allowed only as an explicit compatibility path, provided the architecture stays clean + +That gives us a realistic ingest design early without destabilizing the FM core. diff --git a/docs/audio-ingest-rework.md b/docs/audio-ingest-rework.md new file mode 100644 index 0000000..6e4e6f5 --- /dev/null +++ b/docs/audio-ingest-rework.md @@ -0,0 +1,267 @@ +# Audio Ingest Rework + +## Hinweis zum Stand (2026-04-07) +Dieses Dokument beschreibt das Zielbild. Der aktuelle Ist-Stand in Phase 1 ist: +- shared ingest runtime + unified source factory sind implementiert +- `stdin`, `http-raw`, `icecast` Adapter sind implementiert +- Icecast Decoder-Layer + ffmpeg fallback sind implementiert +- native Decoder `mp3` / `oggvorbis` / `aac` sind noch Platzhalter +- funktionaler Decode-Pfad heute: ffmpeg fallback + +## Ziel +`fm-rds-tx` soll mittelfristig mehrere Audio-Ingest-Pfade sauber unterstützen, ohne den bestehenden `ffmpeg`-Pfad kaputt zu machen. + +Die strategische Richtung ist daher **nicht** „ffmpeg sofort ersetzen“, sondern: + +- bestehenden `ffmpeg`-Pfad als universellen Fallback behalten +- native Ingest-Familien daneben aufbauen +- alle Pfade auf eine gemeinsame interne PCM-/Audio-Source-Abstraktion führen +- neue native Pfade schrittweise produktionsreif machen + +## Leitprinzipien +1. **Kein Big-Bang-Rewrite** – Bestehendes bleibt lauffähig. +2. **Native Pfade zuerst dort, wo sie klaren Mehrwert bringen**. +3. **Go-Libraries bevorzugen** – Decoder/Protocol-Handling einkaufen statt neu erfinden. +4. **Ein gemeinsames Ingest-Modell** – unabhängig von Quelle oder Protokoll. +5. **Control Plane / Runtime / Telemetrie von Decoder-Details trennen**. + +## Zielbild: drei Ingest-Familien + +### 1. FFmpeg Family +Bestehender universeller Adapter. + +**Rolle:** +- Fallback +- Legacy-Kompatibilität +- exotische oder seltene Formate +- schneller pragmatischer Pfad für Quellen, die nativ noch nicht unterstützt werden + +**Wichtig:** +- bleibt vorerst erhalten +- wird nicht „rausoptimiert“, sondern architektonisch nachrangig +- sollte in der Runtime als eigener Ingest-Typ sichtbar sein + +### 2. AoIP Family +Für professionelle / broadcast-nahe Audioquellen. + +**Ziel-Protokolle / Modi:** +- RTP multicast +- AES67-lite +- SDP +- SAP +- später: NMOS IS-04 / IS-05 +- später: SRT framed PCM + +**Basis:** +- `aoiprxkit` + +**Rolle:** +- deterministische LAN-Audiozuführung +- Broadcast-/AoIP-Umgebungen +- spätere professionelle Discovery/Activation + +### 3. Streaming Family +Für klassische Internet-/HTTP-/Radio-Streamingquellen. + +**Ziel-Protokolle / Modi:** +- HTTP audio streams +- Icecast / Shoutcast +- ICY metadata +- MP3 +- AAC / HE-AAC (je nach verfügbarer Lib) +- später ggf. Opus + +**Rolle:** +- Webradio / Online-Streams +- Metadatenübernahme +- native Alternative zu `ffmpeg` für die häufigsten Streaming-Fälle + +**Wichtig:** +Diese Familie sollte **nicht** in `aoiprxkit` gepresst werden. AoIP und Streaming sind konzeptionell verschieden genug, dass getrennte Package-Bereiche sinnvoll sind. + +## Gemeinsame interne Abstraktion +Alle Ingest-Familien sollen auf dieselbe interne PCM-Einspeisung münden. + +### Ziel +Unabhängig davon, ob Samples von: +- `ffmpeg` +- RTP/AES67 +- Icecast/MP3 +- SRT framed PCM + +kommen, soll der Rest der Sende-/RDS-/Runtime-Logik immer dieselbe Audioquelle sehen. + +### Grobe Zielverantwortung +Eine Quelle soll idealerweise liefern können: +- PCM-Samples +- Sample-Rate +- Kanalzahl +- Source-Label / Source-Type +- Laufzeitstatus / Health +- Basisstatistiken +- optional Metadaten (z. B. ICY title) + +### Wichtige Designregel +**Decoder/Protocol-Layer** und **Sender-Runtime** nicht vermischen. + +Das Ingest-System soll: +- Audio empfangen / decodieren / normieren +- Health / Stats liefern +- Audio in die bestehende Audio-Pipeline schieben + +Die Sender-Runtime soll: +- Quellen starten/stoppen +- aktive Quelle verwalten +- Fehler/Fallback/Status darstellen +- UI/Control-Plane bedienen + +## Einordnung von `aoiprxkit` + +## Was `aoiprxkit` heute schon gut abdeckt +- RTP multicast RX +- L24-Decoding +- Jitter/Reorder +- statische SDP-Auswertung +- SAP-Listener +- Stream-Finder per SDP `s=` Name +- Basis-Stats +- Live-Metering +- NMOS-/SRT-Grundgerüst + +## Was `aoiprxkit` heute noch nicht vollständig als Produkt ist +- keine voll integrierte `fm-rds-tx`-Runtime-Anbindung +- SRT-Pfad eher Scaffold als fertig produktionsreif +- NMOS eher vorbereitend als vollständig integriert +- noch kein gemeinsames Source-Management mit anderen Ingest-Familien + +## Konsequenz +`aoiprxkit` ist **integrationswürdig**, aber aktuell noch eher ein Modul/Baukasten als direktes Hauptsystem. + +## Empfohlene Package-/Modul-Richtung in `fm-rds-tx` +Dies ist ein Zielbild, kein harter Sofort-Umbau. + +### Kandidaten +- `internal/audioingest` + - gemeinsame Interfaces / gemeinsame Typen / gemeinsame Runtime-Adapter +- `internal/audioingest/ffmpeg` + - bestehender ffmpeg-basierter Pfad +- `internal/audioingest/aoip` + - Adapter zwischen `aoiprxkit` und `fm-rds-tx` +- `internal/audioingest/streaming` + - HTTP/Icecast/Shoutcast/ICY + Decoder-Libs + +Optional später: +- `internal/audioingest/shared` + - Resampling, channel mapping, sample normalization, metadata structs + +## Konfigurationszielbild +Die Runtime sollte einen expliziten Ingest-Typ kennen. + +Beispielhaft: + +```yaml +input: + kind: ffmpeg | aoip-rtp | aoip-sap | aoip-srt | stream-http +``` + +Später können pro Familie Unterstrukturen folgen. + +Beispielhaft: + +```yaml +input: + kind: aoip-rtp + aoip: + multicastGroup: 239.69.0.1 + port: 5004 + payloadType: 97 + sampleRateHz: 48000 + channels: 2 +``` + +oder + +```yaml +input: + kind: stream-http + streaming: + url: https://example.org/live.mp3 + icyMeta: true +``` + +## Runtime-Zielbild +Die Runtime sollte Quellen einheitlich behandeln können: +- initialisieren +- starten +- stoppen +- Status abfragen +- Health/Stats lesen +- Audio in denselben bestehenden Ringbuffer / Audio-Input-Pfad drücken + +## Telemetrie / UI +Die Control Plane sollte mittelfristig ingest-bezogen sichtbar machen: +- aktiver Ingest-Typ +- Source-Label +- Transport / Codec / Sample-Rate / Channels +- Fehlerzustand +- Puffer-/Jitter-/Underrun-relevante Daten +- optional Metadata (z. B. StreamTitle) + +Wichtig ist hier eine Trennung zwischen: +- **Audio ingest health** +- **TX/runtime health** + +## Empfohlene Umsetzungsreihenfolge + +### Phase 1 – Architektur sauberziehen +- gemeinsames Ingest-Zielbild festziehen +- bestehende Audio-Input-Andockpunkte in `fm-rds-tx` dokumentieren +- entscheiden, welche internen Interfaces nötig sind + +### Phase 2 – AoIP MVP +- `aoiprxkit` nicht blind verschieben, sondern zuerst als Adapter anbinden +- erster nativer Ingest-Modus: statischer RTP/AES67-lite Pfad +- PCM-Frames in bestehende Audio-Pipeline einspeisen +- Runtime-/Health-/Status sichtbar machen + +### Phase 3 – SDP / SAP Discovery +- statische SDP-Unterstützung +- optional SAP Listener + Session-Auswahl +- Discovery klar von Audio-Transport trennen + +### Phase 4 – Streaming MVP +- neuer nativer HTTP/Icecast/Shoutcast-Pfad +- bewährte Go-Libs für Decoder und ICY nutzen +- erstes Ziel: häufige Webradio-Fälle ohne `ffmpeg` + +### Phase 5 – Vereinheitlichung / Telemetrie +- gemeinsame Ingest-Stats +- gemeinsame Statusmodelle +- UI/Control-Plane-Integration +- Quellwechsel / Fehlermeldungen / Health States + +### Phase 6 – Erweiterte Pfade +- SRT sauber produktionsfähig machen +- NMOS weiter integrieren +- später ggf. Opus / weitere Streaming-Codecs + +## Was explizit vermieden werden soll +- `ffmpeg` sofort herausreissen +- AoIP und Web-Streaming in denselben unscharfen Package-Topf werfen +- Decoder / Demux / Protocol-Layer unnötig selbst neu bauen +- Discovery-Logik eng mit der PCM-Pipeline verheiraten +- UI bauen, bevor Runtime-Modelle sauber stehen + +## Erste konkrete Bauschritte ab jetzt +1. bestehenden Audio-Input-Pfad in `fm-rds-tx` analysieren +2. kleinste gemeinsame Ingest-Abstraktion definieren +3. `aoiprxkit`-RTP als ersten nativen Adapter integrieren +4. danach Streaming-Familie planen und anbinden + +## Kurzfazit +`ffmpeg` bleibt vorerst als nützlicher Universalpfad erhalten. +Die Zukunft liegt aber in zwei nativen Familien: +- **AoIP** für professionelle/broadcast-nahe Zuführung +- **Streaming** für HTTP/Icecast/Shoutcast/ICY + Standardcodecs + +Beide sollen sauber über eine gemeinsame interne Audio-Ingest-Schicht in `fm-rds-tx` zusammenlaufen. diff --git a/docs/config.sample.json b/docs/config.sample.json index 1bb4e71..700304d 100644 --- a/docs/config.sample.json +++ b/docs/config.sample.json @@ -34,5 +34,32 @@ }, "control": { "listenAddress": "127.0.0.1:8088" + }, + "runtime": { + "frameQueueCapacity": 3 + }, + "ingest": { + "kind": "none", + "prebufferMs": 1500, + "stallTimeoutMs": 3000, + "reconnect": { + "enabled": true, + "initialBackoffMs": 1000, + "maxBackoffMs": 15000 + }, + "stdin": { + "sampleRateHz": 44100, + "channels": 2, + "format": "s16le" + }, + "httpRaw": { + "sampleRateHz": 44100, + "channels": 2, + "format": "s16le" + }, + "icecast": { + "url": "", + "decoder": "auto" + } } } diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 7236eff..031fbcb 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -160,6 +160,14 @@ func TestValidateRejectsUnsupportedIcecastDecoder(t *testing.T) { } } +func TestValidateAcceptsIcecastDecoderFallbackAlias(t *testing.T) { + cfg := Default() + cfg.Ingest.Icecast.Decoder = "fallback" + if err := cfg.Validate(); err != nil { + t.Fatalf("expected fallback alias to be accepted: %v", err) + } +} + func TestValidateRejectsReconnectWithMissingBackoff(t *testing.T) { cfg := Default() cfg.Ingest.Reconnect.Enabled = true diff --git a/internal/control/control.go b/internal/control/control.go index 1b93a05..006c726 100644 --- a/internal/control/control.go +++ b/internal/control/control.go @@ -336,8 +336,8 @@ func (s *Server) handleRuntimeFaultReset(w http.ResponseWriter, r *http.Request) _ = json.NewEncoder(w).Encode(map[string]any{"ok": true}) } -// handleAudioStream accepts raw S16LE stereo PCM via HTTP POST and pushes -// it into the live audio ring buffer. Use with: +// handleAudioStream accepts raw S16LE PCM via HTTP POST and pushes +// it into the configured ingest http-raw source. Use with: // // curl -X POST --data-binary @- http://host:8088/audio/stream < audio.raw // ffmpeg ... -f s16le -ar 44100 -ac 2 - | curl -X POST --data-binary @- http://host:8088/audio/stream diff --git a/internal/ingest/adapters/icecast/source.go b/internal/ingest/adapters/icecast/source.go index 93c01f0..0106eff 100644 --- a/internal/ingest/adapters/icecast/source.go +++ b/internal/ingest/adapters/icecast/source.go @@ -241,6 +241,8 @@ func (s *Source) decodeWithPreference(ctx context.Context, stream io.Reader, met } return native.DecodeStream(ctx, stream, meta, s.emitChunk) case "auto": + // Phase-1 policy: try native decoder first, then fall back to ffmpeg + // only when native selection/decode reports "unsupported". native, err := s.decReg.SelectByContentType(meta.ContentType) if err == nil { if err := native.DecodeStream(ctx, stream, meta, s.emitChunk); err == nil { diff --git a/internal/ingest/adapters/icecast/source_test.go b/internal/ingest/adapters/icecast/source_test.go index 3786d90..ce7798a 100644 --- a/internal/ingest/adapters/icecast/source_test.go +++ b/internal/ingest/adapters/icecast/source_test.go @@ -105,3 +105,32 @@ func TestDecodeWithPreferenceFFmpegOnly(t *testing.T) { t.Fatalf("fallback called %d times", fallback.called) } } + +func TestDecodeWithPreferenceAutoUnsupportedContentTypeFallsBack(t *testing.T) { + fallback := &testDecoder{name: "ffmpeg"} + reg := decoder.NewRegistry() + reg.Register("ffmpeg", func() decoder.Decoder { return fallback }) + + src := New("ice-test", "http://example", nil, ReconnectConfig{}, + WithDecoderRegistry(reg), + WithDecoderPreference("auto"), + ) + + err := src.decodeWithPreference(context.Background(), bytes.NewReader(nil), decoder.StreamMeta{ + ContentType: "application/octet-stream", + SourceID: "ice-test", + }) + if err != nil { + t.Fatalf("decode: %v", err) + } + if fallback.called != 1 { + t.Fatalf("fallback called %d times", fallback.called) + } +} + +func TestWithDecoderPreferenceFallbackAliasNormalizesToFFmpeg(t *testing.T) { + src := New("ice-test", "http://example", nil, ReconnectConfig{}, WithDecoderPreference("fallback")) + if got := src.Descriptor().Codec; got != "ffmpeg" { + t.Fatalf("codec=%s want ffmpeg", got) + } +}