diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..648887f --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,299 @@ +# AGENTS.md + +This file is the repo-level working guide for humans, coding agents, and LLMs. +Read it before making changes. + +--- + +## 1. Purpose of this file + +Use this file as the canonical "how to work in this repo" guide. +It is intentionally practical and operational. + +Use it to answer questions like: +- Where should changes go? +- What must not be committed? +- How should builds/tests be run? +- Which docs are canonical? +- How should debugging work be documented? +- How should agents behave when touching this repo? + +--- + +## 2. Repo intent + +`sd r-wideband-suite` is a Go-based SDR analysis and streaming system with: +- live spectrum/waterfall UI +- signal detection/classification +- extraction / demodulation / recording +- GPU-assisted paths +- streaming audio paths +- extensive telemetry/debugging support + +This repo has gone through active streaming-path and audio-click debugging. +Do not assume older comments, notes, or experimental code paths are still authoritative. +Prefer current code, current docs in `docs/`, and current branch state over historical assumptions. + +--- + +## 3. Canonical documentation + +### Keep as primary references +- `README.md` + - high-level project overview + - build/run basics + - feature summary +- `ROADMAP.md` + - longer-lived architectural direction +- `docs/known-issues.md` + - curated open engineering issues +- `docs/telemetry-api.md` + - telemetry endpoint documentation +- `docs/telemetry-debug-runbook.md` + - telemetry/debug operating guide +- `docs/audio-click-debug-notes-2026-03-24.md` + - historical incident record and final resolution notes for the audio-click investigation + +### Treat as historical / contextual docs +Anything in `docs/` that reads like an incident log, deep debug note, or one-off investigation should be treated as supporting context, not automatic source of truth. + +### Do not create multiple competing issue lists +If new open problems are found: +- update `docs/known-issues.md` +- keep raw reviewer/ad-hoc reports out of the main repo flow unless they are converted into curated docs + +--- + +## 4. Branching and workflow rules + +### Current working model +- Use focused branches for real feature/fix work. +- Do not keep long-lived junk/debug branches alive once the useful work has been transferred. +- Prefer short-lived cleanup branches for docs/config cleanup. + +### Branch hygiene +- Do not pile unrelated work onto one branch if it can be split cleanly. +- Keep bugfixes, config cleanup, and large refactors logically separable when possible. +- Before deleting an old branch, ensure all useful work is already present in the active branch or merged into the main line. + +### Mainline policy +- Do not merge to `master` blindly. +- Before merge, prefer at least a short sanity pass on: + - live playback + - recording + - WFM / WFM_STEREO / at least one non-WFM mode if relevant + - restart behavior if the change affects runtime state + +--- + +## 5. Commit policy + +### Commit what matters +Good commits are: +- real code fixes +- clear docs improvements +- deliberate config-default changes +- cleanup that reduces confusion + +### Do not commit accidental noise +Do **not** commit unless explicitly intended: +- local debug dumps +- ad-hoc telemetry exports +- generated WAV debug windows +- temporary patch files +- throwaway reviewer JSON snapshots +- local-only runtime artifacts + +### Prefer small, readable commit scopes +Examples of good separate commit scopes: +- code fix +- config default cleanup +- doc cleanup +- known-issues update + +--- + +## 6. Files and paths that need extra care + +### Config files +- `config.yaml` +- `config.autosave.yaml` + +Rules: +- These can drift during debugging. +- Do not commit config changes accidentally. +- Only commit them when the intent is to change repo defaults. +- Keep in mind that `config.autosave.yaml` can override expected runtime behavior after restart. + +### Debug / dump artifacts +Examples: +- `debug/` +- `tele-*.json` +- ad-hoc patch/report scratch files +- generated WAV capture windows + +Rules: +- Treat these as local investigation material unless intentionally promoted into docs. +- Do not leave them hanging around as tracked repo clutter. + +### Root docs +The repo root should stay relatively clean. +Keep only genuinely canonical top-level docs there. +One-off investigation output belongs in `docs/` or should be deleted. + +--- + +## 7. Build and test rules + +### General rule +Prefer the repo's own scripts and established workflow over ad-hoc raw build commands. + +### Important operational rule +Before coding/build/test sessions on this repo: +- stop the browser UI +- stop `sdrd.exe` + +This avoids file locks, stale runtime state, and misleading live-test behavior. + +### Build preference +Use the project scripts where applicable, especially for the real app flows. +Examples already used during this project include: +- `build-sdrplay.ps1` +- `start-sdr.ps1` + +Do **not** default to random raw `go build` commands for full workflow validation unless the goal is a narrow compile-only sanity check. + +### GPU / native-path caution +If working on GPU/native streaming code: +- do not assume the CPU oracle path is currently trustworthy unless you have just validated it +- do not assume old README notes inside subdirectories are current +- check the current code and current docs first + +--- + +## 8. Debugging rules + +### Telemetry-first, but disciplined +Telemetry is available and useful. +However: +- heavy telemetry can distort runtime behavior +- debug config can accidentally persist via autosave +- not every one-off probe belongs in permanent code + +### When debugging +Prefer this order: +1. existing telemetry and current docs +2. focused additional instrumentation +3. short-lived dumps / captures +4. cleanup afterward + +### If you add debugging support +Ask: +- Is this reusable for future incidents? +- Should it live in `docs/known-issues.md` or a runbook? +- Is it temporary and should be removed after use? + +### If a reviewer provides a raw report +Do not blindly keep raw snapshots as canonical repo docs. +Instead: +- extract the durable findings +- update `docs/known-issues.md` +- keep only the cleaned/curated version in the main repo flow + +--- + +## 9. Documentation rules + +### Prefer curated docs over raw dumps +Good: +- `docs/known-issues.md` +- runbooks +- architectural notes +- incident summaries with clear final status + +Bad: +- random JSON reviewer dumps as primary docs +- duplicate issue lists +- stale TODO/STATE files that nobody maintains + +### If a doc becomes stale +Choose one: +- update it +- move it into `docs/` as historical context +- delete it + +Do not keep stale docs in prominent locations if they compete with current truth. + +--- + +## 10. Known lessons from recent work + +These are important enough to keep visible: + +### Audio-click investigation lessons +- The final click bug was not a single simple DSP bug. +- Real causes included: + - shared-buffer mutation / aliasing + - extractor reset churn from unstable config hashing + - streaming-path batch rejection / fallback behavior +- Secondary contributing issues existed in discriminator bridging and WFM mono/plain-path filtering. + +### Practical repo lessons +- Silent fallback paths are dangerous; keep important fallthrough/fallback visibility. +- Shared IQ buffers should be treated very carefully. +- Debug artifacts should not become permanent repo clutter. +- Curated issue tracking in Git is better than keeping raw review snapshots around. + +--- + +## 11. Agent behavior expectations + +If you are an AI coding agent / LLM working in this repo: + +### Do +- read this file first +- prefer current code and current docs over old assumptions +- keep changes scoped and explainable +- separate config cleanup from code fixes when possible +- leave the repo cleaner than you found it +- promote durable findings into curated docs + +### Do not +- commit local debug noise by default +- create duplicate status/todo/issue files without a strong reason +- assume experimental comments or old subdirectory READMEs are still correct +- leave raw reviewer output as the only source of truth +- hide fallback behavior or silently ignore critical path failures + +--- + +## 12. Recommended doc update pattern after meaningful work + +When a meaningful fix or investigation lands: +1. update code +2. update any relevant canonical docs +3. update `docs/known-issues.md` if open issues changed +4. remove or archive temporary debug artifacts +5. keep the repo root and branch state clean + +--- + +## 13. Minimal pre-commit checklist + +Before committing, quickly check: +- Am I committing only intended files? +- Are config changes intentional? +- Am I accidentally committing dumps/logs/debug exports? +- Should any reviewer findings be moved into `docs/known-issues.md`? +- Did I leave stale temporary files behind? + +--- + +## 14. If unsure + +If a file looks ambiguous: +- canonical + actively maintained -> keep/update +- historical but useful -> move or keep in `docs/` +- stale and confusing -> delete + +Clarity beats nostalgia. diff --git a/README.md b/README.md index a2625ec..d872a1f 100644 --- a/README.md +++ b/README.md @@ -192,6 +192,32 @@ go build -tags sdrplay ./cmd/sdrd - `GET /api/signals` -> current live signals - `GET /api/events?limit=&since=` -> recent events +### Debug Telemetry +- `GET /api/debug/telemetry/live` -> current telemetry snapshot (counters, gauges, distributions, recent events, collector status/config) +- `GET /api/debug/telemetry/history` -> historical metric samples with filtering by time/name/prefix/tags +- `GET /api/debug/telemetry/events` -> telemetry event/anomaly history with filtering by time/name/prefix/level/tags +- `GET /api/debug/telemetry/config` -> current collector config plus `debug.telemetry` runtime config +- `POST /api/debug/telemetry/config` -> update telemetry settings at runtime and persist them to autosave config + +Telemetry query params (`history` / `events`) include: +- `since`, `until` -> unix seconds, unix milliseconds, or RFC3339 timestamps +- `limit` +- `name`, `prefix` +- `signal_id`, `session_id`, `stage`, `trace_id`, `component` +- `tag_=` for arbitrary tag filters +- `include_persisted=true|false` (default `true`) +- `level` on the events endpoint + +Telemetry config lives under `debug.telemetry`: +- `enabled`, `heavy_enabled`, `heavy_sample_every` +- `metric_sample_every`, `metric_history_max`, `event_history_max` +- `retention_seconds` +- `persist_enabled`, `persist_dir`, `rotate_mb`, `keep_files` + +See also: +- `docs/telemetry-api.md` for the full telemetry API reference +- `docs/telemetry-debug-runbook.md` for the short operational debug flow + ### Recordings - `GET /api/recordings` - `GET /api/recordings/:id` (meta.json) diff --git a/STATE.md b/STATE.md deleted file mode 100644 index f3487ef..0000000 --- a/STATE.md +++ /dev/null @@ -1,184 +0,0 @@ -# SDR Wideband Suite - Current State - -This file is the practical handoff / resume state for future work. -Use it together with `ROADMAP.md`. - -- `ROADMAP.md` = long-term architecture and phase roadmap -- `STATE.md` = current repo state, working conventions, and next recommended entry point - -## Current Milestone State - -- **Phase 1 complete** -- **Phase 2 complete** -- **Phase 3 complete** -- **Phase 4 complete** - -Current project state should be treated as: -- Phase 1 = architecture foundation landed -- Phase 2 = multi-resolution surveillance semantics landed -- Phase 3 = conservative runtime prioritization/admission/rebalance landed -- Phase 4 = monitor-window operating model landed - -Do not reopen these phases unless there is a concrete bug, mismatch, or regression. - ---- - -## Most Recent Relevant Commits - -These are the most important recent milestone commits that define the current state: - -### Phase 4 monitor-window operating model -- `efe137b` Add monitor window goals for multi-span gating -- `ac64d6b` Add monitor window matches and stats -- `d7e457d` Expose monitor window summaries in runtime debug -- `c520423` Add monitor window priority bias -- `838c941` Add window-based record/decode actions -- `962cf06` Add window zone biases for record/decode actions -- `402a772` Consolidate monitor window summary in debug outputs -- `8545b62` Add per-window outcome summaries for admission pressure -- `65b9845` test: cover overlapping monitor windows -- `efe3215` docs: capture Phase-4 monitor-window status - -### Phase 3 runtime intelligence milestone -- `4ebd51d` Add priority tiers and admission classes to pipeline -- `18b179b` Expose admission metadata in debug output and tests -- `ba9adca` Add budget preference and pressure modeling -- `7a75367` Expose arbitration pressure summary -- `592fa03` pipeline: deepen hold/displacement semantics -- `30a5d11` pipeline: apply intent holds and family tier floors -- `1f5d4ab` pipeline: add intent and family priority tests -- `822829c` Add conservative budget rebalance layer -- `da5fa22` Update Phase-3 Wave 3E status - -### Documentation / stable defaults -- `fd718d5` docs: finalize phase milestones and ukf test config - -If resuming after a long pause, inspect the current `git log` around these commits first. - ---- - -## Current Important Files / Subsystems - -### Long-term guidance -- `ROADMAP.md` - durable roadmap across phases -- `STATE.md` - practical resume/handoff state -- `PLAN.md` - project plan / narrative (may be less pristine than ROADMAP.md) -- `README.md` - user-facing/current feature status - -### Config / runtime surface -- `config.yaml` - current committed default config -- `config.autosave.yaml` - local autosave; intentionally not tracked in git -- `internal/config/config.go` -- `internal/runtime/runtime.go` - -### Phase 3 core runtime intelligence -- `internal/pipeline/arbiter.go` -- `internal/pipeline/arbitration.go` -- `internal/pipeline/arbitration_state.go` -- `internal/pipeline/priority.go` -- `internal/pipeline/budget.go` -- `internal/pipeline/pressure.go` -- `internal/pipeline/rebalance.go` -- `internal/pipeline/decision_queue.go` - -### Phase 2 surveillance/evidence model -- `internal/pipeline/types.go` -- `internal/pipeline/evidence.go` -- `internal/pipeline/candidate_fusion.go` -- `internal/pipeline/scheduler.go` -- `cmd/sdrd/pipeline_runtime.go` - -### Phase 4 monitor-window model -- `internal/pipeline/monitor_rules.go` -- `cmd/sdrd/window_summary.go` -- `cmd/sdrd/level_summary.go` -- `cmd/sdrd/http_handlers.go` -- `cmd/sdrd/decision_compact.go` -- `cmd/sdrd/dsp_loop.go` - ---- - -## Current Default Operator / Test Posture - -The repo was intentionally switched to an FM/UKW-friendly default test posture. - -### Current committed config defaults -- band: `87.5-108.0 MHz` -- center: `99.5 MHz` -- sample rate: `2.048 MHz` -- FFT: `4096` -- profile: `wideband-balanced` -- intent: `broadcast-monitoring` -- priorities include `wfm`, `rds`, `broadcast`, `digital` - -### Important config note -- `config.yaml` is committed and intended as the stable default reference -- `config.autosave.yaml` is **not** git-tracked and may diverge locally -- if behavior seems odd, compare the active runtime config against `config.yaml` - ---- - -## Working Conventions That Matter - -### Codex invocation on Windows -Preferred stable flow: -1. write prompt to `codex_prompt.txt` -2. create/use `run_codex.ps1` containing: - - read prompt file - - pipe to `codex exec --yolo` -3. run with PTY/background from the repo root -4. remove `codex_prompt.txt` and `run_codex.ps1` after the run - -This was adopted specifically to avoid PowerShell quoting failures. - -### Expectations for coding runs -- before every commit: `go test ./...` and `go build ./cmd/sdrd` -- commit in coherent blocks with clear messages -- push after successful validation -- avoid reopening already-closed phase work without a concrete reason - ---- - -## Known Practical Caveats - -- `PLAN.md` has had encoding/character issues in some reads; treat `ROADMAP.md` + `STATE.md` as the cleaner authoritative continuity docs. -- README is generally useful, but `ROADMAP.md`/`STATE.md` are better for architectural continuity. -- `config.autosave.yaml` can become misleading because it is local/autosaved and not tracked. - ---- - -## Recommended Next Entry Point - -If resuming technical work after this checkpoint: - -### Start with **Phase 5** -Do **not** reopen Phase 1-4 unless there is a concrete bug or regression. - -### Recommended Phase 5 direction -Move from monitor windows inside a single capture span toward richer span / operating orchestration: -- span / zone groups -- span-aware resource allocation -- stronger profile-driven operating modes -- retune / scan / dwell semantics where needed - -### Avoid jumping ahead prematurely to -- full adaptive QoS engine (Phase 6) -- major GPU/performance re-architecture (Phase 7) -- heavy UX/product polish (Phase 8) - -Those should build on Phase 5, not bypass it. - ---- - -## Resume Checklist For A Future Agent - -1. Read `ROADMAP.md` -2. Read `STATE.md` -3. Check current `git log` near the commits listed above -4. Inspect `config.yaml` -5. Confirm current repo state with: - - `go test ./...` - - `go build ./cmd/sdrd` -6. Then start Phase 5 planning from the actual repo state - -If these steps still match the repo, continuation should be seamless enough even after a hard context reset. diff --git a/TODO.md b/TODO.md deleted file mode 100644 index eb3a9ca..0000000 --- a/TODO.md +++ /dev/null @@ -1,23 +0,0 @@ -# TODO — SDR Visual Suite - -## UI -- [ ] RDS RadioText (RT) Anzeige hinzufügen: - - Overlay: 1 Zeile, sanfter Fade bei Updates, Ellipsis bei Überlänge, optional kleines „RT“-Badge. - - Detail-Panel: 2 Zeilen Auto-Wrap; bei Überlänge Ellipsis + Expand (Modal/Zone) für Volltext. - - Update-Logik: RT nur bei stabilem Text (z. B. 2–3 identische Blöcke), optional „RT · HH:MM“ Timestamp. - -## Band Settings Profiles (v1.2) -- [ ] Backend: built-in Profile-Struktur + embedded JSON (6 Profile) -- [ ] Backend: Apply-Helper (shared mit /api/config) inkl. source/dsp/save -- [ ] Backend: Merge-Patch mit Feld-Präsenz (nur explizite Felder anwenden) -- [ ] Backend: DisallowUnknownFields + Config-Validierung → 400 -- [ ] Backend: Endpoints GET /api/profiles, POST /api/profiles/apply, POST /api/profiles/undo, GET /api/profiles/suggest -- [ ] Backend: Undo-Snapshot (1 Level) + Active Profile ID (Runtime-State) -- [ ] Optional: Active Profile ID über Neustart persistieren (falls gewünscht) -- [ ] UI: Dropdown + Split-Apply (full/dsp_only) + Undo + Active-Badge -- [ ] UI: Suggest-Toast bei center_hz Wechsel, Dismiss-Schutz (>5 MHz) -- [ ] UX: Loading-Indicator während Profilwechsel (1–3s Reset) -- [ ] Tests: Patch-Semantik, dsp_only (center_hz/gain_db bleiben), Unknown Fields, Suggest-Match - -## Notes -- Ab jetzt hier die Todo-Liste führen. diff --git a/build-gpudemod-dll.ps1 b/build-gpudemod-dll.ps1 index da19b08..4e095c2 100644 --- a/build-gpudemod-dll.ps1 +++ b/build-gpudemod-dll.ps1 @@ -16,12 +16,25 @@ if (!(Test-Path $outDir)) { New-Item -ItemType Directory -Path $outDir | Out-Nul Remove-Item $dll,$lib,$exp -Force -ErrorAction SilentlyContinue -$cmd = @" -call "$vcvars" && "$nvcc" -shared "$src" -o "$dll" -cudart=hybrid -Xcompiler "/MD" -arch=sm_75 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_89,code=sm_89 -gencode arch=compute_90,code=sm_90 +$bat = Join-Path $env:TEMP 'build-gpudemod-dll.bat' +$batContent = @" +@echo off +call "$vcvars" +if errorlevel 1 exit /b %errorlevel% +"$nvcc" -shared "$src" -o "$dll" -cudart=hybrid -Xcompiler "/MD" -arch=sm_75 ^ + -gencode arch=compute_75,code=sm_75 ^ + -gencode arch=compute_80,code=sm_80 ^ + -gencode arch=compute_86,code=sm_86 ^ + -gencode arch=compute_89,code=sm_89 ^ + -gencode arch=compute_90,code=sm_90 +exit /b %errorlevel% "@ +Set-Content -Path $bat -Value $batContent -Encoding ASCII Write-Host 'Building gpudemod CUDA DLL...' -ForegroundColor Cyan -cmd.exe /c $cmd -if ($LASTEXITCODE -ne 0) { throw 'gpudemod DLL build failed' } +cmd.exe /c ""$bat"" +$exitCode = $LASTEXITCODE +Remove-Item $bat -Force -ErrorAction SilentlyContinue +if ($exitCode -ne 0) { throw 'gpudemod DLL build failed' } Write-Host "Built: $dll" -ForegroundColor Green diff --git a/build-sdrplay.ps1 b/build-sdrplay.ps1 index 5f5e2bb..89c5507 100644 --- a/build-sdrplay.ps1 +++ b/build-sdrplay.ps1 @@ -21,10 +21,13 @@ if (Test-Path $sdrplayBin) { $env:PATH = "$sdrplayBin;" + $env:PATH } # CUDA runtime / cuFFT $cudaInc = 'C:\CUDA\include' $cudaBin = 'C:\CUDA\bin' +$cudaBinX64 = 'C:\CUDA\bin\x64' if (-not (Test-Path $cudaInc)) { $cudaInc = 'C:\PROGRA~1\NVIDIA~2\CUDA\v13.2\include' } if (-not (Test-Path $cudaBin)) { $cudaBin = 'C:\PROGRA~1\NVIDIA~2\CUDA\v13.2\bin' } +if (-not (Test-Path $cudaBinX64)) { $cudaBinX64 = 'C:\PROGRA~1\NVIDIA~2\CUDA\v13.2\bin\x64' } $cudaMingw = Join-Path $PSScriptRoot 'cuda-mingw' if (Test-Path $cudaInc) { $env:CGO_CFLAGS = "$env:CGO_CFLAGS -I$cudaInc" } +if (Test-Path $cudaBinX64) { $env:PATH = "$cudaBinX64;" + $env:PATH } if (Test-Path $cudaBin) { $env:PATH = "$cudaBin;" + $env:PATH } if (Test-Path $cudaMingw) { $env:CGO_LDFLAGS = "$env:CGO_LDFLAGS -L$cudaMingw -lcudart64_13 -lcufft64_12 -lkernel32" } @@ -68,8 +71,11 @@ if ($dllSrc) { } $cudartCandidates = @( + (Join-Path $cudaBinX64 'cudart64_13.dll'), (Join-Path $cudaBin 'cudart64_13.dll'), + 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.2\bin\x64\cudart64_13.dll', 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.2\bin\cudart64_13.dll', + 'C:\CUDA\bin\x64\cudart64_13.dll', 'C:\CUDA\bin\cudart64_13.dll' ) $cudartSrc = $cudartCandidates | Where-Object { $_ -and (Test-Path $_) } | Select-Object -First 1 diff --git a/cmd/sdrd/dsp_loop.go b/cmd/sdrd/dsp_loop.go index f8149a8..be6395e 100644 --- a/cmd/sdrd/dsp_loop.go +++ b/cmd/sdrd/dsp_loop.go @@ -3,6 +3,7 @@ package main import ( "context" "encoding/json" + "fmt" "log" "os" "runtime/debug" @@ -16,15 +17,16 @@ import ( "sdr-wideband-suite/internal/logging" "sdr-wideband-suite/internal/pipeline" "sdr-wideband-suite/internal/recorder" + "sdr-wideband-suite/internal/telemetry" ) -func runDSP(ctx context.Context, srcMgr *sourceManager, cfg config.Config, det *detector.Detector, window []float64, h *hub, eventFile *os.File, eventMu *sync.RWMutex, updates <-chan dspUpdate, gpuState *gpuStatus, rec *recorder.Manager, sigSnap *signalSnapshot, extractMgr *extractionManager, phaseSnap *phaseSnapshot) { +func runDSP(ctx context.Context, srcMgr *sourceManager, cfg config.Config, det *detector.Detector, window []float64, h *hub, eventFile *os.File, eventMu *sync.RWMutex, updates <-chan dspUpdate, gpuState *gpuStatus, rec *recorder.Manager, sigSnap *signalSnapshot, extractMgr *extractionManager, phaseSnap *phaseSnapshot, coll *telemetry.Collector) { defer func() { if r := recover(); r != nil { log.Printf("FATAL: runDSP goroutine panic: %v\n%s", r, debug.Stack()) } }() - rt := newDSPRuntime(cfg, det, window, gpuState) + rt := newDSPRuntime(cfg, det, window, gpuState, coll) ticker := time.NewTicker(cfg.FrameInterval()) defer ticker.Stop() logTicker := time.NewTicker(5 * time.Second) @@ -33,6 +35,9 @@ func runDSP(ctx context.Context, srcMgr *sourceManager, cfg config.Config, det * dcBlocker := dsp.NewDCBlocker(0.995) state := &phaseState{} var frameID uint64 + prevDisplayed := map[int64]detector.Signal{} + lastSourceDrops := uint64(0) + lastSourceResets := uint64(0) for { select { case <-ctx.Done(): @@ -40,11 +45,28 @@ func runDSP(ctx context.Context, srcMgr *sourceManager, cfg config.Config, det * case <-logTicker.C: st := srcMgr.Stats() log.Printf("stats: buf=%d drop=%d reset=%d last=%dms", st.BufferSamples, st.Dropped, st.Resets, st.LastSampleAgoMs) + if coll != nil { + coll.SetGauge("source.buffer_samples", float64(st.BufferSamples), nil) + coll.SetGauge("source.last_sample_ago_ms", float64(st.LastSampleAgoMs), nil) + if st.Dropped > lastSourceDrops { + coll.IncCounter("source.drop.count", float64(st.Dropped-lastSourceDrops), nil) + } + if st.Resets > lastSourceResets { + coll.IncCounter("source.reset.count", float64(st.Resets-lastSourceResets), nil) + coll.Event("source_reset", "warn", "source reset observed", nil, map[string]any{"resets": st.Resets}) + } + lastSourceDrops = st.Dropped + lastSourceResets = st.Resets + } case upd := <-updates: rt.applyUpdate(upd, srcMgr, rec, gpuState) dcBlocker.Reset() ticker.Reset(rt.cfg.FrameInterval()) + if coll != nil { + coll.IncCounter("dsp.update.apply", 1, nil) + } case <-ticker.C: + frameStart := time.Now() frameID++ art, err := rt.captureSpectrum(srcMgr, rec, dcBlocker, gpuState) if err != nil { @@ -61,8 +83,19 @@ func runDSP(ctx context.Context, srcMgr *sourceManager, cfg config.Config, det * rt.gotSamples = true } logging.Debug("trace", "capture_done", "trace", frameID, "allIQ", len(art.allIQ), "detailIQ", len(art.detailIQ)) + if coll != nil { + coll.Observe("stage.capture.duration_ms", float64(time.Since(frameStart).Microseconds())/1000.0, telemetry.TagsFromPairs("frame_id", fmt.Sprintf("%d", frameID))) + } + survStart := time.Now() state.surveillance = rt.buildSurveillanceResult(art) + if coll != nil { + coll.Observe("stage.surveillance.duration_ms", float64(time.Since(survStart).Microseconds())/1000.0, telemetry.TagsFromPairs("frame_id", fmt.Sprintf("%d", frameID))) + } + refineStart := time.Now() state.refinement = rt.runRefinement(art, state.surveillance, extractMgr, rec) + if coll != nil { + coll.Observe("stage.refinement.duration_ms", float64(time.Since(refineStart).Microseconds())/1000.0, telemetry.TagsFromPairs("frame_id", fmt.Sprintf("%d", frameID))) + } finished := state.surveillance.Finished thresholds := state.surveillance.Thresholds noiseFloor := state.surveillance.NoiseFloor @@ -75,11 +108,44 @@ func runDSP(ctx context.Context, srcMgr *sourceManager, cfg config.Config, det * streamSignals = stableSignals } if rec != nil && len(art.allIQ) > 0 { + if art.streamDropped { + rt.streamOverlap = &streamIQOverlap{} + for k := range rt.streamPhaseState { + rt.streamPhaseState[k].phase = 0 + } + resetStreamingOracleRunner() + rec.ResetStreams() + logging.Warn("gap", "iq_dropped", "msg", "buffer bloat caused extraction drop; overlap reset") + if coll != nil { + coll.IncCounter("capture.stream_reset", 1, nil) + coll.Event("iq_dropped", "warn", "stream overlap reset after dropped IQ", nil, map[string]any{"frame_id": frameID}) + } + } if rt.cfg.Recorder.DebugLiveAudio { log.Printf("LIVEAUDIO DSP: detailIQ=%d displaySignals=%d streamSignals=%d stableSignals=%d allIQ=%d", len(art.detailIQ), len(displaySignals), len(streamSignals), len(stableSignals), len(art.allIQ)) } aqCfg := extractionConfig{firTaps: rt.cfg.Recorder.ExtractionTaps, bwMult: rt.cfg.Recorder.ExtractionBwMult} - streamSnips, streamRates := extractForStreaming(extractMgr, art.allIQ, rt.cfg.SampleRate, rt.cfg.CenterHz, streamSignals, rt.streamPhaseState, rt.streamOverlap, aqCfg) + extractStart := time.Now() + streamSnips, streamRates := extractForStreaming(extractMgr, art.allIQ, rt.cfg.SampleRate, rt.cfg.CenterHz, streamSignals, rt.streamPhaseState, rt.streamOverlap, aqCfg, rt.telemetry) + if coll != nil { + coll.Observe("stage.extract_stream.duration_ms", float64(time.Since(extractStart).Microseconds())/1000.0, telemetry.TagsFromPairs("frame_id", fmt.Sprintf("%d", frameID))) + coll.SetGauge("stage.extract_stream.signals", float64(len(streamSignals)), nil) + if coll.ShouldSampleHeavy() { + for i := range streamSnips { + if i >= len(streamSignals) { + break + } + tags := telemetry.TagsFromPairs( + "signal_id", fmt.Sprintf("%d", streamSignals[i].ID), + "stage", "extract_stream", + ) + coll.SetGauge("iq.stage.extract.length", float64(len(streamSnips[i])), tags) + if len(streamSnips[i]) > 0 { + observeIQStats(coll, "extract_stream", streamSnips[i], tags) + } + } + } + } nonEmpty := 0 minLen := 0 maxLen := 0 @@ -127,10 +193,18 @@ func runDSP(ctx context.Context, srcMgr *sourceManager, cfg config.Config, det * log.Printf("LIVEAUDIO DSP: feedItems=%d", len(items)) } if len(items) > 0 { + feedStart := time.Now() rec.FeedSnippets(items, frameID) + if coll != nil { + coll.Observe("stage.feed_enqueue.duration_ms", float64(time.Since(feedStart).Microseconds())/1000.0, telemetry.TagsFromPairs("frame_id", fmt.Sprintf("%d", frameID))) + coll.SetGauge("stage.feed.items", float64(len(items)), nil) + } logging.Debug("trace", "feed", "trace", frameID, "items", len(items), "signals", len(streamSignals), "allIQ", len(art.allIQ)) } else { logging.Warn("gap", "feed_empty", "signals", len(streamSignals), "trace", frameID) + if coll != nil { + coll.IncCounter("stage.feed.empty", 1, nil) + } } } rt.maintenance(displaySignals, rec) @@ -156,6 +230,27 @@ func runDSP(ctx context.Context, srcMgr *sourceManager, cfg config.Config, det * if sigSnap != nil { sigSnap.set(displaySignals) } + if coll != nil { + coll.SetGauge("signals.display.count", float64(len(displaySignals)), nil) + current := make(map[int64]detector.Signal, len(displaySignals)) + for _, s := range displaySignals { + current[s.ID] = s + if _, ok := prevDisplayed[s.ID]; !ok { + coll.Event("signal_create", "info", "signal entered display set", telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", s.ID)), map[string]any{ + "center_hz": s.CenterHz, + "bw_hz": s.BWHz, + }) + } + } + for id, prev := range prevDisplayed { + if _, ok := current[id]; !ok { + coll.Event("signal_remove", "info", "signal left display set", telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", id)), map[string]any{ + "center_hz": prev.CenterHz, + }) + } + } + prevDisplayed = current + } eventMu.Lock() for _, ev := range finished { _ = enc.Encode(ev) @@ -244,6 +339,9 @@ func runDSP(ctx context.Context, srcMgr *sourceManager, cfg config.Config, det * debugInfo.Refinement = refinementDebug } h.broadcast(SpectrumFrame{Timestamp: art.now.UnixMilli(), CenterHz: rt.cfg.CenterHz, SampleHz: rt.cfg.SampleRate, FFTSize: rt.cfg.FFTSize, Spectrum: art.surveillanceSpectrum, Signals: displaySignals, Debug: debugInfo}) + if coll != nil { + coll.Observe("dsp.frame.duration_ms", float64(time.Since(frameStart).Microseconds())/1000.0, nil) + } } } } diff --git a/cmd/sdrd/helpers.go b/cmd/sdrd/helpers.go index 76524e9..905af0a 100644 --- a/cmd/sdrd/helpers.go +++ b/cmd/sdrd/helpers.go @@ -1,10 +1,13 @@ package main import ( + "fmt" "log" "math" + "os" "sort" "strconv" + "strings" "time" "sdr-wideband-suite/internal/config" @@ -12,6 +15,7 @@ import ( "sdr-wideband-suite/internal/detector" "sdr-wideband-suite/internal/dsp" "sdr-wideband-suite/internal/logging" + "sdr-wideband-suite/internal/telemetry" ) func mustParseDuration(raw string, fallback time.Duration) time.Duration { @@ -227,15 +231,30 @@ type extractionConfig struct { const streamOverlapLen = 512 // must be >= FIR tap count with margin const ( - wfmStreamOutRate = 500000 + wfmStreamOutRate = 512000 wfmStreamMinBW = 250000 ) +var forceCPUStreamExtract = func() bool { + raw := strings.TrimSpace(os.Getenv("SDR_FORCE_CPU_STREAM_EXTRACT")) + if raw == "" { + return false + } + v, err := strconv.ParseBool(raw) + if err != nil { + return false + } + return v +}() + // extractForStreaming performs GPU-accelerated extraction with: // - Per-signal phase-continuous FreqShift (via PhaseStart in ExtractJob) // - IQ overlap prepended to allIQ so FIR kernel has real data in halo // // Returns extracted snippets with overlap trimmed, and updates phase state. +// extractForStreaming is the current legacy production path. +// It still relies on overlap-prepend + trim semantics and is intentionally +// kept separate from the new streaming refactor/oracle path under development. func extractForStreaming( extractMgr *extractionManager, allIQ []complex64, @@ -245,7 +264,57 @@ func extractForStreaming( phaseState map[int64]*streamExtractState, overlap *streamIQOverlap, aqCfg extractionConfig, + coll *telemetry.Collector, ) ([][]complex64, []int) { + if useStreamingProductionPath { + out, rates, err := extractForStreamingProduction(extractMgr, allIQ, sampleRate, centerHz, signals, aqCfg, coll) + if err == nil { + logging.Debug("extract", "path_active", "path", "streaming_production", "signals", len(signals), "allIQ", len(allIQ)) + if coll != nil { + coll.IncCounter("extract.path.streaming_production", 1, nil) + } + return out, rates + } + // CRITICAL: the streaming production path failed — log WHY before falling through + log.Printf("EXTRACT PATH FALLTHROUGH: streaming production failed: %v — using legacy overlap+trim", err) + logging.Warn("extract", "streaming_production_fallthrough", + "err", err.Error(), + "signals", len(signals), + "allIQ", len(allIQ), + "sampleRate", sampleRate, + ) + if coll != nil { + coll.IncCounter("extract.path.streaming_production_failed", 1, nil) + coll.Event("extraction_path_fallthrough", "warn", + "streaming production path failed, using legacy overlap+trim", nil, + map[string]any{ + "error": err.Error(), + "signals": len(signals), + "allIQ_len": len(allIQ), + "sampleRate": sampleRate, + }) + } + } + if useStreamingOraclePath { + out, rates, err := extractForStreamingOracle(allIQ, sampleRate, centerHz, signals, aqCfg, coll) + if err == nil { + logging.Debug("extract", "path_active", "path", "streaming_oracle", "signals", len(signals)) + if coll != nil { + coll.IncCounter("extract.path.streaming_oracle", 1, nil) + } + return out, rates + } + log.Printf("EXTRACT PATH FALLTHROUGH: streaming oracle failed: %v", err) + logging.Warn("extract", "streaming_oracle_fallthrough", "err", err.Error()) + if coll != nil { + coll.IncCounter("extract.path.streaming_oracle_failed", 1, nil) + } + } + // If we reach here, the legacy overlap+trim path is running + logging.Warn("extract", "path_active", "path", "legacy_overlap_trim", "signals", len(signals), "allIQ", len(allIQ)) + if coll != nil { + coll.IncCounter("extract.path.legacy_overlap_trim", 1, nil) + } out := make([][]complex64, len(signals)) rates := make([]int, len(signals)) if len(allIQ) == 0 || sampleRate <= 0 || len(signals) == 0 { @@ -286,6 +355,18 @@ func extractForStreaming( bwMult = 1.0 } + if coll != nil { + coll.SetGauge("iq.extract.input.length", float64(len(allIQ)), nil) + coll.SetGauge("iq.extract.input.overlap_length", float64(overlapLen), nil) + headMean, tailMean, boundaryScore, _ := boundaryMetrics(overlap.tail, allIQ, 32) + coll.SetGauge("iq.extract.input.head_mean_mag", headMean, nil) + coll.SetGauge("iq.extract.input.prev_tail_mean_mag", tailMean, nil) + coll.Observe("iq.extract.input.discontinuity_score", boundaryScore, nil) + } + + rawBoundary := make(map[int64]boundaryProbeState, len(signals)) + trimmedBoundary := make(map[int64]boundaryProbeState, len(signals)) + // Build jobs with per-signal phase jobs := make([]gpudemod.ExtractJob, len(signals)) for i, sig := range signals { @@ -323,11 +404,45 @@ func extractForStreaming( OutRate: jobOutRate, PhaseStart: gpuPhaseStart, } + if coll != nil { + tags := telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sig.ID), "path", "gpu") + inputHead := probeHead(gpuIQ, 16, 1e-6) + coll.SetGauge("iq.extract.input_head.zero_count", float64(inputHead.zeroCount), tags) + coll.SetGauge("iq.extract.input_head.first_nonzero_index", float64(inputHead.firstNonZeroIndex), tags) + coll.SetGauge("iq.extract.input_head.max_step", inputHead.maxStep, tags) + coll.Event("extract_input_head_probe", "info", "extractor input head probe", tags, map[string]any{ + "mags": inputHead.mags, + "zero_count": inputHead.zeroCount, + "first_nonzero_index": inputHead.firstNonZeroIndex, + "head_max_step": inputHead.maxStep, + "center_offset_hz": jobs[i].OffsetHz, + "bandwidth_hz": bw, + "out_rate": jobOutRate, + "trim_samples": (overlapLen + int(math.Max(1, math.Round(float64(sampleRate)/float64(jobOutRate)))) - 1) / int(math.Max(1, math.Round(float64(sampleRate)/float64(jobOutRate)))), + }) + } } - // Try GPU BatchRunner with phase - runner := extractMgr.get(len(gpuIQ), sampleRate) + // Try GPU BatchRunner with phase unless CPU-only debug is forced. + var runner *gpudemod.BatchRunner + if forceCPUStreamExtract { + logging.Warn("boundary", "force_cpu_stream_extract", "allIQ_len", len(allIQ), "gpuIQ_len", len(gpuIQ), "signals", len(signals)) + } else { + runner = extractMgr.get(len(gpuIQ), sampleRate) + } if runner != nil { + if coll != nil && len(gpuIQ) > 0 { + inputProbe := probeHead(gpuIQ, 16, 1e-6) + coll.Event("gpu_kernel_input_head_probe", "info", "gpu kernel input head probe", nil, map[string]any{ + "mags": inputProbe.mags, + "zero_count": inputProbe.zeroCount, + "first_nonzero_index": inputProbe.firstNonZeroIndex, + "head_max_step": inputProbe.maxStep, + "gpuIQ_len": len(gpuIQ), + "sample_rate": sampleRate, + "signals": len(signals), + }) + } results, err := runner.ShiftFilterDecimateBatchWithPhase(gpuIQ, jobs) if err == nil && len(results) == len(signals) { for i, res := range results { @@ -356,9 +471,95 @@ func extractForStreaming( // Trim overlap from output iq := res.IQ + rawLen := len(iq) if trimSamples > 0 && trimSamples < len(iq) { iq = iq[trimSamples:] } + if i == 0 { + logging.Debug("boundary", "extract_trim", "path", "gpu", "raw_len", rawLen, "trim", trimSamples, "out_len", len(iq), "overlap_len", overlapLen, "allIQ_len", len(allIQ), "gpuIQ_len", len(gpuIQ), "outRate", outRate, "signal", signals[i].ID) + logExtractorHeadComparison(signals[i].ID, "gpu", overlapLen, res.IQ, trimSamples, iq) + } + if coll != nil { + tags := telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", signals[i].ID), "path", "gpu") + kernelProbe := probeHead(res.IQ, 16, 1e-6) + coll.Event("gpu_kernel_output_head_probe", "info", "gpu kernel output head probe", tags, map[string]any{ + "mags": kernelProbe.mags, + "zero_count": kernelProbe.zeroCount, + "first_nonzero_index": kernelProbe.firstNonZeroIndex, + "head_max_step": kernelProbe.maxStep, + "raw_len": rawLen, + "out_rate": outRate, + "trim_samples": trimSamples, + }) + stats := computeIQHeadStats(iq, 64) + coll.SetGauge("iq.extract.output.length", float64(len(iq)), tags) + coll.Observe("iq.extract.output.head_mean_mag", stats.meanMag, tags) + coll.Observe("iq.extract.output.head_min_mag", stats.minMag, tags) + coll.Observe("iq.extract.output.head_max_step", stats.maxStep, tags) + coll.Observe("iq.extract.output.head_p95_step", stats.p95Step, tags) + coll.Observe("iq.extract.output.head_tail_ratio", stats.headTail, tags) + coll.SetGauge("iq.extract.output.head_low_magnitude_count", float64(stats.lowMag), tags) + coll.SetGauge("iq.extract.raw.length", float64(rawLen), tags) + coll.SetGauge("iq.extract.trim.trim_samples", float64(trimSamples), tags) + if rawLen > 0 { + coll.SetGauge("iq.extract.raw.head_mag", math.Hypot(float64(real(res.IQ[0])), float64(imag(res.IQ[0]))), tags) + coll.SetGauge("iq.extract.raw.tail_mag", math.Hypot(float64(real(res.IQ[rawLen-1])), float64(imag(res.IQ[rawLen-1]))), tags) + rawHead := probeHead(res.IQ, 16, 1e-6) + coll.SetGauge("iq.extract.raw.head_zero_count", float64(rawHead.zeroCount), tags) + coll.SetGauge("iq.extract.raw.first_nonzero_index", float64(rawHead.firstNonZeroIndex), tags) + coll.SetGauge("iq.extract.raw.head_max_step", rawHead.maxStep, tags) + coll.Event("extract_raw_head_probe", "info", "raw extractor head probe", tags, map[string]any{ + "mags": rawHead.mags, + "zero_count": rawHead.zeroCount, + "first_nonzero_index": rawHead.firstNonZeroIndex, + "head_max_step": rawHead.maxStep, + "trim_samples": trimSamples, + }) + } + if len(iq) > 0 { + coll.SetGauge("iq.extract.trimmed.head_mag", math.Hypot(float64(real(iq[0])), float64(imag(iq[0]))), tags) + coll.SetGauge("iq.extract.trimmed.tail_mag", math.Hypot(float64(real(iq[len(iq)-1])), float64(imag(iq[len(iq)-1]))), tags) + trimmedHead := probeHead(iq, 16, 1e-6) + coll.SetGauge("iq.extract.trimmed.head_zero_count", float64(trimmedHead.zeroCount), tags) + coll.SetGauge("iq.extract.trimmed.first_nonzero_index", float64(trimmedHead.firstNonZeroIndex), tags) + coll.SetGauge("iq.extract.trimmed.head_max_step", trimmedHead.maxStep, tags) + coll.Event("extract_trimmed_head_probe", "info", "trimmed extractor head probe", tags, map[string]any{ + "mags": trimmedHead.mags, + "zero_count": trimmedHead.zeroCount, + "first_nonzero_index": trimmedHead.firstNonZeroIndex, + "head_max_step": trimmedHead.maxStep, + "trim_samples": trimSamples, + }) + } + if rb := rawBoundary[signals[i].ID]; rb.set && rawLen > 0 { + prevMag := math.Hypot(float64(real(rb.last)), float64(imag(rb.last))) + currMag := math.Hypot(float64(real(res.IQ[0])), float64(imag(res.IQ[0]))) + coll.SetGauge("iq.extract.raw.boundary.prev_tail_mag", prevMag, tags) + coll.SetGauge("iq.extract.raw.boundary.curr_head_mag", currMag, tags) + coll.Event("extract_raw_boundary", "info", "raw extractor boundary", tags, map[string]any{ + "delta_mag": math.Abs(currMag - prevMag), + "trim_samples": trimSamples, + "raw_len": rawLen, + }) + } + if tb := trimmedBoundary[signals[i].ID]; tb.set && len(iq) > 0 { + prevMag := math.Hypot(float64(real(tb.last)), float64(imag(tb.last))) + currMag := math.Hypot(float64(real(iq[0])), float64(imag(iq[0]))) + coll.SetGauge("iq.extract.trimmed.boundary.prev_tail_mag", prevMag, tags) + coll.SetGauge("iq.extract.trimmed.boundary.curr_head_mag", currMag, tags) + coll.Event("extract_trimmed_boundary", "info", "trimmed extractor boundary", tags, map[string]any{ + "delta_mag": math.Abs(currMag - prevMag), + "trim_samples": trimSamples, + "out_len": len(iq), + }) + } + } + if rawLen > 0 { + rawBoundary[signals[i].ID] = boundaryProbeState{last: res.IQ[rawLen-1], set: true} + } + if len(iq) > 0 { + trimmedBoundary[signals[i].ID] = boundaryProbeState{last: iq[len(iq)-1], set: true} + } out[i] = iq rates[i] = res.Rate } @@ -424,10 +625,240 @@ func extractForStreaming( if i == 0 { logging.Debug("extract", "cpu_result", "outRate", outRate, "decim", decim, "trim", trimSamples) } + rawIQ := decimated + rawLen := len(rawIQ) if trimSamples > 0 && trimSamples < len(decimated) { decimated = decimated[trimSamples:] } + if i == 0 { + logging.Debug("boundary", "extract_trim", "path", "cpu", "raw_len", rawLen, "trim", trimSamples, "out_len", len(decimated), "overlap_len", overlapLen, "allIQ_len", len(allIQ), "gpuIQ_len", len(gpuIQ), "outRate", outRate, "signal", signals[i].ID) + logExtractorHeadComparison(signals[i].ID, "cpu", overlapLen, decimated, trimSamples, decimated) + } + if coll != nil { + tags := telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", signals[i].ID), "path", "cpu") + stats := computeIQHeadStats(decimated, 64) + coll.SetGauge("iq.extract.output.length", float64(len(decimated)), tags) + coll.Observe("iq.extract.output.head_mean_mag", stats.meanMag, tags) + coll.Observe("iq.extract.output.head_min_mag", stats.minMag, tags) + coll.Observe("iq.extract.output.head_max_step", stats.maxStep, tags) + coll.Observe("iq.extract.output.head_p95_step", stats.p95Step, tags) + coll.Observe("iq.extract.output.head_tail_ratio", stats.headTail, tags) + coll.SetGauge("iq.extract.output.head_low_magnitude_count", float64(stats.lowMag), tags) + coll.SetGauge("iq.extract.raw.length", float64(rawLen), tags) + coll.SetGauge("iq.extract.trim.trim_samples", float64(trimSamples), tags) + if rb := rawBoundary[signals[i].ID]; rb.set && rawLen > 0 { + observeBoundarySample(coll, "iq.extract.raw.boundary", tags, rb.last, rawIQ[0]) + } + if tb := trimmedBoundary[signals[i].ID]; tb.set && len(decimated) > 0 { + observeBoundarySample(coll, "iq.extract.trimmed.boundary", tags, tb.last, decimated[0]) + } + } + if rawLen > 0 { + rawBoundary[signals[i].ID] = boundaryProbeState{last: rawIQ[rawLen-1], set: true} + } + if len(decimated) > 0 { + trimmedBoundary[signals[i].ID] = boundaryProbeState{last: decimated[len(decimated)-1], set: true} + } out[i] = decimated } return out, rates } + +type iqHeadStats struct { + length int + minMag float64 + maxMag float64 + meanMag float64 + lowMag int + maxStep float64 + maxStepIdx int + p95Step float64 + headTail float64 + headMinIdx int + stepSamples []float64 +} + +type boundaryProbeState struct { + last complex64 + set bool +} + +type headProbe struct { + zeroCount int + firstNonZeroIndex int + maxStep float64 + mags []float64 +} + +func probeHead(samples []complex64, n int, zeroThreshold float64) headProbe { + if n <= 0 || len(samples) == 0 { + return headProbe{firstNonZeroIndex: -1} + } + if len(samples) < n { + n = len(samples) + } + if zeroThreshold <= 0 { + zeroThreshold = 1e-6 + } + out := headProbe{firstNonZeroIndex: -1, mags: make([]float64, 0, n)} + for i := 0; i < n; i++ { + v := samples[i] + mag := math.Hypot(float64(real(v)), float64(imag(v))) + out.mags = append(out.mags, mag) + if mag <= zeroThreshold { + out.zeroCount++ + } else if out.firstNonZeroIndex < 0 { + out.firstNonZeroIndex = i + } + if i > 0 { + p := samples[i-1] + num := float64(real(p))*float64(imag(v)) - float64(imag(p))*float64(real(v)) + den := float64(real(p))*float64(real(v)) + float64(imag(p))*float64(imag(v)) + step := math.Abs(math.Atan2(num, den)) + if step > out.maxStep { + out.maxStep = step + } + } + } + return out +} + +func observeBoundarySample(coll *telemetry.Collector, metricPrefix string, tags map[string]string, prev complex64, curr complex64) { + prevMag := math.Hypot(float64(real(prev)), float64(imag(prev))) + currMag := math.Hypot(float64(real(curr)), float64(imag(curr))) + deltaMag := math.Abs(currMag - prevMag) + num := float64(real(prev))*float64(imag(curr)) - float64(imag(prev))*float64(real(curr)) + den := float64(real(prev))*float64(real(curr)) + float64(imag(prev))*float64(imag(curr)) + deltaPhase := math.Abs(math.Atan2(num, den)) + d2 := float64(real(curr-prev))*float64(real(curr-prev)) + float64(imag(curr-prev))*float64(imag(curr-prev)) + coll.Observe(metricPrefix+".delta_mag", deltaMag, tags) + coll.Observe(metricPrefix+".delta_phase", deltaPhase, tags) + coll.Observe(metricPrefix+".d2", d2, tags) + coll.Observe(metricPrefix+".discontinuity_score", deltaMag+deltaPhase, tags) +} + +func computeIQHeadStats(iq []complex64, headLen int) iqHeadStats { + stats := iqHeadStats{minMag: math.MaxFloat64, headMinIdx: -1, maxStepIdx: -1} + if len(iq) == 0 { + stats.minMag = 0 + return stats + } + n := len(iq) + if headLen > 0 && headLen < n { + n = headLen + } + stats.length = n + stats.stepSamples = make([]float64, 0, max(0, n-1)) + sumMag := 0.0 + headSum := 0.0 + tailSum := 0.0 + tailCount := 0 + for i := 0; i < n; i++ { + v := iq[i] + mag := math.Hypot(float64(real(v)), float64(imag(v))) + if mag < stats.minMag { + stats.minMag = mag + stats.headMinIdx = i + } + if mag > stats.maxMag { + stats.maxMag = mag + } + sumMag += mag + if mag < 0.05 { + stats.lowMag++ + } + if i < min(16, n) { + headSum += mag + } + if i >= max(0, n-16) { + tailSum += mag + tailCount++ + } + if i > 0 { + p := iq[i-1] + num := float64(real(p))*float64(imag(v)) - float64(imag(p))*float64(real(v)) + den := float64(real(p))*float64(real(v)) + float64(imag(p))*float64(imag(v)) + step := math.Abs(math.Atan2(num, den)) + if step > stats.maxStep { + stats.maxStep = step + stats.maxStepIdx = i - 1 + } + stats.stepSamples = append(stats.stepSamples, step) + } + } + stats.meanMag = sumMag / float64(n) + if len(stats.stepSamples) > 0 { + sorted := append([]float64(nil), stats.stepSamples...) + sort.Float64s(sorted) + idx := int(float64(len(sorted)-1) * 0.95) + stats.p95Step = sorted[idx] + } else { + stats.p95Step = stats.maxStep + } + if headSum > 0 && tailCount > 0 { + headMean := headSum / float64(min(16, n)) + tailMean := tailSum / float64(tailCount) + if tailMean > 0 { + stats.headTail = headMean / tailMean + } + } + return stats +} + +func observeIQStats(coll *telemetry.Collector, stage string, iq []complex64, tags telemetry.Tags) { + if coll == nil || len(iq) == 0 { + return + } + stats := computeIQHeadStats(iq, len(iq)) + stageTags := telemetry.TagsWith(tags, "stage", stage) + coll.Observe("iq.magnitude.min", stats.minMag, stageTags) + coll.Observe("iq.magnitude.max", stats.maxMag, stageTags) + coll.Observe("iq.magnitude.mean", stats.meanMag, stageTags) + coll.Observe("iq.phase_step.max", stats.maxStep, stageTags) + coll.Observe("iq.phase_step.p95", stats.p95Step, stageTags) + coll.Observe("iq.low_magnitude.count", float64(stats.lowMag), stageTags) + coll.SetGauge("iq.length", float64(stats.length), stageTags) +} + +func logExtractorHeadComparison(signalID int64, path string, overlapLen int, raw []complex64, trimSamples int, out []complex64) { + rawStats := computeIQHeadStats(raw, 96) + trimmedStats := computeIQHeadStats(out, 96) + logging.Debug("boundary", "extract_head_compare", + "signal", signalID, + "path", path, + "raw_len", len(raw), + "trim", trimSamples, + "out_len", len(out), + "overlap_len", overlapLen, + "raw_min_mag", rawStats.minMag, + "raw_min_idx", rawStats.headMinIdx, + "raw_max_step", rawStats.maxStep, + "raw_max_step_idx", rawStats.maxStepIdx, + "raw_head_tail", rawStats.headTail, + "trimmed_min_mag", trimmedStats.minMag, + "trimmed_min_idx", trimmedStats.headMinIdx, + "trimmed_max_step", trimmedStats.maxStep, + "trimmed_max_step_idx", trimmedStats.maxStepIdx, + "trimmed_head_tail", trimmedStats.headTail, + ) + for _, off := range []int{2, 4, 8, 16} { + if len(out) <= off+8 { + continue + } + offStats := computeIQHeadStats(out[off:], 96) + logging.Debug("boundary", "extract_head_offset_compare", + "signal", signalID, + "path", path, + "offset", off, + "base_min_mag", trimmedStats.minMag, + "base_min_idx", trimmedStats.headMinIdx, + "base_max_step", trimmedStats.maxStep, + "base_max_step_idx", trimmedStats.maxStepIdx, + "offset_min_mag", offStats.minMag, + "offset_min_idx", offStats.headMinIdx, + "offset_max_step", offStats.maxStep, + "offset_max_step_idx", offStats.maxStepIdx, + "offset_head_tail", offStats.headTail, + ) + } +} diff --git a/cmd/sdrd/http_handlers.go b/cmd/sdrd/http_handlers.go index 14c0846..a633fde 100644 --- a/cmd/sdrd/http_handlers.go +++ b/cmd/sdrd/http_handlers.go @@ -3,6 +3,7 @@ package main import ( "context" "encoding/json" + "errors" "log" "net/http" "os" @@ -19,9 +20,10 @@ import ( "sdr-wideband-suite/internal/pipeline" "sdr-wideband-suite/internal/recorder" "sdr-wideband-suite/internal/runtime" + "sdr-wideband-suite/internal/telemetry" ) -func registerAPIHandlers(mux *http.ServeMux, cfgPath string, cfgManager *runtime.Manager, srcMgr *sourceManager, dspUpdates chan dspUpdate, gpuState *gpuStatus, recMgr *recorder.Manager, sigSnap *signalSnapshot, eventMu *sync.RWMutex, phaseSnap *phaseSnapshot) { +func registerAPIHandlers(mux *http.ServeMux, cfgPath string, cfgManager *runtime.Manager, srcMgr *sourceManager, dspUpdates chan dspUpdate, gpuState *gpuStatus, recMgr *recorder.Manager, sigSnap *signalSnapshot, eventMu *sync.RWMutex, phaseSnap *phaseSnapshot, telem *telemetry.Collector) { mux.HandleFunc("/api/config", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") switch r.Method { @@ -378,16 +380,196 @@ func registerAPIHandlers(mux *http.ServeMux, cfgPath string, cfgManager *runtime w.Header().Set("Content-Type", "audio/wav") _, _ = w.Write(data) }) + mux.HandleFunc("/api/debug/telemetry/live", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + if telem == nil { + _ = json.NewEncoder(w).Encode(map[string]any{"enabled": false, "error": "telemetry unavailable"}) + return + } + _ = json.NewEncoder(w).Encode(telem.LiveSnapshot()) + }) + mux.HandleFunc("/api/debug/telemetry/history", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + if telem == nil { + http.Error(w, "telemetry unavailable", http.StatusServiceUnavailable) + return + } + query, err := telemetryQueryFromRequest(r) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + items, err := telem.QueryMetrics(query) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + _ = json.NewEncoder(w).Encode(map[string]any{"items": items, "count": len(items)}) + }) + mux.HandleFunc("/api/debug/telemetry/events", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + if telem == nil { + http.Error(w, "telemetry unavailable", http.StatusServiceUnavailable) + return + } + query, err := telemetryQueryFromRequest(r) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + items, err := telem.QueryEvents(query) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + _ = json.NewEncoder(w).Encode(map[string]any{"items": items, "count": len(items)}) + }) + mux.HandleFunc("/api/debug/telemetry/config", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + if telem == nil { + http.Error(w, "telemetry unavailable", http.StatusServiceUnavailable) + return + } + switch r.Method { + case http.MethodGet: + _ = json.NewEncoder(w).Encode(map[string]any{ + "collector": telem.Config(), + "config": cfgManager.Snapshot().Debug.Telemetry, + }) + case http.MethodPost: + var update struct { + Enabled *bool `json:"enabled"` + HeavyEnabled *bool `json:"heavy_enabled"` + HeavySampleEvery *int `json:"heavy_sample_every"` + MetricSampleEvery *int `json:"metric_sample_every"` + MetricHistoryMax *int `json:"metric_history_max"` + EventHistoryMax *int `json:"event_history_max"` + RetentionSeconds *int `json:"retention_seconds"` + PersistEnabled *bool `json:"persist_enabled"` + PersistDir *string `json:"persist_dir"` + RotateMB *int `json:"rotate_mb"` + KeepFiles *int `json:"keep_files"` + } + if err := json.NewDecoder(r.Body).Decode(&update); err != nil { + http.Error(w, "invalid json", http.StatusBadRequest) + return + } + next := cfgManager.Snapshot() + cur := next.Debug.Telemetry + if update.Enabled != nil { + cur.Enabled = *update.Enabled + } + if update.HeavyEnabled != nil { + cur.HeavyEnabled = *update.HeavyEnabled + } + if update.HeavySampleEvery != nil { + cur.HeavySampleEvery = *update.HeavySampleEvery + } + if update.MetricSampleEvery != nil { + cur.MetricSampleEvery = *update.MetricSampleEvery + } + if update.MetricHistoryMax != nil { + cur.MetricHistoryMax = *update.MetricHistoryMax + } + if update.EventHistoryMax != nil { + cur.EventHistoryMax = *update.EventHistoryMax + } + if update.RetentionSeconds != nil { + cur.RetentionSeconds = *update.RetentionSeconds + } + if update.PersistEnabled != nil { + cur.PersistEnabled = *update.PersistEnabled + } + if update.PersistDir != nil && *update.PersistDir != "" { + cur.PersistDir = *update.PersistDir + } + if update.RotateMB != nil { + cur.RotateMB = *update.RotateMB + } + if update.KeepFiles != nil { + cur.KeepFiles = *update.KeepFiles + } + next.Debug.Telemetry = cur + cfgManager.Replace(next) + if err := config.Save(cfgPath, next); err != nil { + log.Printf("telemetry config save failed: %v", err) + } + err := telem.Configure(telemetry.Config{ + Enabled: cur.Enabled, + HeavyEnabled: cur.HeavyEnabled, + HeavySampleEvery: cur.HeavySampleEvery, + MetricSampleEvery: cur.MetricSampleEvery, + MetricHistoryMax: cur.MetricHistoryMax, + EventHistoryMax: cur.EventHistoryMax, + Retention: time.Duration(cur.RetentionSeconds) * time.Second, + PersistEnabled: cur.PersistEnabled, + PersistDir: cur.PersistDir, + RotateMB: cur.RotateMB, + KeepFiles: cur.KeepFiles, + }) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + _ = json.NewEncoder(w).Encode(map[string]any{"ok": true, "collector": telem.Config(), "config": cur}) + default: + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + } + }) } -func newHTTPServer(addr string, webRoot string, h *hub, cfgPath string, cfgManager *runtime.Manager, srcMgr *sourceManager, dspUpdates chan dspUpdate, gpuState *gpuStatus, recMgr *recorder.Manager, sigSnap *signalSnapshot, eventMu *sync.RWMutex, phaseSnap *phaseSnapshot) *http.Server { +func newHTTPServer(addr string, webRoot string, h *hub, cfgPath string, cfgManager *runtime.Manager, srcMgr *sourceManager, dspUpdates chan dspUpdate, gpuState *gpuStatus, recMgr *recorder.Manager, sigSnap *signalSnapshot, eventMu *sync.RWMutex, phaseSnap *phaseSnapshot, telem *telemetry.Collector) *http.Server { mux := http.NewServeMux() registerWSHandlers(mux, h, recMgr) - registerAPIHandlers(mux, cfgPath, cfgManager, srcMgr, dspUpdates, gpuState, recMgr, sigSnap, eventMu, phaseSnap) + registerAPIHandlers(mux, cfgPath, cfgManager, srcMgr, dspUpdates, gpuState, recMgr, sigSnap, eventMu, phaseSnap, telem) mux.Handle("/", http.FileServer(http.Dir(webRoot))) return &http.Server{Addr: addr, Handler: mux} } +func telemetryQueryFromRequest(r *http.Request) (telemetry.Query, error) { + q := r.URL.Query() + var out telemetry.Query + var err error + if out.From, err = telemetry.ParseTimeQuery(q.Get("since")); err != nil { + return out, errors.New("invalid since") + } + if out.To, err = telemetry.ParseTimeQuery(q.Get("until")); err != nil { + return out, errors.New("invalid until") + } + if v := q.Get("limit"); v != "" { + if parsed, parseErr := strconv.Atoi(v); parseErr == nil { + out.Limit = parsed + } + } + out.Name = q.Get("name") + out.NamePrefix = q.Get("prefix") + out.Level = q.Get("level") + out.IncludePersisted = true + if v := q.Get("include_persisted"); v != "" { + if b, parseErr := strconv.ParseBool(v); parseErr == nil { + out.IncludePersisted = b + } + } + tags := telemetry.Tags{} + for key, vals := range q { + if len(vals) == 0 { + continue + } + if strings.HasPrefix(key, "tag_") { + tags[strings.TrimPrefix(key, "tag_")] = vals[0] + } + } + for _, key := range []string{"signal_id", "session_id", "stage", "trace_id", "component"} { + if v := q.Get(key); v != "" { + tags[key] = v + } + } + if len(tags) > 0 { + out.Tags = tags + } + return out, nil +} + func shutdownServer(server *http.Server) { ctxTimeout, cancelTimeout := context.WithTimeout(context.Background(), 5*time.Second) defer cancelTimeout() diff --git a/cmd/sdrd/legacy_extract.go b/cmd/sdrd/legacy_extract.go new file mode 100644 index 0000000..52590cc --- /dev/null +++ b/cmd/sdrd/legacy_extract.go @@ -0,0 +1,6 @@ +package main + +// NOTE: Legacy extractor logic still lives in helpers.go for now. +// This file is intentionally reserved for the later explicit move once the +// production-path rewrite is far enough along that the split can be done in one +// safe pass instead of a risky mechanical half-step. diff --git a/cmd/sdrd/main.go b/cmd/sdrd/main.go index 77a9814..361775d 100644 --- a/cmd/sdrd/main.go +++ b/cmd/sdrd/main.go @@ -23,6 +23,7 @@ import ( "sdr-wideband-suite/internal/runtime" "sdr-wideband-suite/internal/sdr" "sdr-wideband-suite/internal/sdrplay" + "sdr-wideband-suite/internal/telemetry" ) func main() { @@ -51,6 +52,25 @@ func main() { cfgManager := runtime.New(cfg) gpuState := &gpuStatus{Available: gpufft.Available()} + telemetryCfg := telemetry.Config{ + Enabled: cfg.Debug.Telemetry.Enabled, + HeavyEnabled: cfg.Debug.Telemetry.HeavyEnabled, + HeavySampleEvery: cfg.Debug.Telemetry.HeavySampleEvery, + MetricSampleEvery: cfg.Debug.Telemetry.MetricSampleEvery, + MetricHistoryMax: cfg.Debug.Telemetry.MetricHistoryMax, + EventHistoryMax: cfg.Debug.Telemetry.EventHistoryMax, + Retention: time.Duration(cfg.Debug.Telemetry.RetentionSeconds) * time.Second, + PersistEnabled: cfg.Debug.Telemetry.PersistEnabled, + PersistDir: cfg.Debug.Telemetry.PersistDir, + RotateMB: cfg.Debug.Telemetry.RotateMB, + KeepFiles: cfg.Debug.Telemetry.KeepFiles, + } + telemetryCollector, err := telemetry.New(telemetryCfg) + if err != nil { + log.Fatalf("telemetry init failed: %v", err) + } + defer telemetryCollector.Close() + telemetryCollector.SetStatus("build", "sdrd") newSource := func(cfg config.Config) (sdr.Source, error) { if mockFlag { @@ -74,7 +94,7 @@ func main() { if err != nil { log.Fatalf("sdrplay init failed: %v (try --mock or build with -tags sdrplay)", err) } - srcMgr := newSourceManager(src, newSource) + srcMgr := newSourceManagerWithTelemetry(src, newSource, telemetryCollector) if err := srcMgr.Start(); err != nil { log.Fatalf("source start: %v", err) } @@ -118,7 +138,7 @@ func main() { DeemphasisUs: cfg.Recorder.DeemphasisUs, ExtractionTaps: cfg.Recorder.ExtractionTaps, ExtractionBwMult: cfg.Recorder.ExtractionBwMult, - }, cfg.CenterHz, decodeMap) + }, cfg.CenterHz, decodeMap, telemetryCollector) defer recMgr.Close() sigSnap := &signalSnapshot{} @@ -126,9 +146,9 @@ func main() { defer extractMgr.reset() phaseSnap := &phaseSnapshot{} - go runDSP(ctx, srcMgr, cfg, det, window, h, eventFile, eventMu, dspUpdates, gpuState, recMgr, sigSnap, extractMgr, phaseSnap) + go runDSP(ctx, srcMgr, cfg, det, window, h, eventFile, eventMu, dspUpdates, gpuState, recMgr, sigSnap, extractMgr, phaseSnap, telemetryCollector) - server := newHTTPServer(cfg.WebAddr, cfg.WebRoot, h, cfgPath, cfgManager, srcMgr, dspUpdates, gpuState, recMgr, sigSnap, eventMu, phaseSnap) + server := newHTTPServer(cfg.WebAddr, cfg.WebRoot, h, cfgPath, cfgManager, srcMgr, dspUpdates, gpuState, recMgr, sigSnap, eventMu, phaseSnap, telemetryCollector) go func() { log.Printf("web listening on %s", cfg.WebAddr) if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed { diff --git a/cmd/sdrd/pipeline_runtime.go b/cmd/sdrd/pipeline_runtime.go index d2cec7f..5b37088 100644 --- a/cmd/sdrd/pipeline_runtime.go +++ b/cmd/sdrd/pipeline_runtime.go @@ -3,6 +3,8 @@ package main import ( "fmt" "math" + "os" + "strconv" "strings" "sync" "sync/atomic" @@ -19,6 +21,7 @@ import ( "sdr-wideband-suite/internal/pipeline" "sdr-wideband-suite/internal/rds" "sdr-wideband-suite/internal/recorder" + "sdr-wideband-suite/internal/telemetry" ) type rdsState struct { @@ -29,6 +32,18 @@ type rdsState struct { mu sync.Mutex } +var forceFixedStreamReadSamples = func() int { + raw := strings.TrimSpace(os.Getenv("SDR_FORCE_FIXED_STREAM_READ_SAMPLES")) + if raw == "" { + return 0 + } + v, err := strconv.Atoi(raw) + if err != nil || v <= 0 { + return 0 + } + return v +}() + type dspRuntime struct { cfg config.Config det *detector.Detector @@ -52,10 +67,13 @@ type dspRuntime struct { arbiter *pipeline.Arbiter arbitration pipeline.ArbitrationState gotSamples bool + telemetry *telemetry.Collector + lastAllIQTail []complex64 } type spectrumArtifacts struct { allIQ []complex64 + streamDropped bool surveillanceIQ []complex64 detailIQ []complex64 surveillanceSpectrum []float64 @@ -94,7 +112,7 @@ type surveillancePlan struct { const derivedIDBlock = int64(1_000_000_000) -func newDSPRuntime(cfg config.Config, det *detector.Detector, window []float64, gpuState *gpuStatus) *dspRuntime { +func newDSPRuntime(cfg config.Config, det *detector.Detector, window []float64, gpuState *gpuStatus, coll *telemetry.Collector) *dspRuntime { detailFFT := cfg.Refinement.DetailFFTSize if detailFFT <= 0 { detailFFT = cfg.FFTSize @@ -119,6 +137,7 @@ func newDSPRuntime(cfg config.Config, det *detector.Detector, window []float64, streamPhaseState: map[int64]*streamExtractState{}, streamOverlap: &streamIQOverlap{}, arbiter: pipeline.NewArbiter(), + telemetry: coll, } if rt.useGPU && gpuState != nil { snap := gpuState.snapshot() @@ -216,6 +235,15 @@ func (rt *dspRuntime) applyUpdate(upd dspUpdate, srcMgr *sourceManager, rec *rec gpuState.set(false, nil) } } + if rt.telemetry != nil { + rt.telemetry.Event("dsp_config_update", "info", "dsp runtime configuration updated", nil, map[string]any{ + "fft_size": rt.cfg.FFTSize, + "sample_rate": rt.cfg.SampleRate, + "use_gpu_fft": rt.cfg.UseGPUFFT, + "detail_fft": rt.detailFFT, + "surv_strategy": rt.cfg.Surveillance.Strategy, + }) + } } func (rt *dspRuntime) spectrumFromIQ(iq []complex64, gpuState *gpuStatus) []float64 { @@ -334,26 +362,112 @@ func (rt *dspRuntime) decimateSurveillanceIQ(iq []complex64, factor int) []compl return dsp.Decimate(filtered, factor) } +func meanMagComplex(samples []complex64) float64 { + if len(samples) == 0 { + return 0 + } + var sum float64 + for _, v := range samples { + sum += math.Hypot(float64(real(v)), float64(imag(v))) + } + return sum / float64(len(samples)) +} + +func phaseStepAbs(a, b complex64) float64 { + num := float64(real(a))*float64(imag(b)) - float64(imag(a))*float64(real(b)) + den := float64(real(a))*float64(real(b)) + float64(imag(a))*float64(imag(b)) + return math.Abs(math.Atan2(num, den)) +} + +func boundaryMetrics(prevTail []complex64, curr []complex64, window int) (float64, float64, float64, int) { + if len(curr) == 0 { + return 0, 0, 0, 0 + } + if window <= 0 { + window = 16 + } + headN := window + if len(curr) < headN { + headN = len(curr) + } + headMean := meanMagComplex(curr[:headN]) + if len(prevTail) == 0 { + return headMean, 0, 0, headN + } + tailN := window + if len(prevTail) < tailN { + tailN = len(prevTail) + } + tailMean := meanMagComplex(prevTail[len(prevTail)-tailN:]) + deltaMag := math.Abs(headMean - tailMean) + phaseJump := phaseStepAbs(prevTail[len(prevTail)-1], curr[0]) + score := deltaMag + phaseJump + return headMean, tailMean, score, headN +} + +func tailWindowComplex(src []complex64, n int) []complex64 { + if n <= 0 || len(src) == 0 { + return nil + } + if len(src) <= n { + out := make([]complex64, len(src)) + copy(out, src) + return out + } + out := make([]complex64, n) + copy(out, src[len(src)-n:]) + return out +} + func (rt *dspRuntime) captureSpectrum(srcMgr *sourceManager, rec *recorder.Manager, dcBlocker *dsp.DCBlocker, gpuState *gpuStatus) (*spectrumArtifacts, error) { + start := time.Now() required := rt.cfg.FFTSize if rt.detailFFT > required { required = rt.detailFFT } available := required st := srcMgr.Stats() - if st.BufferSamples > required { + if rt.telemetry != nil { + rt.telemetry.SetGauge("source.buffer_samples", float64(st.BufferSamples), nil) + rt.telemetry.SetGauge("source.last_sample_ago_ms", float64(st.LastSampleAgoMs), nil) + rt.telemetry.SetGauge("source.dropped", float64(st.Dropped), nil) + rt.telemetry.SetGauge("source.resets", float64(st.Resets), nil) + } + if forceFixedStreamReadSamples > 0 { + available = forceFixedStreamReadSamples + if available < required { + available = required + } + available = (available / required) * required + if available < required { + available = required + } + logging.Warn("boundary", "fixed_stream_read_samples", "configured", forceFixedStreamReadSamples, "effective", available, "required", required) + } else if st.BufferSamples > required { available = (st.BufferSamples / required) * required if available < required { available = required } } logging.Debug("capture", "read_iq", "required", required, "available", available, "buf", st.BufferSamples, "reset", st.Resets, "drop", st.Dropped) + readStart := time.Now() allIQ, err := srcMgr.ReadIQ(available) if err != nil { + if rt.telemetry != nil { + rt.telemetry.IncCounter("capture.read.error", 1, nil) + } return nil, err } + if rt.telemetry != nil { + rt.telemetry.Observe("capture.read.duration_ms", float64(time.Since(readStart).Microseconds())/1000.0, nil) + rt.telemetry.Observe("capture.read.samples", float64(len(allIQ)), nil) + } if rec != nil { + ingestStart := time.Now() rec.Ingest(time.Now(), allIQ) + if rt.telemetry != nil { + rt.telemetry.Observe("capture.ingest.duration_ms", float64(time.Since(ingestStart).Microseconds())/1000.0, nil) + } } // Cap allIQ for downstream extraction to prevent buffer bloat. // Without this cap, buffer accumulation during processing stalls causes @@ -366,8 +480,17 @@ func (rt *dspRuntime) captureSpectrum(srcMgr *sourceManager, rec *recorder.Manag maxStreamSamples = required } maxStreamSamples = (maxStreamSamples / required) * required + streamDropped := false if len(allIQ) > maxStreamSamples { allIQ = allIQ[len(allIQ)-maxStreamSamples:] + streamDropped = true + if rt.telemetry != nil { + rt.telemetry.IncCounter("capture.stream_drop.count", 1, nil) + rt.telemetry.Event("iq_dropped", "warn", "capture IQ dropped before extraction", nil, map[string]any{ + "max_stream_samples": maxStreamSamples, + "required": required, + }) + } } logging.Debug("capture", "iq_len", "len", len(allIQ), "surv_fft", rt.cfg.FFTSize, "detail_fft", rt.detailFFT) survIQ := allIQ @@ -380,14 +503,60 @@ func (rt *dspRuntime) captureSpectrum(srcMgr *sourceManager, rec *recorder.Manag } if rt.dcEnabled { dcBlocker.Apply(allIQ) + if rt.telemetry != nil { + rt.telemetry.IncCounter("dsp.dc_block.apply", 1, nil) + } } if rt.iqEnabled { + // IQBalance must NOT modify allIQ in-place: allIQ goes to the extraction + // pipeline and any in-place modification creates a phase/amplitude + // discontinuity at the survIQ boundary (len-FFTSize) that the polyphase + // extractor then sees as paired click artifacts in the FM discriminator. + detailIsSurv := sameIQBuffer(detailIQ, survIQ) + survIQ = append([]complex64(nil), survIQ...) dsp.IQBalance(survIQ) - if !sameIQBuffer(detailIQ, survIQ) { + if detailIsSurv { + detailIQ = survIQ + } else { detailIQ = append([]complex64(nil), detailIQ...) dsp.IQBalance(detailIQ) } } + if rt.telemetry != nil { + rt.telemetry.SetGauge("iq.stage.all.length", float64(len(allIQ)), nil) + rt.telemetry.SetGauge("iq.stage.surveillance.length", float64(len(survIQ)), nil) + rt.telemetry.SetGauge("iq.stage.detail.length", float64(len(detailIQ)), nil) + rt.telemetry.Observe("capture.total.duration_ms", float64(time.Since(start).Microseconds())/1000.0, nil) + + headMean, tailMean, boundaryScore, boundaryWindow := boundaryMetrics(rt.lastAllIQTail, allIQ, 32) + rt.telemetry.SetGauge("iq.boundary.all.head_mean_mag", headMean, nil) + rt.telemetry.SetGauge("iq.boundary.all.prev_tail_mean_mag", tailMean, nil) + rt.telemetry.Observe("iq.boundary.all.discontinuity_score", boundaryScore, nil) + if len(rt.lastAllIQTail) > 0 && len(allIQ) > 0 { + deltaMag := math.Abs(math.Hypot(float64(real(allIQ[0])), float64(imag(allIQ[0]))) - math.Hypot(float64(real(rt.lastAllIQTail[len(rt.lastAllIQTail)-1])), float64(imag(rt.lastAllIQTail[len(rt.lastAllIQTail)-1])))) + phaseJump := phaseStepAbs(rt.lastAllIQTail[len(rt.lastAllIQTail)-1], allIQ[0]) + rt.telemetry.Observe("iq.boundary.all.delta_mag", deltaMag, nil) + rt.telemetry.Observe("iq.boundary.all.delta_phase", phaseJump, nil) + if rt.telemetry.ShouldSampleHeavy() { + rt.telemetry.Event("alliq_boundary", "info", "allIQ boundary snapshot", nil, map[string]any{ + "window": boundaryWindow, + "head_mean_mag": headMean, + "prev_tail_mean_mag": tailMean, + "delta_mag": deltaMag, + "delta_phase": phaseJump, + "discontinuity_score": boundaryScore, + "alliq_len": len(allIQ), + "stream_dropped": streamDropped, + }) + } + } + if rt.telemetry.ShouldSampleHeavy() { + observeIQStats(rt.telemetry, "capture_all", allIQ, nil) + observeIQStats(rt.telemetry, "capture_surveillance", survIQ, nil) + observeIQStats(rt.telemetry, "capture_detail", detailIQ, nil) + } + } + rt.lastAllIQTail = tailWindowComplex(allIQ, 32) survSpectrum := rt.spectrumFromIQ(survIQ, gpuState) sanitizeSpectrum(survSpectrum) detailSpectrum := survSpectrum @@ -430,8 +599,13 @@ func (rt *dspRuntime) captureSpectrum(srcMgr *sourceManager, rec *recorder.Manag } now := time.Now() finished, detected := rt.det.Process(now, survSpectrum, rt.cfg.CenterHz) + if rt.telemetry != nil { + rt.telemetry.SetGauge("signals.detected.count", float64(len(detected)), nil) + rt.telemetry.SetGauge("signals.finished.count", float64(len(finished)), nil) + } return &spectrumArtifacts{ allIQ: allIQ, + streamDropped: streamDropped, surveillanceIQ: survIQ, detailIQ: detailIQ, surveillanceSpectrum: survSpectrum, diff --git a/cmd/sdrd/pipeline_runtime_test.go b/cmd/sdrd/pipeline_runtime_test.go index 99e2654..54d4ac9 100644 --- a/cmd/sdrd/pipeline_runtime_test.go +++ b/cmd/sdrd/pipeline_runtime_test.go @@ -13,7 +13,7 @@ func TestNewDSPRuntime(t *testing.T) { cfg := config.Default() det := detector.New(cfg.Detector, cfg.SampleRate, cfg.FFTSize) window := fftutil.Hann(cfg.FFTSize) - rt := newDSPRuntime(cfg, det, window, &gpuStatus{}) + rt := newDSPRuntime(cfg, det, window, &gpuStatus{}, nil) if rt == nil { t.Fatalf("runtime is nil") } @@ -47,7 +47,7 @@ func TestSurveillanceLevelsRespectStrategy(t *testing.T) { cfg := config.Default() det := detector.New(cfg.Detector, cfg.SampleRate, cfg.FFTSize) window := fftutil.Hann(cfg.FFTSize) - rt := newDSPRuntime(cfg, det, window, &gpuStatus{}) + rt := newDSPRuntime(cfg, det, window, &gpuStatus{}, nil) policy := pipeline.Policy{SurveillanceStrategy: "single-resolution"} plan := rt.buildSurveillancePlan(policy) if len(plan.Levels) != 1 { diff --git a/cmd/sdrd/source_manager.go b/cmd/sdrd/source_manager.go index 606e6e8..4f58a54 100644 --- a/cmd/sdrd/source_manager.go +++ b/cmd/sdrd/source_manager.go @@ -1,11 +1,16 @@ package main import ( + "fmt" + "time" + "sdr-wideband-suite/internal/config" "sdr-wideband-suite/internal/sdr" + "sdr-wideband-suite/internal/telemetry" ) func (m *sourceManager) Restart(cfg config.Config) error { + start := time.Now() m.mu.Lock() defer m.mu.Unlock() old := m.src @@ -14,15 +19,27 @@ func (m *sourceManager) Restart(cfg config.Config) error { if err != nil { _ = old.Start() m.src = old + if m.telemetry != nil { + m.telemetry.IncCounter("source.restart.error", 1, nil) + m.telemetry.Event("source_restart_failed", "warn", "source restart failed", nil, map[string]any{"error": err.Error()}) + } return err } if err := next.Start(); err != nil { _ = next.Stop() _ = old.Start() m.src = old + if m.telemetry != nil { + m.telemetry.IncCounter("source.restart.error", 1, nil) + m.telemetry.Event("source_restart_failed", "warn", "source restart failed", nil, map[string]any{"error": err.Error()}) + } return err } m.src = next + if m.telemetry != nil { + m.telemetry.IncCounter("source.restart.count", 1, nil) + m.telemetry.Observe("source.restart.duration_ms", float64(time.Since(start).Milliseconds()), nil) + } return nil } @@ -44,7 +61,11 @@ func (m *sourceManager) Flush() { } func newSourceManager(src sdr.Source, newSource func(cfg config.Config) (sdr.Source, error)) *sourceManager { - return &sourceManager{src: src, newSource: newSource} + return newSourceManagerWithTelemetry(src, newSource, nil) +} + +func newSourceManagerWithTelemetry(src sdr.Source, newSource func(cfg config.Config) (sdr.Source, error), coll *telemetry.Collector) *sourceManager { + return &sourceManager{src: src, newSource: newSource, telemetry: coll} } func (m *sourceManager) Start() error { @@ -60,9 +81,27 @@ func (m *sourceManager) Stop() error { } func (m *sourceManager) ReadIQ(n int) ([]complex64, error) { + waitStart := time.Now() m.mu.RLock() + wait := time.Since(waitStart) defer m.mu.RUnlock() - return m.src.ReadIQ(n) + if m.telemetry != nil { + m.telemetry.Observe("source.lock_wait_ms", float64(wait.Microseconds())/1000.0, telemetry.TagsFromPairs("lock", "read")) + if wait > 2*time.Millisecond { + m.telemetry.IncCounter("source.lock_contention.count", 1, telemetry.TagsFromPairs("lock", "read")) + } + } + readStart := time.Now() + out, err := m.src.ReadIQ(n) + if m.telemetry != nil { + tags := telemetry.TagsFromPairs("requested", fmt.Sprintf("%d", n)) + m.telemetry.Observe("source.read.duration_ms", float64(time.Since(readStart).Microseconds())/1000.0, tags) + m.telemetry.SetGauge("source.read.samples", float64(len(out)), nil) + if err != nil { + m.telemetry.IncCounter("source.read.error", 1, nil) + } + } + return out, err } func (m *sourceManager) ApplyConfig(cfg config.Config) error { diff --git a/cmd/sdrd/streaming_compare.go b/cmd/sdrd/streaming_compare.go new file mode 100644 index 0000000..dda334b --- /dev/null +++ b/cmd/sdrd/streaming_compare.go @@ -0,0 +1,45 @@ +package main + +import ( + "fmt" + + "sdr-wideband-suite/internal/demod/gpudemod" + "sdr-wideband-suite/internal/telemetry" +) + +func observeStreamingComparison(coll *telemetry.Collector, oracle gpudemod.StreamingExtractResult, prod gpudemod.StreamingExtractResult) { + if coll == nil { + return + } + metrics, stats := gpudemod.CompareOracleAndGPUHostOracle(oracle, prod) + tags := telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", oracle.SignalID), "path", "streaming_compare") + coll.SetGauge("streaming.compare.n_out", float64(metrics.NOut), tags) + coll.SetGauge("streaming.compare.phase_count", float64(metrics.PhaseCount), tags) + coll.SetGauge("streaming.compare.history_len", float64(metrics.HistoryLen), tags) + coll.Observe("streaming.compare.ref_max_abs_err", metrics.RefMaxAbsErr, tags) + coll.Observe("streaming.compare.ref_rms_err", metrics.RefRMSErr, tags) + coll.SetGauge("streaming.compare.compare_count", float64(stats.Count), tags) + coll.SetGauge("streaming.compare.oracle_rate", float64(oracle.Rate), tags) + coll.SetGauge("streaming.compare.production_rate", float64(prod.Rate), tags) + coll.SetGauge("streaming.compare.oracle_output_len", float64(len(oracle.IQ)), tags) + coll.SetGauge("streaming.compare.production_output_len", float64(len(prod.IQ)), tags) + if len(oracle.IQ) > 0 { + oracleStats := computeIQHeadStats(oracle.IQ, 64) + coll.Observe("streaming.compare.oracle_head_mean_mag", oracleStats.meanMag, tags) + coll.Observe("streaming.compare.oracle_head_max_step", oracleStats.maxStep, tags) + } + if len(prod.IQ) > 0 { + prodStats := computeIQHeadStats(prod.IQ, 64) + coll.Observe("streaming.compare.production_head_mean_mag", prodStats.meanMag, tags) + coll.Observe("streaming.compare.production_head_max_step", prodStats.maxStep, tags) + } + coll.Event("streaming_compare_snapshot", "info", "streaming comparison snapshot", tags, map[string]any{ + "oracle_rate": oracle.Rate, + "production_rate": prod.Rate, + "oracle_output_len": len(oracle.IQ), + "production_output_len": len(prod.IQ), + "ref_max_abs_err": metrics.RefMaxAbsErr, + "ref_rms_err": metrics.RefRMSErr, + "compare_count": stats.Count, + }) +} diff --git a/cmd/sdrd/streaming_monitoring.go b/cmd/sdrd/streaming_monitoring.go new file mode 100644 index 0000000..f334a15 --- /dev/null +++ b/cmd/sdrd/streaming_monitoring.go @@ -0,0 +1,27 @@ +package main + +import ( + "fmt" + + "sdr-wideband-suite/internal/demod/gpudemod" + "sdr-wideband-suite/internal/telemetry" +) + +func observeStreamingResult(coll *telemetry.Collector, prefix string, res gpudemod.StreamingExtractResult) { + if coll == nil { + return + } + tags := telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", res.SignalID), "path", prefix) + coll.SetGauge(prefix+".n_out", float64(res.NOut), tags) + coll.SetGauge(prefix+".phase_count", float64(res.PhaseCount), tags) + coll.SetGauge(prefix+".history_len", float64(res.HistoryLen), tags) + coll.SetGauge(prefix+".rate", float64(res.Rate), tags) + coll.SetGauge(prefix+".output_len", float64(len(res.IQ)), tags) + if len(res.IQ) > 0 { + stats := computeIQHeadStats(res.IQ, 64) + coll.Observe(prefix+".head_mean_mag", stats.meanMag, tags) + coll.Observe(prefix+".head_max_step", stats.maxStep, tags) + coll.Observe(prefix+".head_p95_step", stats.p95Step, tags) + coll.SetGauge(prefix+".head_low_magnitude_count", float64(stats.lowMag), tags) + } +} diff --git a/cmd/sdrd/streaming_production.go b/cmd/sdrd/streaming_production.go new file mode 100644 index 0000000..6198993 --- /dev/null +++ b/cmd/sdrd/streaming_production.go @@ -0,0 +1,50 @@ +package main + +import ( + "fmt" + + "sdr-wideband-suite/internal/demod/gpudemod" + "sdr-wideband-suite/internal/detector" + "sdr-wideband-suite/internal/telemetry" +) + +func extractForStreamingProduction( + extractMgr *extractionManager, + allIQ []complex64, + sampleRate int, + centerHz float64, + signals []detector.Signal, + aqCfg extractionConfig, + coll *telemetry.Collector, +) ([][]complex64, []int, error) { + out := make([][]complex64, len(signals)) + rates := make([]int, len(signals)) + jobs, err := buildStreamingJobs(sampleRate, centerHz, signals, aqCfg) + if err != nil { + return nil, nil, err + } + runner := extractMgr.get(len(allIQ), sampleRate) + if runner == nil { + return nil, nil, fmt.Errorf("streaming production path unavailable: no batch runner") + } + results, err := runner.StreamingExtractGPU(allIQ, jobs) + if err != nil { + return nil, nil, err + } + var oracleResults []gpudemod.StreamingExtractResult + if useStreamingOraclePath { + if streamingOracleRunner == nil || streamingOracleRunner.SampleRate != sampleRate { + streamingOracleRunner = gpudemod.NewCPUOracleRunner(sampleRate) + } + oracleResults, _ = streamingOracleRunner.StreamingExtract(allIQ, jobs) + } + for i, res := range results { + out[i] = res.IQ + rates[i] = res.Rate + observeStreamingResult(coll, "streaming.production", res) + if i < len(oracleResults) { + observeStreamingComparison(coll, oracleResults[i], res) + } + } + return out, rates, nil +} diff --git a/cmd/sdrd/streaming_refactor.go b/cmd/sdrd/streaming_refactor.go new file mode 100644 index 0000000..9ad2260 --- /dev/null +++ b/cmd/sdrd/streaming_refactor.go @@ -0,0 +1,137 @@ +package main + +import ( + "math" + + "sdr-wideband-suite/internal/demod/gpudemod" + "sdr-wideband-suite/internal/detector" + "sdr-wideband-suite/internal/telemetry" +) + +const useStreamingOraclePath = false // temporarily disable oracle during bring-up to isolate production-path runtime behavior +const useStreamingProductionPath = true // route top-level extraction through the new production path during bring-up/validation + +var streamingOracleRunner *gpudemod.CPUOracleRunner + +func buildStreamingJobs(sampleRate int, centerHz float64, signals []detector.Signal, aqCfg extractionConfig) ([]gpudemod.StreamingExtractJob, error) { + jobs := make([]gpudemod.StreamingExtractJob, len(signals)) + bwMult := aqCfg.bwMult + if bwMult <= 0 { + bwMult = 1.0 + } + firTaps := aqCfg.firTaps + if firTaps <= 0 { + firTaps = 101 + } + for i, sig := range signals { + bw := sig.BWHz * bwMult + sigMHz := sig.CenterHz / 1e6 + isWFM := (sigMHz >= 87.5 && sigMHz <= 108.0) || + (sig.Class != nil && (sig.Class.ModType == "WFM" || sig.Class.ModType == "WFM_STEREO")) + var outRate int + if isWFM { + outRate = wfmStreamOutRate + if bw < wfmStreamMinBW { + bw = wfmStreamMinBW + } + } else { + // Non-WFM target: must be an exact integer divisor of sampleRate. + // The old hardcoded 200000 fails for common SDR rates (e.g. 4096000/200000=20.48). + // Find the nearest valid rate >= 128000 (enough for NFM/AM/SSB). + outRate = nearestExactDecimationRate(sampleRate, 200000, 128000) + if bw < 20000 { + bw = 20000 + } + } + if _, err := gpudemod.ExactIntegerDecimation(sampleRate, outRate); err != nil { + return nil, err + } + offset := sig.CenterHz - centerHz + jobs[i] = gpudemod.StreamingExtractJob{ + SignalID: sig.ID, + OffsetHz: offset, + Bandwidth: bw, + OutRate: outRate, + NumTaps: firTaps, + ConfigHash: gpudemod.StreamingConfigHash(sig.ID, offset, bw, outRate, firTaps, sampleRate), + } + } + return jobs, nil +} + +func resetStreamingOracleRunner() { + if streamingOracleRunner != nil { + streamingOracleRunner.ResetAllStates() + } +} + +func extractForStreamingOracle( + allIQ []complex64, + sampleRate int, + centerHz float64, + signals []detector.Signal, + aqCfg extractionConfig, + coll *telemetry.Collector, +) ([][]complex64, []int, error) { + out := make([][]complex64, len(signals)) + rates := make([]int, len(signals)) + jobs, err := buildStreamingJobs(sampleRate, centerHz, signals, aqCfg) + if err != nil { + return nil, nil, err + } + if streamingOracleRunner == nil || streamingOracleRunner.SampleRate != sampleRate { + streamingOracleRunner = gpudemod.NewCPUOracleRunner(sampleRate) + } + results, err := streamingOracleRunner.StreamingExtract(allIQ, jobs) + if err != nil { + return nil, nil, err + } + for i, res := range results { + out[i] = res.IQ + rates[i] = res.Rate + observeStreamingResult(coll, "streaming.oracle", res) + } + return out, rates, nil +} + +func phaseIncForOffset(sampleRate int, offsetHz float64) float64 { + return -2.0 * math.Pi * offsetHz / float64(sampleRate) +} + +// nearestExactDecimationRate finds the output rate closest to targetRate +// (but not below minRate) that is an exact integer divisor of sampleRate. +// This avoids the ExactIntegerDecimation check failing for rates like +// 4096000/200000=20.48 which silently killed the entire streaming batch. +func nearestExactDecimationRate(sampleRate int, targetRate int, minRate int) int { + if sampleRate <= 0 || targetRate <= 0 { + return targetRate + } + if sampleRate%targetRate == 0 { + return targetRate // already exact + } + // Try decimation factors near the target + targetDecim := sampleRate / targetRate // floor + bestRate := 0 + bestDist := sampleRate // impossibly large + for d := max(1, targetDecim-2); d <= targetDecim+2; d++ { + rate := sampleRate / d + if rate < minRate { + continue + } + if sampleRate%rate != 0 { + continue // not exact (shouldn't happen since rate = sampleRate/d, but guard) + } + dist := targetRate - rate + if dist < 0 { + dist = -dist + } + if dist < bestDist { + bestDist = dist + bestRate = rate + } + } + if bestRate > 0 { + return bestRate + } + return targetRate // fallback — will fail ExactIntegerDecimation and surface the error +} diff --git a/cmd/sdrd/types.go b/cmd/sdrd/types.go index c96e5c6..0e36748 100644 --- a/cmd/sdrd/types.go +++ b/cmd/sdrd/types.go @@ -11,6 +11,7 @@ import ( "sdr-wideband-suite/internal/detector" "sdr-wideband-suite/internal/pipeline" "sdr-wideband-suite/internal/sdr" + "sdr-wideband-suite/internal/telemetry" ) type SpectrumDebug struct { @@ -110,6 +111,7 @@ type sourceManager struct { mu sync.RWMutex src sdr.Source newSource func(cfg config.Config) (sdr.Source, error) + telemetry *telemetry.Collector } type extractionManager struct { diff --git a/config.autosave.yaml b/config.autosave.yaml new file mode 100644 index 0000000..03dbd1c --- /dev/null +++ b/config.autosave.yaml @@ -0,0 +1,343 @@ +bands: + - name: uk-fm-broadcast + start_hz: 8.75e+07 + end_hz: 1.08e+08 +center_hz: 1.02e+08 +sample_rate: 4096000 +fft_size: 512 +gain_db: 32 +tuner_bw_khz: 5000 +use_gpu_fft: true +classifier_mode: combined +agc: true +dc_block: true +iq_balance: true +pipeline: + mode: wideband-balanced + profile: wideband-balanced + goals: + intent: broadcast-monitoring + monitor_start_hz: 8.8e+07 + monitor_end_hz: 1.08e+08 + monitor_span_hz: 2e+07 + monitor_windows: + - label: "" + zone: focus + start_hz: 8.75e+07 + end_hz: 1.08e+08 + center_hz: 0 + span_hz: 0 + priority: 1.25 + auto_record: false + auto_decode: false + - label: "" + zone: decode + start_hz: 8.75e+07 + end_hz: 1.08e+08 + center_hz: 0 + span_hz: 0 + priority: 1.35 + auto_record: false + auto_decode: false + signal_priorities: + - wfm + - rds + - broadcast + auto_record_classes: + - WFM + - WFM_STEREO + auto_decode_classes: + - WFM + - WFM_STEREO + - RDS +surveillance: + analysis_fft_size: 512 + frame_rate: 12 + strategy: multi-resolution + display_bins: 2048 + display_fps: 12 + derived_detection: auto +refinement: + enabled: true + max_concurrent: 24 + detail_fft_size: 4096 + min_candidate_snr_db: -3 + min_span_hz: 60000 + max_span_hz: 250000 + auto_span: true +resources: + prefer_gpu: true + max_refinement_jobs: 24 + max_recording_streams: 32 + max_decode_jobs: 16 + decision_hold_ms: 2500 +profiles: + - name: legacy + description: Current single-band pipeline behavior + pipeline: + mode: legacy + profile: legacy + goals: + intent: general-monitoring + monitor_start_hz: 0 + monitor_end_hz: 0 + monitor_span_hz: 0 + monitor_windows: [] + signal_priorities: [] + auto_record_classes: [] + auto_decode_classes: [] + surveillance: + analysis_fft_size: 2048 + frame_rate: 15 + strategy: single-resolution + display_bins: 2048 + display_fps: 15 + derived_detection: auto + refinement: + enabled: true + max_concurrent: 8 + detail_fft_size: 2048 + min_candidate_snr_db: 0 + min_span_hz: 0 + max_span_hz: 0 + auto_span: true + resources: + prefer_gpu: false + max_refinement_jobs: 8 + max_recording_streams: 16 + max_decode_jobs: 16 + decision_hold_ms: 2000 + - name: wideband-balanced + description: Baseline multi-resolution wideband surveillance + pipeline: + mode: wideband-balanced + profile: wideband-balanced + goals: + intent: broadcast-monitoring + monitor_start_hz: 0 + monitor_end_hz: 0 + monitor_span_hz: 0 + monitor_windows: [] + signal_priorities: + - wfm + - rds + - broadcast + auto_record_classes: + - WFM + - WFM_STEREO + auto_decode_classes: + - WFM + - WFM_STEREO + - RDS + surveillance: + analysis_fft_size: 4096 + frame_rate: 12 + strategy: multi-resolution + display_bins: 2048 + display_fps: 12 + derived_detection: auto + refinement: + enabled: true + max_concurrent: 24 + detail_fft_size: 4096 + min_candidate_snr_db: -3 + min_span_hz: 60000 + max_span_hz: 250000 + auto_span: true + resources: + prefer_gpu: true + max_refinement_jobs: 24 + max_recording_streams: 32 + max_decode_jobs: 16 + decision_hold_ms: 2500 + - name: wideband-aggressive + description: Higher surveillance/refinement budgets for dense wideband monitoring + pipeline: + mode: wideband-aggressive + profile: wideband-aggressive + goals: + intent: high-density-wideband-surveillance + monitor_start_hz: 0 + monitor_end_hz: 0 + monitor_span_hz: 0 + monitor_windows: [] + signal_priorities: + - wfm + - rds + - broadcast + - digital + auto_record_classes: [] + auto_decode_classes: [] + surveillance: + analysis_fft_size: 8192 + frame_rate: 10 + strategy: multi-resolution + display_bins: 4096 + display_fps: 10 + derived_detection: auto + refinement: + enabled: true + max_concurrent: 32 + detail_fft_size: 8192 + min_candidate_snr_db: -3 + min_span_hz: 50000 + max_span_hz: 280000 + auto_span: true + resources: + prefer_gpu: true + max_refinement_jobs: 32 + max_recording_streams: 40 + max_decode_jobs: 24 + decision_hold_ms: 2500 + - name: archive + description: Record-first monitoring profile + pipeline: + mode: archive + profile: archive + goals: + intent: archive-and-triage + monitor_start_hz: 0 + monitor_end_hz: 0 + monitor_span_hz: 0 + monitor_windows: [] + signal_priorities: + - wfm + - broadcast + - digital + auto_record_classes: [] + auto_decode_classes: [] + surveillance: + analysis_fft_size: 4096 + frame_rate: 12 + strategy: single-resolution + display_bins: 2048 + display_fps: 12 + derived_detection: auto + refinement: + enabled: true + max_concurrent: 16 + detail_fft_size: 4096 + min_candidate_snr_db: -2 + min_span_hz: 50000 + max_span_hz: 250000 + auto_span: true + resources: + prefer_gpu: true + max_refinement_jobs: 16 + max_recording_streams: 40 + max_decode_jobs: 16 + decision_hold_ms: 3000 + - name: digital-hunting + description: Digital-first refinement and decode focus + pipeline: + mode: digital-hunting + profile: digital-hunting + goals: + intent: digital-surveillance + monitor_start_hz: 0 + monitor_end_hz: 0 + monitor_span_hz: 0 + monitor_windows: [] + signal_priorities: + - rds + - digital + - wfm + auto_record_classes: [] + auto_decode_classes: [] + surveillance: + analysis_fft_size: 4096 + frame_rate: 12 + strategy: multi-resolution + display_bins: 2048 + display_fps: 12 + derived_detection: auto + refinement: + enabled: true + max_concurrent: 20 + detail_fft_size: 4096 + min_candidate_snr_db: -2 + min_span_hz: 50000 + max_span_hz: 200000 + auto_span: true + resources: + prefer_gpu: true + max_refinement_jobs: 20 + max_recording_streams: 20 + max_decode_jobs: 24 + decision_hold_ms: 2500 +detector: + threshold_db: -60 + min_duration_ms: 500 + hold_ms: 1500 + ema_alpha: 0.025 + hysteresis_db: 10 + min_stable_frames: 4 + gap_tolerance_ms: 2000 + cfar_mode: GOSCA + cfar_guard_hz: 200000 + cfar_train_hz: 100000 + cfar_guard_cells: 3 + cfar_train_cells: 24 + cfar_rank: 36 + cfar_scale_db: 23 + cfar_wrap_around: true + edge_margin_db: 6 + max_signal_bw_hz: 260000 + merge_gap_hz: 20000 + class_history_size: 10 + class_switch_ratio: 0.6 +recorder: + enabled: false + min_snr_db: 0 + min_duration: 500ms + max_duration: 300s + preroll_ms: 500 + record_iq: false + record_audio: true + auto_demod: true + auto_decode: false + max_disk_mb: 0 + output_dir: data/recordings + class_filter: [] + ring_seconds: 12 + deemphasis_us: 50 + extraction_fir_taps: 101 + extraction_bw_mult: 1.35 + debug_live_audio: false +decoder: + ft8_cmd: C:/WSJT/wsjtx-2.7.0-rc6/bin/jt9.exe -8 {audio} + wspr_cmd: C:/WSJT/wsjtx-2.7.0-rc6/bin/wsprd.exe {audio} + dmr_cmd: tools/dsd-neo/bin/dsd-neo.exe -fs -i {audio} -s {sr} -o null + dstar_cmd: tools/dsd-neo/bin/dsd-neo.exe -fd -i {audio} -s {sr} -o null + fsk_cmd: tools/fsk/fsk_decoder --iq {iq} --sample-rate {sr} + psk_cmd: tools/psk/psk_decoder --iq {iq} --sample-rate {sr} +debug: + audio_dump_enabled: false + cpu_monitoring: false + telemetry: + enabled: true + heavy_enabled: false + heavy_sample_every: 12 + metric_sample_every: 8 + metric_history_max: 6000 + event_history_max: 1500 + retention_seconds: 900 + persist_enabled: false + persist_dir: debug/telemetry + rotate_mb: 16 + keep_files: 8 +logging: + level: error + categories: [] + rate_limit_ms: 1000 + stdout: true + stdout_color: true + file: logs/trace.log + file_level: error + time_format: "15:04:05" + disable_time: false +web_addr: :8080 +event_path: data/events.jsonl +frame_rate: 12 +waterfall_lines: 200 +web_root: web diff --git a/config.yaml b/config.yaml index 6d8f0de..53cdb0b 100644 --- a/config.yaml +++ b/config.yaml @@ -248,14 +248,29 @@ decoder: dstar_cmd: tools/dsd-neo/bin/dsd-neo.exe -fd -i {audio} -s {sr} -o null fsk_cmd: tools/fsk/fsk_decoder --iq {iq} --sample-rate {sr} psk_cmd: tools/psk/psk_decoder --iq {iq} --sample-rate {sr} +debug: + audio_dump_enabled: false + cpu_monitoring: false + telemetry: + enabled: true + heavy_enabled: false + heavy_sample_every: 12 + metric_sample_every: 8 + metric_history_max: 6000 + event_history_max: 1500 + retention_seconds: 900 + persist_enabled: true + persist_dir: debug/telemetry + rotate_mb: 16 + keep_files: 8 logging: - level: debug - categories: [capture, extract, demod, resample, drop, ws, boundary] - rate_limit_ms: 500 + level: error + categories: [] + rate_limit_ms: 1000 stdout: true stdout_color: true - file: logs/trace.log - file_level: debug + file: "" + file_level: error time_format: "15:04:05" disable_time: false web_addr: :8080 diff --git a/docs/audio-click-debug-notes-2026-03-24.md b/docs/audio-click-debug-notes-2026-03-24.md new file mode 100644 index 0000000..44a12ba --- /dev/null +++ b/docs/audio-click-debug-notes-2026-03-24.md @@ -0,0 +1,1077 @@ +# Audio Click Debug Notes — 2026-03-24 + +## Context + +This note captures the intermediate findings from the live/recording audio click investigation on `sdr-wideband-suite`. + +Goal: preserve the reasoning, experiments, false leads, and current best understanding so future work does not restart from scratch. + +--- + +## High-level outcome so far + +**SOLVED** — the persistent audio clicking issue is now resolved. + +Final result: +- live listening test confirmed the clicks are gone +- the final fix set consists of three independent root-cause fixes plus two secondary fixes +- the CUDA DLL did **not** need a rebuild for the final fix + +This document now serves as the investigation log plus final resolution record. + +--- + +## What was tested + +### 1. Session/context recovery +- Reconstructed prior debugging context from reset-session backup files. +- Confirmed the relevant investigation was the persistent audio clicking bug in live audio / recordings. + +### 2. Codebase deep-read +Reviewed in detail: +- `cmd/sdrd/dsp_loop.go` +- `cmd/sdrd/pipeline_runtime.go` +- `cmd/sdrd/helpers.go` +- `internal/recorder/streamer.go` +- `internal/recorder/demod_live.go` +- `internal/dsp/fir.go` +- `internal/dsp/fir_stateful.go` +- `internal/dsp/resample.go` +- `internal/demod/fm.go` +- `internal/demod/gpudemod/*` +- `web/app.js` + +Main conclusion from static reading: the pipeline contains several stateful continuity mechanisms, so clicks are likely to emerge at boundaries or from phase/timing inconsistencies rather than from one obvious isolated bug. + +### 3. AM vs FM tests +Observed by ear: +- AM clicks too. +- Therefore this is **not** an FM-only issue. +- That shifted focus away from purely FM-specific explanations and toward shared-path / continuity / transport / demod-adjacent causes. + +### 4. Recording vs live path comparison +Observed by ear: +- Recordings click too. +- Therefore browser/WebSocket/live playback is **not** the sole cause. +- The root problem exists in the server-side audio pipeline before browser playback. + +### 5. Boundary instrumentation added +Temporary diagnostics were added to inspect: +- extract trimming +- snippet lengths +- demod path lengths +- boundary click / intra-click detector +- IQ continuity at various stages + +### 6. Discriminator-overlap hypothesis +A test switch temporarily disabled the extra 1-sample discriminator overlap prepend in `streamer.go`. + +Result: +- This extra overlap **was** a real problem. +- It caused the downstream decimation phase to flip between blocks. +- Removing it cleaned up the boundary model and was the correct change. + +However: +- Removing it did **not** eliminate the audible clicks. +- Therefore it was a real bug, but **not the main remaining root cause**. + +### 7. GPU vs CPU extraction test +Forced CPU-only stream extraction. + +Result: +- CPU-only made things dramatically worse in real time. +- Large `feed_gap` values appeared. +- Huge backlogs built up. +- Therefore CPU-only is not a solution, and the GPU path is not the sole main problem. + +### 8. Fixed read-size test +Forced a constant extraction read size (`389120`) instead of variable read sizing based on backlog. + +Result: +- `allIQ`, `gpuIQ_len`, `raw_len`, and `out_len` became very stable. +- This reduced pipeline variability and made logs much cleaner. +- Subjectively, audio may have become slightly better, but clicks remained. +- Therefore variable block sizing is likely a contributing factor, but not the full explanation. + +### 9. Multi-stage audio dump test +Added optional debug dumping for: +- demod audio (`*-demod.wav`) +- final audio after resampler (`*-final.wav`) + +Observed by ear: +- Clicks are present in **both** dump types. +- Therefore the click is already present by the time demodulated audio exists. +- Resampler/final audio path is not the primary origin. + +### 10. CPU monitoring +A process-level CSV monitor was added and used. + +Result: +- Overall process CPU usage was modest (not near full machine saturation). +- This does **not** support “overall CPU is pegged” as the main explanation. +- Caveat: this does not fully exclude a hot thread or scheduler issue, but gross total CPU overload is not the main story. + +--- + +## What we now know with reasonable confidence + +### A. The issue is not primarily caused by: +- Browser playback +- WebSocket transport +- Final PCM fanout only +- Resampler alone +- CPU-only vs GPU-only as the core dichotomy +- The old extra discriminator overlap prepend (that was a bug, but not the remaining dominant one) +- Purely variable block sizes alone +- Gross whole-process CPU saturation + +### B. The issue is server-side and exists before final playback +Because: +- recordings click +- demod dump clicks +- final dump clicks + +### C. The issue is present by the demodulated audio stage +This is one of the strongest current findings. + +### D. The WFM/FM-demod-adjacent path remains highly suspicious +Current best area of suspicion: +- decimated IQ may still contain subtle corruption/instability not fully captured by current metrics +- OR the FM discriminator (`fmDiscrim`) is producing pathological output from otherwise “boundary-clean-looking” IQ + +--- + +## Important runtime/pathology observations + +### 1. Backlog amplification is real +Several debug runs showed severe buffer growth and drops: +- large `buf=` values +- growing `drop=` counts +- repeated `audio_gap` + +This means some debug configurations can easily become self-distorting and produce additional artifacts that are not representative of the original bug. + +### 2. Too much debug output causes self-inflicted load +At one point: +- rate limiter was disabled (`rate_limit_ms: 0`) +- aggressive boundary logging was enabled +- many short WAV files were generated + +This clearly increased overhead and likely polluted some runs. + +### 3. Many short WAVs were a bad debug design +That was replaced with a design intended to write one continuous window file instead of many micro-files. + +### 4. Total process CPU saturation does not appear to be the main cause +A process-level CSV monitor was collected and showed only modest total CPU utilisation during the relevant tests. +This does **not** support a simple “the machine is pegged” explanation. +A hot thread / scheduling issue is still theoretically possible, but gross overall CPU overload is not the main signal. + +--- + +## Current debug state in repo + +### Branch +All current work is on: +- `debug/audio-clicks` + +### Commits so far +- `94c132d` — `debug: instrument audio click investigation` +- `ffbc45d` — `debug: add advanced boundary metering` + +### Current config/logging state +The active debug logging was trimmed down to: +- `demod` +- `discrim` +- `gap` +- `boundary` + +Rate limit is currently back to a nonzero value to avoid self-induced spam. + +### Dump/CPU debug state +A `debug:` config section was added with: +- `audio_dump_enabled: false` +- `cpu_monitoring: false` + +Meaning: +- heavy WAV dumping is now OFF by default +- CPU monitoring is conceptually OFF by default (script still exists, but must be explicitly used) + +--- + +## Most important code changes/findings to remember + +### 1. Removed the extra discriminator overlap prepend in `streamer.go` +This was a correct fix. + +Reason: +- it introduced a blockwise extra IQ sample +- this shifted decimation phase between blocks +- it created real boundary artifacts + +This should **not** be reintroduced casually. + +### 2. Fixed read-size test exists and is useful for investigation +A temporary mechanism exists to force stable extraction block sizes. +This is useful diagnostically because it removes one source of pipeline variability. + +**IMPORTANT DECISION / DO NOT LOSE:** +- The fixed read-size path currently lives behind the environment variable `SDR_FORCE_FIXED_STREAM_READ_SAMPLES`. +- The tested value `389120` clearly helps by making `allIQ`, `gpuIQ_len`, `raw_len`, and `out_len` much more stable and by reducing one major source of pipeline variability. +- Current plan: **once the remaining click root cause is solved, promote this behavior into the normal code path instead of leaving it as an env-var-only debug switch.** +- In other words: treat fixed read sizing as a likely permanent stabilization improvement, but do not bake it in blindly until the click investigation is complete. + +### 3. FM discriminator metering exists +`internal/demod/fm.go` now emits targeted discriminator stats under `discrim` logging, including: +- min/max IQ magnitude +- maximum absolute phase step +- count of large phase steps + +This was useful to establish that large discriminator steps correlate with low IQ magnitude, but discriminator logging was later disabled from the active category list to reduce log spam. + +### 4. Strong `dec`-IQ findings before demod +Additional metering in `streamer.go` showed: +- repeated `dec_iq_head_dip` +- repeated low magnitude near `min_idx ~= 25` +- repeated early large local phase step near `max_step_idx ~= 24` +- repeated `demod_boundary` and audible clicks shortly afterward + +This is the strongest currently known mechanism in the chain. + +### 5. Group delay observation +For the current pre-demod FIR: +- taps = `101` +- FIR group delay = `(101 - 1) / 2 = 50` input samples +- with `decim1 = 2`, this projects to about `25` output samples + +This matches the repeatedly observed problematic `dec` indices (~24-25) remarkably well. +That strongly suggests the audible issue is connected to the FIR/decimation settling region at the beginning of the `dec` block. + +### 6. Pre-FIR vs post-FIR comparison +A dedicated pre-FIR probe was added on `fullSnip` (the input to the pre-demod FIR) and compared against the existing `dec`-side probes. + +Observed pattern: +- pre-FIR head probe usually looked relatively normal +- no equally strong or equally reproducible hot spot appeared there +- after FIR + decimation, the problematic dip/step repeatedly appeared near `dec` indices ~24-25 + +Interpretation: +- the strongest currently observed defect is **not already present in the same form before the FIR** +- it is much more likely to emerge in the FIR/decimation section (or its settling behavior) than in the raw pre-FIR input + +### 7. Head-trim test results +A debug head-trim on `dec` was tested. +Subjective result: +- `trim=32` sounded best among the tested values (`16/32/48/64`) +- but it did **not** remove the clicks entirely + +Interpretation: +- the early `dec` settling region is a real contributor +- but it is probably not the only contributor, or trimming alone is not the final correct fix + +### 8. Current architectural conclusion +The likely clean fix is **not** to keep trimming samples away. +The FIR/decimation section is still suspicious, but later tests showed it is likely not the sole origin. + +Important nuance: +- the currently suspicious FIR + decimation section is already running in **Go/CPU** (`processSnippet`), not in CUDA +- therefore the next correctness fix should be developed and validated in Go first + +Later update: +- a stateful decimating FIR / polyphase-style replacement was implemented in Go and tested +- it was architecturally cleaner than the old separated FIR->decimate handoff +- but it did **not** remove the recurring hot spot / clicks +- therefore the old handoff was not the whole root cause, even if the newer path is still cleaner + +--- + +## Best current hypothesis + +The remaining audible clicks are most likely generated **at or immediately before FM demodulation**. + +Most plausible interpretations: +1. The decimated IQ stream still contains subtle corruption/instability not fully captured by the earliest boundary metrics. +2. The FM discriminator is reacting violently to short abnormal IQ behavior inside blocks, not just at block boundaries. +3. The problematic region is likely a **very specific early decimated-IQ settling zone**, not broad corruption across the whole block. + +At this point, the most valuable next data is low-overhead IQ telemetry right before demod, plus carefully controlled demod-vs-final audio comparison. + +### Stronger updated working theory (later findings, same day) + +After discriminator-focused metering and targeted `dec`-IQ probes, the strongest current theory is: + +> A reproducible early defect in the `dec` IQ block appears around sample index **24-25**, where IQ magnitude dips sharply and the effective FM phase step becomes abnormally large. This then shows up as `demod_boundary` and audible clicks. + +Crucially: +- this issue appears in `demod.wav`, so it exists before the final resampler/playback path +- it is **not** spread uniformly across the whole `dec` block +- it repeatedly appears near the same index +- trimming the first ~32 samples subjectively reduces the click, but does not eliminate it entirely + +This strongly suggests a **settling/transient zone at the beginning of the decimated IQ block**. + +Later refinements to this theory: +- pre-FIR probing originally looked cleaner than post-FIR probing, which made FIR/decimation look like the main culprit +- however, a temporary FIR bypass showed the clicks were still present, only somewhat quieter / less aggressive +- this indicates the pre-demod FIR likely amplifies or sharpens an upstream issue, but is not the sole origin +- a cleaner stateful decimating FIR implementation also failed to eliminate the recurring hot spot, further weakening the idea that the old FIR->decimate handoff alone caused the bug + +--- + +## Recommended next steps + +1. Run with reduced logging only and keep heavy dump features OFF unless explicitly needed. +2. Continue investigating the extractor path and its immediate surroundings (`extractForStreaming`, signal parameter source, offset/BW stability, overlap/trim behavior). +3. Treat FIR/decimation as a possible amplifier/focuser of the issue, but not the only suspect. +4. When testing fixes, prefer low-overhead, theory-driven experiments over broad logging/dump spam. +5. Only re-enable audio dump windows selectively and briefly. + +### Debug TODO / operational reminders + +- The current telemetry collector is **not** using a true ring buffer for metric/event history. +- Internally it keeps append-only history slices (`metricsHistory`, `events`) and periodically trims them by copying tail slices. +- Under heavy per-block telemetry this can add enough mutex/copy overhead to make the live stream start stuttering after a short run. +- Therefore: keep telemetry sampling conservative during live reproduction runs; do **not** leave full heavy telemetry enabled longer than needed. +- Follow-up engineering task: replace or redesign telemetry history storage to use a proper low-overhead ring-buffer style structure (or equivalent bounded lock-light design) if live telemetry is to remain a standard debugging tool. + +--- + +## 2026-03-25 update — extractor-focused live telemetry findings + +### Where the investigation moved + +The investigation was deliberately refocused away from browser/feed/demod-only suspicions and toward: +- shared upstream IQ cadence / block boundaries +- extractor input/output continuity +- raw vs trimmed extractor-head behaviour + +This was driven by two observations: +1. all signals still click +2. the newly added live telemetry made it possible to inspect the shared path while the system was running + +### Telemetry infrastructure / config notes + +Two config files matter for debug telemetry defaults: +- `config.yaml` +- `config.autosave.yaml` + +The autosave file can overwrite intended telemetry defaults after restart, so both must be updated together. + +Current conservative live-debug defaults that worked better: +- `heavy_enabled: false` +- `heavy_sample_every: 12` +- `metric_sample_every: 8` +- `metric_history_max: 6000` +- `event_history_max: 1500` + +Important operational lesson: +- runtime `POST /api/debug/telemetry/config` changes only affect the current `sdrd` process +- after restart, the process reloads config defaults again +- if autosave still contains older values (for example `heavy_enabled: true` or very large history limits), the debug run can accidentally become self-distorting again + +### Telemetry endpoints + +The live debug work used these HTTP endpoints on the `sdrd` web server (typically `http://127.0.0.1:8080`): + +#### `GET /api/debug/telemetry/config` +Returns the current effective telemetry configuration. +Useful for verifying: +- whether heavy telemetry is enabled +- history sizes +- persistence settings +- sample rates actually active in the running process + +Typical fields: +- `enabled` +- `heavy_enabled` +- `heavy_sample_every` +- `metric_sample_every` +- `metric_history_max` +- `event_history_max` +- `retention_seconds` +- `persist_enabled` +- `persist_dir` + +#### `POST /api/debug/telemetry/config` +Applies runtime telemetry config changes to the current process. +Used during investigation to temporarily reduce telemetry load without editing files. + +Example body used during investigation: +```json +{ + "heavy_enabled": true, + "heavy_sample_every": 12, + "metric_sample_every": 8 +} +``` + +#### `GET /api/debug/telemetry/live` +Returns the current live metric snapshot (gauges/counters/distributions). +Useful for: +- quick sanity checks +- verifying that a metric family exists +- confirming whether a new metric name is actually being emitted + +#### `GET /api/debug/telemetry/history?prefix=&limit=` +Returns stored metric history entries filtered by metric-name prefix. +This is the main endpoint for time-series debugging during live runs. + +Useful examples: +- `prefix=stage.` +- `prefix=source.` +- `prefix=iq.boundary.all` +- `prefix=iq.extract.input` +- `prefix=iq.extract.output` +- `prefix=iq.extract.raw.` +- `prefix=iq.extract.trimmed.` +- `prefix=iq.pre_demod` +- `prefix=audio.demod` + +#### `GET /api/debug/telemetry/events?limit=` +Returns recent structured telemetry events. +Used heavily once compact per-block event probes were added, because events were often easier to inspect reliably than sparsely sampled distribution histories. + +This ended up being especially useful for: +- raw extractor head probes +- trimmed extractor head probes +- extractor input head probes +- GPU kernel input/output head probes +- boundary snapshots + +### Important telemetry families added/used + +#### Shared-path / global boundary metrics +- `iq.boundary.all.head_mean_mag` +- `iq.boundary.all.prev_tail_mean_mag` +- `iq.boundary.all.delta_mag` +- `iq.boundary.all.delta_phase` +- `iq.boundary.all.discontinuity_score` + +Purpose: +- detect whether the shared `allIQ` block boundary was already obviously broken before signal-specific extraction + +#### Extractor input/output metrics +- `iq.extract.input.length` +- `iq.extract.input.overlap_length` +- `iq.extract.input.head_mean_mag` +- `iq.extract.input.prev_tail_mean_mag` +- `iq.extract.input.discontinuity_score` +- `iq.extract.output.length` +- `iq.extract.output.head_mean_mag` +- `iq.extract.output.head_min_mag` +- `iq.extract.output.head_max_step` +- `iq.extract.output.head_p95_step` +- `iq.extract.output.head_tail_ratio` +- `iq.extract.output.head_low_magnitude_count` +- `iq.extract.output.boundary.delta_mag` +- `iq.extract.output.boundary.delta_phase` +- `iq.extract.output.boundary.d2` +- `iq.extract.output.boundary.discontinuity_score` + +Purpose: +- isolate whether the final per-signal extractor output itself was discontinuous across blocks + +#### Raw vs trimmed extractor-head telemetry +- `iq.extract.raw.length` +- `iq.extract.raw.head_mag` +- `iq.extract.raw.tail_mag` +- `iq.extract.raw.head_zero_count` +- `iq.extract.raw.first_nonzero_index` +- `iq.extract.raw.head_max_step` +- `iq.extract.trim.trim_samples` +- `iq.extract.trimmed.head_mag` +- `iq.extract.trimmed.tail_mag` +- `iq.extract.trimmed.head_zero_count` +- `iq.extract.trimmed.first_nonzero_index` +- `iq.extract.trimmed.head_max_step` +- event `extract_raw_head_probe` +- event `extract_trimmed_head_probe` + +Purpose: +- answer the key question: is the corruption already present in the raw extractor output head, or created by trimming/overlap logic afterward? + +#### Additional extractor input / GPU-kernel probe telemetry +- `iq.extract.input_head.zero_count` +- `iq.extract.input_head.first_nonzero_index` +- `iq.extract.input_head.max_step` +- event `extract_input_head_probe` +- event `gpu_kernel_input_head_probe` +- event `gpu_kernel_output_head_probe` + +Purpose: +- split the remaining uncertainty between: + - signal-specific input already being bad + - GPU extractor kernel/start semantics producing the bad raw head + - later output assembly after the kernel + +#### Pre-demod / audio-stage metrics +- `iq.pre_demod.head_mean_mag` +- `iq.pre_demod.head_min_mag` +- `iq.pre_demod.head_max_step` +- `iq.pre_demod.head_p95_step` +- `iq.pre_demod.head_low_magnitude_count` +- `audio.demod.head_mean_abs` +- `audio.demod.tail_mean_abs` +- `audio.demod.edge_delta_abs` +- existing `audio.demod_boundary.*` + +Purpose: +- verify where artifacts become visible/audible downstream + +### What the 2026-03-25 telemetry actually showed + +#### 1. Feed / enqueue remained relatively uninteresting +`stage.feed_enqueue.duration_ms` was usually effectively zero. + +Representative values during live runs: +- mostly `0` +- occasional small spikes such as `0.5 ms` and `5.8 ms` + +Interpretation: +- feed enqueue is not the main source of clicks + +#### 2. Extract-stream time was usually modest +`stage.extract_stream.duration_ms` was usually small and stable compared with the main loop. + +Representative values: +- often `1–5 ms` +- occasional spikes such as `10.7 ms` and `18.9 ms` + +Interpretation: +- extraction is not free, but runtime cost alone does not explain the clicks + +#### 3. Shared capture / source cadence still fluctuated heavily +Representative live values: +- `dsp.frame.duration_ms`: often around `90–100 ms`, but also `110–150 ms`, with one observed spike around `212.6 ms` +- `source.read.duration_ms`: roughly `80–90 ms` often, but also about `60 ms`, `47 ms`, `19 ms`, and even `0.677 ms` +- `source.buffer_samples`: ranged from very small to very large bursts, including examples like `512`, `4608`, `94720`, `179200`, `304544` +- a `source_reset` event was seen and `source.resets=1` + +Interpretation: +- shared upstream cadence is clearly unstable enough to remain suspicious +- but this alone did not localize the final click mechanism + +#### 4. Pre-demod stage showed repeated hard phase anomalies even when energy looked healthy +Representative live values for normal non-vanishing signals: +- `iq.pre_demod.head_mean_mag` around `0.25–0.31` +- `iq.pre_demod.head_low_magnitude_count = 0` +- `iq.pre_demod.head_max_step` repeatedly high, including roughly: + - `1.5` + - `2.0` + - `2.4` + - `2.8` + - `3.08` + +Interpretation: +- not primarily an amplitude collapse +- rather a strong phase/continuity defect reaching the pre-demod stage + +#### 5. Audio stage still showed real block-edge artifacts +Representative values: +- `audio.demod.edge_delta_abs` repeatedly around `0.4–0.8` +- outliers up to roughly `1.21` and `1.26` +- `audio.demod_boundary.count` continued to fire repeatedly + +Interpretation: +- demod is where the problem becomes audible, but the root cause still appeared to be earlier/shared + +### Key extractor findings from the new telemetry + +#### A. Per-signal extractor output boundary is genuinely broken +For a representative strong signal (`signal_id=2`), `iq.extract.output.boundary.delta_phase` repeatedly showed very large jumps such as: +- `2.60` +- `3.06` +- `2.14` +- `2.71` +- `3.09` +- `2.92` +- `2.63` +- `2.78` + +Also observed for `iq.extract.output.boundary.discontinuity_score`: +- `2.86` +- `3.08` +- `2.92` +- `2.52` +- `2.40` +- `2.85` + +Later runs using `d2` made the discontinuity even easier to see. Representative `iq.extract.output.boundary.d2` values for the same strong signal included: +- `0.347` +- `0.303` +- `0.362` +- `0.359` +- `0.382` +- `0.344` +- `0.337` +- `0.206` + +At the same time, `iq.extract.output.boundary.delta_mag` was often comparatively small (examples around `0.0003–0.0038`). + +Interpretation: +- the main boundary defect is not primarily amplitude mismatch +- it is much more consistent with complex/phase discontinuity across output blocks + +#### B. The raw extractor head is systematically bad on all signals +The new `extract_raw_head_probe` events were the strongest finding of the day. + +Representative repeated pattern for strong signals (`signal_id=1` and `signal_id=2`): +- `first_nonzero_index = 1` +- `zero_count = 1` +- first magnitude sample exactly `0` +- then a short ramp: e.g. for `signal_id=2` + - `0` + - `0.000388` + - `0.002316` + - `0.004152` + - `0.019126` + - `0.011418` + - `0.124034` + - `0.257569` + - `0.317579` +- `head_max_step` often near π, e.g.: + - `3.141592653589793` + - `3.088773696463606` + - `3.0106854446936318` + - `2.9794833659932527` + +The same qualitative pattern appeared for weaker signals too: +- raw head starts at `0` +- a brief near-zero ramp follows +- only after several samples does the magnitude look like a normal extracted band + +Interpretation: +- the raw extractor output head is already damaged / settling / invalid before trimming +- this strongly supports an upstream/shared-start-condition problem rather than a trim-created artifact + +#### C. The trimmed extractor head usually looks sane +Representative repeated pattern for the same signals after `trim_samples = 64`: +- `first_nonzero_index = 0` +- `zero_count = 0` +- magnitudes look immediately plausible and stable +- `head_max_step` is dramatically lower than raw, often around `0.15–0.9` for strong channels + +Example trimmed head magnitudes for `signal_id=2`: +- `0.299350` +- `0.300954` +- `0.298032` +- `0.298738` +- `0.312258` +- `0.296932` +- `0.239010` +- `0.266881` +- `0.313193` + +Example trimmed head magnitudes for `signal_id=1`: +- `0.277400` +- `0.275994` +- `0.273718` +- `0.272846` +- `0.277842` +- `0.278398` +- `0.268829` +- `0.273790` +- `0.279031` + +Interpretation: +- trimming is removing a genuinely bad raw head region +- trimming is therefore **not** the main origin of the problem +- it acts more like cleanup of an already bad upstream/raw start region + +### Input-vs-raw-vs-trimmed extractor result (important refinement) + +A later, more targeted telemetry pass added a direct probe on the signal-specific extractor input head (`extract_input_head_probe`) and compared it against the raw and trimmed extractor output heads. + +This materially refined the earlier conclusion. + +#### Input-head result +Representative values from `iq.extract.input_head.*`: +- `iq.extract.input_head.zero_count = 0` +- `iq.extract.input_head.first_nonzero_index = 0` + +Interpretation: +- the signal-specific input head going into the GPU extractor is **not** starting with a zero sample +- the head is not arriving already dead/null from the immediate input probe point + +#### Raw-head result +Representative values from `iq.extract.raw.*`: +- `iq.extract.raw.head_mag = 0` +- `iq.extract.raw.head_zero_count = 1` +- `iq.extract.raw.head_max_step` frequently around `2.4–3.14` + +These values repeated for strong channels such as `signal_id=2`, and similarly across other signals. + +Interpretation: +- the first raw output sample is repeatedly exactly zero +- therefore the visibly bad raw head is being created **after** the probed input head and **before/during raw extractor output generation** + +#### Trimmed-head result +Representative values from `iq.extract.trimmed.*`: +- `iq.extract.trimmed.head_zero_count = 0` +- `iq.extract.trimmed.head_mag` often looked healthy immediately after trimming, for example: + - signal 1: about `0.275–0.300` + - signal 2: about `0.311` +- `iq.extract.trimmed.head_max_step` was much lower than raw for strong channels, often around: + - `0.11` + - `0.14` + - `0.19` + - `0.30` + - `0.75` + +Interpretation: +- trimming cleans up the visibly bad raw head region +- trimming still does **not** explain the deeper output-boundary continuity issue + +### Further refinement after direct extractor-input and GPU-kernel probes + +A final telemetry round added: +- `extract_input_head_probe` +- `gpu_kernel_input_head_probe` +- `gpu_kernel_output_head_probe` + +These probes further sharpened the likely fault location. + +#### Signal-specific extractor input head looked sane +Representative values: +- `iq.extract.input_head.zero_count = 0` +- `iq.extract.input_head.first_nonzero_index = 0` + +Interpretation: +- at the observed signal-specific input probe point, the GPU extractor is **not** receiving a dead/null head + +#### Raw GPU output head remained systematically broken +Representative repeated values: +- `iq.extract.raw.head_mag = 0` +- `iq.extract.raw.head_zero_count = 1` +- `iq.extract.raw.head_max_step` repeatedly around: + - `3.141592653589793` + - `3.122847934305907` + - `3.101915352902961` + - `3.080672178550904` + - `3.062425574273907` + - `2.9785041567778427` + - `2.7508533785793476` + +Representative repeated examples from strong channels: +- signal 2: `head_mag = 0`, `head_zero_count = 1` +- signal 3: `head_mag = 0`, `head_zero_count = 1` +- signal 1/4 showed the same qualitative head-zero pattern as well + +Interpretation: +- the raw extractor output head is still repeatedly born broken +- the problem is therefore after the currently probed input head and before/during raw output creation + +#### Trimmed head still looked healthier +Representative values: +- `iq.extract.trimmed.head_zero_count = 0` +- signal 1 `iq.extract.trimmed.head_mag` repeatedly around: + - `0.2868` + - `0.2907` + - `0.3036` + - `0.3116` + - `0.2838` + - `0.2760` +- signal 2 examples: + - `0.3461` + - `0.3182` + +Representative `iq.extract.trimmed.head_max_step` values for strong channels were much lower than raw, often around: +- `0.11` +- `0.13` +- `0.21` +- `0.30` +- `0.44` +- `0.69` +- `0.86` + +Interpretation: +- trimming still removes the most visibly broken head region +- but trimming does not explain the deeper output-boundary continuity issue + +### Refined strongest current conclusion after the full 2026-03-25 telemetry pass + +The strongest current reading is now: + +> The click root cause is very likely **not** that the signal-specific extractor input already starts dead/null. Instead, the bad raw head appears to be introduced **inside the GPU extractor path itself** (or at its immediate start/output semantics) before final trimming. + +More specifically: +- signal-specific extractor input head looks non-zero and sane at the probe point +- raw GPU output head still repeatedly starts with an exact zero sample and a short bad settling region +- the trimmed head usually looks healthier +- yet the final extractor output still exhibits significant complex boundary discontinuity from block to block + +This now points away from a simple "shared global input head is already zero" theory and toward one of these narrower causes: +1. GPU extractor kernel start semantics / warmup / first-output handling +2. phase-start or alignment handling at extractor block start +3. raw GPU output assembly semantics within the extractor path + +### What should not be forgotten from this stage + +- The overlap-prepend bug was real and worth fixing, but was not sufficient. +- The fixed read-size path (`SDR_FORCE_FIXED_STREAM_READ_SAMPLES=389120`) remains useful and likely worth promoting later, but it is not the root-cause fix. +- The telemetry system itself can perturb runs if overused; conservative sampling matters. +- `config.autosave.yaml` must be kept in sync with `config.yaml` or telemetry defaults can silently revert after restart. +- The most promising root-cause area is now the shared upstream/extractor-start boundary path, not downstream playback. + +### 2026-03-25 refactor work status (post-reviewer instruction) + +After the reviewer guidance, work pivoted away from symptomatic patching and onto the required two-track architecture change: + +#### Track 1 — CPU/oracle path repair (in progress) +The following was added to start building a trustworthy streaming oracle: +- `internal/demod/gpudemod/streaming_types.go` +- `internal/demod/gpudemod/cpu_oracle.go` +- `internal/demod/gpudemod/cpu_oracle_test.go` +- `internal/demod/gpudemod/streaming_oracle_extract.go` +- `internal/demod/gpudemod/polyphase.go` +- `internal/demod/gpudemod/polyphase_test.go` + +What exists now: +- explicit `StreamingExtractJob` / `StreamingExtractResult` +- explicit `CPUOracleState` +- exact integer decimation enforcement (`ExactIntegerDecimation`) +- monolithic-vs-chunked CPU oracle test +- explicit polyphase tap layout (`phase-major`) +- CPU oracle direct-vs-polyphase equivalence test +- persistent CPU oracle runner state keyed by signal ID +- config-hash reset behavior +- cleanup of disappeared signals from oracle state + +Important limitation: +- this is **not finished production validation yet** +- the CPU oracle path is being built toward the reviewer’s required semantics, but it is not yet the final signed-off oracle for GPU validation + +#### Track 2 — GPU path architecture refactor (in progress) +The following was added to begin the new stateful GPU architecture: +- `internal/demod/gpudemod/stream_state.go` +- `internal/demod/gpudemod/streaming_gpu_stub.go` +- `docs/gpu-streaming-refactor-plan-2026-03-25.md` +- `cmd/sdrd/streaming_refactor.go` + +What exists now: +- explicit `ExtractStreamState` +- batch-runner-owned per-signal state map +- config-hash reset behavior for GPU-side stream state +- exact integer decimation enforcement in relevant batch path +- base taps and polyphase taps initialized into GPU-side stream state +- explicit future production entry point: `StreamingExtractGPU(...)` +- explicit separation between current legacy extractor path and the new streaming/oracle path +- persistent oracle-runner lifecycle hooks, including reset on stream-drop events + +Important limitation: +- the new GPU production path is **not implemented yet** +- the legacy overlap+trim production path still exists and is still the current active path +- the new GPU entry point currently exists as an explicit architectural boundary and state owner, not as the finished stateful polyphase kernel path + +#### Tests currently passing during refactor +Repeatedly verified during the refactor work: +- `go test ./internal/demod/gpudemod/...` +- `go test ./cmd/sdrd/...` + +#### Incremental progress reached so far inside the refactor + +Additional progress after the initial refactor scaffolding: +- the CPU oracle runner now uses the explicit polyphase oracle path (`CPUOracleExtractPolyphase`) instead of only carrying polyphase tap data passively +- the CPU oracle now has a direct-vs-polyphase equivalence test +- the GPU-side stream state now initializes both `BaseTaps` and `PolyphaseTaps` +- the GPU side now has an explicit future production entry point `StreamingExtractGPU(...)` +- the GPU streaming stub now advances `NCOPhase` over NEW samples only +- the GPU streaming stub now advances `PhaseCount` modulo exact integer decimation +- the GPU streaming stub now builds and persists `ShiftedHistory` from already frequency-shifted NEW samples +- the new streaming/oracle path is explicitly separated from the current legacy overlap+trim production path + +Important current limitation: +- `StreamingExtractGPU(...)` still intentionally returns a not-implemented error rather than pretending to be the finished production path +- this is deliberate, to avoid hidden quick-fix semantics or silent goalpost shifts + +Additional note on the latest step: +- the GPU streaming stub now also reports an estimated output-count schedule (`NOut`) derived from NEW sample consumption plus carried `PhaseCount` +- this still does **not** make it a production path; it only means the stub now models output cadence semantics more honestly +- the new CPU/oracle path is also now exposing additional runtime telemetry such as `streaming.oracle.rate` and `streaming.oracle.output_len`, so the reference path becomes easier to inspect as it matures +- a reusable complex-slice comparison helper now exists (`CompareComplexSlices`) to support later oracle-vs-GPU equivalence work without improvising comparison logic at the last minute +- a dedicated `TestCPUOracleMonolithicVsChunkedPolyphase` now verifies chunked-vs-monolithic self-consistency for the polyphase oracle path specifically +- explicit reset tests now exist for both CPU oracle state and GPU streaming state, so config-change reset semantics are no longer only implicit in code review +- a dedicated `ExtractDebugMetrics` structure now exists as a future comparison/telemetry contract for reviewer-required state/error/boundary metrics +- the first mapper from oracle results into that debug-metric structure now exists, so the comparison contract is beginning to attach to real refactor code rather than staying purely conceptual +- the same minimal debug-metric mapping now also exists for GPU-stub results, so both sides of the future GPU-vs-oracle comparison now have an initial common reporting shape +- a first comparison-pipeline helper now exists to turn oracle-vs-GPU-stub results into shared `CompareStats` / `ExtractDebugMetrics` output, even though the GPU path is still intentionally incomplete +- that comparison helper is now also covered by a dedicated unit test, so even the scaffolding around future GPU-vs-oracle validation is being locked down incrementally +- GPU-side stream-state initialization is now also unit-tested (`Decim`, `BaseTaps`, `PolyphaseTaps`, `ShiftedHistory` capacity), so the new state ownership layer is no longer just trusted by inspection +- the GPU streaming stub now also has a dedicated test proving that it advances persistent state while still explicitly failing as a not-yet-implemented production path +- at this point, enough scaffolding exists that the next sensible step is to build the broader validation/test harness in one larger pass before continuing the actual production-path rewrite +- that harness pass has now happened: deterministic IQ/tone fixtures, harness config/state builders, chunked polyphase oracle runners, and additional validation tests now exist, so the next step is back to the actual production-path rewrite +- the first non-stub NEW-samples-only production-like path now exists as `StreamingExtractGPUHostOracle(...)`: it is still host-side, but it executes the new streaming/stateful semantics and therefore serves as a concrete bridge between pure test infrastructure and the eventual real GPU production path +- that host-side production-like path is now directly compared against the CPU oracle in tests and currently matches within tight tolerance, which is an important confidence step before any real CUDA-path replacement +- the canonical new production entry point `StreamingExtractGPU(...)` is now structurally wired so that the host-side production-like implementation can sit behind the same API later, without forcing a premature switch today +- a top-level `cmd/sdrd` production path hook now exists as well (`extractForStreamingProduction` plus `useStreamingProductionPath=false`), so the new architecture is no longer isolated to internal packages only +- the new production path now also emits first-class output/heading telemetry (`rate`, `output_len`, `head_mean_mag`, `head_max_step`) in addition to pure state counters, which will make activation/debugging easier later +- a top-level comparison observation hook now also exists in `cmd/sdrd`, so oracle-vs-production metrics no longer have to remain buried inside internal package helpers +- after the broader monitoring/comparison consolidation pass, the next agreed work mode is to continue in larger clusters rather than micro-steps: (1) wire the new production semantics more deeply, (2) isolate the legacy path more sharply, (3) keep preparing the eventual real GPU production path behind the same architecture +- after the first larger cluster, the next explicit target is to complete Cluster B: make the host-oracle bridge sit more naturally behind the new production execution architecture, rather than leaving production-path semantics spread across loosely connected files +- after Cluster B, the remaining GPU rewrite work is now best split into two explicit parts: `C1 = prepare` and `C2 = definitive implementation`, so the project can keep momentum without pretending that the final CUDA/stateful production path is already done +- Cluster B is now effectively complete: CPU oracle runner, host-oracle production-like path, and top-level production comparison all share the same host streaming core, and that common core is directly tested against the polyphase oracle +- Cluster C1 is now also complete: the new GPU production layer has an explicit invocation contract, execution-result contract, state handoff/build/apply stages, and a host-side execution strategy already running behind the same model + +### Current refactor status before C2 + +At this point the project has: +- a corrected streaming/oracle architecture direction +- a shared host-side streaming core used by both the CPU oracle runner and the host-side production-like bridge +- explicit production-path hooks in `cmd/sdrd` +- comparison and monitoring scaffolding above and below the execution layer +- a prepared GPU execution contract (`StreamingGPUInvocation` / `StreamingGPUExecutionResult`) + +What it does **not** have yet: +- a real native CUDA streaming/polyphase execution entry point with history-in/history-out and phase-count in/out semantics +- a real CUDA-backed implementation behind `StreamingExtractGPUExec(...)` +- completed GPU-vs-oracle validation on the final native execution path + +### C2 plan + +#### C2-A — native CUDA / bridge entry preparation +Goal: +- introduce the real native entry shape for stateful streaming/polyphase execution + +Status note before starting C2-A: +- C2 is **not** honestly complete yet because the native CUDA side still only exposes the old separate freq-shift/FIR/decimate pieces. +- Therefore C2-A must begin by creating the real native entry shape rather than continuing to stack more Go-only abstractions on top of the old kernels. + +Required outcomes: +- explicit native/CUDA function signature for streaming execution +- bridge bindings for history in/out, phase count in/out, new samples in, outputs out +- Go-side wrapper ready to call the new native path through the prepared invocation/result model + +#### C2-B — definitive execution implementation hookup +Goal: +- put a real native CUDA-backed execution strategy behind `StreamingExtractGPUExec(...)` + +Status note after C2-A: +- the native entry shape now exists in CUDA, the Windows bridge can resolve it, and the Go execution layer can route into a native-prepared strategy. +- what is still missing for C2-B is the actual stateful execution body behind that new native entrypoint. +- therefore C2-B now means exactly one serious thing: replace the current placeholder body of the new native entrypoint with real stateful streaming/polyphase execution semantics, rather than adding more scaffolding around it. +- C2-B is now materially done: the new native entrypoint no longer returns only placeholder state, and the Go native execution path now uploads inputs/history/taps, runs the new native function, and reads back outputs plus updated state. +- when the new exact-integer streaming decimation rules were turned on, an immediate runtime integration issue appeared: previous WFM extraction defaults expected `outRate=500000`, but the live sample rate was `4096000`, which is not exactly divisible. The correct fix is to align streaming defaults with the new integer-decimation model instead of trying to preserve the old rounded ratio behavior. +- the concrete immediate adjustment made for this was: `wfmStreamOutRate = 512000` (instead of `500000`), because `4096000 / 512000 = 8` is exactly divisible and therefore consistent with the new streaming architecture’s no-rounding rule. + +Required outcomes: +- `StreamingExtractGPUExec(...)` can execute a real native stateful path +- host-oracle bridge remains available only as a comparison/support path, not as the disguised production implementation +- state apply/backflow goes through the already prepared invocation/result contract + +#### C2-C — final validation and serious completion gate +Goal: +- validate the real CUDA-backed path against the corrected oracle and make the completion criterion explicit + +Required outcomes: +- GPU-vs-oracle comparison active on the real native path +- test coverage and runtime comparison hooks in place +- after C2-C, the CUDA story must be treated as complete, correct, and serious — not half-switched or pseudo-finished + +#### Why the refactor is intentionally incremental +The reviewer explicitly required: +- no start-index-only production patch +- no continued reliance on overlap+trim as final continuity model +- no silent decimation rounding +- no GPU sign-off without a corrected CPU oracle + +Because of that, the work is being done in ordered layers: +1. define streaming types and state +2. build the CPU oracle with exact streaming semantics +3. establish shared polyphase/tap semantics +4. prepare GPU-side persistent state ownership +5. only then replace the actual production GPU execution path + +This means the repo now contains partially completed new architecture pieces that are deliberate stepping stones, not abandoned half-fixes. + +### Reviewer package artifacts created for second-opinion review + +To support external/secondary review of the GPU extractor path, a focused reviewer package was created in the project root: +- `reviewer-gpu-extractor-package/` +- `reviewer-gpu-extractor-package.zip` +- `reviewer-gpu-extractor-package.json` + +The package intentionally contains: +- relevant GPU extractor / kernel code +- surrounding host-path code needed for context +- current debug notes +- a reviewer brief +- a short reviewer prompt +- relevant config files used during live telemetry work + +The JSON variant is uncompressed and stores all included package files as a single JSON document with repeated entries of: +- `path` +- `content` + +This was created specifically so the same reviewer payload can be consumed by tools or APIs that prefer a single structured text file instead of a ZIP archive. + +--- + +## Final resolution — 2026-03-25 + +Status: **SOLVED** + +The final fix set that resolved the audible clicks consisted of **three root-cause fixes** and **two secondary fixes**: + +### Root causes fixed + +1. **IQBalance in-place corruption of shared `allIQ` tail** + - File: `cmd/sdrd/pipeline_runtime.go` + - The surveillance slice (`survIQ`) was an alias of the tail of `allIQ`. + - `dsp.IQBalance(survIQ)` therefore modified the shared `allIQ` buffer in-place. + - The same `allIQ` buffer was then passed into the streaming extractor, creating a discontinuity where the IQ-balanced tail met unbalanced samples. + - Fix: copy `survIQ` before applying IQBalance so extraction sees an unmodified `allIQ` buffer. + +2. **`StreamingConfigHash` forced full extractor state reset every frame** + - File: `internal/demod/gpudemod/streaming_types.go` + - Floating-point jitter in smoothed center frequency caused `offsetHz` / `bandwidth` hash churn. + - That reset extractor history, NCO phase, and decimation phase every frame. + - Fix: hash only structural parameters (`signalID`, `outRate`, `numTaps`, `sampleRate`). + +3. **Non-WFM exact-decimation failure killed the entire streaming batch** + - File: `cmd/sdrd/streaming_refactor.go` + - Hardcoded `200000` output rate was not an exact divisor of `4096000`, so one non-WFM signal could reject the whole batch and silently force fallback to legacy extraction. + - Fix: use nearest exact integer-divisor output rate and keep fallthrough logging visible. + +### Secondary issues fixed + +1. **FM discriminator block-boundary gap** + - File: `internal/recorder/streamer.go` + - The cross-boundary phase step between consecutive IQ blocks was missing. + - Fix: carry the last IQ sample into the next discriminator block. + +2. **Missing 15 kHz lowpass on WFM mono/plain paths** + - File: `internal/recorder/streamer.go` + - Mono fallback / plain WFM paths sent raw discriminator output (pilot/subcarrier/RDS energy) directly into the resampler. + - Fix: add a stateful 15 kHz LPF before resampling on those paths. + +### Final verification summary + +- Before major fixes: + - persistent loud clicking on all signals/modes + - `intra_click_rate` about `110/sec` + - extractor/audio boundary telemetry showed large discontinuities +- After config-hash fix: + - hard clicks disappeared + - large discontinuities dropped sharply + - fine click noise still remained +- After the final `IQBalance` aliasing fix: + - operator listening test confirmed clicks were eliminated + +### Files involved in the final fix set + +- `cmd/sdrd/helpers.go` +- `cmd/sdrd/streaming_refactor.go` +- `cmd/sdrd/pipeline_runtime.go` +- `internal/demod/gpudemod/streaming_types.go` +- `internal/demod/gpudemod/stream_state.go` +- `internal/recorder/streamer.go` + +### Important architectural note + +The CUDA streaming polyphase kernel itself was **not** the root cause. +The actual bugs were in the Go-side orchestration around path selection, extractor reset semantics, and mutation of the shared IQ buffer before extraction. + +## Meta note + +This investigation disproved several plausible explanations before landing the final answer. +That mattered, because the eventual root cause was not a single simple DSP bug but a combination of path fallthrough, state-reset churn, and shared-buffer mutation. diff --git a/docs/gpu-streaming-refactor-plan-2026-03-25.md b/docs/gpu-streaming-refactor-plan-2026-03-25.md new file mode 100644 index 0000000..a381078 --- /dev/null +++ b/docs/gpu-streaming-refactor-plan-2026-03-25.md @@ -0,0 +1,48 @@ +# GPU Streaming Refactor Plan (2026-03-25) + +## Goal +Replace the current overlap+trim GPU extractor model with a true stateful per-signal streaming architecture, and build a corrected CPU oracle/reference path for validation. + +## Non-negotiables +- No production start-index-only patch. +- No production overlap-prepend + trim continuity model. +- Exact integer decimation only in the new streaming production path. +- Persistent per-signal state must include NCO phase, FIR history, and decimator phase/residue. +- GPU validation must compare against a corrected CPU oracle, not the legacy CPU fallback. + +## Work order +1. Introduce explicit stateful streaming types in `gpudemod`. +2. Add a clean CPU oracle implementation and monolithic-vs-chunked tests. +3. Add per-signal state ownership in batch runner. +4. Implement new streaming extractor semantics in Go using NEW IQ samples only. +5. Replace legacy GPU-path assumptions (rounding decimation, overlap-prepend, trim-defined validity) in the new path. +6. Add production telemetry that proves state continuity (`phase_count`, `history_len`, `n_out`, reference error). +7. Keep legacy path isolated only for temporary comparison if needed. + +## Initial files in scope +- `internal/demod/gpudemod/batch.go` +- `internal/demod/gpudemod/batch_runner.go` +- `internal/demod/gpudemod/batch_runner_windows.go` +- `internal/demod/gpudemod/kernels.cu` +- `internal/demod/gpudemod/native/exports.cu` +- `cmd/sdrd/helpers.go` + +## Immediate implementation strategy +### Phase 1 +- Create explicit streaming state structs in Go. +- Add CPU oracle/reference path with exact semantics and tests. +- Introduce exact integer-decimation checks. + +### Phase 2 +- Rework batch runner to own persistent per-signal state. +- Add config-hash-based resets. +- Stop modeling continuity via overlap tail in the new path. + +### Phase 3 +- Introduce a real streaming GPU entry path that consumes NEW shifted samples plus carried state. +- Move to a stateful polyphase decimator model. + +## Validation expectations +- CPU oracle monolithic == CPU oracle chunked within tolerance. +- GPU streaming output == CPU oracle chunked within tolerance. +- Former periodic block-boundary clicks gone in real-world testing. diff --git a/docs/known-issues.md b/docs/known-issues.md new file mode 100644 index 0000000..02860ac --- /dev/null +++ b/docs/known-issues.md @@ -0,0 +1,196 @@ +# Known Issues + +This file tracks durable open engineering issues that remain after the 2026-03-25 audio-click fix. + +Primary source: +- `docs/open-issues-report-2026-03-25.json` + +Status values used here: +- `open` +- `deferred` +- `info` + +--- + +## High Priority + +### OI-02 — `lastDiscrimIQ` missing from `dspStateSnapshot` +- Status: `open` +- Severity: High +- Category: state-continuity +- File: `internal/recorder/streamer.go` +- Summary: FM discriminator bridging state is not preserved across `captureDSPState()` / `restoreDSPState()`, so recording segment splits can lose the final IQ sample and create a micro-click at the segment boundary. +- Recommended fix: add `lastDiscrimIQ` and `lastDiscrimIQSet` to `dspStateSnapshot`. +- Source: `docs/open-issues-report-2026-03-25.json` (OI-02) + +### OI-03 — CPU oracle path not yet usable as validation baseline +- Status: `open` +- Severity: High +- Category: architecture +- File: `cmd/sdrd/streaming_refactor.go`, `internal/demod/gpudemod/cpu_oracle.go` +- Summary: the CPU oracle exists, but the production comparison/integration path is not trusted yet. That means GPU-path regressions still cannot be checked automatically with confidence. +- Recommended fix: repair oracle integration and restore GPU-vs-CPU validation flow. +- Source: `docs/open-issues-report-2026-03-25.json` (OI-03) + +### OI-18 — planned C2-C validation gate never completed +- Status: `open` +- Severity: Info +- Category: architecture +- File: `docs/audio-click-debug-notes-2026-03-24.md` +- Summary: the final native streaming path works in practice, but the planned formal GPU-vs-oracle validation gate was never completed. +- Recommended fix: complete this together with OI-03. +- Source: `docs/open-issues-report-2026-03-25.json` (OI-18) + +--- + +## Medium Priority + +### OI-14 — no regression test for `allIQ` immutability through spectrum/detection pipeline +- Status: `open` +- Severity: Low +- Category: test-coverage +- File: `cmd/sdrd/pipeline_runtime.go` +- Summary: the `IQBalance` aliasing bug showed that shared-buffer mutation can slip in undetected. There is still no test asserting that `allIQ` remains unchanged after capture/detection-side processing. +- Recommended fix: add an integration test that compares `allIQ` before and after the relevant pipeline stage. +- Source: `docs/open-issues-report-2026-03-25.json` (OI-14) + +### OI-15 — very low test coverage for `processSnippet` audio pipeline +- Status: `open` +- Severity: Low +- Category: test-coverage +- File: `internal/recorder/streamer.go` +- Summary: the main live audio pipeline still lacks focused tests for boundary continuity, WFM mono/stereo behavior, resampling, and demod-path regressions. +- Recommended fix: add synthetic fixtures and continuity-oriented tests around repeated `processSnippet` calls. +- Source: `docs/open-issues-report-2026-03-25.json` (OI-15) + +### OI-07 — taps are recalculated every frame +- Status: `open` +- Severity: Medium +- Category: correctness +- File: `internal/demod/gpudemod/stream_state.go` +- Summary: FIR/polyphase taps are recomputed every frame even when parameters do not change, which is unnecessary work and makes it easier for host/GPU tap state to drift apart. +- Recommended fix: only rebuild taps when tap-relevant inputs actually change. +- Source: `docs/open-issues-report-2026-03-25.json` (OI-07) + +### OI-17 — bandwidth changes can change Go-side taps without GPU tap re-upload +- Status: `open` +- Severity: Low-Medium +- Category: correctness +- File: `internal/demod/gpudemod/streaming_gpu_native_prepare.go`, `internal/demod/gpudemod/stream_state.go` +- Summary: after the config-hash fix, a bandwidth change may rebuild taps on the Go side while the GPU still keeps older uploaded taps unless a reset happens. +- Recommended fix: add a separate tap-change detection/re-upload path without forcing full extractor reset. +- Source: `docs/open-issues-report-2026-03-25.json` (OI-17) + +### OI-09 — streaming feature flags are compile-time constants +- Status: `open` +- Severity: Medium +- Category: architecture +- File: `cmd/sdrd/streaming_refactor.go`, `internal/demod/gpudemod/streaming_gpu_modes.go` +- Summary: switching between production/oracle/native-host modes still requires code changes and rebuilds, which makes field debugging and A/B validation harder than necessary. +- Recommended fix: expose these as config or environment-driven switches. +- Source: `docs/open-issues-report-2026-03-25.json` (OI-09) + +### OI-05 — feed channel is shallow and can drop frames under pressure +- Status: `open` +- Severity: Medium +- Category: reliability +- File: `internal/recorder/streamer.go` +- Summary: `feedCh` has a buffer of only 2. Under heavier processing or debug load, dropped feed messages can create audible gaps. +- Recommended fix: increase channel depth or redesign backpressure behavior. +- Source: `docs/open-issues-report-2026-03-25.json` (OI-05) + +### OI-06 — legacy overlap/trim extractor path is now mostly legacy baggage +- Status: `deferred` +- Severity: Medium +- Category: dead-code +- File: `cmd/sdrd/helpers.go` +- Summary: the old overlap/trim path is now mainly fallback/legacy code and adds complexity plus old instrumentation noise. +- Recommended fix: isolate, simplify, or remove it once the production path and fallback strategy are formally settled. +- Source: `docs/open-issues-report-2026-03-25.json` (OI-06) + +### OI-04 — telemetry history storage still uses append+copy trim +- Status: `deferred` +- Severity: Medium +- Category: telemetry +- File: `internal/telemetry/telemetry.go` +- Summary: heavy telemetry can still create avoidable allocation/copy pressure because history trimming is O(n) and happens under lock. +- Recommended fix: replace with a ring-buffer design. +- Source: `docs/open-issues-report-2026-03-25.json` (OI-04) + +--- + +## Lower Priority / Nice-to-Have + +### OI-01 — `DCBlocker.Apply(allIQ)` still mutates extraction input in-place +- Status: `deferred` +- Severity: High +- Category: data-integrity +- File: `cmd/sdrd/pipeline_runtime.go` +- Summary: unlike the old `IQBalance` bug this does not create a boundary artifact, but it does mean live extraction and recorded/replayed data are not semantically identical. +- Recommended fix: clarify the contract or move to immutable/copy-based handling. +- Source: `docs/open-issues-report-2026-03-25.json` (OI-01) + +### OI-08 — WFM audio LPF could reject pilot more strongly +- Status: `deferred` +- Severity: Medium +- Category: audio-quality +- File: `internal/recorder/streamer.go` +- Summary: the current 15 kHz LPF is good enough functionally, but a steeper filter could further improve pilot suppression. +- Recommended fix: more taps or a dedicated pilot notch. +- Source: `docs/open-issues-report-2026-03-25.json` (OI-08) + +### OI-10 — `demod.wav` debug dumps can clip and mislead analysis +- Status: `deferred` +- Severity: Medium +- Category: correctness +- File: `internal/recorder/streamer.go`, `internal/recorder/wavwriter.go` +- Summary: raw discriminator output can exceed the WAV writer's `[-1,+1]` clip range, so debug dumps can show artifacts that are not part of the real downstream audio path. +- Recommended fix: scale by `1/pi` before dumping or use float WAV output. +- Source: `docs/open-issues-report-2026-03-25.json` (OI-10) + +### OI-11 — browser AudioContext resync still causes audible micro-gaps +- Status: `deferred` +- Severity: Low +- Category: reliability +- File: `web/app.js` +- Summary: underrun recovery is softened with a fade-in, but repeated resyncs still create audible stutter on the browser side. +- Recommended fix: prefer the AudioWorklet/ring-player path wherever possible. +- Source: `docs/open-issues-report-2026-03-25.json` (OI-11) + +### OI-12 — tiny per-frame tail copy for boundary telemetry +- Status: `info` +- Severity: Low +- Category: performance +- File: `cmd/sdrd/pipeline_runtime.go` +- Summary: the last-32-sample copy is trivial and not urgent, but it is one more small allocation in a path that already has several. +- Recommended fix: none needed unless a broader allocation cleanup happens. +- Source: `docs/open-issues-report-2026-03-25.json` (OI-12) + +### OI-13 — temporary patch artifacts should not live in the repo long-term +- Status: `deferred` +- Severity: Low +- Category: dead-code +- File: `patches/*` +- Summary: reviewer/debug patch artifacts were useful during the investigation, but they should either be removed or archived under docs rather than kept as loose patch files. +- Recommended fix: delete or archive them once no longer needed. +- Source: `docs/open-issues-report-2026-03-25.json` (OI-13) + +### OI-16 — `config.autosave.yaml` can re-enable unwanted debug telemetry after restart +- Status: `deferred` +- Severity: Low +- Category: config +- File: `config.autosave.yaml` +- Summary: autosave can silently restore debug-heavy telemetry settings after restart and distort future runs. +- Recommended fix: stop persisting debug telemetry knobs to autosave or explicitly ignore them. +- Source: `docs/open-issues-report-2026-03-25.json` (OI-16) + +--- + +## Suggested next execution order + +1. Fix OI-02 (`lastDiscrimIQ` snapshot/restore) +2. Repair OI-03 and close OI-18 (oracle + formal validation path) +3. Add OI-14 and OI-15 regression tests +4. Consolidate OI-07 and OI-17 (tap rebuild / tap upload logic) +5. Expose OI-09 feature flags via config or env +6. Revisit OI-05 / OI-06 / OI-04 when doing reliability/cleanup work diff --git a/docs/telemetry-api.md b/docs/telemetry-api.md new file mode 100644 index 0000000..9ac8672 --- /dev/null +++ b/docs/telemetry-api.md @@ -0,0 +1,711 @@ +# Telemetry API Reference + +This document describes the server-side telemetry collector, its runtime configuration, and the HTTP API exposed by `sdrd`. + +The telemetry system is intended for debugging and performance analysis of the SDR pipeline, especially around source cadence, extraction, DSP timing, boundary artifacts, queue pressure, and other runtime anomalies. + +## Goals + +The telemetry layer gives you three different views of runtime state: + +1. **Live snapshot** + - Current counters, gauges, distributions, recent events, and collector status. +2. **Historical metrics** + - Timestamped metric samples that can be filtered by name, prefix, or tags. +3. **Historical events** + - Structured anomalies / warnings / debug events with optional fields. + +It is designed to be lightweight in normal operation and more detailed when `heavy_enabled` is turned on. + +--- + +## Base URLs + +All telemetry endpoints live under: + +- `/api/debug/telemetry/live` +- `/api/debug/telemetry/history` +- `/api/debug/telemetry/events` +- `/api/debug/telemetry/config` + +Responses are JSON. + +--- + +## Data model + +### Metric types + +Telemetry metrics are stored in three logical groups: + +- **counter** + - Accumulating values, usually incremented over time. +- **gauge** + - Latest current value. +- **distribution** + - Observed numeric samples with summary stats. + +A historical metric sample is returned as: + +```json +{ + "ts": "2026-03-25T12:00:00Z", + "name": "stage.extract_stream.duration_ms", + "type": "distribution", + "value": 4.83, + "tags": { + "stage": "extract_stream", + "signal_id": "1" + } +} +``` + +### Events + +Telemetry events are structured anomaly/debug records: + +```json +{ + "id": 123, + "ts": "2026-03-25T12:00:02Z", + "name": "demod_boundary", + "level": "warn", + "message": "boundary discontinuity detected", + "tags": { + "signal_id": "1", + "stage": "demod" + }, + "fields": { + "d2": 0.3358, + "index": 25 + } +} +``` + +### Tags + +Tags are string key/value metadata used for filtering and correlation. + +Common tag keys already supported by the HTTP layer: + +- `signal_id` +- `session_id` +- `stage` +- `trace_id` +- `component` + +You can also filter on arbitrary tags via `tag_=` query parameters. + +--- + +## Endpoint: `GET /api/debug/telemetry/live` + +Returns a live snapshot of the in-memory collector state. + +### Response shape + +```json +{ + "now": "2026-03-25T12:00:05Z", + "started_at": "2026-03-25T11:52:10Z", + "uptime_ms": 472500, + "config": { + "enabled": true, + "heavy_enabled": false, + "heavy_sample_every": 12, + "metric_sample_every": 2, + "metric_history_max": 12000, + "event_history_max": 4000, + "retention": 900000000000, + "persist_enabled": false, + "persist_dir": "debug/telemetry", + "rotate_mb": 16, + "keep_files": 8 + }, + "counters": [ + { + "name": "source.resets", + "value": 1, + "tags": { + "component": "source" + } + } + ], + "gauges": [ + { + "name": "source.buffer_samples", + "value": 304128, + "tags": { + "component": "source" + } + } + ], + "distributions": [ + { + "name": "dsp.frame.duration_ms", + "count": 96, + "min": 82.5, + "max": 212.4, + "mean": 104.8, + "last": 98.3, + "p95": 149.2, + "tags": { + "stage": "dsp" + } + } + ], + "recent_events": [], + "status": { + "source_state": "running" + } +} +``` + +### Notes + +- `counters`, `gauges`, and `distributions` are sorted by metric name. +- `recent_events` contains the most recent in-memory event slice. +- `status` is optional and contains arbitrary runtime status published by code using `SetStatus(...)`. +- If telemetry is unavailable, the server returns a small JSON object instead of a full snapshot. + +### Typical uses + +- Check whether telemetry is enabled. +- Look for timing hotspots in `*.duration_ms` distributions. +- Inspect current queue or source gauges. +- See recent anomaly events without querying history. + +--- + +## Endpoint: `GET /api/debug/telemetry/history` + +Returns historical metric samples from in-memory history and, optionally, persisted JSONL files. + +### Response shape + +```json +{ + "items": [ + { + "ts": "2026-03-25T12:00:01Z", + "name": "stage.extract_stream.duration_ms", + "type": "distribution", + "value": 5.2, + "tags": { + "stage": "extract_stream", + "signal_id": "2" + } + } + ], + "count": 1 +} +``` + +### Supported query parameters + +#### Time filters + +- `since` +- `until` + +Accepted formats: + +- Unix seconds +- Unix milliseconds +- RFC3339 +- RFC3339Nano + +Examples: + +- `?since=1711368000` +- `?since=1711368000123` +- `?since=2026-03-25T12:00:00Z` + +#### Result shaping + +- `limit` + - Default normalization is 500. + - Values above 5000 are clamped down by the collector query layer. + +#### Name filters + +- `name=` +- `prefix=` + +Examples: + +- `?name=source.read.duration_ms` +- `?prefix=stage.` +- `?prefix=iq.extract.` + +#### Tag filters + +Special convenience query params map directly to tag filters: + +- `signal_id` +- `session_id` +- `stage` +- `trace_id` +- `component` + +Arbitrary tag filters: + +- `tag_=` + +Examples: + +- `?signal_id=1` +- `?stage=extract_stream` +- `?tag_path=gpu` +- `?tag_zone=broadcast` + +#### Persistence control + +- `include_persisted=true|false` + - Default: `true` + +When enabled and persistence is active, the server reads matching data from rotated JSONL telemetry files in addition to in-memory history. + +### Notes + +- Results are sorted by timestamp ascending. +- If `limit` is hit, the most recent matching items are retained. +- Exact retention depends on both in-memory retention and persisted file availability. +- A small set of boundary-related IQ metrics is force-stored regardless of the normal metric sample cadence. + +### Typical queries + +Get all stage timing since a specific start: + +```text +/api/debug/telemetry/history?since=2026-03-25T12:00:00Z&prefix=stage. +``` + +Get extraction metrics for a single signal: + +```text +/api/debug/telemetry/history?since=2026-03-25T12:00:00Z&prefix=extract.&signal_id=2 +``` + +Get source cadence metrics only from in-memory history: + +```text +/api/debug/telemetry/history?prefix=source.&include_persisted=false +``` + +--- + +## Endpoint: `GET /api/debug/telemetry/events` + +Returns historical telemetry events from memory and, optionally, persisted storage. + +### Response shape + +```json +{ + "items": [ + { + "id": 991, + "ts": "2026-03-25T12:00:03Z", + "name": "source_reset", + "level": "warn", + "message": "source reader reset observed", + "tags": { + "component": "source" + }, + "fields": { + "reason": "short_read" + } + } + ], + "count": 1 +} +``` + +### Supported query parameters + +All `history` filters are also supported here, plus: + +- `level=` + +Examples: + +- `?since=2026-03-25T12:00:00Z&level=warn` +- `?prefix=audio.&signal_id=1` +- `?name=demod_boundary&signal_id=1` + +### Notes + +- Event matching supports `name`, `prefix`, `level`, time range, and tags. +- Event `level` matching is case-insensitive. +- Results are timestamp-sorted ascending. + +### Typical queries + +Get warnings during a reproduction run: + +```text +/api/debug/telemetry/events?since=2026-03-25T12:00:00Z&level=warn +``` + +Get boundary-related events for one signal: + +```text +/api/debug/telemetry/events?since=2026-03-25T12:00:00Z&signal_id=1&prefix=demod_ +``` + +--- + +## Endpoint: `GET /api/debug/telemetry/config` + +Returns both: + +1. the active collector configuration, and +2. the current runtime config under `debug.telemetry` + +### Response shape + +```json +{ + "collector": { + "enabled": true, + "heavy_enabled": false, + "heavy_sample_every": 12, + "metric_sample_every": 2, + "metric_history_max": 12000, + "event_history_max": 4000, + "retention": 900000000000, + "persist_enabled": false, + "persist_dir": "debug/telemetry", + "rotate_mb": 16, + "keep_files": 8 + }, + "config": { + "enabled": true, + "heavy_enabled": false, + "heavy_sample_every": 12, + "metric_sample_every": 2, + "metric_history_max": 12000, + "event_history_max": 4000, + "retention_seconds": 900, + "persist_enabled": false, + "persist_dir": "debug/telemetry", + "rotate_mb": 16, + "keep_files": 8 + } +} +``` + +### Important distinction + +- `collector.retention` is a Go duration serialized in nanoseconds. +- `config.retention_seconds` is the config-facing field used by YAML and the POST update API. + +If you are writing tooling, prefer `config.retention_seconds` for human-facing config edits. + +--- + +## Endpoint: `POST /api/debug/telemetry/config` + +Updates telemetry settings at runtime and writes them back via the autosave config path. + +### Request body + +All fields are optional. Only provided fields are changed. + +```json +{ + "enabled": true, + "heavy_enabled": true, + "heavy_sample_every": 8, + "metric_sample_every": 1, + "metric_history_max": 20000, + "event_history_max": 6000, + "retention_seconds": 1800, + "persist_enabled": true, + "persist_dir": "debug/telemetry", + "rotate_mb": 32, + "keep_files": 12 +} +``` + +### Response shape + +```json +{ + "ok": true, + "collector": { + "enabled": true, + "heavy_enabled": true, + "heavy_sample_every": 8, + "metric_sample_every": 1, + "metric_history_max": 20000, + "event_history_max": 6000, + "retention": 1800000000000, + "persist_enabled": true, + "persist_dir": "debug/telemetry", + "rotate_mb": 32, + "keep_files": 12 + }, + "config": { + "enabled": true, + "heavy_enabled": true, + "heavy_sample_every": 8, + "metric_sample_every": 1, + "metric_history_max": 20000, + "event_history_max": 6000, + "retention_seconds": 1800, + "persist_enabled": true, + "persist_dir": "debug/telemetry", + "rotate_mb": 32, + "keep_files": 12 + } +} +``` + +### Persistence behavior + +A POST updates: + +- the runtime manager snapshot/config +- the in-process collector config +- the autosave config file via `config.Save(...)` + +That means these updates are runtime-effective immediately and also survive restarts through autosave, unless manually reverted. + +### Error cases + +- Invalid JSON -> `400 Bad Request` +- Invalid collector reconfiguration -> `400 Bad Request` +- Telemetry unavailable -> `503 Service Unavailable` + +--- + +## Configuration fields (`debug.telemetry`) + +Telemetry config lives under: + +```yaml +debug: + telemetry: + enabled: true + heavy_enabled: false + heavy_sample_every: 12 + metric_sample_every: 2 + metric_history_max: 12000 + event_history_max: 4000 + retention_seconds: 900 + persist_enabled: false + persist_dir: debug/telemetry + rotate_mb: 16 + keep_files: 8 +``` + +### Field reference + +#### `enabled` +Master on/off switch for telemetry collection. + +If false: +- metrics are not recorded +- events are not recorded +- live snapshot remains effectively empty/minimal + +#### `heavy_enabled` +Enables more expensive / more detailed telemetry paths that should not be left on permanently unless needed. + +Use this for deep extractor/IQ/boundary debugging. + +#### `heavy_sample_every` +Sampling cadence for heavy telemetry. + +- `1` means every eligible heavy sample +- higher numbers reduce cost by sampling less often + +#### `metric_sample_every` +Sampling cadence for normal historical metric point storage. + +Collector summaries still update live, but historical storage becomes less dense when this value is greater than 1. + +#### `metric_history_max` +Maximum number of in-memory historical metric samples retained. + +#### `event_history_max` +Maximum number of in-memory telemetry events retained. + +#### `retention_seconds` +Time-based in-memory retention window. + +Older in-memory metrics/events are trimmed once they fall outside this retention period. + +#### `persist_enabled` +When enabled, telemetry metrics/events are also appended to rotated JSONL files. + +#### `persist_dir` +Directory where rotated telemetry JSONL files are written. + +Default: + +- `debug/telemetry` + +#### `rotate_mb` +Approximate JSONL file rotation threshold in megabytes. + +#### `keep_files` +How many rotated telemetry files to retain in `persist_dir`. + +Older files beyond this count are pruned. + +--- + +## Collector behavior and caveats + +### In-memory vs persisted data + +The query endpoints can read from both: + +- current in-memory collector state/history +- persisted JSONL files + +This means a request may return data older than current in-memory retention if: + +- `persist_enabled=true`, and +- `include_persisted=true` + +### Sampling behavior + +Not every observation necessarily becomes a historical metric point. + +The collector: + +- always updates live counters/gauges/distributions while enabled +- stores historical points according to `metric_sample_every` +- force-stores selected boundary IQ metrics even when sampling would normally skip them + +So the live snapshot and historical series density are intentionally different. + +### Distribution summaries + +Distribution values in the live snapshot include: + +- `count` +- `min` +- `max` +- `mean` +- `last` +- `p95` + +The p95 estimate is based on the collector's bounded rolling sample buffer, not an unbounded full-history quantile computation. + +### Config serialization detail + +The collector's `retention` field is a Go duration. In JSON this appears as an integer nanosecond count. + +This is expected. + +--- + +## Recommended workflows + +### Fast low-overhead runtime watch + +Use: + +- `enabled=true` +- `heavy_enabled=false` +- `persist_enabled=false` or `true` if you want an archive + +Then query: + +- `/api/debug/telemetry/live` +- `/api/debug/telemetry/history?prefix=stage.` +- `/api/debug/telemetry/events?level=warn` + +### 5-10 minute anomaly capture + +Suggested settings: + +- `enabled=true` +- `heavy_enabled=false` +- `persist_enabled=true` +- moderate `metric_sample_every` + +Then: + +1. note start time +2. reproduce workload +3. fetch live snapshot +4. inspect warning events +5. inspect `stage.*`, `streamer.*`, and `source.*` history + +### Deep extractor / boundary investigation + +Temporarily enable: + +- `heavy_enabled=true` +- `heavy_sample_every` > 1 unless you really need every sample +- `persist_enabled=true` + +Then inspect: + +- `iq.*` +- `extract.*` +- `audio.*` +- boundary/anomaly events for specific `signal_id` or `session_id` + +Turn heavy telemetry back off once done. + +--- + +## Example requests + +### Fetch live snapshot + +```bash +curl http://localhost:8080/api/debug/telemetry/live +``` + +### Fetch stage timings from the last 10 minutes + +```bash +curl "http://localhost:8080/api/debug/telemetry/history?since=2026-03-25T12:00:00Z&prefix=stage." +``` + +### Fetch source metrics for one signal + +```bash +curl "http://localhost:8080/api/debug/telemetry/history?prefix=source.&signal_id=1" +``` + +### Fetch warning events only + +```bash +curl "http://localhost:8080/api/debug/telemetry/events?since=2026-03-25T12:00:00Z&level=warn" +``` + +### Fetch events with a custom tag filter + +```bash +curl "http://localhost:8080/api/debug/telemetry/events?tag_zone=broadcast" +``` + +### Enable persistence and heavy telemetry temporarily + +```bash +curl -X POST http://localhost:8080/api/debug/telemetry/config \ + -H "Content-Type: application/json" \ + -d '{ + "heavy_enabled": true, + "heavy_sample_every": 8, + "persist_enabled": true + }' +``` + +--- + +## Related docs + +- `README.md` - high-level project overview and endpoint summary +- `docs/telemetry-debug-runbook.md` - quick operational runbook for short debug sessions +- `internal/telemetry/telemetry.go` - collector implementation details +- `cmd/sdrd/http_handlers.go` - HTTP wiring for telemetry endpoints diff --git a/docs/telemetry-debug-runbook.md b/docs/telemetry-debug-runbook.md new file mode 100644 index 0000000..4b14c87 --- /dev/null +++ b/docs/telemetry-debug-runbook.md @@ -0,0 +1,100 @@ +# Debug Telemetry Runbook + +This project now includes structured server-side telemetry for the audio/DSP pipeline. + +## Endpoints + +- `GET /api/debug/telemetry/live` + - Current counters/gauges/distributions and recent events. +- `GET /api/debug/telemetry/history` + - Historical metric samples. + - Query params: + - `since`, `until`: unix seconds/ms or RFC3339 + - `limit` + - `name`, `prefix` + - `signal_id`, `session_id`, `stage`, `trace_id`, `component` + - `tag_=` for arbitrary tag filters + - `include_persisted=true|false` +- `GET /api/debug/telemetry/events` + - Historical events/anomalies. + - Same filters as history plus `level`. +- `GET /api/debug/telemetry/config` + - Active telemetry config from runtime + collector. +- `POST /api/debug/telemetry/config` + - Runtime config update (also saved to autosave config). + +## Config knobs + +`debug.telemetry` in config: + +- `enabled` +- `heavy_enabled` +- `heavy_sample_every` +- `metric_sample_every` +- `metric_history_max` +- `event_history_max` +- `retention_seconds` +- `persist_enabled` +- `persist_dir` +- `rotate_mb` +- `keep_files` + +Persisted JSONL files rotate in `persist_dir` (default: `debug/telemetry`). + +## 5-10 minute debug flow + +1. Keep `enabled=true`, `heavy_enabled=false`, `persist_enabled=true`. +2. Run workload for 5-10 minutes. +3. Pull live state: + - `GET /api/debug/telemetry/live` +4. Pull anomalies: + - `GET /api/debug/telemetry/events?since=&level=warn` +5. Pull pipeline timing and queue/backpressure: + - `GET /api/debug/telemetry/history?since=&prefix=stage.` + - `GET /api/debug/telemetry/history?since=&prefix=streamer.` +6. If IQ boundary issues persist, temporarily set `heavy_enabled=true` (keep sampling coarse with `heavy_sample_every` > 1), rerun, then inspect `iq.*` metrics and `audio.*` anomalies by `signal_id`/`session_id`. + +## 2026-03-25 audio click incident — final resolved summary + +Status: **SOLVED** + +The March 2026 live-audio click investigation ultimately converged on a combination of three real root causes plus two secondary fixes: + +### Root causes + +1. **Shared `allIQ` corruption by `IQBalance` aliasing** + - `cmd/sdrd/pipeline_runtime.go` + - `survIQ` aliased the tail of `allIQ` + - `dsp.IQBalance(survIQ)` modified `allIQ` in-place + - extractor then saw a corrupted boundary inside the shared buffer + - final fix: copy `survIQ` before `IQBalance` + +2. **Per-frame extractor reset due to `StreamingConfigHash` jitter** + - `internal/demod/gpudemod/streaming_types.go` + - smoothed tuning values changed slightly every frame + - offset/bandwidth in the hash caused repeated state resets + - final fix: hash only structural parameters + +3. **Streaming path batch rejection for non-WFM exact-decimation mismatch** + - `cmd/sdrd/streaming_refactor.go` + - one non-WFM signal could reject the whole batch and silently force fallback to the legacy path + - final fix: choose nearest exact integer-divisor output rate and keep fallback logging visible + +### Secondary fixes + +- FM discriminator cross-block carry in `internal/recorder/streamer.go` +- WFM mono/plain-path 15 kHz audio lowpass in `internal/recorder/streamer.go` + +### Verification notes + +- major discontinuities dropped sharply after the config-hash fix +- remaining fine clicks were eliminated only after the `IQBalance` aliasing fix in `pipeline_runtime.go` +- final confirmation was by operator listening test, backed by prior telemetry and WAV analysis + +### Practical lesson + +When the same captured `allIQ` buffer feeds both: +- surveillance/detail analysis +- and extraction/streaming + +then surveillance-side DSP helpers must not mutate a shared sub-slice in-place unless that mutation is intentionally part of the extraction contract. diff --git a/internal/config/config.go b/internal/config/config.go index 1cd1fb7..66f0c9a 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -96,6 +96,26 @@ type DecoderConfig struct { PSKCmd string `yaml:"psk_cmd" json:"psk_cmd"` } +type DebugConfig struct { + AudioDumpEnabled bool `yaml:"audio_dump_enabled" json:"audio_dump_enabled"` + CPUMonitoring bool `yaml:"cpu_monitoring" json:"cpu_monitoring"` + Telemetry TelemetryConfig `yaml:"telemetry" json:"telemetry"` +} + +type TelemetryConfig struct { + Enabled bool `yaml:"enabled" json:"enabled"` + HeavyEnabled bool `yaml:"heavy_enabled" json:"heavy_enabled"` + HeavySampleEvery int `yaml:"heavy_sample_every" json:"heavy_sample_every"` + MetricSampleEvery int `yaml:"metric_sample_every" json:"metric_sample_every"` + MetricHistoryMax int `yaml:"metric_history_max" json:"metric_history_max"` + EventHistoryMax int `yaml:"event_history_max" json:"event_history_max"` + RetentionSeconds int `yaml:"retention_seconds" json:"retention_seconds"` + PersistEnabled bool `yaml:"persist_enabled" json:"persist_enabled"` + PersistDir string `yaml:"persist_dir" json:"persist_dir"` + RotateMB int `yaml:"rotate_mb" json:"rotate_mb"` + KeepFiles int `yaml:"keep_files" json:"keep_files"` +} + type PipelineGoalConfig struct { Intent string `yaml:"intent" json:"intent"` MonitorStartHz float64 `yaml:"monitor_start_hz" json:"monitor_start_hz"` @@ -169,6 +189,7 @@ type Config struct { Detector DetectorConfig `yaml:"detector" json:"detector"` Recorder RecorderConfig `yaml:"recorder" json:"recorder"` Decoder DecoderConfig `yaml:"decoder" json:"decoder"` + Debug DebugConfig `yaml:"debug" json:"debug"` Logging LogConfig `yaml:"logging" json:"logging"` WebAddr string `yaml:"web_addr" json:"web_addr"` EventPath string `yaml:"event_path" json:"event_path"` @@ -421,6 +442,23 @@ func Default() Config { ExtractionBwMult: 1.2, }, Decoder: DecoderConfig{}, + Debug: DebugConfig{ + AudioDumpEnabled: false, + CPUMonitoring: false, + Telemetry: TelemetryConfig{ + Enabled: true, + HeavyEnabled: false, + HeavySampleEvery: 12, + MetricSampleEvery: 2, + MetricHistoryMax: 12000, + EventHistoryMax: 4000, + RetentionSeconds: 900, + PersistEnabled: false, + PersistDir: "debug/telemetry", + RotateMB: 16, + KeepFiles: 8, + }, + }, Logging: LogConfig{ Level: "informal", Categories: []string{}, @@ -664,6 +702,30 @@ func applyDefaults(cfg Config) Config { if cfg.Recorder.ExtractionBwMult <= 0 { cfg.Recorder.ExtractionBwMult = 1.2 } + if cfg.Debug.Telemetry.HeavySampleEvery <= 0 { + cfg.Debug.Telemetry.HeavySampleEvery = 12 + } + if cfg.Debug.Telemetry.MetricSampleEvery <= 0 { + cfg.Debug.Telemetry.MetricSampleEvery = 2 + } + if cfg.Debug.Telemetry.MetricHistoryMax <= 0 { + cfg.Debug.Telemetry.MetricHistoryMax = 12000 + } + if cfg.Debug.Telemetry.EventHistoryMax <= 0 { + cfg.Debug.Telemetry.EventHistoryMax = 4000 + } + if cfg.Debug.Telemetry.RetentionSeconds <= 0 { + cfg.Debug.Telemetry.RetentionSeconds = 900 + } + if cfg.Debug.Telemetry.PersistDir == "" { + cfg.Debug.Telemetry.PersistDir = "debug/telemetry" + } + if cfg.Debug.Telemetry.RotateMB <= 0 { + cfg.Debug.Telemetry.RotateMB = 16 + } + if cfg.Debug.Telemetry.KeepFiles <= 0 { + cfg.Debug.Telemetry.KeepFiles = 8 + } return cfg } diff --git a/internal/demod/fm.go b/internal/demod/fm.go index 6c1d153..7fe05dc 100644 --- a/internal/demod/fm.go +++ b/internal/demod/fm.go @@ -4,6 +4,7 @@ import ( "math" "sdr-wideband-suite/internal/dsp" + "sdr-wideband-suite/internal/logging" ) type NFM struct{} @@ -45,12 +46,45 @@ func fmDiscrim(iq []complex64) []float32 { return nil } out := make([]float32, len(iq)-1) + maxAbs := 0.0 + maxIdx := 0 + largeSteps := 0 + minMag := math.MaxFloat64 + maxMag := 0.0 for i := 1; i < len(iq); i++ { p := iq[i-1] c := iq[i] + pmag := math.Hypot(float64(real(p)), float64(imag(p))) + cmag := math.Hypot(float64(real(c)), float64(imag(c))) + if pmag < minMag { + minMag = pmag + } + if cmag < minMag { + minMag = cmag + } + if pmag > maxMag { + maxMag = pmag + } + if cmag > maxMag { + maxMag = cmag + } num := float64(real(p))*float64(imag(c)) - float64(imag(p))*float64(real(c)) den := float64(real(p))*float64(real(c)) + float64(imag(p))*float64(imag(c)) - out[i-1] = float32(math.Atan2(num, den)) + step := math.Atan2(num, den) + if a := math.Abs(step); a > maxAbs { + maxAbs = a + maxIdx = i - 1 + } + if math.Abs(step) > 1.5 { + largeSteps++ + } + out[i-1] = float32(step) + } + if logging.EnabledCategory("discrim") { + logging.Debug("discrim", "fm_meter", "iq_len", len(iq), "audio_len", len(out), "min_mag", minMag, "max_mag", maxMag, "max_abs_step", maxAbs, "max_idx", maxIdx, "large_steps", largeSteps) + if largeSteps > 0 { + logging.Warn("discrim", "fm_large_steps", "iq_len", len(iq), "large_steps", largeSteps, "max_abs_step", maxAbs, "max_idx", maxIdx, "min_mag", minMag, "max_mag", maxMag) + } } return out } diff --git a/internal/demod/gpudemod/README.md b/internal/demod/gpudemod/README.md deleted file mode 100644 index adcd2a6..0000000 --- a/internal/demod/gpudemod/README.md +++ /dev/null @@ -1,34 +0,0 @@ -# gpudemod - -Phase 1 CUDA demod scaffolding. - -## Current state - -- Standard Go builds use `gpudemod_stub.go` (`!cufft`). -- `cufft` builds allocate GPU buffers and cross the CGO/CUDA launch boundary. -- If CUDA launch wrappers are not backed by compiled kernels yet, the code falls back to CPU DSP. -- The shifted IQ path is already wired so a successful GPU freq-shift result can be copied back and reused immediately. -- Build orchestration should now be considered OS-specific; see `docs/build-cuda.md`. - -## First real kernel - -`kernels.cu` contains the first candidate implementation: -- `gpud_freq_shift_kernel` - -This is **not compiled automatically yet** in the current environment because the machine currently lacks a CUDA compiler toolchain in PATH (`nvcc` not found). - -## Next machine-side step - -On a CUDA-capable dev machine with toolchain installed: - -1. Compile `kernels.cu` into an object file and archive it into a linkable library - - helper script: `tools/build-gpudemod-kernel.ps1` -2. On Jan's Windows machine, the working kernel-build path currently relies on `nvcc` + MSVC `cl.exe` in PATH -3. Link `gpudemod_kernels.lib` into the `cufft` build -3. Replace `gpud_launch_freq_shift(...)` stub body with the real kernel launch -4. Validate copied-back shifted IQ against `dsp.FreqShift` -5. Only then move the next stage (FM discriminator) onto the GPU - -## Why this is still useful - -The runtime/buffer/recorder/fallback structure is already in place, so once kernel compilation is available, real acceleration can be inserted without another architecture rewrite. diff --git a/internal/demod/gpudemod/batch.go b/internal/demod/gpudemod/batch.go index 6bbf9df..df6af46 100644 --- a/internal/demod/gpudemod/batch.go +++ b/internal/demod/gpudemod/batch.go @@ -6,7 +6,7 @@ type ExtractJob struct { OffsetHz float64 BW float64 OutRate int - PhaseStart float64 // FreqShift starting phase (0 for stateless, carry over for streaming) + PhaseStart float64 // legacy batch phase field; retained only while migrating to streaming extractor semantics } // ExtractResult holds the output of a batch extraction including the ending diff --git a/internal/demod/gpudemod/batch_runner.go b/internal/demod/gpudemod/batch_runner.go index 7441263..3933c1b 100644 --- a/internal/demod/gpudemod/batch_runner.go +++ b/internal/demod/gpudemod/batch_runner.go @@ -10,10 +10,12 @@ type batchSlot struct { } type BatchRunner struct { - eng *Engine - slots []batchSlot - slotBufs []slotBuffers + eng *Engine + slots []batchSlot + slotBufs []slotBuffers slotBufSize int // number of IQ samples the slot buffers were allocated for + streamState map[int64]*ExtractStreamState + nativeState map[int64]*nativeStreamingSignalState } func NewBatchRunner(maxSamples int, sampleRate int) (*BatchRunner, error) { @@ -21,7 +23,11 @@ func NewBatchRunner(maxSamples int, sampleRate int) (*BatchRunner, error) { if err != nil { return nil, err } - return &BatchRunner{eng: eng}, nil + return &BatchRunner{ + eng: eng, + streamState: make(map[int64]*ExtractStreamState), + nativeState: make(map[int64]*nativeStreamingSignalState), + }, nil } func (r *BatchRunner) Close() { @@ -29,9 +35,12 @@ func (r *BatchRunner) Close() { return } r.freeSlotBuffers() + r.freeAllNativeStreamingStates() r.eng.Close() r.eng = nil r.slots = nil + r.streamState = nil + r.nativeState = nil } func (r *BatchRunner) prepare(jobs []ExtractJob) { diff --git a/internal/demod/gpudemod/batch_runner_windows.go b/internal/demod/gpudemod/batch_runner_windows.go index c81467c..58836fd 100644 --- a/internal/demod/gpudemod/batch_runner_windows.go +++ b/internal/demod/gpudemod/batch_runner_windows.go @@ -160,9 +160,9 @@ func (r *BatchRunner) shiftFilterDecimateSlotParallel(iq []complex64, job Extrac if bridgeMemcpyH2D(buf.dTaps, unsafe.Pointer(&taps[0]), tapsBytes) != 0 { return 0, 0, errors.New("taps H2D failed") } - decim := int(math.Round(float64(e.sampleRate) / float64(job.OutRate))) - if decim < 1 { - decim = 1 + decim, err := ExactIntegerDecimation(e.sampleRate, job.OutRate) + if err != nil { + return 0, 0, err } nOut := n / decim if nOut <= 0 { diff --git a/internal/demod/gpudemod/build/gpudemod_kernels.lib b/internal/demod/gpudemod/build/gpudemod_kernels.lib deleted file mode 100644 index dccfca0..0000000 Binary files a/internal/demod/gpudemod/build/gpudemod_kernels.lib and /dev/null differ diff --git a/internal/demod/gpudemod/compare.go b/internal/demod/gpudemod/compare.go new file mode 100644 index 0000000..24ba29b --- /dev/null +++ b/internal/demod/gpudemod/compare.go @@ -0,0 +1,47 @@ +package gpudemod + +import "math/cmplx" + +type CompareStats struct { + MaxAbsErr float64 + RMSErr float64 + Count int +} + +func CompareComplexSlices(a []complex64, b []complex64) CompareStats { + n := len(a) + if len(b) < n { + n = len(b) + } + if n == 0 { + return CompareStats{} + } + var sumSq float64 + var maxAbs float64 + for i := 0; i < n; i++ { + err := cmplx.Abs(complex128(a[i] - b[i])) + if err > maxAbs { + maxAbs = err + } + sumSq += err * err + } + return CompareStats{ + MaxAbsErr: maxAbs, + RMSErr: mathSqrt(sumSq / float64(n)), + Count: n, + } +} + +func mathSqrt(v float64) float64 { + // tiny shim to keep the compare helper self-contained and easy to move + // without importing additional logic elsewhere + z := v + if z <= 0 { + return 0 + } + x := z + for i := 0; i < 12; i++ { + x = 0.5 * (x + z/x) + } + return x +} diff --git a/internal/demod/gpudemod/compare_gpu.go b/internal/demod/gpudemod/compare_gpu.go new file mode 100644 index 0000000..9232c3c --- /dev/null +++ b/internal/demod/gpudemod/compare_gpu.go @@ -0,0 +1,19 @@ +package gpudemod + +func BuildGPUStubDebugMetrics(res StreamingExtractResult) ExtractDebugMetrics { + return ExtractDebugMetrics{ + SignalID: res.SignalID, + PhaseCount: res.PhaseCount, + HistoryLen: res.HistoryLen, + NOut: res.NOut, + } +} + +func BuildGPUHostOracleDebugMetrics(res StreamingExtractResult) ExtractDebugMetrics { + return ExtractDebugMetrics{ + SignalID: res.SignalID, + PhaseCount: res.PhaseCount, + HistoryLen: res.HistoryLen, + NOut: res.NOut, + } +} diff --git a/internal/demod/gpudemod/compare_oracle.go b/internal/demod/gpudemod/compare_oracle.go new file mode 100644 index 0000000..ccf48e5 --- /dev/null +++ b/internal/demod/gpudemod/compare_oracle.go @@ -0,0 +1,10 @@ +package gpudemod + +func BuildOracleDebugMetrics(res StreamingExtractResult) ExtractDebugMetrics { + return ExtractDebugMetrics{ + SignalID: res.SignalID, + PhaseCount: res.PhaseCount, + HistoryLen: res.HistoryLen, + NOut: res.NOut, + } +} diff --git a/internal/demod/gpudemod/compare_pipeline.go b/internal/demod/gpudemod/compare_pipeline.go new file mode 100644 index 0000000..5578fd9 --- /dev/null +++ b/internal/demod/gpudemod/compare_pipeline.go @@ -0,0 +1,27 @@ +package gpudemod + +func CompareOracleAndGPUStub(oracle StreamingExtractResult, gpu StreamingExtractResult) (ExtractDebugMetrics, CompareStats) { + stats := CompareComplexSlices(oracle.IQ, gpu.IQ) + metrics := ExtractDebugMetrics{ + SignalID: oracle.SignalID, + PhaseCount: gpu.PhaseCount, + HistoryLen: gpu.HistoryLen, + NOut: gpu.NOut, + RefMaxAbsErr: stats.MaxAbsErr, + RefRMSErr: stats.RMSErr, + } + return metrics, stats +} + +func CompareOracleAndGPUHostOracle(oracle StreamingExtractResult, gpu StreamingExtractResult) (ExtractDebugMetrics, CompareStats) { + stats := CompareComplexSlices(oracle.IQ, gpu.IQ) + metrics := ExtractDebugMetrics{ + SignalID: oracle.SignalID, + PhaseCount: gpu.PhaseCount, + HistoryLen: gpu.HistoryLen, + NOut: gpu.NOut, + RefMaxAbsErr: stats.MaxAbsErr, + RefRMSErr: stats.RMSErr, + } + return metrics, stats +} diff --git a/internal/demod/gpudemod/compare_pipeline_test.go b/internal/demod/gpudemod/compare_pipeline_test.go new file mode 100644 index 0000000..9337674 --- /dev/null +++ b/internal/demod/gpudemod/compare_pipeline_test.go @@ -0,0 +1,32 @@ +package gpudemod + +import "testing" + +func TestCompareOracleAndGPUStub(t *testing.T) { + oracle := StreamingExtractResult{ + SignalID: 1, + IQ: []complex64{1 + 1i, 2 + 2i}, + Rate: 200000, + NOut: 2, + PhaseCount: 0, + HistoryLen: 64, + } + gpu := StreamingExtractResult{ + SignalID: 1, + IQ: []complex64{1 + 1i, 2.1 + 2i}, + Rate: 200000, + NOut: 2, + PhaseCount: 3, + HistoryLen: 64, + } + metrics, stats := CompareOracleAndGPUStub(oracle, gpu) + if metrics.SignalID != 1 { + t.Fatalf("unexpected signal id: %d", metrics.SignalID) + } + if stats.Count != 2 { + t.Fatalf("unexpected compare count: %d", stats.Count) + } + if metrics.RefMaxAbsErr <= 0 { + t.Fatalf("expected positive max abs error") + } +} diff --git a/internal/demod/gpudemod/compare_state.go b/internal/demod/gpudemod/compare_state.go new file mode 100644 index 0000000..34e35d0 --- /dev/null +++ b/internal/demod/gpudemod/compare_state.go @@ -0,0 +1,12 @@ +package gpudemod + +type ExtractDebugMetrics struct { + SignalID int64 + PhaseCount int + HistoryLen int + NOut int + RefMaxAbsErr float64 + RefRMSErr float64 + BoundaryDelta float64 + BoundaryD2 float64 +} diff --git a/internal/demod/gpudemod/compare_test.go b/internal/demod/gpudemod/compare_test.go new file mode 100644 index 0000000..643c61e --- /dev/null +++ b/internal/demod/gpudemod/compare_test.go @@ -0,0 +1,18 @@ +package gpudemod + +import "testing" + +func TestCompareComplexSlices(t *testing.T) { + a := []complex64{1 + 1i, 2 + 2i, 3 + 3i} + b := []complex64{1 + 1i, 2.1 + 2i, 2.9 + 3.2i} + stats := CompareComplexSlices(a, b) + if stats.Count != 3 { + t.Fatalf("unexpected count: %d", stats.Count) + } + if stats.MaxAbsErr <= 0 { + t.Fatalf("expected positive max abs error") + } + if stats.RMSErr <= 0 { + t.Fatalf("expected positive rms error") + } +} diff --git a/internal/demod/gpudemod/cpu_oracle.go b/internal/demod/gpudemod/cpu_oracle.go new file mode 100644 index 0000000..d045072 --- /dev/null +++ b/internal/demod/gpudemod/cpu_oracle.go @@ -0,0 +1,170 @@ +package gpudemod + +import ( + "fmt" + "math" +) + +type CPUOracleState struct { + SignalID int64 + ConfigHash uint64 + NCOPhase float64 + Decim int + PhaseCount int + NumTaps int + ShiftedHistory []complex64 + BaseTaps []float32 + PolyphaseTaps []float32 +} + +func ResetCPUOracleStateIfConfigChanged(state *CPUOracleState, newHash uint64) { + if state == nil { + return + } + if state.ConfigHash != newHash { + state.ConfigHash = newHash + state.NCOPhase = 0 + state.PhaseCount = 0 + state.ShiftedHistory = state.ShiftedHistory[:0] + } +} + +func CPUOracleExtract(iqNew []complex64, state *CPUOracleState, phaseInc float64) []complex64 { + if state == nil || state.NumTaps <= 0 || state.Decim <= 0 || len(state.BaseTaps) < state.NumTaps { + return nil + } + out := make([]complex64, 0, len(iqNew)/maxInt(1, state.Decim)+2) + phase := state.NCOPhase + hist := append([]complex64(nil), state.ShiftedHistory...) + + for _, x := range iqNew { + rot := complex64(complex(math.Cos(phase), math.Sin(phase))) + s := x * rot + hist = append(hist, s) + state.PhaseCount++ + + if state.PhaseCount == state.Decim { + var y complex64 + for k := 0; k < state.NumTaps; k++ { + idx := len(hist) - 1 - k + var sample complex64 + if idx >= 0 { + sample = hist[idx] + } + y += complex(state.BaseTaps[k], 0) * sample + } + out = append(out, y) + state.PhaseCount = 0 + } + + if len(hist) > state.NumTaps-1 { + hist = hist[len(hist)-(state.NumTaps-1):] + } + + phase += phaseInc + if phase >= math.Pi { + phase -= 2 * math.Pi + } else if phase < -math.Pi { + phase += 2 * math.Pi + } + } + + state.NCOPhase = phase + state.ShiftedHistory = append(state.ShiftedHistory[:0], hist...) + return out +} + +// CPUOracleExtractPolyphase keeps the same streaming state semantics as CPUOracleExtract, +// but computes outputs using the explicit phase-major polyphase tap layout. +func CPUOracleExtractPolyphase(iqNew []complex64, state *CPUOracleState, phaseInc float64) []complex64 { + if state == nil || state.NumTaps <= 0 || state.Decim <= 0 || len(state.BaseTaps) < state.NumTaps { + return nil + } + if len(state.PolyphaseTaps) == 0 { + state.PolyphaseTaps = BuildPolyphaseTapsPhaseMajor(state.BaseTaps, state.Decim) + } + phaseLen := PolyphasePhaseLen(len(state.BaseTaps), state.Decim) + out := make([]complex64, 0, len(iqNew)/maxInt(1, state.Decim)+2) + phase := state.NCOPhase + hist := append([]complex64(nil), state.ShiftedHistory...) + + for _, x := range iqNew { + rot := complex64(complex(math.Cos(phase), math.Sin(phase))) + s := x * rot + hist = append(hist, s) + state.PhaseCount++ + + if state.PhaseCount == state.Decim { + var y complex64 + for p := 0; p < state.Decim; p++ { + for k := 0; k < phaseLen; k++ { + tap := state.PolyphaseTaps[p*phaseLen+k] + if tap == 0 { + continue + } + srcBack := p + k*state.Decim + idx := len(hist) - 1 - srcBack + if idx < 0 { + continue + } + y += complex(tap, 0) * hist[idx] + } + } + out = append(out, y) + state.PhaseCount = 0 + } + + if len(hist) > state.NumTaps-1 { + hist = hist[len(hist)-(state.NumTaps-1):] + } + + phase += phaseInc + if phase >= math.Pi { + phase -= 2 * math.Pi + } else if phase < -math.Pi { + phase += 2 * math.Pi + } + } + + state.NCOPhase = phase + state.ShiftedHistory = append(state.ShiftedHistory[:0], hist...) + return out +} + +func RunChunkedCPUOracle(all []complex64, chunkSizes []int, mkState func() *CPUOracleState, phaseInc float64) []complex64 { + state := mkState() + out := make([]complex64, 0) + pos := 0 + for _, n := range chunkSizes { + if pos >= len(all) { + break + } + end := pos + n + if end > len(all) { + end = len(all) + } + out = append(out, CPUOracleExtract(all[pos:end], state, phaseInc)...) + pos = end + } + if pos < len(all) { + out = append(out, CPUOracleExtract(all[pos:], state, phaseInc)...) + } + return out +} + +func ExactIntegerDecimation(sampleRate int, outRate int) (int, error) { + if sampleRate <= 0 || outRate <= 0 { + return 0, fmt.Errorf("invalid sampleRate/outRate: %d/%d", sampleRate, outRate) + } + if sampleRate%outRate != 0 { + return 0, fmt.Errorf("streaming polyphase extractor requires integer decimation: sampleRate=%d outRate=%d", sampleRate, outRate) + } + return sampleRate / outRate, nil +} + +func maxInt(a int, b int) int { + if a > b { + return a + } + return b +} diff --git a/internal/demod/gpudemod/cpu_oracle_test.go b/internal/demod/gpudemod/cpu_oracle_test.go new file mode 100644 index 0000000..762caeb --- /dev/null +++ b/internal/demod/gpudemod/cpu_oracle_test.go @@ -0,0 +1,89 @@ +package gpudemod + +import ( + "math" + "math/cmplx" + "testing" +) + +func makeDeterministicIQ(n int) []complex64 { + out := make([]complex64, n) + for i := 0; i < n; i++ { + a := 0.017 * float64(i) + b := 0.031 * float64(i) + out[i] = complex64(complex(math.Cos(a)+0.2*math.Cos(b), math.Sin(a)+0.15*math.Sin(b))) + } + return out +} + +func makeLowpassTaps(n int) []float32 { + out := make([]float32, n) + for i := range out { + out[i] = 1.0 / float32(n) + } + return out +} + +func requireComplexSlicesClose(t *testing.T, a []complex64, b []complex64, tol float64) { + t.Helper() + if len(a) != len(b) { + t.Fatalf("length mismatch: %d vs %d", len(a), len(b)) + } + for i := range a { + if cmplx.Abs(complex128(a[i]-b[i])) > tol { + t.Fatalf("slice mismatch at %d: %v vs %v (tol=%f)", i, a[i], b[i], tol) + } + } +} + +func TestCPUOracleMonolithicVsChunked(t *testing.T) { + iq := makeDeterministicIQ(200000) + mk := func() *CPUOracleState { + return &CPUOracleState{ + SignalID: 1, + ConfigHash: 123, + NCOPhase: 0, + Decim: 20, + PhaseCount: 0, + NumTaps: 65, + ShiftedHistory: make([]complex64, 0, 64), + BaseTaps: makeLowpassTaps(65), + } + } + phaseInc := 0.017 + monoState := mk() + mono := CPUOracleExtract(iq, monoState, phaseInc) + chunked := RunChunkedCPUOracle(iq, []int{4096, 5000, 8192, 27307}, mk, phaseInc) + requireComplexSlicesClose(t, mono, chunked, 1e-5) +} + +func TestExactIntegerDecimation(t *testing.T) { + if d, err := ExactIntegerDecimation(4000000, 200000); err != nil || d != 20 { + t.Fatalf("unexpected exact decim result: d=%d err=%v", d, err) + } + if _, err := ExactIntegerDecimation(4000000, 192000); err == nil { + t.Fatalf("expected non-integer decimation error") + } +} + +func TestCPUOracleDirectVsPolyphase(t *testing.T) { + iq := makeDeterministicIQ(50000) + mk := func() *CPUOracleState { + taps := makeLowpassTaps(65) + return &CPUOracleState{ + SignalID: 1, + ConfigHash: 123, + NCOPhase: 0, + Decim: 20, + PhaseCount: 0, + NumTaps: 65, + ShiftedHistory: make([]complex64, 0, 64), + BaseTaps: taps, + PolyphaseTaps: BuildPolyphaseTapsPhaseMajor(taps, 20), + } + } + phaseInc := 0.017 + direct := CPUOracleExtract(iq, mk(), phaseInc) + poly := CPUOracleExtractPolyphase(iq, mk(), phaseInc) + requireComplexSlicesClose(t, direct, poly, 1e-5) +} diff --git a/internal/demod/gpudemod/native/exports.cu b/internal/demod/gpudemod/native/exports.cu index 6081b57..d2bceae 100644 --- a/internal/demod/gpudemod/native/exports.cu +++ b/internal/demod/gpudemod/native/exports.cu @@ -11,6 +11,10 @@ typedef void* gpud_stream_handle; +static __forceinline__ int gpud_max_i(int a, int b) { + return a > b ? a : b; +} + GPUD_API int GPUD_CALL gpud_stream_create(gpud_stream_handle* out) { if (!out) return -1; cudaStream_t stream; @@ -320,3 +324,308 @@ GPUD_API int GPUD_CALL gpud_launch_ssb_product_cuda( gpud_ssb_product_kernel<<>>(in, out, n, phase_inc, phase_start); return (int)cudaGetLastError(); } + +__global__ void gpud_streaming_polyphase_accum_kernel( + const float2* __restrict__ history_state, + int history_len, + const float2* __restrict__ shifted_new, + int n_new, + const float* __restrict__ polyphase_taps, + int polyphase_len, + int decim, + int phase_len, + int start_idx, + int n_out, + float2* __restrict__ out +); + +__global__ void gpud_streaming_history_tail_kernel( + const float2* __restrict__ history_state, + int history_len, + const float2* __restrict__ shifted_new, + int n_new, + int keep, + float2* __restrict__ history_out +); + +static __forceinline__ double gpud_reduce_phase(double phase); + +// Transitional legacy entrypoint retained for bring-up and comparison. +// The production-native streaming path is gpud_launch_streaming_polyphase_stateful_cuda, +// which preserves per-signal carry state across NEW-samples-only chunks. +GPUD_API int GPUD_CALL gpud_launch_streaming_polyphase_prepare_cuda( + const float2* in_new, + int n_new, + const float2* history_in, + int history_len, + const float* polyphase_taps, + int polyphase_len, + int decim, + int num_taps, + int phase_count_in, + double phase_start, + double phase_inc, + float2* out, + int* n_out, + int* phase_count_out, + double* phase_end_out, + float2* history_out +) { + if (n_new < 0 || !polyphase_taps || polyphase_len <= 0 || decim <= 0 || num_taps <= 0) return -1; + const int phase_len = (num_taps + decim - 1) / decim; + if (polyphase_len < decim * phase_len) return -2; + + const int keep = num_taps > 1 ? num_taps - 1 : 0; + int clamped_history_len = history_len; + if (clamped_history_len < 0) clamped_history_len = 0; + if (clamped_history_len > keep) clamped_history_len = keep; + if (clamped_history_len > 0 && !history_in) return -5; + + float2* shifted = NULL; + cudaError_t err = cudaSuccess; + if (n_new > 0) { + if (!in_new) return -3; + err = cudaMalloc((void**)&shifted, (size_t)gpud_max_i(1, n_new) * sizeof(float2)); + if (err != cudaSuccess) return (int)err; + const int block = 256; + const int grid_shift = (n_new + block - 1) / block; + gpud_freq_shift_kernel<<>>(in_new, shifted, n_new, phase_inc, phase_start); + err = cudaGetLastError(); + if (err != cudaSuccess) { + cudaFree(shifted); + return (int)err; + } + } + + int phase_count = phase_count_in; + if (phase_count < 0) phase_count = 0; + if (phase_count >= decim) phase_count %= decim; + const int total_phase = phase_count + n_new; + const int out_count = total_phase / decim; + if (out_count > 0) { + if (!out) { + cudaFree(shifted); + return -4; + } + const int block = 256; + const int grid = (out_count + block - 1) / block; + const int start_idx = decim - phase_count - 1; + gpud_streaming_polyphase_accum_kernel<<>>( + history_in, + clamped_history_len, + shifted, + n_new, + polyphase_taps, + polyphase_len, + decim, + phase_len, + start_idx, + out_count, + out + ); + err = cudaGetLastError(); + if (err != cudaSuccess) { + cudaFree(shifted); + return (int)err; + } + } + + if (history_out && keep > 0) { + const int new_history_len = clamped_history_len + n_new < keep ? clamped_history_len + n_new : keep; + if (new_history_len > 0) { + const int block = 256; + const int grid = (new_history_len + block - 1) / block; + gpud_streaming_history_tail_kernel<<>>( + history_in, + clamped_history_len, + shifted, + n_new, + new_history_len, + history_out + ); + err = cudaGetLastError(); + if (err != cudaSuccess) { + cudaFree(shifted); + return (int)err; + } + } + } + + if (n_out) *n_out = out_count; + if (phase_count_out) *phase_count_out = total_phase % decim; + if (phase_end_out) *phase_end_out = gpud_reduce_phase(phase_start + phase_inc * (double)n_new); + + if (shifted) cudaFree(shifted); + return 0; +} + +static __device__ __forceinline__ float2 gpud_stream_sample_at( + const float2* __restrict__ history_state, + int history_len, + const float2* __restrict__ shifted_new, + int n_new, + int idx +) { + if (idx < 0) return make_float2(0.0f, 0.0f); + if (idx < history_len) return history_state[idx]; + int shifted_idx = idx - history_len; + if (shifted_idx < 0 || shifted_idx >= n_new) return make_float2(0.0f, 0.0f); + return shifted_new[shifted_idx]; +} + +__global__ void gpud_streaming_polyphase_accum_kernel( + const float2* __restrict__ history_state, + int history_len, + const float2* __restrict__ shifted_new, + int n_new, + const float* __restrict__ polyphase_taps, + int polyphase_len, + int decim, + int phase_len, + int start_idx, + int n_out, + float2* __restrict__ out +) { + int out_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (out_idx >= n_out) return; + + int newest = history_len + start_idx + out_idx * decim; + float acc_r = 0.0f; + float acc_i = 0.0f; + for (int p = 0; p < decim; ++p) { + for (int k = 0; k < phase_len; ++k) { + int tap_idx = p * phase_len + k; + if (tap_idx >= polyphase_len) continue; + float tap = polyphase_taps[tap_idx]; + if (tap == 0.0f) continue; + int src_back = p + k * decim; + int src_idx = newest - src_back; + float2 sample = gpud_stream_sample_at(history_state, history_len, shifted_new, n_new, src_idx); + acc_r += sample.x * tap; + acc_i += sample.y * tap; + } + } + out[out_idx] = make_float2(acc_r, acc_i); +} + +__global__ void gpud_streaming_history_tail_kernel( + const float2* __restrict__ history_state, + int history_len, + const float2* __restrict__ shifted_new, + int n_new, + int keep, + float2* __restrict__ history_out +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= keep) return; + int combined_len = history_len + n_new; + int src_idx = combined_len - keep + idx; + history_out[idx] = gpud_stream_sample_at(history_state, history_len, shifted_new, n_new, src_idx); +} + +static __forceinline__ double gpud_reduce_phase(double phase) { + const double TWO_PI = 6.283185307179586; + return phase - rint(phase / TWO_PI) * TWO_PI; +} + +// Production-native candidate entrypoint for the stateful streaming extractor. +// Callers provide only NEW samples; overlap+trim is intentionally not part of this path. +GPUD_API int GPUD_CALL gpud_launch_streaming_polyphase_stateful_cuda( + const float2* in_new, + int n_new, + float2* shifted_new_tmp, + const float* polyphase_taps, + int polyphase_len, + int decim, + int num_taps, + float2* history_state, + float2* history_scratch, + int history_cap, + int* history_len_io, + int* phase_count_state, + double* phase_state, + double phase_inc, + float2* out, + int out_cap, + int* n_out +) { + if (!polyphase_taps || decim <= 0 || num_taps <= 0 || !history_len_io || !phase_count_state || !phase_state || !n_out) return -10; + if (n_new < 0 || out_cap < 0 || history_cap < 0) return -11; + const int phase_len = (num_taps + decim - 1) / decim; + if (polyphase_len < decim * phase_len) return -12; + + int history_len = *history_len_io; + if (history_len < 0) history_len = 0; + if (history_len > history_cap) history_len = history_cap; + + int phase_count = *phase_count_state; + if (phase_count < 0) phase_count = 0; + if (phase_count >= decim) phase_count %= decim; + + double phase_start = *phase_state; + if (n_new > 0) { + if (!in_new || !shifted_new_tmp) return -13; + const int block = 256; + const int grid = (n_new + block - 1) / block; + gpud_freq_shift_kernel<<>>(in_new, shifted_new_tmp, n_new, phase_inc, phase_start); + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) return (int)err; + } + + const int total_phase = phase_count + n_new; + const int out_count = total_phase / decim; + if (out_count > out_cap) return -14; + + if (out_count > 0) { + if (!out) return -15; + const int block = 256; + const int grid = (out_count + block - 1) / block; + const int start_idx = decim - phase_count - 1; + gpud_streaming_polyphase_accum_kernel<<>>( + history_state, + history_len, + shifted_new_tmp, + n_new, + polyphase_taps, + polyphase_len, + decim, + phase_len, + start_idx, + out_count, + out + ); + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) return (int)err; + } + + int new_history_len = history_len; + if (history_cap > 0) { + new_history_len = history_len + n_new; + if (new_history_len > history_cap) new_history_len = history_cap; + if (new_history_len > 0) { + if (!history_state || !history_scratch) return -16; + const int block = 256; + const int grid = (new_history_len + block - 1) / block; + gpud_streaming_history_tail_kernel<<>>( + history_state, + history_len, + shifted_new_tmp, + n_new, + new_history_len, + history_scratch + ); + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) return (int)err; + err = cudaMemcpy(history_state, history_scratch, (size_t)new_history_len * sizeof(float2), cudaMemcpyDeviceToDevice); + if (err != cudaSuccess) return (int)err; + } + } else { + new_history_len = 0; + } + + *history_len_io = new_history_len; + *phase_count_state = total_phase % decim; + *phase_state = gpud_reduce_phase(phase_start + phase_inc * (double)n_new); + *n_out = out_count; + return 0; +} diff --git a/internal/demod/gpudemod/oracle_runner_test.go b/internal/demod/gpudemod/oracle_runner_test.go new file mode 100644 index 0000000..e7d27bd --- /dev/null +++ b/internal/demod/gpudemod/oracle_runner_test.go @@ -0,0 +1,31 @@ +package gpudemod + +import "testing" + +func TestCPUOracleRunnerCleansUpDisappearedSignals(t *testing.T) { + r := NewCPUOracleRunner(4000000) + jobs1 := []StreamingExtractJob{ + {SignalID: 1, OffsetHz: 1000, Bandwidth: 20000, OutRate: 200000, NumTaps: 65, ConfigHash: 101}, + {SignalID: 2, OffsetHz: 2000, Bandwidth: 20000, OutRate: 200000, NumTaps: 65, ConfigHash: 102}, + } + _, err := r.StreamingExtract(makeDeterministicIQ(4096), jobs1) + if err != nil { + t.Fatalf("unexpected error on first extract: %v", err) + } + if len(r.States) != 2 { + t.Fatalf("expected 2 states, got %d", len(r.States)) + } + jobs2 := []StreamingExtractJob{ + {SignalID: 2, OffsetHz: 2000, Bandwidth: 20000, OutRate: 200000, NumTaps: 65, ConfigHash: 102}, + } + _, err = r.StreamingExtract(makeDeterministicIQ(2048), jobs2) + if err != nil { + t.Fatalf("unexpected error on second extract: %v", err) + } + if len(r.States) != 1 { + t.Fatalf("expected 1 state after cleanup, got %d", len(r.States)) + } + if _, ok := r.States[1]; ok { + t.Fatalf("expected signal 1 state to be cleaned up") + } +} diff --git a/internal/demod/gpudemod/oracle_validation_test.go b/internal/demod/gpudemod/oracle_validation_test.go new file mode 100644 index 0000000..7026dcb --- /dev/null +++ b/internal/demod/gpudemod/oracle_validation_test.go @@ -0,0 +1,45 @@ +package gpudemod + +import "testing" + +func TestCPUOracleMonolithicVsChunkedPolyphase(t *testing.T) { + iq := makeDeterministicIQ(120000) + mk := func() *CPUOracleState { + taps := makeLowpassTaps(65) + return &CPUOracleState{ + SignalID: 1, + ConfigHash: 999, + NCOPhase: 0, + Decim: 20, + PhaseCount: 0, + NumTaps: 65, + ShiftedHistory: make([]complex64, 0, 64), + BaseTaps: taps, + PolyphaseTaps: BuildPolyphaseTapsPhaseMajor(taps, 20), + } + } + phaseInc := 0.013 + mono := CPUOracleExtractPolyphase(iq, mk(), phaseInc) + chunked := func() []complex64 { + state := mk() + out := make([]complex64, 0) + chunks := []int{4096, 3000, 8192, 7777, 12000} + pos := 0 + for _, n := range chunks { + if pos >= len(iq) { + break + } + end := pos + n + if end > len(iq) { + end = len(iq) + } + out = append(out, CPUOracleExtractPolyphase(iq[pos:end], state, phaseInc)...) + pos = end + } + if pos < len(iq) { + out = append(out, CPUOracleExtractPolyphase(iq[pos:], state, phaseInc)...) + } + return out + }() + requireComplexSlicesClose(t, mono, chunked, 1e-5) +} diff --git a/internal/demod/gpudemod/polyphase.go b/internal/demod/gpudemod/polyphase.go new file mode 100644 index 0000000..f92acd7 --- /dev/null +++ b/internal/demod/gpudemod/polyphase.go @@ -0,0 +1,28 @@ +package gpudemod + +// BuildPolyphaseTapsPhaseMajor builds a phase-major polyphase tap layout: +// tapsByPhase[p][k] = h[p + k*D] +// Flattened as: [phase0 taps..., phase1 taps..., ...] +func BuildPolyphaseTapsPhaseMajor(base []float32, decim int) []float32 { + if decim <= 0 || len(base) == 0 { + return nil + } + maxPhaseLen := (len(base) + decim - 1) / decim + out := make([]float32, decim*maxPhaseLen) + for p := 0; p < decim; p++ { + for k := 0; k < maxPhaseLen; k++ { + src := p + k*decim + if src < len(base) { + out[p*maxPhaseLen+k] = base[src] + } + } + } + return out +} + +func PolyphasePhaseLen(baseLen int, decim int) int { + if decim <= 0 || baseLen <= 0 { + return 0 + } + return (baseLen + decim - 1) / decim +} diff --git a/internal/demod/gpudemod/polyphase_test.go b/internal/demod/gpudemod/polyphase_test.go new file mode 100644 index 0000000..bd8ecb9 --- /dev/null +++ b/internal/demod/gpudemod/polyphase_test.go @@ -0,0 +1,22 @@ +package gpudemod + +import "testing" + +func TestBuildPolyphaseTapsPhaseMajor(t *testing.T) { + base := []float32{1, 2, 3, 4, 5, 6, 7} + got := BuildPolyphaseTapsPhaseMajor(base, 3) + // phase-major with phase len ceil(7/3)=3 + want := []float32{ + 1, 4, 7, + 2, 5, 0, + 3, 6, 0, + } + if len(got) != len(want) { + t.Fatalf("len mismatch: got %d want %d", len(got), len(want)) + } + for i := range want { + if got[i] != want[i] { + t.Fatalf("mismatch at %d: got %v want %v", i, got[i], want[i]) + } + } +} diff --git a/internal/demod/gpudemod/state_reset_test.go b/internal/demod/gpudemod/state_reset_test.go new file mode 100644 index 0000000..9345caa --- /dev/null +++ b/internal/demod/gpudemod/state_reset_test.go @@ -0,0 +1,57 @@ +package gpudemod + +import "testing" + +func TestResetCPUOracleStateIfConfigChanged(t *testing.T) { + state := &CPUOracleState{ + SignalID: 1, + ConfigHash: 111, + NCOPhase: 1.23, + Decim: 20, + PhaseCount: 7, + NumTaps: 65, + ShiftedHistory: []complex64{1 + 1i, 2 + 2i}, + } + ResetCPUOracleStateIfConfigChanged(state, 222) + if state.ConfigHash != 222 { + t.Fatalf("config hash not updated") + } + if state.NCOPhase != 0 { + t.Fatalf("expected phase reset") + } + if state.PhaseCount != 0 { + t.Fatalf("expected phase count reset") + } + if len(state.ShiftedHistory) != 0 { + t.Fatalf("expected shifted history reset") + } +} + +func TestResetExtractStreamState(t *testing.T) { + state := &ExtractStreamState{ + SignalID: 1, + ConfigHash: 111, + NCOPhase: 2.34, + Decim: 20, + PhaseCount: 9, + NumTaps: 65, + ShiftedHistory: []complex64{3 + 3i, 4 + 4i}, + Initialized: true, + } + ResetExtractStreamState(state, 333) + if state.ConfigHash != 333 { + t.Fatalf("config hash not updated") + } + if state.NCOPhase != 0 { + t.Fatalf("expected phase reset") + } + if state.PhaseCount != 0 { + t.Fatalf("expected phase count reset") + } + if len(state.ShiftedHistory) != 0 { + t.Fatalf("expected shifted history reset") + } + if state.Initialized { + t.Fatalf("expected initialized=false after reset") + } +} diff --git a/internal/demod/gpudemod/stream_state.go b/internal/demod/gpudemod/stream_state.go new file mode 100644 index 0000000..26bc5fd --- /dev/null +++ b/internal/demod/gpudemod/stream_state.go @@ -0,0 +1,70 @@ +package gpudemod + +import ( + "log" + + "sdr-wideband-suite/internal/dsp" +) + +func (r *BatchRunner) ResetSignalState(signalID int64) { + if r == nil || r.streamState == nil { + return + } + delete(r.streamState, signalID) + r.resetNativeStreamingState(signalID) +} + +func (r *BatchRunner) ResetAllSignalStates() { + if r == nil { + return + } + r.streamState = make(map[int64]*ExtractStreamState) + r.resetAllNativeStreamingStates() +} + +func (r *BatchRunner) getOrInitExtractState(job StreamingExtractJob, sampleRate int) (*ExtractStreamState, error) { + if r == nil { + return nil, ErrUnavailable + } + if r.streamState == nil { + r.streamState = make(map[int64]*ExtractStreamState) + } + decim, err := ExactIntegerDecimation(sampleRate, job.OutRate) + if err != nil { + return nil, err + } + state := r.streamState[job.SignalID] + if state == nil { + state = &ExtractStreamState{SignalID: job.SignalID} + r.streamState[job.SignalID] = state + } + if state.ConfigHash != job.ConfigHash { + if state.Initialized { + log.Printf("STREAMING STATE RESET: signal=%d oldHash=%d newHash=%d historyLen=%d", + job.SignalID, state.ConfigHash, job.ConfigHash, len(state.ShiftedHistory)) + } + ResetExtractStreamState(state, job.ConfigHash) + } + state.Decim = decim + state.NumTaps = job.NumTaps + if state.NumTaps <= 0 { + state.NumTaps = 101 + } + cutoff := job.Bandwidth / 2 + if cutoff < 200 { + cutoff = 200 + } + base := dsp.LowpassFIR(cutoff, sampleRate, state.NumTaps) + state.BaseTaps = make([]float32, len(base)) + for i, v := range base { + state.BaseTaps[i] = float32(v) + } + state.PolyphaseTaps = BuildPolyphaseTapsPhaseMajor(state.BaseTaps, state.Decim) + if cap(state.ShiftedHistory) < maxInt(0, state.NumTaps-1) { + state.ShiftedHistory = make([]complex64, 0, maxInt(0, state.NumTaps-1)) + } else if state.ShiftedHistory == nil { + state.ShiftedHistory = make([]complex64, 0, maxInt(0, state.NumTaps-1)) + } + state.Initialized = true + return state, nil +} diff --git a/internal/demod/gpudemod/stream_state_test.go b/internal/demod/gpudemod/stream_state_test.go new file mode 100644 index 0000000..b86c5f5 --- /dev/null +++ b/internal/demod/gpudemod/stream_state_test.go @@ -0,0 +1,31 @@ +package gpudemod + +import "testing" + +func TestGetOrInitExtractStateInitializesPolyphaseAndHistory(t *testing.T) { + r := &BatchRunner{streamState: make(map[int64]*ExtractStreamState)} + job := StreamingExtractJob{ + SignalID: 7, + OffsetHz: 12500, + Bandwidth: 20000, + OutRate: 200000, + NumTaps: 65, + ConfigHash: 555, + } + state, err := r.getOrInitExtractState(job, 4000000) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if state.Decim != 20 { + t.Fatalf("unexpected decim: %d", state.Decim) + } + if len(state.BaseTaps) != 65 { + t.Fatalf("unexpected base taps len: %d", len(state.BaseTaps)) + } + if len(state.PolyphaseTaps) == 0 { + t.Fatalf("expected polyphase taps") + } + if cap(state.ShiftedHistory) < 64 { + t.Fatalf("expected shifted history capacity >= 64, got %d", cap(state.ShiftedHistory)) + } +} diff --git a/internal/demod/gpudemod/streaming_gpu_contract.go b/internal/demod/gpudemod/streaming_gpu_contract.go new file mode 100644 index 0000000..c978f22 --- /dev/null +++ b/internal/demod/gpudemod/streaming_gpu_contract.go @@ -0,0 +1,39 @@ +package gpudemod + +type StreamingGPUExecutionMode string + +const ( + StreamingGPUExecUnavailable StreamingGPUExecutionMode = "unavailable" + StreamingGPUExecHostOracle StreamingGPUExecutionMode = "host_oracle" + StreamingGPUExecCUDA StreamingGPUExecutionMode = "cuda" +) + +type StreamingGPUInvocation struct { + SignalID int64 + ConfigHash uint64 + OffsetHz float64 + OutRate int + Bandwidth float64 + SampleRate int + NumTaps int + Decim int + PhaseCountIn int + NCOPhaseIn float64 + HistoryLen int + BaseTaps []float32 + PolyphaseTaps []float32 + ShiftedHistory []complex64 + IQNew []complex64 +} + +type StreamingGPUExecutionResult struct { + SignalID int64 + Mode StreamingGPUExecutionMode + IQ []complex64 + Rate int + NOut int + PhaseCountOut int + NCOPhaseOut float64 + HistoryOut []complex64 + HistoryLenOut int +} diff --git a/internal/demod/gpudemod/streaming_gpu_exec.go b/internal/demod/gpudemod/streaming_gpu_exec.go new file mode 100644 index 0000000..23ec814 --- /dev/null +++ b/internal/demod/gpudemod/streaming_gpu_exec.go @@ -0,0 +1,29 @@ +package gpudemod + +// StreamingExtractGPUExec is the internal execution selector for the new +// production-path semantics. It intentionally keeps the public API stable while +// allowing the implementation to evolve from host-side oracle execution toward +// a real GPU polyphase path. +func (r *BatchRunner) StreamingExtractGPUExec(iqNew []complex64, jobs []StreamingExtractJob) ([]StreamingExtractResult, error) { + invocations, err := r.buildStreamingGPUInvocations(iqNew, jobs) + if err != nil { + return nil, err + } + if useGPUNativePreparedExecution { + execResults, err := r.executeStreamingGPUNativePrepared(invocations) + if err == nil { + return r.applyStreamingGPUExecutionResults(execResults), nil + } + if !useGPUHostOracleExecution { + return nil, err + } + } + if useGPUHostOracleExecution { + execResults, err := r.executeStreamingGPUHostOraclePrepared(invocations) + if err != nil { + return nil, err + } + return r.applyStreamingGPUExecutionResults(execResults), nil + } + return nil, ErrUnavailable +} diff --git a/internal/demod/gpudemod/streaming_gpu_exec_test.go b/internal/demod/gpudemod/streaming_gpu_exec_test.go new file mode 100644 index 0000000..9933cdf --- /dev/null +++ b/internal/demod/gpudemod/streaming_gpu_exec_test.go @@ -0,0 +1,112 @@ +package gpudemod + +import "testing" + +func TestStreamingExtractGPUExecUsesSafeDefaultMode(t *testing.T) { + r := &BatchRunner{eng: &Engine{sampleRate: 4000000}, streamState: make(map[int64]*ExtractStreamState)} + job := StreamingExtractJob{ + SignalID: 1, + OffsetHz: 12500, + Bandwidth: 20000, + OutRate: 200000, + NumTaps: 65, + ConfigHash: 777, + } + res, err := r.StreamingExtractGPUExec(makeDeterministicIQ(2048), []StreamingExtractJob{job}) + if err != nil { + t.Fatalf("expected safe default execution path, got error: %v", err) + } + if len(res) != 1 { + t.Fatalf("expected 1 result, got %d", len(res)) + } + if res[0].Rate != job.OutRate { + t.Fatalf("expected output rate %d, got %d", job.OutRate, res[0].Rate) + } + if res[0].NOut <= 0 { + t.Fatalf("expected streaming output samples") + } +} + +func TestStreamingGPUExecMatchesCPUOracleAcrossChunkPatterns(t *testing.T) { + job := StreamingExtractJob{ + SignalID: 1, + OffsetHz: 12500, + Bandwidth: 20000, + OutRate: 200000, + NumTaps: 65, + ConfigHash: 777, + } + t.Run("DeterministicIQ", func(t *testing.T) { + r := &BatchRunner{eng: &Engine{sampleRate: 4000000}, streamState: make(map[int64]*ExtractStreamState)} + steps := makeStreamingValidationSteps( + makeDeterministicIQ(1500), + []int{0, 1, 2, 17, 63, 64, 65, 129, 511}, + []StreamingExtractJob{job}, + ) + runStreamingExecSequenceAgainstOracle(t, r, steps, 1e-5, 1e-9) + }) + t.Run("ToneNoiseIQ", func(t *testing.T) { + r := &BatchRunner{eng: &Engine{sampleRate: 4000000}, streamState: make(map[int64]*ExtractStreamState)} + steps := makeStreamingValidationSteps( + makeToneNoiseIQ(4096, 0.023), + []int{7, 20, 3, 63, 64, 65, 777}, + []StreamingExtractJob{job}, + ) + runStreamingExecSequenceAgainstOracle(t, r, steps, 1e-5, 1e-9) + }) +} + +func TestStreamingGPUExecLifecycleMatchesCPUOracle(t *testing.T) { + r := &BatchRunner{ + eng: &Engine{sampleRate: 4000000}, + streamState: make(map[int64]*ExtractStreamState), + nativeState: make(map[int64]*nativeStreamingSignalState), + } + baseA := StreamingExtractJob{ + SignalID: 11, + OffsetHz: 12500, + Bandwidth: 20000, + OutRate: 200000, + NumTaps: 65, + ConfigHash: 1001, + } + baseB := StreamingExtractJob{ + SignalID: 22, + OffsetHz: -18750, + Bandwidth: 16000, + OutRate: 100000, + NumTaps: 33, + ConfigHash: 2002, + } + steps := []streamingValidationStep{ + { + name: "prime_both_signals", + iq: makeDeterministicIQ(512), + jobs: []StreamingExtractJob{baseA, baseB}, + }, + { + name: "config_reset_with_zero_new", + iq: nil, + jobs: []StreamingExtractJob{{SignalID: baseA.SignalID, OffsetHz: baseA.OffsetHz, Bandwidth: baseA.Bandwidth, OutRate: baseA.OutRate, NumTaps: baseA.NumTaps, ConfigHash: baseA.ConfigHash + 1}, baseB}, + }, + { + name: "signal_b_disappears", + iq: makeToneNoiseIQ(96, 0.041), + jobs: []StreamingExtractJob{baseA}, + }, + { + name: "signal_b_reappears_fresh", + iq: makeDeterministicIQ(160), + jobs: []StreamingExtractJob{baseA, baseB}, + }, + { + name: "small_history_boundary_chunk", + iq: makeToneNoiseIQ(65, 0.017), + jobs: []StreamingExtractJob{baseA, baseB}, + }, + } + runStreamingExecSequenceAgainstOracle(t, r, steps, 1e-5, 1e-9) + if _, ok := r.nativeState[baseB.SignalID]; ok { + t.Fatalf("expected safe host-oracle path to keep native state inactive while gate is off") + } +} diff --git a/internal/demod/gpudemod/streaming_gpu_host_exec.go b/internal/demod/gpudemod/streaming_gpu_host_exec.go new file mode 100644 index 0000000..02d5953 --- /dev/null +++ b/internal/demod/gpudemod/streaming_gpu_host_exec.go @@ -0,0 +1,30 @@ +package gpudemod + +func (r *BatchRunner) executeStreamingGPUHostOraclePrepared(invocations []StreamingGPUInvocation) ([]StreamingGPUExecutionResult, error) { + results := make([]StreamingGPUExecutionResult, len(invocations)) + for i, inv := range invocations { + out, phase, phaseCount, hist := runStreamingPolyphaseHostCore( + inv.IQNew, + inv.SampleRate, + inv.OffsetHz, + inv.NCOPhaseIn, + inv.PhaseCountIn, + inv.NumTaps, + inv.Decim, + inv.ShiftedHistory, + inv.PolyphaseTaps, + ) + results[i] = StreamingGPUExecutionResult{ + SignalID: inv.SignalID, + Mode: StreamingGPUExecHostOracle, + IQ: out, + Rate: inv.OutRate, + NOut: len(out), + PhaseCountOut: phaseCount, + NCOPhaseOut: phase, + HistoryOut: hist, + HistoryLenOut: len(hist), + } + } + return results, nil +} diff --git a/internal/demod/gpudemod/streaming_gpu_host_oracle.go b/internal/demod/gpudemod/streaming_gpu_host_oracle.go new file mode 100644 index 0000000..aa2825e --- /dev/null +++ b/internal/demod/gpudemod/streaming_gpu_host_oracle.go @@ -0,0 +1,49 @@ +package gpudemod + +// StreamingExtractGPUHostOracle is a temporary host-side execution of the intended +// streaming semantics using GPU-owned stream state. It is not the final GPU +// production implementation, but it allows the new production entrypoint to move +// from pure stub semantics toward real NEW-samples-only streaming behavior +// without reintroducing overlap+trim. +func (r *BatchRunner) StreamingExtractGPUHostOracle(iqNew []complex64, jobs []StreamingExtractJob) ([]StreamingExtractResult, error) { + if r == nil || r.eng == nil { + return nil, ErrUnavailable + } + results := make([]StreamingExtractResult, len(jobs)) + active := make(map[int64]struct{}, len(jobs)) + for i, job := range jobs { + active[job.SignalID] = struct{}{} + state, err := r.getOrInitExtractState(job, r.eng.sampleRate) + if err != nil { + return nil, err + } + out, phase, phaseCount, hist := runStreamingPolyphaseHostCore( + iqNew, + r.eng.sampleRate, + job.OffsetHz, + state.NCOPhase, + state.PhaseCount, + state.NumTaps, + state.Decim, + state.ShiftedHistory, + state.PolyphaseTaps, + ) + state.NCOPhase = phase + state.PhaseCount = phaseCount + state.ShiftedHistory = append(state.ShiftedHistory[:0], hist...) + results[i] = StreamingExtractResult{ + SignalID: job.SignalID, + IQ: out, + Rate: job.OutRate, + NOut: len(out), + PhaseCount: state.PhaseCount, + HistoryLen: len(state.ShiftedHistory), + } + } + for signalID := range r.streamState { + if _, ok := active[signalID]; !ok { + delete(r.streamState, signalID) + } + } + return results, nil +} diff --git a/internal/demod/gpudemod/streaming_gpu_host_oracle_test.go b/internal/demod/gpudemod/streaming_gpu_host_oracle_test.go new file mode 100644 index 0000000..b889ba5 --- /dev/null +++ b/internal/demod/gpudemod/streaming_gpu_host_oracle_test.go @@ -0,0 +1,35 @@ +package gpudemod + +import "testing" + +func TestStreamingGPUHostOracleComparableToCPUOracle(t *testing.T) { + r := &BatchRunner{eng: &Engine{sampleRate: 4000000}, streamState: make(map[int64]*ExtractStreamState)} + job := StreamingExtractJob{ + SignalID: 1, + OffsetHz: 12500, + Bandwidth: 20000, + OutRate: 200000, + NumTaps: 65, + ConfigHash: 777, + } + iq := makeDeterministicIQ(16000) + gpuLike, err := r.StreamingExtractGPUHostOracle(iq, []StreamingExtractJob{job}) + if err != nil { + t.Fatalf("unexpected host-oracle error: %v", err) + } + oracleRunner := NewCPUOracleRunner(4000000) + oracle, err := oracleRunner.StreamingExtract(iq, []StreamingExtractJob{job}) + if err != nil { + t.Fatalf("unexpected oracle error: %v", err) + } + if len(gpuLike) != 1 || len(oracle) != 1 { + t.Fatalf("unexpected result lengths: gpuLike=%d oracle=%d", len(gpuLike), len(oracle)) + } + metrics, stats := CompareOracleAndGPUHostOracle(oracle[0], gpuLike[0]) + if stats.Count == 0 { + t.Fatalf("expected compare count > 0") + } + if metrics.RefMaxAbsErr > 1e-5 { + t.Fatalf("expected host-oracle path to match cpu oracle closely, got max abs err %f", metrics.RefMaxAbsErr) + } +} diff --git a/internal/demod/gpudemod/streaming_gpu_modes.go b/internal/demod/gpudemod/streaming_gpu_modes.go new file mode 100644 index 0000000..c5e858d --- /dev/null +++ b/internal/demod/gpudemod/streaming_gpu_modes.go @@ -0,0 +1,4 @@ +package gpudemod + +const useGPUHostOracleExecution = false +const useGPUNativePreparedExecution = true diff --git a/internal/demod/gpudemod/streaming_gpu_native_prepare.go b/internal/demod/gpudemod/streaming_gpu_native_prepare.go new file mode 100644 index 0000000..247998d --- /dev/null +++ b/internal/demod/gpudemod/streaming_gpu_native_prepare.go @@ -0,0 +1,284 @@ +//go:build cufft && windows + +package gpudemod + +/* +#cgo windows CFLAGS: -I"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.2/include" +#include +typedef struct { float x; float y; } gpud_float2; +*/ +import "C" + +import ( + "math" + "unsafe" +) + +func (r *BatchRunner) executeStreamingGPUNativePrepared(invocations []StreamingGPUInvocation) ([]StreamingGPUExecutionResult, error) { + if r == nil || r.eng == nil { + return nil, ErrUnavailable + } + if r.nativeState == nil { + r.nativeState = make(map[int64]*nativeStreamingSignalState) + } + results := make([]StreamingGPUExecutionResult, len(invocations)) + for i, inv := range invocations { + state, err := r.getOrInitNativeStreamingState(inv) + if err != nil { + return nil, err + } + if len(inv.IQNew) > 0 { + if err := ensureNativeBuffer(&state.dInNew, &state.inNewCap, len(inv.IQNew), unsafe.Sizeof(C.gpud_float2{})); err != nil { + return nil, err + } + if bridgeMemcpyH2D(state.dInNew, unsafe.Pointer(&inv.IQNew[0]), uintptr(len(inv.IQNew))*unsafe.Sizeof(complex64(0))) != 0 { + return nil, ErrUnavailable + } + } + outCap := len(inv.IQNew)/maxInt(1, inv.Decim) + 2 + if outCap > 0 { + if err := ensureNativeBuffer(&state.dOut, &state.outCap, outCap, unsafe.Sizeof(C.gpud_float2{})); err != nil { + return nil, err + } + } + + phaseInc := -2.0 * math.Pi * inv.OffsetHz / float64(inv.SampleRate) + // The native export consumes phase carry as host scalars while sample/history + // buffers remain device-resident, so keep these counters in nativeState. + var nOut C.int + historyLen := C.int(state.historyLen) + phaseCount := C.int(state.phaseCount) + phaseNCO := C.double(state.phaseNCO) + res := bridgeLaunchStreamingPolyphaseStateful( + (*C.gpud_float2)(state.dInNew), + len(inv.IQNew), + (*C.gpud_float2)(state.dShifted), + (*C.float)(state.dTaps), + state.tapsLen, + state.decim, + state.numTaps, + (*C.gpud_float2)(state.dHistory), + (*C.gpud_float2)(state.dHistoryScratch), + state.historyCap, + &historyLen, + &phaseCount, + &phaseNCO, + phaseInc, + (*C.gpud_float2)(state.dOut), + outCap, + &nOut, + ) + if res != 0 { + return nil, ErrUnavailable + } + state.historyLen = int(historyLen) + state.phaseCount = int(phaseCount) + state.phaseNCO = float64(phaseNCO) + + outHost := make([]complex64, int(nOut)) + if len(outHost) > 0 { + if bridgeMemcpyD2H(unsafe.Pointer(&outHost[0]), state.dOut, uintptr(len(outHost))*unsafe.Sizeof(complex64(0))) != 0 { + return nil, ErrUnavailable + } + } + histHost := make([]complex64, state.historyLen) + if state.historyLen > 0 { + if bridgeMemcpyD2H(unsafe.Pointer(&histHost[0]), state.dHistory, uintptr(state.historyLen)*unsafe.Sizeof(complex64(0))) != 0 { + return nil, ErrUnavailable + } + } + + results[i] = StreamingGPUExecutionResult{ + SignalID: inv.SignalID, + Mode: StreamingGPUExecCUDA, + IQ: outHost, + Rate: inv.OutRate, + NOut: len(outHost), + PhaseCountOut: state.phaseCount, + NCOPhaseOut: state.phaseNCO, + HistoryOut: histHost, + HistoryLenOut: len(histHost), + } + } + return results, nil +} + +func (r *BatchRunner) getOrInitNativeStreamingState(inv StreamingGPUInvocation) (*nativeStreamingSignalState, error) { + state := r.nativeState[inv.SignalID] + needReset := false + historyCap := maxInt(0, inv.NumTaps-1) + if state == nil { + state = &nativeStreamingSignalState{signalID: inv.SignalID} + r.nativeState[inv.SignalID] = state + needReset = true + } + if state.configHash != inv.ConfigHash { + needReset = true + } + if state.decim != inv.Decim || state.numTaps != inv.NumTaps || state.tapsLen != len(inv.PolyphaseTaps) { + needReset = true + } + if state.historyCap != historyCap { + needReset = true + } + if needReset { + releaseNativeStreamingSignalState(state) + } + if len(inv.PolyphaseTaps) == 0 { + return nil, ErrUnavailable + } + if state.dTaps == nil && len(inv.PolyphaseTaps) > 0 { + if bridgeCudaMalloc(&state.dTaps, uintptr(len(inv.PolyphaseTaps))*unsafe.Sizeof(C.float(0))) != 0 { + return nil, ErrUnavailable + } + if bridgeMemcpyH2D(state.dTaps, unsafe.Pointer(&inv.PolyphaseTaps[0]), uintptr(len(inv.PolyphaseTaps))*unsafe.Sizeof(float32(0))) != 0 { + return nil, ErrUnavailable + } + state.tapsLen = len(inv.PolyphaseTaps) + } + if state.dShifted == nil { + minCap := maxInt(1, len(inv.IQNew)) + if bridgeCudaMalloc(&state.dShifted, uintptr(minCap)*unsafe.Sizeof(C.gpud_float2{})) != 0 { + return nil, ErrUnavailable + } + state.shiftedCap = minCap + } + if state.shiftedCap < len(inv.IQNew) { + if bridgeCudaFree(state.dShifted) != 0 { + return nil, ErrUnavailable + } + state.dShifted = nil + state.shiftedCap = 0 + if bridgeCudaMalloc(&state.dShifted, uintptr(len(inv.IQNew))*unsafe.Sizeof(C.gpud_float2{})) != 0 { + return nil, ErrUnavailable + } + state.shiftedCap = len(inv.IQNew) + } + if state.dHistory == nil && historyCap > 0 { + if bridgeCudaMalloc(&state.dHistory, uintptr(historyCap)*unsafe.Sizeof(C.gpud_float2{})) != 0 { + return nil, ErrUnavailable + } + } + if state.dHistoryScratch == nil && historyCap > 0 { + if bridgeCudaMalloc(&state.dHistoryScratch, uintptr(historyCap)*unsafe.Sizeof(C.gpud_float2{})) != 0 { + return nil, ErrUnavailable + } + state.historyScratchCap = historyCap + } + if needReset { + state.phaseCount = inv.PhaseCountIn + state.phaseNCO = inv.NCOPhaseIn + state.historyLen = minInt(len(inv.ShiftedHistory), historyCap) + if state.historyLen > 0 { + if bridgeMemcpyH2D(state.dHistory, unsafe.Pointer(&inv.ShiftedHistory[len(inv.ShiftedHistory)-state.historyLen]), uintptr(state.historyLen)*unsafe.Sizeof(complex64(0))) != 0 { + return nil, ErrUnavailable + } + } + } + state.decim = inv.Decim + state.numTaps = inv.NumTaps + state.historyCap = historyCap + state.historyScratchCap = historyCap + state.configHash = inv.ConfigHash + return state, nil +} + +func ensureNativeBuffer(ptr *unsafe.Pointer, capRef *int, need int, elemSize uintptr) error { + if need <= 0 { + return nil + } + if *ptr != nil && *capRef >= need { + return nil + } + if *ptr != nil { + if bridgeCudaFree(*ptr) != 0 { + return ErrUnavailable + } + *ptr = nil + *capRef = 0 + } + if bridgeCudaMalloc(ptr, uintptr(need)*elemSize) != 0 { + return ErrUnavailable + } + *capRef = need + return nil +} + +func (r *BatchRunner) syncNativeStreamingStates(active map[int64]struct{}) { + if r == nil || r.nativeState == nil { + return + } + for id, state := range r.nativeState { + if _, ok := active[id]; ok { + continue + } + releaseNativeStreamingSignalState(state) + delete(r.nativeState, id) + } +} + +func (r *BatchRunner) resetNativeStreamingState(signalID int64) { + if r == nil || r.nativeState == nil { + return + } + if state := r.nativeState[signalID]; state != nil { + releaseNativeStreamingSignalState(state) + } + delete(r.nativeState, signalID) +} + +func (r *BatchRunner) resetAllNativeStreamingStates() { + if r == nil { + return + } + r.freeAllNativeStreamingStates() + r.nativeState = make(map[int64]*nativeStreamingSignalState) +} + +func (r *BatchRunner) freeAllNativeStreamingStates() { + if r == nil || r.nativeState == nil { + return + } + for id, state := range r.nativeState { + releaseNativeStreamingSignalState(state) + delete(r.nativeState, id) + } +} + +func releaseNativeStreamingSignalState(state *nativeStreamingSignalState) { + if state == nil { + return + } + for _, ptr := range []*unsafe.Pointer{ + &state.dInNew, + &state.dShifted, + &state.dOut, + &state.dTaps, + &state.dHistory, + &state.dHistoryScratch, + } { + if *ptr != nil { + _ = bridgeCudaFree(*ptr) + *ptr = nil + } + } + state.inNewCap = 0 + state.shiftedCap = 0 + state.outCap = 0 + state.tapsLen = 0 + state.historyCap = 0 + state.historyLen = 0 + state.historyScratchCap = 0 + state.phaseCount = 0 + state.phaseNCO = 0 + state.decim = 0 + state.numTaps = 0 + state.configHash = 0 +} + +func minInt(a int, b int) int { + if a < b { + return a + } + return b +} diff --git a/internal/demod/gpudemod/streaming_gpu_native_prepare_stub.go b/internal/demod/gpudemod/streaming_gpu_native_prepare_stub.go new file mode 100644 index 0000000..7f1e4c0 --- /dev/null +++ b/internal/demod/gpudemod/streaming_gpu_native_prepare_stub.go @@ -0,0 +1,44 @@ +//go:build !cufft || !windows + +package gpudemod + +func (r *BatchRunner) executeStreamingGPUNativePrepared(invocations []StreamingGPUInvocation) ([]StreamingGPUExecutionResult, error) { + _ = invocations + return nil, ErrUnavailable +} + +func (r *BatchRunner) syncNativeStreamingStates(active map[int64]struct{}) { + _ = active + if r == nil { + return + } + if r.nativeState == nil { + r.nativeState = make(map[int64]*nativeStreamingSignalState) + } + for id := range r.nativeState { + if _, ok := active[id]; !ok { + delete(r.nativeState, id) + } + } +} + +func (r *BatchRunner) resetNativeStreamingState(signalID int64) { + if r == nil || r.nativeState == nil { + return + } + delete(r.nativeState, signalID) +} + +func (r *BatchRunner) resetAllNativeStreamingStates() { + if r == nil { + return + } + r.nativeState = make(map[int64]*nativeStreamingSignalState) +} + +func (r *BatchRunner) freeAllNativeStreamingStates() { + if r == nil { + return + } + r.nativeState = nil +} diff --git a/internal/demod/gpudemod/streaming_gpu_native_prepare_test.go b/internal/demod/gpudemod/streaming_gpu_native_prepare_test.go new file mode 100644 index 0000000..9312d65 --- /dev/null +++ b/internal/demod/gpudemod/streaming_gpu_native_prepare_test.go @@ -0,0 +1,206 @@ +//go:build cufft && windows + +package gpudemod + +import ( + "os" + "path/filepath" + "testing" +) + +func configureNativePreparedDLLPath(t *testing.T) { + t.Helper() + candidates := []string{ + filepath.Join("build", "gpudemod_kernels.dll"), + filepath.Join("internal", "demod", "gpudemod", "build", "gpudemod_kernels.dll"), + "gpudemod_kernels.dll", + } + for _, candidate := range candidates { + if _, err := os.Stat(candidate); err == nil { + abs, err := filepath.Abs(candidate) + if err != nil { + t.Fatalf("resolve native prepared DLL path: %v", err) + } + t.Setenv("GPUMOD_DLL", abs) + return + } + } +} + +func requireNativePreparedTestRunner(t *testing.T) *BatchRunner { + t.Helper() + configureNativePreparedDLLPath(t) + if err := ensureDLLLoaded(); err != nil { + t.Skipf("native prepared path unavailable: %v", err) + } + if !Available() { + t.Skip("native prepared path unavailable: cuda device not available") + } + r, err := NewBatchRunner(32768, 4000000) + if err != nil { + t.Skipf("native prepared path unavailable: %v", err) + } + t.Cleanup(r.Close) + return r +} + +func TestStreamingGPUNativePreparedMatchesCPUOracleAcrossChunkPatterns(t *testing.T) { + job := StreamingExtractJob{ + SignalID: 1, + OffsetHz: 12500, + Bandwidth: 20000, + OutRate: 200000, + NumTaps: 65, + ConfigHash: 777, + } + exec := func(r *BatchRunner, invocations []StreamingGPUInvocation) ([]StreamingGPUExecutionResult, error) { + return r.executeStreamingGPUNativePrepared(invocations) + } + t.Run("DeterministicIQ", func(t *testing.T) { + r := requireNativePreparedTestRunner(t) + steps := makeStreamingValidationSteps( + makeDeterministicIQ(8192), + []int{0, 1, 2, 17, 63, 64, 65, 129, 511, 2048}, + []StreamingExtractJob{job}, + ) + runPreparedSequenceAgainstOracle(t, r, exec, steps, 1e-4, 1e-8) + }) + t.Run("ToneNoiseIQ", func(t *testing.T) { + r := requireNativePreparedTestRunner(t) + steps := makeStreamingValidationSteps( + makeToneNoiseIQ(12288, 0.023), + []int{7, 20, 3, 63, 64, 65, 777, 2048, 4096}, + []StreamingExtractJob{job}, + ) + runPreparedSequenceAgainstOracle(t, r, exec, steps, 1e-4, 1e-8) + }) +} + +func TestStreamingGPUNativePreparedLifecycleResetAndCapacity(t *testing.T) { + r := requireNativePreparedTestRunner(t) + exec := func(invocations []StreamingGPUInvocation) ([]StreamingGPUExecutionResult, error) { + return r.executeStreamingGPUNativePrepared(invocations) + } + jobA := StreamingExtractJob{ + SignalID: 11, + OffsetHz: 12500, + Bandwidth: 20000, + OutRate: 200000, + NumTaps: 65, + ConfigHash: 3001, + } + jobB := StreamingExtractJob{ + SignalID: 22, + OffsetHz: -18750, + Bandwidth: 16000, + OutRate: 100000, + NumTaps: 33, + ConfigHash: 4002, + } + + steps := []streamingValidationStep{ + { + name: "prime_both_signals", + iq: makeDeterministicIQ(256), + jobs: []StreamingExtractJob{jobA, jobB}, + }, + { + name: "grow_capacity", + iq: makeToneNoiseIQ(4096, 0.037), + jobs: []StreamingExtractJob{jobA, jobB}, + }, + { + name: "config_reset_zero_new", + iq: nil, + jobs: []StreamingExtractJob{{SignalID: jobA.SignalID, OffsetHz: jobA.OffsetHz, Bandwidth: jobA.Bandwidth, OutRate: jobA.OutRate, NumTaps: jobA.NumTaps, ConfigHash: jobA.ConfigHash + 1}, jobB}, + }, + { + name: "signal_b_disappears", + iq: makeDeterministicIQ(64), + jobs: []StreamingExtractJob{jobA}, + }, + { + name: "signal_b_reappears", + iq: makeToneNoiseIQ(96, 0.017), + jobs: []StreamingExtractJob{jobA, jobB}, + }, + { + name: "history_boundary", + iq: makeDeterministicIQ(65), + jobs: []StreamingExtractJob{jobA, jobB}, + }, + } + + oracle := NewCPUOracleRunner(r.eng.sampleRate) + var grownCap int + for idx, step := range steps { + invocations, err := r.buildStreamingGPUInvocations(step.iq, step.jobs) + if err != nil { + t.Fatalf("step %d (%s): build invocations failed: %v", idx, step.name, err) + } + got, err := exec(invocations) + if err != nil { + t.Fatalf("step %d (%s): native prepared exec failed: %v", idx, step.name, err) + } + want, err := oracle.StreamingExtract(step.iq, step.jobs) + if err != nil { + t.Fatalf("step %d (%s): oracle failed: %v", idx, step.name, err) + } + if len(got) != len(want) { + t.Fatalf("step %d (%s): result count mismatch: got=%d want=%d", idx, step.name, len(got), len(want)) + } + applied := r.applyStreamingGPUExecutionResults(got) + for i, job := range step.jobs { + oracleState := oracle.States[job.SignalID] + requirePreparedExecutionResultMatchesOracle(t, got[i], want[i], oracleState, 1e-4, 1e-8) + requireStreamingExtractResultMatchesOracle(t, applied[i], want[i]) + requireExtractStateMatchesOracle(t, r.streamState[job.SignalID], oracleState, 1e-8, 1e-4) + + state := r.nativeState[job.SignalID] + if state == nil { + t.Fatalf("step %d (%s): missing native state for signal %d", idx, step.name, job.SignalID) + } + if state.configHash != job.ConfigHash { + t.Fatalf("step %d (%s): native config hash mismatch for signal %d: got=%d want=%d", idx, step.name, job.SignalID, state.configHash, job.ConfigHash) + } + if state.decim != oracleState.Decim { + t.Fatalf("step %d (%s): native decim mismatch for signal %d: got=%d want=%d", idx, step.name, job.SignalID, state.decim, oracleState.Decim) + } + if state.numTaps != oracleState.NumTaps { + t.Fatalf("step %d (%s): native num taps mismatch for signal %d: got=%d want=%d", idx, step.name, job.SignalID, state.numTaps, oracleState.NumTaps) + } + if state.historyCap != maxInt(0, oracleState.NumTaps-1) { + t.Fatalf("step %d (%s): native history cap mismatch for signal %d: got=%d want=%d", idx, step.name, job.SignalID, state.historyCap, maxInt(0, oracleState.NumTaps-1)) + } + if state.historyLen != len(oracleState.ShiftedHistory) { + t.Fatalf("step %d (%s): native history len mismatch for signal %d: got=%d want=%d", idx, step.name, job.SignalID, state.historyLen, len(oracleState.ShiftedHistory)) + } + if len(step.iq) > 0 && state.shiftedCap < len(step.iq) { + t.Fatalf("step %d (%s): native shifted capacity too small for signal %d: got=%d need>=%d", idx, step.name, job.SignalID, state.shiftedCap, len(step.iq)) + } + if state.outCap < got[i].NOut { + t.Fatalf("step %d (%s): native out capacity too small for signal %d: got=%d need>=%d", idx, step.name, job.SignalID, state.outCap, got[i].NOut) + } + if job.SignalID == jobA.SignalID && state.shiftedCap > grownCap { + grownCap = state.shiftedCap + } + } + if step.name == "grow_capacity" && grownCap < len(step.iq) { + t.Fatalf("expected capacity growth for signal %d, got=%d want>=%d", jobA.SignalID, grownCap, len(step.iq)) + } + if step.name == "config_reset_zero_new" { + state := r.nativeState[jobA.SignalID] + if state == nil { + t.Fatalf("missing native state for signal %d after config reset", jobA.SignalID) + } + if state.historyLen != 0 { + t.Fatalf("expected cleared native history after config reset, got=%d", state.historyLen) + } + } + if step.name == "signal_b_disappears" { + if _, ok := r.nativeState[jobB.SignalID]; ok { + t.Fatalf("expected native state for signal %d to be removed on disappearance", jobB.SignalID) + } + } + } +} diff --git a/internal/demod/gpudemod/streaming_gpu_native_state.go b/internal/demod/gpudemod/streaming_gpu_native_state.go new file mode 100644 index 0000000..e1b6460 --- /dev/null +++ b/internal/demod/gpudemod/streaming_gpu_native_state.go @@ -0,0 +1,28 @@ +package gpudemod + +import "unsafe" + +type nativeStreamingSignalState struct { + signalID int64 + + configHash uint64 + decim int + numTaps int + + dInNew unsafe.Pointer + dShifted unsafe.Pointer + dOut unsafe.Pointer + dTaps unsafe.Pointer + dHistory unsafe.Pointer + dHistoryScratch unsafe.Pointer + + inNewCap int + shiftedCap int + outCap int + tapsLen int + historyCap int + historyLen int + historyScratchCap int + phaseCount int + phaseNCO float64 +} diff --git a/internal/demod/gpudemod/streaming_gpu_prepare.go b/internal/demod/gpudemod/streaming_gpu_prepare.go new file mode 100644 index 0000000..8e8a957 --- /dev/null +++ b/internal/demod/gpudemod/streaming_gpu_prepare.go @@ -0,0 +1,61 @@ +package gpudemod + +func (r *BatchRunner) buildStreamingGPUInvocations(iqNew []complex64, jobs []StreamingExtractJob) ([]StreamingGPUInvocation, error) { + if r == nil || r.eng == nil { + return nil, ErrUnavailable + } + invocations := make([]StreamingGPUInvocation, len(jobs)) + active := make(map[int64]struct{}, len(jobs)) + for i, job := range jobs { + active[job.SignalID] = struct{}{} + state, err := r.getOrInitExtractState(job, r.eng.sampleRate) + if err != nil { + return nil, err + } + invocations[i] = StreamingGPUInvocation{ + SignalID: job.SignalID, + ConfigHash: state.ConfigHash, + OffsetHz: job.OffsetHz, + OutRate: job.OutRate, + Bandwidth: job.Bandwidth, + SampleRate: r.eng.sampleRate, + NumTaps: state.NumTaps, + Decim: state.Decim, + PhaseCountIn: state.PhaseCount, + NCOPhaseIn: state.NCOPhase, + HistoryLen: len(state.ShiftedHistory), + BaseTaps: append([]float32(nil), state.BaseTaps...), + PolyphaseTaps: append([]float32(nil), state.PolyphaseTaps...), + ShiftedHistory: append([]complex64(nil), state.ShiftedHistory...), + IQNew: iqNew, + } + } + for signalID := range r.streamState { + if _, ok := active[signalID]; !ok { + delete(r.streamState, signalID) + } + } + r.syncNativeStreamingStates(active) + return invocations, nil +} + +func (r *BatchRunner) applyStreamingGPUExecutionResults(results []StreamingGPUExecutionResult) []StreamingExtractResult { + out := make([]StreamingExtractResult, len(results)) + for i, res := range results { + state := r.streamState[res.SignalID] + if state != nil { + state.NCOPhase = res.NCOPhaseOut + state.PhaseCount = res.PhaseCountOut + state.ShiftedHistory = append(state.ShiftedHistory[:0], res.HistoryOut...) + } + out[i] = StreamingExtractResult{ + SignalID: res.SignalID, + IQ: res.IQ, + Rate: res.Rate, + NOut: res.NOut, + PhaseCount: res.PhaseCountOut, + HistoryLen: res.HistoryLenOut, + } + } + return out +} diff --git a/internal/demod/gpudemod/streaming_gpu_stub.go b/internal/demod/gpudemod/streaming_gpu_stub.go new file mode 100644 index 0000000..500e235 --- /dev/null +++ b/internal/demod/gpudemod/streaming_gpu_stub.go @@ -0,0 +1,26 @@ +package gpudemod + +func updateShiftedHistory(prev []complex64, shiftedNew []complex64, numTaps int) []complex64 { + need := numTaps - 1 + if need <= 0 { + return nil + } + combined := append(append(make([]complex64, 0, len(prev)+len(shiftedNew)), prev...), shiftedNew...) + if len(combined) <= need { + out := make([]complex64, len(combined)) + copy(out, combined) + return out + } + out := make([]complex64, need) + copy(out, combined[len(combined)-need:]) + return out +} + +// StreamingExtractGPU is the production entry point for the stateful streaming +// extractor path. Execution strategy is selected by StreamingExtractGPUExec. +func (r *BatchRunner) StreamingExtractGPU(iqNew []complex64, jobs []StreamingExtractJob) ([]StreamingExtractResult, error) { + if r == nil || r.eng == nil { + return nil, ErrUnavailable + } + return r.StreamingExtractGPUExec(iqNew, jobs) +} diff --git a/internal/demod/gpudemod/streaming_gpu_stub_test.go b/internal/demod/gpudemod/streaming_gpu_stub_test.go new file mode 100644 index 0000000..2c947d3 --- /dev/null +++ b/internal/demod/gpudemod/streaming_gpu_stub_test.go @@ -0,0 +1,59 @@ +package gpudemod + +import "testing" + +func TestStreamingGPUUsesSafeProductionDefault(t *testing.T) { + r := &BatchRunner{eng: &Engine{sampleRate: 4000000}, streamState: make(map[int64]*ExtractStreamState)} + job := StreamingExtractJob{ + SignalID: 1, + OffsetHz: 12500, + Bandwidth: 20000, + OutRate: 200000, + NumTaps: 65, + ConfigHash: 777, + } + iq := makeDeterministicIQ(1000) + results, err := r.StreamingExtractGPU(iq, []StreamingExtractJob{job}) + if err != nil { + t.Fatalf("expected safe production default path, got error: %v", err) + } + if len(results) != 1 { + t.Fatalf("expected 1 result, got %d", len(results)) + } + if results[0].NOut == 0 { + t.Fatalf("expected non-zero output count from safe production path") + } +} + +func TestStreamingGPUHostOracleAdvancesState(t *testing.T) { + r := &BatchRunner{eng: &Engine{sampleRate: 4000000}, streamState: make(map[int64]*ExtractStreamState)} + job := StreamingExtractJob{ + SignalID: 1, + OffsetHz: 12500, + Bandwidth: 20000, + OutRate: 200000, + NumTaps: 65, + ConfigHash: 777, + } + iq := makeDeterministicIQ(1000) + results, err := r.StreamingExtractGPUHostOracle(iq, []StreamingExtractJob{job}) + if err != nil { + t.Fatalf("unexpected host-oracle error: %v", err) + } + if len(results) != 1 { + t.Fatalf("expected 1 result, got %d", len(results)) + } + state := r.streamState[1] + if state == nil { + t.Fatalf("expected state to be initialized") + } + if state.NCOPhase == 0 { + t.Fatalf("expected phase to advance") + } + if len(state.ShiftedHistory) == 0 { + t.Fatalf("expected shifted history to be updated") + } + if results[0].NOut == 0 { + t.Fatalf("expected non-zero output count from host oracle path") + } +} diff --git a/internal/demod/gpudemod/streaming_gpu_validation_helpers_test.go b/internal/demod/gpudemod/streaming_gpu_validation_helpers_test.go new file mode 100644 index 0000000..b88b102 --- /dev/null +++ b/internal/demod/gpudemod/streaming_gpu_validation_helpers_test.go @@ -0,0 +1,213 @@ +package gpudemod + +import ( + "math" + "testing" +) + +type streamingValidationStep struct { + name string + iq []complex64 + jobs []StreamingExtractJob +} + +type streamingPreparedExecutor func(*BatchRunner, []StreamingGPUInvocation) ([]StreamingGPUExecutionResult, error) + +func makeToneNoiseIQ(n int, phaseInc float64) []complex64 { + out := make([]complex64, n) + phase := 0.0 + for i := 0; i < n; i++ { + tone := complex(math.Cos(phase), math.Sin(phase)) + noiseI := 0.17*math.Cos(0.113*float64(i)+0.31) + 0.07*math.Sin(0.071*float64(i)) + noiseQ := 0.13*math.Sin(0.097*float64(i)+0.11) - 0.05*math.Cos(0.043*float64(i)) + out[i] = complex64(0.85*tone + 0.15*complex(noiseI, noiseQ)) + phase += phaseInc + } + return out +} + +func makeStreamingValidationSteps(iq []complex64, chunkSizes []int, jobs []StreamingExtractJob) []streamingValidationStep { + steps := make([]streamingValidationStep, 0, len(chunkSizes)+1) + pos := 0 + for idx, n := range chunkSizes { + if n < 0 { + n = 0 + } + end := pos + n + if end > len(iq) { + end = len(iq) + } + steps = append(steps, streamingValidationStep{ + name: "chunk", + iq: append([]complex64(nil), iq[pos:end]...), + jobs: append([]StreamingExtractJob(nil), jobs...), + }) + _ = idx + pos = end + } + if pos < len(iq) { + steps = append(steps, streamingValidationStep{ + name: "remainder", + iq: append([]complex64(nil), iq[pos:]...), + jobs: append([]StreamingExtractJob(nil), jobs...), + }) + } + return steps +} + +func requirePhaseClose(t *testing.T, got float64, want float64, tol float64) { + t.Helper() + diff := got - want + for diff > math.Pi { + diff -= 2 * math.Pi + } + for diff < -math.Pi { + diff += 2 * math.Pi + } + if math.Abs(diff) > tol { + t.Fatalf("phase mismatch: got=%0.12f want=%0.12f diff=%0.12f tol=%0.12f", got, want, diff, tol) + } +} + +func requireStreamingExtractResultMatchesOracle(t *testing.T, got StreamingExtractResult, want StreamingExtractResult) { + t.Helper() + if got.SignalID != want.SignalID { + t.Fatalf("signal id mismatch: got=%d want=%d", got.SignalID, want.SignalID) + } + if got.Rate != want.Rate { + t.Fatalf("rate mismatch for signal %d: got=%d want=%d", got.SignalID, got.Rate, want.Rate) + } + if got.NOut != want.NOut { + t.Fatalf("n_out mismatch for signal %d: got=%d want=%d", got.SignalID, got.NOut, want.NOut) + } + if got.PhaseCount != want.PhaseCount { + t.Fatalf("phase count mismatch for signal %d: got=%d want=%d", got.SignalID, got.PhaseCount, want.PhaseCount) + } + if got.HistoryLen != want.HistoryLen { + t.Fatalf("history len mismatch for signal %d: got=%d want=%d", got.SignalID, got.HistoryLen, want.HistoryLen) + } +} + +func requirePreparedExecutionResultMatchesOracle(t *testing.T, got StreamingGPUExecutionResult, want StreamingExtractResult, oracleState *CPUOracleState, sampleTol float64, phaseTol float64) { + t.Helper() + if oracleState == nil { + t.Fatalf("missing oracle state for signal %d", got.SignalID) + } + if got.SignalID != want.SignalID { + t.Fatalf("signal id mismatch: got=%d want=%d", got.SignalID, want.SignalID) + } + if got.Rate != want.Rate { + t.Fatalf("rate mismatch for signal %d: got=%d want=%d", got.SignalID, got.Rate, want.Rate) + } + if got.NOut != want.NOut { + t.Fatalf("n_out mismatch for signal %d: got=%d want=%d", got.SignalID, got.NOut, want.NOut) + } + if got.PhaseCountOut != oracleState.PhaseCount { + t.Fatalf("phase count mismatch for signal %d: got=%d want=%d", got.SignalID, got.PhaseCountOut, oracleState.PhaseCount) + } + requirePhaseClose(t, got.NCOPhaseOut, oracleState.NCOPhase, phaseTol) + if got.HistoryLenOut != len(oracleState.ShiftedHistory) { + t.Fatalf("history len mismatch for signal %d: got=%d want=%d", got.SignalID, got.HistoryLenOut, len(oracleState.ShiftedHistory)) + } + requireComplexSlicesClose(t, got.IQ, want.IQ, sampleTol) + requireComplexSlicesClose(t, got.HistoryOut, oracleState.ShiftedHistory, sampleTol) +} + +func requireExtractStateMatchesOracle(t *testing.T, got *ExtractStreamState, want *CPUOracleState, phaseTol float64, sampleTol float64) { + t.Helper() + if got == nil || want == nil { + t.Fatalf("state mismatch: got nil=%t want nil=%t", got == nil, want == nil) + } + if got.SignalID != want.SignalID { + t.Fatalf("signal id mismatch: got=%d want=%d", got.SignalID, want.SignalID) + } + if got.ConfigHash != want.ConfigHash { + t.Fatalf("config hash mismatch for signal %d: got=%d want=%d", got.SignalID, got.ConfigHash, want.ConfigHash) + } + if got.Decim != want.Decim { + t.Fatalf("decim mismatch for signal %d: got=%d want=%d", got.SignalID, got.Decim, want.Decim) + } + if got.NumTaps != want.NumTaps { + t.Fatalf("num taps mismatch for signal %d: got=%d want=%d", got.SignalID, got.NumTaps, want.NumTaps) + } + if got.PhaseCount != want.PhaseCount { + t.Fatalf("phase count mismatch for signal %d: got=%d want=%d", got.SignalID, got.PhaseCount, want.PhaseCount) + } + requirePhaseClose(t, got.NCOPhase, want.NCOPhase, phaseTol) + requireComplexSlicesClose(t, got.ShiftedHistory, want.ShiftedHistory, sampleTol) +} + +func requireStateKeysMatchOracle(t *testing.T, got map[int64]*ExtractStreamState, want map[int64]*CPUOracleState) { + t.Helper() + if len(got) != len(want) { + t.Fatalf("active state count mismatch: got=%d want=%d", len(got), len(want)) + } + for signalID := range want { + if got[signalID] == nil { + t.Fatalf("missing active state for signal %d", signalID) + } + } + for signalID := range got { + if want[signalID] == nil { + t.Fatalf("unexpected active state for signal %d", signalID) + } + } +} + +func runStreamingExecSequenceAgainstOracle(t *testing.T, runner *BatchRunner, steps []streamingValidationStep, sampleTol float64, phaseTol float64) { + t.Helper() + oracle := NewCPUOracleRunner(runner.eng.sampleRate) + for idx, step := range steps { + got, err := runner.StreamingExtractGPUExec(step.iq, step.jobs) + if err != nil { + t.Fatalf("step %d (%s): exec failed: %v", idx, step.name, err) + } + want, err := oracle.StreamingExtract(step.iq, step.jobs) + if err != nil { + t.Fatalf("step %d (%s): oracle failed: %v", idx, step.name, err) + } + if len(got) != len(want) { + t.Fatalf("step %d (%s): result count mismatch: got=%d want=%d", idx, step.name, len(got), len(want)) + } + for i, job := range step.jobs { + requireStreamingExtractResultMatchesOracle(t, got[i], want[i]) + requireComplexSlicesClose(t, got[i].IQ, want[i].IQ, sampleTol) + requireExtractStateMatchesOracle(t, runner.streamState[job.SignalID], oracle.States[job.SignalID], phaseTol, sampleTol) + } + requireStateKeysMatchOracle(t, runner.streamState, oracle.States) + } +} + +func runPreparedSequenceAgainstOracle(t *testing.T, runner *BatchRunner, exec streamingPreparedExecutor, steps []streamingValidationStep, sampleTol float64, phaseTol float64) { + t.Helper() + oracle := NewCPUOracleRunner(runner.eng.sampleRate) + for idx, step := range steps { + invocations, err := runner.buildStreamingGPUInvocations(step.iq, step.jobs) + if err != nil { + t.Fatalf("step %d (%s): build invocations failed: %v", idx, step.name, err) + } + got, err := exec(runner, invocations) + if err != nil { + t.Fatalf("step %d (%s): prepared exec failed: %v", idx, step.name, err) + } + want, err := oracle.StreamingExtract(step.iq, step.jobs) + if err != nil { + t.Fatalf("step %d (%s): oracle failed: %v", idx, step.name, err) + } + if len(got) != len(want) { + t.Fatalf("step %d (%s): result count mismatch: got=%d want=%d", idx, step.name, len(got), len(want)) + } + applied := runner.applyStreamingGPUExecutionResults(got) + if len(applied) != len(want) { + t.Fatalf("step %d (%s): applied result count mismatch: got=%d want=%d", idx, step.name, len(applied), len(want)) + } + for i, job := range step.jobs { + oracleState := oracle.States[job.SignalID] + requirePreparedExecutionResultMatchesOracle(t, got[i], want[i], oracleState, sampleTol, phaseTol) + requireStreamingExtractResultMatchesOracle(t, applied[i], want[i]) + requireComplexSlicesClose(t, applied[i].IQ, want[i].IQ, sampleTol) + requireExtractStateMatchesOracle(t, runner.streamState[job.SignalID], oracleState, phaseTol, sampleTol) + } + requireStateKeysMatchOracle(t, runner.streamState, oracle.States) + } +} diff --git a/internal/demod/gpudemod/streaming_host_core.go b/internal/demod/gpudemod/streaming_host_core.go new file mode 100644 index 0000000..f9b75aa --- /dev/null +++ b/internal/demod/gpudemod/streaming_host_core.go @@ -0,0 +1,64 @@ +package gpudemod + +import "math" + +func runStreamingPolyphaseHostCore( + iqNew []complex64, + sampleRate int, + offsetHz float64, + stateNCOPhase float64, + statePhaseCount int, + stateNumTaps int, + stateDecim int, + stateHistory []complex64, + polyphaseTaps []float32, +) ([]complex64, float64, int, []complex64) { + out := make([]complex64, 0, len(iqNew)/maxInt(1, stateDecim)+2) + phase := stateNCOPhase + phaseCount := statePhaseCount + hist := append([]complex64(nil), stateHistory...) + phaseLen := PolyphasePhaseLen(len(polyphaseTaps)/maxInt(1, stateDecim)*maxInt(1, stateDecim), stateDecim) + if phaseLen == 0 { + phaseLen = PolyphasePhaseLen(len(polyphaseTaps), stateDecim) + } + phaseInc := -2.0 * math.Pi * offsetHz / float64(sampleRate) + for _, x := range iqNew { + rot := complex64(complex(math.Cos(phase), math.Sin(phase))) + s := x * rot + hist = append(hist, s) + phaseCount++ + if phaseCount == stateDecim { + var y complex64 + for p := 0; p < stateDecim; p++ { + for k := 0; k < phaseLen; k++ { + idxTap := p*phaseLen + k + if idxTap >= len(polyphaseTaps) { + continue + } + tap := polyphaseTaps[idxTap] + if tap == 0 { + continue + } + srcBack := p + k*stateDecim + idx := len(hist) - 1 - srcBack + if idx < 0 { + continue + } + y += complex(tap, 0) * hist[idx] + } + } + out = append(out, y) + phaseCount = 0 + } + if len(hist) > stateNumTaps-1 { + hist = hist[len(hist)-(stateNumTaps-1):] + } + phase += phaseInc + if phase >= math.Pi { + phase -= 2 * math.Pi + } else if phase < -math.Pi { + phase += 2 * math.Pi + } + } + return out, phase, phaseCount, append([]complex64(nil), hist...) +} diff --git a/internal/demod/gpudemod/streaming_host_core_test.go b/internal/demod/gpudemod/streaming_host_core_test.go new file mode 100644 index 0000000..099c755 --- /dev/null +++ b/internal/demod/gpudemod/streaming_host_core_test.go @@ -0,0 +1,40 @@ +package gpudemod + +import "testing" + +func TestRunStreamingPolyphaseHostCoreMatchesCPUOraclePolyphase(t *testing.T) { + cfg := OracleHarnessConfig{ + SignalID: 1, + ConfigHash: 123, + NCOPhase: 0, + Decim: 20, + NumTaps: 65, + PhaseInc: 0.017, + } + state := MakeCPUOracleState(cfg) + iq := MakeDeterministicIQ(12000) + oracle := CPUOracleExtractPolyphase(iq, state, cfg.PhaseInc) + + state2 := MakeCPUOracleState(cfg) + out, phase, phaseCount, hist := runStreamingPolyphaseHostCore( + iq, + 4000000, + -cfg.PhaseInc*4000000/(2*3.141592653589793), + state2.NCOPhase, + state2.PhaseCount, + state2.NumTaps, + state2.Decim, + state2.ShiftedHistory, + state2.PolyphaseTaps, + ) + requireComplexSlicesClose(t, oracle, out, 1e-5) + if phase == 0 && len(iq) > 0 { + t.Fatalf("expected phase to advance") + } + if phaseCount < 0 || phaseCount >= state2.Decim { + t.Fatalf("unexpected phaseCount: %d", phaseCount) + } + if len(hist) == 0 { + t.Fatalf("expected history to be retained") + } +} diff --git a/internal/demod/gpudemod/streaming_oracle_extract.go b/internal/demod/gpudemod/streaming_oracle_extract.go new file mode 100644 index 0000000..eb89b7e --- /dev/null +++ b/internal/demod/gpudemod/streaming_oracle_extract.go @@ -0,0 +1,111 @@ +package gpudemod + +import ( + "fmt" + + "sdr-wideband-suite/internal/dsp" +) + +type CPUOracleRunner struct { + SampleRate int + States map[int64]*CPUOracleState +} + +func (r *CPUOracleRunner) ResetAllStates() { + if r == nil { + return + } + r.States = make(map[int64]*CPUOracleState) +} + +func NewCPUOracleRunner(sampleRate int) *CPUOracleRunner { + return &CPUOracleRunner{ + SampleRate: sampleRate, + States: make(map[int64]*CPUOracleState), + } +} + +func (r *CPUOracleRunner) ResetSignalState(signalID int64) { + if r == nil || r.States == nil { + return + } + delete(r.States, signalID) +} + +func (r *CPUOracleRunner) getOrInitState(job StreamingExtractJob) (*CPUOracleState, error) { + if r == nil { + return nil, fmt.Errorf("nil CPUOracleRunner") + } + if r.States == nil { + r.States = make(map[int64]*CPUOracleState) + } + decim, err := ExactIntegerDecimation(r.SampleRate, job.OutRate) + if err != nil { + return nil, err + } + state := r.States[job.SignalID] + if state == nil { + state = &CPUOracleState{SignalID: job.SignalID} + r.States[job.SignalID] = state + } + ResetCPUOracleStateIfConfigChanged(state, job.ConfigHash) + state.Decim = decim + state.NumTaps = job.NumTaps + if state.NumTaps <= 0 { + state.NumTaps = 101 + } + cutoff := job.Bandwidth / 2 + if cutoff < 200 { + cutoff = 200 + } + base := dsp.LowpassFIR(cutoff, r.SampleRate, state.NumTaps) + state.BaseTaps = make([]float32, len(base)) + for i, v := range base { + state.BaseTaps[i] = float32(v) + } + state.PolyphaseTaps = BuildPolyphaseTapsPhaseMajor(state.BaseTaps, state.Decim) + if state.ShiftedHistory == nil { + state.ShiftedHistory = make([]complex64, 0, maxInt(0, state.NumTaps-1)) + } + return state, nil +} + +func (r *CPUOracleRunner) StreamingExtract(iqNew []complex64, jobs []StreamingExtractJob) ([]StreamingExtractResult, error) { + results := make([]StreamingExtractResult, len(jobs)) + active := make(map[int64]struct{}, len(jobs)) + for i, job := range jobs { + active[job.SignalID] = struct{}{} + state, err := r.getOrInitState(job) + if err != nil { + return nil, err + } + out, phase, phaseCount, hist := runStreamingPolyphaseHostCore( + iqNew, + r.SampleRate, + job.OffsetHz, + state.NCOPhase, + state.PhaseCount, + state.NumTaps, + state.Decim, + state.ShiftedHistory, + state.PolyphaseTaps, + ) + state.NCOPhase = phase + state.PhaseCount = phaseCount + state.ShiftedHistory = append(state.ShiftedHistory[:0], hist...) + results[i] = StreamingExtractResult{ + SignalID: job.SignalID, + IQ: out, + Rate: job.OutRate, + NOut: len(out), + PhaseCount: state.PhaseCount, + HistoryLen: len(state.ShiftedHistory), + } + } + for signalID := range r.States { + if _, ok := active[signalID]; !ok { + delete(r.States, signalID) + } + } + return results, nil +} diff --git a/internal/demod/gpudemod/streaming_types.go b/internal/demod/gpudemod/streaming_types.go new file mode 100644 index 0000000..fb15cb3 --- /dev/null +++ b/internal/demod/gpudemod/streaming_types.go @@ -0,0 +1,64 @@ +package gpudemod + +import ( + "fmt" + "hash/fnv" +) + +type StreamingExtractJob struct { + SignalID int64 + OffsetHz float64 + Bandwidth float64 + OutRate int + NumTaps int + ConfigHash uint64 +} + +type StreamingExtractResult struct { + SignalID int64 + IQ []complex64 + Rate int + NOut int + PhaseCount int + HistoryLen int +} + +type ExtractStreamState struct { + SignalID int64 + ConfigHash uint64 + NCOPhase float64 + Decim int + PhaseCount int + NumTaps int + ShiftedHistory []complex64 + BaseTaps []float32 + PolyphaseTaps []float32 + Initialized bool +} + +func ResetExtractStreamState(state *ExtractStreamState, cfgHash uint64) { + if state == nil { + return + } + state.ConfigHash = cfgHash + state.NCOPhase = 0 + state.PhaseCount = 0 + state.ShiftedHistory = state.ShiftedHistory[:0] + state.Initialized = false +} + +func StreamingConfigHash(signalID int64, offsetHz float64, bandwidth float64, outRate int, numTaps int, sampleRate int) uint64 { + // Hash only structural parameters that change the FIR/decimation geometry. + // Offset is NOT included because the NCO phase_inc tracks it smoothly each frame. + // Bandwidth is NOT included because taps are rebuilt every frame in getOrInitExtractState. + // A state reset (zeroing NCO phase, history, phase count) is only needed when + // decimation factor, tap count, or sample rate changes — all of which affect + // buffer sizes and polyphase structure. + // + // Previous bug: offset and bandwidth were formatted at %.9f precision, causing + // a new hash (and full state reset) every single frame because the detector's + // exponential smoothing changes CenterHz by sub-Hz fractions each frame. + h := fnv.New64a() + _, _ = h.Write([]byte(fmt.Sprintf("sig=%d|out=%d|taps=%d|sr=%d", signalID, outRate, numTaps, sampleRate))) + return h.Sum64() +} diff --git a/internal/demod/gpudemod/test_harness.go b/internal/demod/gpudemod/test_harness.go new file mode 100644 index 0000000..2a74d0b --- /dev/null +++ b/internal/demod/gpudemod/test_harness.go @@ -0,0 +1,78 @@ +package gpudemod + +import ( + "math" +) + +type OracleHarnessConfig struct { + SignalID int64 + ConfigHash uint64 + NCOPhase float64 + Decim int + NumTaps int + PhaseInc float64 +} + +func MakeDeterministicIQ(n int) []complex64 { + out := make([]complex64, n) + for i := 0; i < n; i++ { + a := 0.017 * float64(i) + b := 0.031 * float64(i) + out[i] = complex64(complex(math.Cos(a)+0.2*math.Cos(b), math.Sin(a)+0.15*math.Sin(b))) + } + return out +} + +func MakeToneIQ(n int, phaseInc float64) []complex64 { + out := make([]complex64, n) + phase := 0.0 + for i := 0; i < n; i++ { + out[i] = complex64(complex(math.Cos(phase), math.Sin(phase))) + phase += phaseInc + } + return out +} + +func MakeLowpassTaps(n int) []float32 { + out := make([]float32, n) + for i := range out { + out[i] = 1.0 / float32(n) + } + return out +} + +func MakeCPUOracleState(cfg OracleHarnessConfig) *CPUOracleState { + taps := MakeLowpassTaps(cfg.NumTaps) + return &CPUOracleState{ + SignalID: cfg.SignalID, + ConfigHash: cfg.ConfigHash, + NCOPhase: cfg.NCOPhase, + Decim: cfg.Decim, + PhaseCount: 0, + NumTaps: cfg.NumTaps, + ShiftedHistory: make([]complex64, 0, maxInt(0, cfg.NumTaps-1)), + BaseTaps: taps, + PolyphaseTaps: BuildPolyphaseTapsPhaseMajor(taps, cfg.Decim), + } +} + +func RunChunkedCPUOraclePolyphase(all []complex64, chunkSizes []int, mkState func() *CPUOracleState, phaseInc float64) []complex64 { + state := mkState() + out := make([]complex64, 0) + pos := 0 + for _, n := range chunkSizes { + if pos >= len(all) { + break + } + end := pos + n + if end > len(all) { + end = len(all) + } + out = append(out, CPUOracleExtractPolyphase(all[pos:end], state, phaseInc)...) + pos = end + } + if pos < len(all) { + out = append(out, CPUOracleExtractPolyphase(all[pos:], state, phaseInc)...) + } + return out +} diff --git a/internal/demod/gpudemod/test_harness_test.go b/internal/demod/gpudemod/test_harness_test.go new file mode 100644 index 0000000..c4621b1 --- /dev/null +++ b/internal/demod/gpudemod/test_harness_test.go @@ -0,0 +1,39 @@ +package gpudemod + +import "testing" + +func requireComplexSlicesCloseHarness(t *testing.T, a []complex64, b []complex64, tol float64) { + t.Helper() + if len(a) != len(b) { + t.Fatalf("length mismatch: %d vs %d", len(a), len(b)) + } + for i := range a { + d := CompareComplexSlices([]complex64{a[i]}, []complex64{b[i]}) + if d.MaxAbsErr > tol { + t.Fatalf("slice mismatch at %d: %v vs %v (tol=%f)", i, a[i], b[i], tol) + } + } +} + +func TestHarnessChunkedCPUOraclePolyphase(t *testing.T) { + cfg := OracleHarnessConfig{ + SignalID: 1, + ConfigHash: 123, + NCOPhase: 0, + Decim: 20, + NumTaps: 65, + PhaseInc: 0.017, + } + iq := MakeDeterministicIQ(150000) + mk := func() *CPUOracleState { return MakeCPUOracleState(cfg) } + mono := CPUOracleExtractPolyphase(iq, mk(), cfg.PhaseInc) + chunked := RunChunkedCPUOraclePolyphase(iq, []int{4096, 5000, 8192, 27307}, mk, cfg.PhaseInc) + requireComplexSlicesCloseHarness(t, mono, chunked, 1e-5) +} + +func TestHarnessToneIQ(t *testing.T) { + iq := MakeToneIQ(1024, 0.05) + if len(iq) != 1024 { + t.Fatalf("unexpected tone iq length: %d", len(iq)) + } +} diff --git a/internal/demod/gpudemod/windows_bridge.go b/internal/demod/gpudemod/windows_bridge.go index 3371be7..fbfcc9a 100644 --- a/internal/demod/gpudemod/windows_bridge.go +++ b/internal/demod/gpudemod/windows_bridge.go @@ -4,7 +4,7 @@ package gpudemod /* #cgo windows CFLAGS: -I"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.2/include" -#cgo windows LDFLAGS: -lcudart64_13 -lkernel32 +#cgo windows LDFLAGS: -L"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.2/bin/x64" -l:cudart64_13.dll -lkernel32 #include #include #include @@ -26,6 +26,8 @@ typedef int (__stdcall *gpud_launch_decimate_stream_fn)(const gpud_float2* in, g typedef int (__stdcall *gpud_launch_decimate_fn)(const gpud_float2* in, gpud_float2* out, int n_out, int factor); typedef int (__stdcall *gpud_launch_am_envelope_fn)(const gpud_float2* in, float* out, int n); typedef int (__stdcall *gpud_launch_ssb_product_fn)(const gpud_float2* in, float* out, int n, double phase_inc, double phase_start); +typedef int (__stdcall *gpud_launch_streaming_polyphase_prepare_fn)(const gpud_float2* in_new, int n_new, const gpud_float2* history_in, int history_len, const float* polyphase_taps, int polyphase_len, int decim, int num_taps, int phase_count_in, double phase_start, double phase_inc, gpud_float2* out, int* n_out, int* phase_count_out, double* phase_end_out, gpud_float2* history_out); +typedef int (__stdcall *gpud_launch_streaming_polyphase_stateful_fn)(const gpud_float2* in_new, int n_new, gpud_float2* shifted_new_tmp, const float* polyphase_taps, int polyphase_len, int decim, int num_taps, gpud_float2* history_state, gpud_float2* history_scratch, int history_cap, int* history_len_io, int* phase_count_state, double* phase_state, double phase_inc, gpud_float2* out, int out_cap, int* n_out); static HMODULE gpud_mod = NULL; static gpud_stream_create_fn gpud_p_stream_create = NULL; @@ -42,6 +44,8 @@ static gpud_launch_decimate_stream_fn gpud_p_launch_decimate_stream = NULL; static gpud_launch_decimate_fn gpud_p_launch_decimate = NULL; static gpud_launch_am_envelope_fn gpud_p_launch_am_envelope = NULL; static gpud_launch_ssb_product_fn gpud_p_launch_ssb_product = NULL; +static gpud_launch_streaming_polyphase_prepare_fn gpud_p_launch_streaming_polyphase_prepare = NULL; +static gpud_launch_streaming_polyphase_stateful_fn gpud_p_launch_streaming_polyphase_stateful = NULL; static int gpud_cuda_malloc(void **ptr, size_t bytes) { return (int)cudaMalloc(ptr, bytes); } static int gpud_cuda_free(void *ptr) { return (int)cudaFree(ptr); } @@ -67,6 +71,8 @@ static int gpud_load_library(const char* path) { gpud_p_launch_decimate = (gpud_launch_decimate_fn)GetProcAddress(gpud_mod, "gpud_launch_decimate_cuda"); gpud_p_launch_am_envelope = (gpud_launch_am_envelope_fn)GetProcAddress(gpud_mod, "gpud_launch_am_envelope_cuda"); gpud_p_launch_ssb_product = (gpud_launch_ssb_product_fn)GetProcAddress(gpud_mod, "gpud_launch_ssb_product_cuda"); + gpud_p_launch_streaming_polyphase_prepare = (gpud_launch_streaming_polyphase_prepare_fn)GetProcAddress(gpud_mod, "gpud_launch_streaming_polyphase_prepare_cuda"); + gpud_p_launch_streaming_polyphase_stateful = (gpud_launch_streaming_polyphase_stateful_fn)GetProcAddress(gpud_mod, "gpud_launch_streaming_polyphase_stateful_cuda"); if (!gpud_p_stream_create || !gpud_p_stream_destroy || !gpud_p_stream_sync || !gpud_p_upload_fir_taps || !gpud_p_launch_freq_shift_stream || !gpud_p_launch_freq_shift || !gpud_p_launch_fm_discrim || !gpud_p_launch_fir_stream || !gpud_p_launch_fir || !gpud_p_launch_decimate_stream || !gpud_p_launch_decimate || !gpud_p_launch_am_envelope || !gpud_p_launch_ssb_product) { FreeLibrary(gpud_mod); gpud_mod = NULL; @@ -89,6 +95,8 @@ static int gpud_launch_decimate_stream(gpud_float2 *in, gpud_float2 *out, int n_ static int gpud_launch_decimate(gpud_float2 *in, gpud_float2 *out, int n_out, int factor) { if (!gpud_p_launch_decimate) return -1; return gpud_p_launch_decimate(in, out, n_out, factor); } static int gpud_launch_am_envelope(gpud_float2 *in, float *out, int n) { if (!gpud_p_launch_am_envelope) return -1; return gpud_p_launch_am_envelope(in, out, n); } static int gpud_launch_ssb_product(gpud_float2 *in, float *out, int n, double phase_inc, double phase_start) { if (!gpud_p_launch_ssb_product) return -1; return gpud_p_launch_ssb_product(in, out, n, phase_inc, phase_start); } +static int gpud_launch_streaming_polyphase_prepare(gpud_float2 *in_new, int n_new, gpud_float2 *history_in, int history_len, float *polyphase_taps, int polyphase_len, int decim, int num_taps, int phase_count_in, double phase_start, double phase_inc, gpud_float2 *out, int *n_out, int *phase_count_out, double *phase_end_out, gpud_float2 *history_out) { if (!gpud_p_launch_streaming_polyphase_prepare) return -1; return gpud_p_launch_streaming_polyphase_prepare(in_new, n_new, history_in, history_len, polyphase_taps, polyphase_len, decim, num_taps, phase_count_in, phase_start, phase_inc, out, n_out, phase_count_out, phase_end_out, history_out); } +static int gpud_launch_streaming_polyphase_stateful(gpud_float2 *in_new, int n_new, gpud_float2 *shifted_new_tmp, float *polyphase_taps, int polyphase_len, int decim, int num_taps, gpud_float2 *history_state, gpud_float2 *history_scratch, int history_cap, int *history_len_io, int *phase_count_state, double *phase_state, double phase_inc, gpud_float2 *out, int out_cap, int *n_out) { if (!gpud_p_launch_streaming_polyphase_stateful) return -1; return gpud_p_launch_streaming_polyphase_stateful(in_new, n_new, shifted_new_tmp, polyphase_taps, polyphase_len, decim, num_taps, history_state, history_scratch, history_cap, history_len_io, phase_count_state, phase_state, phase_inc, out, out_cap, n_out); } */ import "C" @@ -103,38 +111,68 @@ func bridgeLoadLibrary(path string) int { defer C.free(unsafe.Pointer(cp)) return int(C.gpud_load_library(cp)) } -func bridgeCudaMalloc(ptr *unsafe.Pointer, bytes uintptr) int { return int(C.gpud_cuda_malloc(ptr, C.size_t(bytes))) } +func bridgeCudaMalloc(ptr *unsafe.Pointer, bytes uintptr) int { + return int(C.gpud_cuda_malloc(ptr, C.size_t(bytes))) +} func bridgeCudaFree(ptr unsafe.Pointer) int { return int(C.gpud_cuda_free(ptr)) } -func bridgeMemcpyH2D(dst unsafe.Pointer, src unsafe.Pointer, bytes uintptr) int { return int(C.gpud_memcpy_h2d(dst, src, C.size_t(bytes))) } -func bridgeMemcpyD2H(dst unsafe.Pointer, src unsafe.Pointer, bytes uintptr) int { return int(C.gpud_memcpy_d2h(dst, src, C.size_t(bytes))) } +func bridgeMemcpyH2D(dst unsafe.Pointer, src unsafe.Pointer, bytes uintptr) int { + return int(C.gpud_memcpy_h2d(dst, src, C.size_t(bytes))) +} +func bridgeMemcpyD2H(dst unsafe.Pointer, src unsafe.Pointer, bytes uintptr) int { + return int(C.gpud_memcpy_d2h(dst, src, C.size_t(bytes))) +} func bridgeDeviceSync() int { return int(C.gpud_device_sync()) } -func bridgeUploadFIRTaps(taps *C.float, n int) int { return int(C.gpud_upload_fir_taps(taps, C.int(n))) } +func bridgeUploadFIRTaps(taps *C.float, n int) int { + return int(C.gpud_upload_fir_taps(taps, C.int(n))) +} func bridgeLaunchFreqShift(in *C.gpud_float2, out *C.gpud_float2, n int, phaseInc float64, phaseStart float64) int { return int(C.gpud_launch_freq_shift(in, out, C.int(n), C.double(phaseInc), C.double(phaseStart))) } func bridgeLaunchFreqShiftStream(in *C.gpud_float2, out *C.gpud_float2, n int, phaseInc float64, phaseStart float64, stream streamHandle) int { return int(C.gpud_launch_freq_shift_stream(in, out, C.int(n), C.double(phaseInc), C.double(phaseStart), C.gpud_stream_handle(stream))) } -func bridgeLaunchFIR(in *C.gpud_float2, out *C.gpud_float2, n int, numTaps int) int { return int(C.gpud_launch_fir(in, out, C.int(n), C.int(numTaps))) } +func bridgeLaunchFIR(in *C.gpud_float2, out *C.gpud_float2, n int, numTaps int) int { + return int(C.gpud_launch_fir(in, out, C.int(n), C.int(numTaps))) +} func bridgeLaunchFIRStream(in *C.gpud_float2, out *C.gpud_float2, n int, numTaps int, stream streamHandle) int { return int(C.gpud_launch_fir_stream(in, out, C.int(n), C.int(numTaps), C.gpud_stream_handle(stream))) } func bridgeLaunchFIRv2Stream(in *C.gpud_float2, out *C.gpud_float2, taps *C.float, n int, numTaps int, stream streamHandle) int { return int(C.gpud_launch_fir_v2_stream(in, out, taps, C.int(n), C.int(numTaps), C.gpud_stream_handle(stream))) } -func bridgeLaunchDecimate(in *C.gpud_float2, out *C.gpud_float2, nOut int, factor int) int { return int(C.gpud_launch_decimate(in, out, C.int(nOut), C.int(factor))) } +func bridgeLaunchDecimate(in *C.gpud_float2, out *C.gpud_float2, nOut int, factor int) int { + return int(C.gpud_launch_decimate(in, out, C.int(nOut), C.int(factor))) +} func bridgeLaunchDecimateStream(in *C.gpud_float2, out *C.gpud_float2, nOut int, factor int, stream streamHandle) int { return int(C.gpud_launch_decimate_stream(in, out, C.int(nOut), C.int(factor), C.gpud_stream_handle(stream))) } -func bridgeLaunchFMDiscrim(in *C.gpud_float2, out *C.float, n int) int { return int(C.gpud_launch_fm_discrim(in, out, C.int(n))) } -func bridgeLaunchAMEnvelope(in *C.gpud_float2, out *C.float, n int) int { return int(C.gpud_launch_am_envelope(in, out, C.int(n))) } +func bridgeLaunchFMDiscrim(in *C.gpud_float2, out *C.float, n int) int { + return int(C.gpud_launch_fm_discrim(in, out, C.int(n))) +} +func bridgeLaunchAMEnvelope(in *C.gpud_float2, out *C.float, n int) int { + return int(C.gpud_launch_am_envelope(in, out, C.int(n))) +} func bridgeLaunchSSBProduct(in *C.gpud_float2, out *C.float, n int, phaseInc float64, phaseStart float64) int { return int(C.gpud_launch_ssb_product(in, out, C.int(n), C.double(phaseInc), C.double(phaseStart))) } + +// bridgeLaunchStreamingPolyphasePrepare is a transitional bridge for the +// legacy single-call prepare path. The stateful native path uses +// bridgeLaunchStreamingPolyphaseStateful. +func bridgeLaunchStreamingPolyphasePrepare(inNew *C.gpud_float2, nNew int, historyIn *C.gpud_float2, historyLen int, polyphaseTaps *C.float, polyphaseLen int, decim int, numTaps int, phaseCountIn int, phaseStart float64, phaseInc float64, out *C.gpud_float2, nOut *C.int, phaseCountOut *C.int, phaseEndOut *C.double, historyOut *C.gpud_float2) int { + return int(C.gpud_launch_streaming_polyphase_prepare(inNew, C.int(nNew), historyIn, C.int(historyLen), polyphaseTaps, C.int(polyphaseLen), C.int(decim), C.int(numTaps), C.int(phaseCountIn), C.double(phaseStart), C.double(phaseInc), out, nOut, phaseCountOut, phaseEndOut, historyOut)) +} +func bridgeLaunchStreamingPolyphaseStateful(inNew *C.gpud_float2, nNew int, shiftedNewTmp *C.gpud_float2, polyphaseTaps *C.float, polyphaseLen int, decim int, numTaps int, historyState *C.gpud_float2, historyScratch *C.gpud_float2, historyCap int, historyLenIO *C.int, phaseCountState *C.int, phaseState *C.double, phaseInc float64, out *C.gpud_float2, outCap int, nOut *C.int) int { + return int(C.gpud_launch_streaming_polyphase_stateful(inNew, C.int(nNew), shiftedNewTmp, polyphaseTaps, C.int(polyphaseLen), C.int(decim), C.int(numTaps), historyState, historyScratch, C.int(historyCap), historyLenIO, phaseCountState, phaseState, C.double(phaseInc), out, C.int(outCap), nOut)) +} func bridgeStreamCreate() (streamHandle, int) { var s C.gpud_stream_handle res := int(C.gpud_stream_create(&s)) return streamHandle(s), res } -func bridgeStreamDestroy(stream streamHandle) int { return int(C.gpud_stream_destroy(C.gpud_stream_handle(stream))) } -func bridgeStreamSync(stream streamHandle) int { return int(C.gpud_stream_sync(C.gpud_stream_handle(stream))) } +func bridgeStreamDestroy(stream streamHandle) int { + return int(C.gpud_stream_destroy(C.gpud_stream_handle(stream))) +} +func bridgeStreamSync(stream streamHandle) int { + return int(C.gpud_stream_sync(C.gpud_stream_handle(stream))) +} diff --git a/internal/dsp/decimating_fir.go b/internal/dsp/decimating_fir.go new file mode 100644 index 0000000..3cd958f --- /dev/null +++ b/internal/dsp/decimating_fir.go @@ -0,0 +1,95 @@ +package dsp + +// StatefulDecimatingFIRComplex combines FIR filtering and decimation into a +// single stateful stage. This avoids exposing FIR settling/transient output as +// ordinary block-leading samples before decimation. +type StatefulDecimatingFIRComplex struct { + taps []float64 + delayR []float64 + delayI []float64 + factor int + phase int // number of input samples until next output sample (0 => emit now) +} + +func (f *StatefulDecimatingFIRComplex) Phase() int { + if f == nil { + return 0 + } + return f.phase +} + +func (f *StatefulDecimatingFIRComplex) TapsLen() int { + if f == nil { + return 0 + } + return len(f.taps) +} + +func NewStatefulDecimatingFIRComplex(taps []float64, factor int) *StatefulDecimatingFIRComplex { + if factor < 1 { + factor = 1 + } + t := make([]float64, len(taps)) + copy(t, taps) + return &StatefulDecimatingFIRComplex{ + taps: t, + delayR: make([]float64, len(taps)), + delayI: make([]float64, len(taps)), + factor: factor, + phase: 0, + } +} + +func (f *StatefulDecimatingFIRComplex) Reset() { + for i := range f.delayR { + f.delayR[i] = 0 + f.delayI[i] = 0 + } + f.phase = 0 +} + +func (f *StatefulDecimatingFIRComplex) Process(iq []complex64) []complex64 { + if len(iq) == 0 || len(f.taps) == 0 { + return nil + } + if f.factor <= 1 { + out := make([]complex64, len(iq)) + for i := 0; i < len(iq); i++ { + copy(f.delayR[1:], f.delayR[:len(f.taps)-1]) + copy(f.delayI[1:], f.delayI[:len(f.taps)-1]) + f.delayR[0] = float64(real(iq[i])) + f.delayI[0] = float64(imag(iq[i])) + var accR, accI float64 + for k := 0; k < len(f.taps); k++ { + w := f.taps[k] + accR += f.delayR[k] * w + accI += f.delayI[k] * w + } + out[i] = complex(float32(accR), float32(accI)) + } + return out + } + + out := make([]complex64, 0, len(iq)/f.factor+1) + n := len(f.taps) + for i := 0; i < len(iq); i++ { + copy(f.delayR[1:], f.delayR[:n-1]) + copy(f.delayI[1:], f.delayI[:n-1]) + f.delayR[0] = float64(real(iq[i])) + f.delayI[0] = float64(imag(iq[i])) + + if f.phase == 0 { + var accR, accI float64 + for k := 0; k < n; k++ { + w := f.taps[k] + accR += f.delayR[k] * w + accI += f.delayI[k] * w + } + out = append(out, complex(float32(accR), float32(accI))) + f.phase = f.factor - 1 + } else { + f.phase-- + } + } + return out +} diff --git a/internal/dsp/decimating_fir_test.go b/internal/dsp/decimating_fir_test.go new file mode 100644 index 0000000..821cb09 --- /dev/null +++ b/internal/dsp/decimating_fir_test.go @@ -0,0 +1,57 @@ +package dsp + +import ( + "math/cmplx" + "testing" +) + +func TestStatefulDecimatingFIRComplexStreamContinuity(t *testing.T) { + taps := LowpassFIR(90000, 512000, 101) + factor := 2 + + input := make([]complex64, 8192) + for i := range input { + input[i] = complex(float32((i%17)-8)/8.0, float32((i%11)-5)/8.0) + } + + one := NewStatefulDecimatingFIRComplex(taps, factor) + whole := one.Process(input) + + chunkedProc := NewStatefulDecimatingFIRComplex(taps, factor) + var chunked []complex64 + for i := 0; i < len(input); i += 733 { + end := i + 733 + if end > len(input) { + end = len(input) + } + chunked = append(chunked, chunkedProc.Process(input[i:end])...) + } + + if len(whole) != len(chunked) { + t.Fatalf("length mismatch whole=%d chunked=%d", len(whole), len(chunked)) + } + for i := range whole { + if cmplx.Abs(complex128(whole[i]-chunked[i])) > 1e-5 { + t.Fatalf("sample %d mismatch whole=%v chunked=%v", i, whole[i], chunked[i]) + } + } +} + +func TestStatefulDecimatingFIRComplexMatchesBlockPipelineLength(t *testing.T) { + taps := LowpassFIR(90000, 512000, 101) + factor := 2 + input := make([]complex64, 48640) + for i := range input { + input[i] = complex(float32((i%13)-6)/8.0, float32((i%7)-3)/8.0) + } + + stateful := NewStatefulDecimatingFIRComplex(taps, factor) + out := stateful.Process(input) + + filtered := ApplyFIR(input, taps) + dec := Decimate(filtered, factor) + + if len(out) != len(dec) { + t.Fatalf("unexpected output len got=%d want=%d", len(out), len(dec)) + } +} diff --git a/internal/recorder/recorder.go b/internal/recorder/recorder.go index 7e473a9..a03b378 100644 --- a/internal/recorder/recorder.go +++ b/internal/recorder/recorder.go @@ -12,6 +12,7 @@ import ( "sdr-wideband-suite/internal/demod/gpudemod" "sdr-wideband-suite/internal/detector" + "sdr-wideband-suite/internal/telemetry" ) type Policy struct { @@ -54,9 +55,10 @@ type Manager struct { streamer *Streamer streamedIDs map[int64]bool // signal IDs that were streamed (skip retroactive recording) streamedMu sync.Mutex + telemetry *telemetry.Collector } -func New(sampleRate int, blockSize int, policy Policy, centerHz float64, decodeCommands map[string]string) *Manager { +func New(sampleRate int, blockSize int, policy Policy, centerHz float64, decodeCommands map[string]string, coll *telemetry.Collector) *Manager { if policy.OutputDir == "" { policy.OutputDir = "data/recordings" } @@ -71,8 +73,9 @@ func New(sampleRate int, blockSize int, policy Policy, centerHz float64, decodeC centerHz: centerHz, decodeCommands: decodeCommands, queue: make(chan detector.Event, 64), - streamer: newStreamer(policy, centerHz), + streamer: newStreamer(policy, centerHz, coll), streamedIDs: make(map[int64]bool), + telemetry: coll, } m.initGPUDemod(sampleRate, blockSize) m.workerWG.Add(1) @@ -103,6 +106,13 @@ func (m *Manager) Update(sampleRate int, blockSize int, policy Policy, centerHz if m.streamer != nil { m.streamer.updatePolicy(policy, centerHz) } + if m.telemetry != nil { + m.telemetry.Event("recorder_update", "info", "recorder policy updated", nil, map[string]any{ + "sample_rate": sampleRate, + "block_size": blockSize, + "enabled": policy.Enabled, + }) + } } func (m *Manager) Ingest(t0 time.Time, samples []complex64) { @@ -116,6 +126,9 @@ func (m *Manager) Ingest(t0 time.Time, samples []complex64) { return } ring.Push(t0, samples) + if m.telemetry != nil { + m.telemetry.SetGauge("recorder.ring.push_samples", float64(len(samples)), nil) + } } func (m *Manager) OnEvents(events []detector.Event) { @@ -134,8 +147,14 @@ func (m *Manager) OnEvents(events []detector.Event) { case m.queue <- ev: default: // drop if queue full + if m.telemetry != nil { + m.telemetry.IncCounter("recorder.event_queue.drop", 1, nil) + } } } + if m.telemetry != nil { + m.telemetry.SetGauge("recorder.event_queue.len", float64(len(m.queue)), nil) + } } func (m *Manager) worker() { @@ -357,6 +376,13 @@ func (m *Manager) StreamerRef() *Streamer { return m.streamer } +func (m *Manager) ResetStreams() { + if m == nil || m.streamer == nil { + return + } + m.streamer.ResetStreams() +} + func (m *Manager) RuntimeInfoBySignalID() map[int64]RuntimeSignalInfo { if m == nil || m.streamer == nil { return nil diff --git a/internal/recorder/streamer.go b/internal/recorder/streamer.go index 30d5248..d7fee44 100644 --- a/internal/recorder/streamer.go +++ b/internal/recorder/streamer.go @@ -10,7 +10,9 @@ import ( "math" "os" "path/filepath" + "strconv" "strings" + "sort" "sync" "time" @@ -19,6 +21,7 @@ import ( "sdr-wideband-suite/internal/detector" "sdr-wideband-suite/internal/dsp" "sdr-wideband-suite/internal/logging" + "sdr-wideband-suite/internal/telemetry" ) // --------------------------------------------------------------------------- @@ -26,6 +29,7 @@ import ( // --------------------------------------------------------------------------- type streamSession struct { + sessionID string signalID int64 centerHz float64 bwHz float64 @@ -37,11 +41,36 @@ type streamSession struct { playbackMode string stereoState string lastAudioTs time.Time + + debugDumpStart time.Time + debugDumpUntil time.Time + debugDumpBase string + + demodDump []float32 + finalDump []float32 lastAudioL float32 lastAudioR float32 prevAudioL float64 // second-to-last L sample for boundary transient detection lastAudioSet bool + lastDecIQ complex64 + prevDecIQ complex64 + lastDecIQSet bool + + lastExtractIQ complex64 + prevExtractIQ complex64 + lastExtractIQSet bool + + // FM discriminator cross-block bridging: carry the last IQ sample so the + // discriminator can compute the phase step across block boundaries. + lastDiscrimIQ complex64 + lastDiscrimIQSet bool + + lastDemodL float32 + prevDemodL float64 + lastDemodSet bool + snippetSeq uint64 + // listenOnly sessions have no WAV file and no disk I/O. // They exist solely to feed audio to live-listen subscribers. listenOnly bool @@ -60,6 +89,8 @@ type streamSession struct { // --- Persistent DSP state for click-free streaming --- // Overlap-save: tail of previous extracted IQ snippet. + // Currently unused for live demod after removing the extra discriminator + // overlap prepend, but kept in DSP snapshot state for compatibility. overlapIQ []complex64 // De-emphasis IIR state (persists across frames) @@ -96,13 +127,21 @@ type streamSession struct { pilotLPFHi *dsp.StatefulFIRReal // ~21kHz LP for pilot bandpass high pilotLPFLo *dsp.StatefulFIRReal // ~17kHz LP for pilot bandpass low + // WFM 15kHz audio LPF — removes pilot (19kHz), L-R subcarrier (23-53kHz), + // and RDS (57kHz) from the FM discriminator output before resampling. + // Without this, the pilot leaks into the audio as a 19kHz tone (+55dB above + // noise floor) and L-R subcarrier energy causes audible click-like artifacts. + wfmAudioLPF *dsp.StatefulFIRReal + wfmAudioLPFRate int + // Stateful pre-demod anti-alias FIR (eliminates cold-start transients // and avoids per-frame FIR recomputation) - preDemodFIR *dsp.StatefulFIRComplex - preDemodDecim int // cached decimation factor - preDemodRate int // cached snipRate this FIR was built for - preDemodCutoff float64 // cached cutoff - preDemodDecimPhase int // stateful decimation phase (index offset into next frame) + preDemodFIR *dsp.StatefulFIRComplex + preDemodDecimator *dsp.StatefulDecimatingFIRComplex + preDemodDecim int // cached decimation factor + preDemodRate int // cached snipRate this FIR was built for + preDemodCutoff float64 // cached cutoff + preDemodDecimPhase int // retained for backward compatibility in snapshots/debug // AQ-2: De-emphasis config (µs, 0 = disabled) deemphasisUs float64 @@ -146,6 +185,54 @@ const ( resamplerTaps = 32 // taps per polyphase arm — good quality ) +var debugDumpDelay = func() time.Duration { + raw := strings.TrimSpace(os.Getenv("SDR_DEBUG_DUMP_DELAY_SECONDS")) + if raw == "" { + return 5 * time.Second + } + v, err := strconv.Atoi(raw) + if err != nil || v < 0 { + return 5 * time.Second + } + return time.Duration(v) * time.Second +}() + +var debugDumpDuration = func() time.Duration { + raw := strings.TrimSpace(os.Getenv("SDR_DEBUG_DUMP_DURATION_SECONDS")) + if raw == "" { + return 15 * time.Second + } + v, err := strconv.Atoi(raw) + if err != nil || v <= 0 { + return 15 * time.Second + } + return time.Duration(v) * time.Second +}() + +var audioDumpEnabled = func() bool { + raw := strings.TrimSpace(os.Getenv("SDR_DEBUG_AUDIO_DUMP_ENABLED")) + if raw == "" { + return false + } + v, err := strconv.ParseBool(raw) + if err != nil { + return false + } + return v +}() + +var decHeadTrimSamples = func() int { + raw := strings.TrimSpace(os.Getenv("SDR_DEC_HEAD_TRIM")) + if raw == "" { + return 0 + } + v, err := strconv.Atoi(raw) + if err != nil || v < 0 { + return 0 + } + return v +}() + // --------------------------------------------------------------------------- // Streamer — manages all active streaming sessions // --------------------------------------------------------------------------- @@ -159,6 +246,7 @@ type streamFeedItem struct { type streamFeedMsg struct { traceID uint64 items []streamFeedItem + enqueuedAt time.Time } type Streamer struct { @@ -178,6 +266,7 @@ type Streamer struct { // pendingListens are subscribers waiting for a matching session. pendingListens map[int64]*pendingListen + telemetry *telemetry.Collector } type pendingListen struct { @@ -187,7 +276,7 @@ type pendingListen struct { ch chan []byte } -func newStreamer(policy Policy, centerHz float64) *Streamer { +func newStreamer(policy Policy, centerHz float64, coll *telemetry.Collector) *Streamer { st := &Streamer{ sessions: make(map[int64]*streamSession), policy: policy, @@ -195,6 +284,7 @@ func newStreamer(policy Policy, centerHz float64) *Streamer { feedCh: make(chan streamFeedMsg, 2), done: make(chan struct{}), pendingListens: make(map[int64]*pendingListen), + telemetry: coll, } go st.worker() return st @@ -282,18 +372,33 @@ func (st *Streamer) FeedSnippets(items []streamFeedItem, traceID uint64) { if (!recEnabled && !hasListeners) || len(items) == 0 { return } + if st.telemetry != nil { + st.telemetry.SetGauge("streamer.feed.queue_len", float64(len(st.feedCh)), nil) + st.telemetry.SetGauge("streamer.pending_listeners", float64(pending), nil) + st.telemetry.Observe("streamer.feed.batch_size", float64(len(items)), nil) + } select { - case st.feedCh <- streamFeedMsg{traceID: traceID, items: items}: + case st.feedCh <- streamFeedMsg{traceID: traceID, items: items, enqueuedAt: time.Now()}: default: st.droppedFeed++ logging.Warn("drop", "feed_drop", "count", st.droppedFeed) + if st.telemetry != nil { + st.telemetry.IncCounter("streamer.feed.drop", 1, nil) + st.telemetry.Event("stream_feed_drop", "warn", "feed queue full", nil, map[string]any{ + "trace_id": traceID, + "queue_len": len(st.feedCh), + }) + } } } // processFeed runs in the worker goroutine. func (st *Streamer) processFeed(msg streamFeedMsg) { + procStart := time.Now() + lockStart := time.Now() st.mu.Lock() + lockWait := time.Since(lockStart) recEnabled := st.policy.Enabled && (st.policy.RecordAudio || st.policy.RecordIQ) hasListeners := st.hasListenersLocked() now := time.Now() @@ -301,10 +406,24 @@ func (st *Streamer) processFeed(msg streamFeedMsg) { gap := now.Sub(st.lastProcTS) if gap > 150*time.Millisecond { logging.Warn("gap", "process_gap", "gap_ms", gap.Milliseconds(), "trace", msg.traceID) + if st.telemetry != nil { + st.telemetry.IncCounter("streamer.process.gap.count", 1, nil) + st.telemetry.Observe("streamer.process.gap_ms", float64(gap.Milliseconds()), nil) + } } } st.lastProcTS = now defer st.mu.Unlock() + defer func() { + if st.telemetry != nil { + st.telemetry.Observe("streamer.process.total_ms", float64(time.Since(procStart).Microseconds())/1000.0, nil) + st.telemetry.Observe("streamer.lock_wait_ms", float64(lockWait.Microseconds())/1000.0, telemetry.TagsFromPairs("lock", "process")) + } + }() + if st.telemetry != nil { + st.telemetry.Observe("streamer.feed.enqueue_delay_ms", float64(now.Sub(msg.enqueuedAt).Microseconds())/1000.0, nil) + st.telemetry.SetGauge("streamer.sessions.active", float64(len(st.sessions)), nil) + } logging.Debug("trace", "process_feed", "trace", msg.traceID, "items", len(msg.items)) @@ -367,6 +486,9 @@ func (st *Streamer) processFeed(msg streamFeedMsg) { if err != nil { log.Printf("STREAM: open failed signal=%d %.1fMHz: %v", sig.ID, sig.CenterHz/1e6, err) + if st.telemetry != nil { + st.telemetry.IncCounter("streamer.session.open_error", 1, telemetry.TagsFromPairs("kind", "recording")) + } continue } st.sessions[sig.ID] = s @@ -378,6 +500,13 @@ func (st *Streamer) processFeed(msg streamFeedMsg) { } // Attach any pending listeners st.attachPendingListeners(sess) + if st.telemetry != nil { + st.telemetry.IncCounter("streamer.session.open", 1, telemetry.TagsFromPairs("session_id", sess.sessionID, "signal_id", fmt.Sprintf("%d", sig.ID))) + st.telemetry.Event("session_open", "info", "stream session opened", telemetry.TagsFromPairs("session_id", sess.sessionID, "signal_id", fmt.Sprintf("%d", sig.ID)), map[string]any{ + "listen_only": sess.listenOnly, + "demod": sess.demodName, + }) + } } // Update metadata @@ -396,10 +525,17 @@ func (st *Streamer) processFeed(msg streamFeedMsg) { // Demod with persistent state logging.Debug("trace", "demod_start", "trace", msg.traceID, "signal", sess.signalID, "snip_len", len(item.snippet), "snip_rate", item.snipRate) - audio, audioRate := sess.processSnippet(item.snippet, item.snipRate) + audioStart := time.Now() + audio, audioRate := sess.processSnippet(item.snippet, item.snipRate, st.telemetry) + if st.telemetry != nil { + st.telemetry.Observe("streamer.process_snippet_ms", float64(time.Since(audioStart).Microseconds())/1000.0, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID), "session_id", sess.sessionID)) + } logging.Debug("trace", "demod_done", "trace", msg.traceID, "signal", sess.signalID, "audio_len", len(audio), "audio_rate", audioRate) if len(audio) == 0 { logging.Warn("gap", "audio_empty", "signal", sess.signalID, "snip_len", len(item.snippet), "snip_rate", item.snipRate) + if st.telemetry != nil { + st.telemetry.IncCounter("streamer.audio.empty", 1, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID))) + } } if len(audio) > 0 { if sess.wavSamples == 0 && audioRate > 0 { @@ -426,6 +562,10 @@ func (st *Streamer) processFeed(msg streamFeedMsg) { gap := time.Since(sess.lastAudioTs) if gap > 150*time.Millisecond { logging.Warn("gap", "audio_gap", "signal", sess.signalID, "gap_ms", gap.Milliseconds()) + if st.telemetry != nil { + st.telemetry.IncCounter("streamer.audio.gap.count", 1, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID))) + st.telemetry.Observe("streamer.audio.gap_ms", float64(gap.Milliseconds()), telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID))) + } } } // Transient click detector: finds short impulses (1-3 samples) @@ -452,6 +592,10 @@ func (st *Streamer) processFeed(msg streamFeedMsg) { d2 := math.Abs(2*float64(sess.lastAudioL) - sess.prevAudioL - first) if d2 > 0.15 { logging.Warn("boundary", "boundary_click", "signal", sess.signalID, "d2", d2) + if st.telemetry != nil { + st.telemetry.IncCounter("audio.boundary_click.count", 1, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID), "session_id", sess.sessionID)) + st.telemetry.Observe("audio.boundary_click.d2", d2, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID))) + } } } @@ -474,6 +618,10 @@ func (st *Streamer) processFeed(msg streamFeedMsg) { } if nClicks > 0 { logging.Warn("boundary", "intra_click", "signal", sess.signalID, "clicks", nClicks, "maxD2", maxD2, "pos", maxD2Pos, "len", nFrames) + if st.telemetry != nil { + st.telemetry.IncCounter("audio.intra_click.count", float64(nClicks), telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID), "session_id", sess.sessionID)) + st.telemetry.Observe("audio.intra_click.max_d2", maxD2, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID))) + } } // Store last two samples for next frame's boundary check @@ -513,6 +661,13 @@ func (st *Streamer) processFeed(msg streamFeedMsg) { s.audioSubs = oldSubs s.restoreDSPState(oldState) st.sessions[sig.ID] = s + if st.telemetry != nil { + st.telemetry.IncCounter("streamer.session.reopen", 1, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sig.ID))) + st.telemetry.Event("session_reopen", "info", "stream session rotated by max duration", telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sig.ID)), map[string]any{ + "old_session": sess.sessionID, + "new_session": s.sessionID, + }) + } } } @@ -533,6 +688,13 @@ func (st *Streamer) processFeed(msg streamFeedMsg) { if !sess.listenOnly { closeSession(sess, &st.policy) } + if st.telemetry != nil { + st.telemetry.IncCounter("streamer.session.close", 1, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", id), "session_id", sess.sessionID)) + st.telemetry.Event("session_close", "info", "stream session closed", telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", id), "session_id", sess.sessionID), map[string]any{ + "reason": "signal_missing", + "listen_only": sess.listenOnly, + }) + } delete(st.sessions, id) } } @@ -578,8 +740,19 @@ func (st *Streamer) attachPendingListeners(sess *streamSession) { default: } + if audioDumpEnabled { + now := time.Now() + sess.debugDumpStart = now.Add(debugDumpDelay) + sess.debugDumpUntil = sess.debugDumpStart.Add(debugDumpDuration) + sess.debugDumpBase = filepath.Join("debug", fmt.Sprintf("signal-%d-window-%s", sess.signalID, now.Format("20060102-150405"))) + sess.demodDump = nil + sess.finalDump = nil + } log.Printf("STREAM: attached pending listener %d to signal %d (%.1fMHz %s ch=%d)", subID, sess.signalID, sess.centerHz/1e6, sess.demodName, sess.channels) + if audioDumpEnabled { + log.Printf("STREAM: debug dump armed signal=%d start=%s until=%s", sess.signalID, sess.debugDumpStart.Format(time.RFC3339), sess.debugDumpUntil.Format(time.RFC3339)) + } } } } @@ -615,12 +788,18 @@ func (st *Streamer) CloseAll() { if !sess.listenOnly { closeSession(sess, &st.policy) } + if st.telemetry != nil { + st.telemetry.IncCounter("streamer.session.close", 1, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", id), "session_id", sess.sessionID)) + } delete(st.sessions, id) } for _, pl := range st.pendingListens { close(pl.ch) } st.pendingListens = nil + if st.telemetry != nil { + st.telemetry.Event("streamer_close_all", "info", "all stream sessions closed", nil, nil) + } } // ActiveSessions returns the number of open streaming sessions. @@ -663,9 +842,23 @@ func (st *Streamer) SubscribeAudio(freq float64, bw float64, mode string) (int64 if bestSess != nil && bestDist < 200000 { bestSess.audioSubs = append(bestSess.audioSubs, audioSub{id: subID, ch: ch}) + if audioDumpEnabled { + now := time.Now() + bestSess.debugDumpStart = now.Add(debugDumpDelay) + bestSess.debugDumpUntil = bestSess.debugDumpStart.Add(debugDumpDuration) + bestSess.debugDumpBase = filepath.Join("debug", fmt.Sprintf("signal-%d-window-%s", bestSess.signalID, now.Format("20060102-150405"))) + bestSess.demodDump = nil + bestSess.finalDump = nil + } info := bestSess.audioInfo() log.Printf("STREAM: subscriber %d attached to signal %d (%.1fMHz %s)", subID, bestSess.signalID, bestSess.centerHz/1e6, bestSess.demodName) + if audioDumpEnabled { + log.Printf("STREAM: debug dump armed signal=%d start=%s until=%s", bestSess.signalID, bestSess.debugDumpStart.Format(time.RFC3339), bestSess.debugDumpUntil.Format(time.RFC3339)) + } + if st.telemetry != nil { + st.telemetry.IncCounter("streamer.listener.attach", 1, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", bestSess.signalID), "session_id", bestSess.sessionID)) + } return subID, ch, info, nil } @@ -679,6 +872,10 @@ func (st *Streamer) SubscribeAudio(freq float64, bw float64, mode string) (int64 info := defaultAudioInfoForMode(mode) log.Printf("STREAM: subscriber %d pending (freq=%.1fMHz)", subID, freq/1e6) log.Printf("LIVEAUDIO MATCH: subscriber=%d pending req=%.3fMHz bw=%.0f mode=%s", subID, freq/1e6, bw, mode) + if st.telemetry != nil { + st.telemetry.IncCounter("streamer.listener.pending", 1, nil) + st.telemetry.SetGauge("streamer.pending_listeners", float64(len(st.pendingListens)), nil) + } return subID, ch, info, nil } @@ -690,6 +887,10 @@ func (st *Streamer) UnsubscribeAudio(subID int64) { if pl, ok := st.pendingListens[subID]; ok { close(pl.ch) delete(st.pendingListens, subID) + if st.telemetry != nil { + st.telemetry.IncCounter("streamer.listener.unsubscribe", 1, telemetry.TagsFromPairs("kind", "pending")) + st.telemetry.SetGauge("streamer.pending_listeners", float64(len(st.pendingListens)), nil) + } return } @@ -698,6 +899,9 @@ func (st *Streamer) UnsubscribeAudio(subID int64) { if sub.id == subID { close(sub.ch) sess.audioSubs = append(sess.audioSubs[:i], sess.audioSubs[i+1:]...) + if st.telemetry != nil { + st.telemetry.IncCounter("streamer.listener.unsubscribe", 1, telemetry.TagsFromPairs("kind", "active", "session_id", sess.sessionID)) + } return } } @@ -711,10 +915,96 @@ func (st *Streamer) UnsubscribeAudio(subID int64) { // processSnippet takes a pre-extracted IQ snippet and demodulates it with // persistent state. Uses stateful FIR + polyphase resampler for exact 48kHz // output with zero transient artifacts. -func (sess *streamSession) processSnippet(snippet []complex64, snipRate int) ([]float32, int) { +type iqHeadProbeStats struct { + meanMag float64 + minMag float64 + maxStep float64 + p95Step float64 + lowMag int +} + +func probeIQHeadStats(iq []complex64, probeLen int) iqHeadProbeStats { + if probeLen <= 0 || len(iq) == 0 { + return iqHeadProbeStats{} + } + if len(iq) < probeLen { + probeLen = len(iq) + } + stats := iqHeadProbeStats{minMag: math.MaxFloat64} + steps := make([]float64, 0, probeLen) + var sum float64 + for i := 0; i < probeLen; i++ { + v := iq[i] + mag := math.Hypot(float64(real(v)), float64(imag(v))) + sum += mag + if mag < stats.minMag { + stats.minMag = mag + } + if mag < 0.02 { + stats.lowMag++ + } + if i > 0 { + p := iq[i-1] + num := float64(real(p))*float64(imag(v)) - float64(imag(p))*float64(real(v)) + den := float64(real(p))*float64(real(v)) + float64(imag(p))*float64(imag(v)) + step := math.Abs(math.Atan2(num, den)) + steps = append(steps, step) + if step > stats.maxStep { + stats.maxStep = step + } + } + } + stats.meanMag = sum / float64(probeLen) + if len(steps) > 0 { + sorted := append([]float64(nil), steps...) + sort.Float64s(sorted) + idx := int(math.Round(0.95 * float64(len(sorted)-1))) + if idx < 0 { + idx = 0 + } + if idx >= len(sorted) { + idx = len(sorted) - 1 + } + stats.p95Step = sorted[idx] + } + if stats.minMag == math.MaxFloat64 { + stats.minMag = 0 + } + return stats +} + +func (sess *streamSession) processSnippet(snippet []complex64, snipRate int, coll *telemetry.Collector) ([]float32, int) { if len(snippet) == 0 || snipRate <= 0 { return nil, 0 } + baseTags := telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID), "session_id", sess.sessionID) + if coll != nil { + coll.SetGauge("iq.stage.snippet.length", float64(len(snippet)), baseTags) + stats := probeIQHeadStats(snippet, 64) + coll.Observe("iq.snippet.head_mean_mag", stats.meanMag, baseTags) + coll.Observe("iq.snippet.head_min_mag", stats.minMag, baseTags) + coll.Observe("iq.snippet.head_max_step", stats.maxStep, baseTags) + coll.Observe("iq.snippet.head_p95_step", stats.p95Step, baseTags) + coll.SetGauge("iq.snippet.head_low_magnitude_count", float64(stats.lowMag), baseTags) + if sess.lastExtractIQSet { + prevMag := math.Hypot(float64(real(sess.lastExtractIQ)), float64(imag(sess.lastExtractIQ))) + currMag := math.Hypot(float64(real(snippet[0])), float64(imag(snippet[0]))) + deltaMag := math.Abs(currMag - prevMag) + num := float64(real(sess.lastExtractIQ))*float64(imag(snippet[0])) - float64(imag(sess.lastExtractIQ))*float64(real(snippet[0])) + den := float64(real(sess.lastExtractIQ))*float64(real(snippet[0])) + float64(imag(sess.lastExtractIQ))*float64(imag(snippet[0])) + deltaPhase := math.Abs(math.Atan2(num, den)) + d2 := float64(real(snippet[0]-sess.lastExtractIQ))*float64(real(snippet[0]-sess.lastExtractIQ)) + float64(imag(snippet[0]-sess.lastExtractIQ))*float64(imag(snippet[0]-sess.lastExtractIQ)) + coll.Observe("iq.extract.output.boundary.delta_mag", deltaMag, baseTags) + coll.Observe("iq.extract.output.boundary.delta_phase", deltaPhase, baseTags) + coll.Observe("iq.extract.output.boundary.d2", d2, baseTags) + coll.Observe("iq.extract.output.boundary.discontinuity_score", deltaMag+deltaPhase, baseTags) + } + } + if len(snippet) > 0 { + sess.prevExtractIQ = sess.lastExtractIQ + sess.lastExtractIQ = snippet[len(snippet)-1] + sess.lastExtractIQSet = true + } isWFMStereo := sess.demodName == "WFM_STEREO" isWFM := sess.demodName == "WFM" || isWFMStereo @@ -731,25 +1021,48 @@ func (sess *streamSession) processSnippet(snippet []complex64, snipRate int) ([] return nil, 0 } - // --- FM discriminator overlap: prepend 1 sample from previous frame --- - // The FM discriminator needs iq[i-1] to compute the first output. - // All FIR filtering is now stateful, so no additional overlap is needed. - var fullSnip []complex64 - trimSamples := 0 - _ = trimSamples - if len(sess.overlapIQ) == 1 { - fullSnip = make([]complex64, 1+len(snippet)) - fullSnip[0] = sess.overlapIQ[0] - copy(fullSnip[1:], snippet) - trimSamples = 1 - logging.Debug("discrim", "overlap_applied", "signal", sess.signalID, "snip", len(snippet)) - } else { - fullSnip = snippet - } + // The extra 1-sample discriminator overlap prepend was removed after it was + // shown to shift the downstream decimation phase and create heavy click + // artifacts in steady-state streaming/recording. The upstream extraction path + // and the stateful FIR/decimation stages already provide continuity. + fullSnip := snippet + overlapApplied := false + prevTailValid := false - // Save last sample for next frame's FM discriminator - if len(snippet) > 0 { - sess.overlapIQ = []complex64{snippet[len(snippet)-1]} + if logging.EnabledCategory("prefir") && len(fullSnip) > 0 { + probeN := 64 + if len(fullSnip) < probeN { + probeN = len(fullSnip) + } + minPreMag := math.MaxFloat64 + minPreIdx := 0 + maxPreStep := 0.0 + maxPreStepIdx := 0 + for i := 0; i < probeN; i++ { + v := fullSnip[i] + mag := math.Hypot(float64(real(v)), float64(imag(v))) + if mag < minPreMag { + minPreMag = mag + minPreIdx = i + } + if i > 0 { + p := fullSnip[i-1] + num := float64(real(p))*float64(imag(v)) - float64(imag(p))*float64(real(v)) + den := float64(real(p))*float64(real(v)) + float64(imag(p))*float64(imag(v)) + step := math.Abs(math.Atan2(num, den)) + if step > maxPreStep { + maxPreStep = step + maxPreStepIdx = i - 1 + } + } + } + logging.Debug("prefir", "pre_fir_head_probe", "signal", sess.signalID, "probe_len", probeN, "min_mag", minPreMag, "min_idx", minPreIdx, "max_step", maxPreStep, "max_step_idx", maxPreStepIdx, "snip_len", len(fullSnip)) + if minPreMag < 0.18 { + logging.Warn("prefir", "pre_fir_head_dip", "signal", sess.signalID, "probe_len", probeN, "min_mag", minPreMag, "min_idx", minPreIdx, "max_step", maxPreStep, "max_step_idx", maxPreStepIdx) + } + if maxPreStep > 1.5 { + logging.Warn("prefir", "pre_fir_head_step", "signal", sess.signalID, "probe_len", probeN, "max_step", maxPreStep, "max_step_idx", maxPreStepIdx, "min_mag", minPreMag, "min_idx", minPreIdx) + } } // --- Stateful anti-alias FIR + decimation to demod rate --- @@ -779,29 +1092,242 @@ func (sess *streamSession) processSnippet(snippet []complex64, snipRate int) ([] } // Lazy-init or reinit stateful FIR if parameters changed - if sess.preDemodFIR == nil || sess.preDemodRate != snipRate || sess.preDemodCutoff != cutoff { + if sess.preDemodDecimator == nil || sess.preDemodRate != snipRate || sess.preDemodCutoff != cutoff || sess.preDemodDecim != decim1 { taps := dsp.LowpassFIR(cutoff, snipRate, 101) sess.preDemodFIR = dsp.NewStatefulFIRComplex(taps) + sess.preDemodDecimator = dsp.NewStatefulDecimatingFIRComplex(taps, decim1) sess.preDemodRate = snipRate sess.preDemodCutoff = cutoff sess.preDemodDecim = decim1 sess.preDemodDecimPhase = 0 + if coll != nil { + coll.IncCounter("dsp.pre_demod.init", 1, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID), "session_id", sess.sessionID)) + coll.Event("prefir_reinit", "info", "pre-demod decimator reinitialized", telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID), "session_id", sess.sessionID), map[string]any{ + "snip_rate": snipRate, + "cutoff_hz": cutoff, + "decim": decim1, + }) + } } + decimPhaseBefore := sess.preDemodDecimPhase filtered := sess.preDemodFIR.ProcessInto(fullSnip, sess.growIQ(len(fullSnip))) - dec = dsp.DecimateStateful(filtered, decim1, &sess.preDemodDecimPhase) + dec = sess.preDemodDecimator.Process(fullSnip) + sess.preDemodDecimPhase = sess.preDemodDecimator.Phase() + if coll != nil { + coll.Observe("dsp.pre_demod.decimation_factor", float64(decim1), baseTags) + coll.SetGauge("iq.stage.pre_demod.length", float64(len(dec)), baseTags) + decStats := probeIQHeadStats(dec, 64) + coll.Observe("iq.pre_demod.head_mean_mag", decStats.meanMag, baseTags) + coll.Observe("iq.pre_demod.head_min_mag", decStats.minMag, baseTags) + coll.Observe("iq.pre_demod.head_max_step", decStats.maxStep, baseTags) + coll.Observe("iq.pre_demod.head_p95_step", decStats.p95Step, baseTags) + coll.SetGauge("iq.pre_demod.head_low_magnitude_count", float64(decStats.lowMag), baseTags) + } + logging.Debug("boundary", "snippet_path", "signal", sess.signalID, "overlap_applied", overlapApplied, "snip_len", len(snippet), "full_len", len(fullSnip), "filtered_len", len(filtered), "dec_len", len(dec), "decim1", decim1, "phase_before", decimPhaseBefore, "phase_after", sess.preDemodDecimPhase) } else { + logging.Debug("boundary", "snippet_path", "signal", sess.signalID, "overlap_applied", overlapApplied, "snip_len", len(snippet), "full_len", len(fullSnip), "filtered_len", len(fullSnip), "dec_len", len(fullSnip), "decim1", decim1, "phase_before", 0, "phase_after", 0) dec = fullSnip } - // --- FM Demod --- - audio := d.Demod(dec, actualDemodRate) + if decHeadTrimSamples > 0 && decHeadTrimSamples < len(dec) { + logging.Warn("boundary", "dec_head_trim_applied", "signal", sess.signalID, "trim", decHeadTrimSamples, "before_len", len(dec)) + dec = dec[decHeadTrimSamples:] + if coll != nil { + coll.IncCounter("dsp.pre_demod.head_trim", 1, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID))) + } + } + + if logging.EnabledCategory("boundary") && len(dec) > 0 { + first := dec[0] + if sess.lastDecIQSet { + d2Re := math.Abs(2*float64(real(sess.lastDecIQ)) - float64(real(sess.prevDecIQ)) - float64(real(first))) + d2Im := math.Abs(2*float64(imag(sess.lastDecIQ)) - float64(imag(sess.prevDecIQ)) - float64(imag(first))) + d2Mag := math.Hypot(d2Re, d2Im) + if d2Mag > 0.15 { + logging.Warn("boundary", "dec_iq_boundary", "signal", sess.signalID, "d2", d2Mag) + if coll != nil { + coll.IncCounter("iq.dec.boundary.count", 1, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID), "session_id", sess.sessionID)) + coll.Observe("iq.dec.boundary.d2", d2Mag, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID))) + } + } + } + + headN := 16 + if len(dec) < headN { + headN = len(dec) + } + tailN := 16 + if len(dec) < tailN { + tailN = len(dec) + } + var headSum, tailSum, minMag, maxMag float64 + minMag = math.MaxFloat64 + for i, v := range dec { + mag := math.Hypot(float64(real(v)), float64(imag(v))) + if mag < minMag { + minMag = mag + } + if mag > maxMag { + maxMag = mag + } + if i < headN { + headSum += mag + } + } + for i := len(dec) - tailN; i < len(dec); i++ { + if i >= 0 { + v := dec[i] + tailSum += math.Hypot(float64(real(v)), float64(imag(v))) + } + } + headAvg := 0.0 + if headN > 0 { + headAvg = headSum / float64(headN) + } + tailAvg := 0.0 + if tailN > 0 { + tailAvg = tailSum / float64(tailN) + } + logging.Debug("boundary", "dec_iq_meter", "signal", sess.signalID, "len", len(dec), "head_avg", headAvg, "tail_avg", tailAvg, "min_mag", minMag, "max_mag", maxMag) + if tailAvg > 0 { + ratio := headAvg / tailAvg + if ratio < 0.75 || ratio > 1.25 { + logging.Warn("boundary", "dec_iq_head_tail_skew", "signal", sess.signalID, "head_avg", headAvg, "tail_avg", tailAvg, "ratio", ratio) + } + if coll != nil { + coll.Observe("iq.dec.head_tail_ratio", ratio, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID), "session_id", sess.sessionID)) + } + } + + probeN := 64 + if len(dec) < probeN { + probeN = len(dec) + } + minHeadMag := math.MaxFloat64 + minHeadIdx := 0 + maxHeadStep := 0.0 + maxHeadStepIdx := 0 + for i := 0; i < probeN; i++ { + v := dec[i] + mag := math.Hypot(float64(real(v)), float64(imag(v))) + if mag < minHeadMag { + minHeadMag = mag + minHeadIdx = i + } + if i > 0 { + p := dec[i-1] + num := float64(real(p))*float64(imag(v)) - float64(imag(p))*float64(real(v)) + den := float64(real(p))*float64(real(v)) + float64(imag(p))*float64(imag(v)) + step := math.Abs(math.Atan2(num, den)) + if step > maxHeadStep { + maxHeadStep = step + maxHeadStepIdx = i - 1 + } + } + } + logging.Debug("boundary", "dec_iq_head_probe", "signal", sess.signalID, "probe_len", probeN, "min_mag", minHeadMag, "min_idx", minHeadIdx, "max_step", maxHeadStep, "max_step_idx", maxHeadStepIdx) + if minHeadMag < 0.18 { + logging.Warn("boundary", "dec_iq_head_dip", "signal", sess.signalID, "probe_len", probeN, "min_mag", minHeadMag, "min_idx", minHeadIdx, "max_step", maxHeadStep, "max_step_idx", maxHeadStepIdx) + } + if maxHeadStep > 1.5 { + logging.Warn("boundary", "dec_iq_head_step", "signal", sess.signalID, "probe_len", probeN, "max_step", maxHeadStep, "max_step_idx", maxHeadStepIdx, "min_mag", minHeadMag, "min_idx", minHeadIdx) + } + if coll != nil { + coll.Observe("iq.dec.magnitude.min", minMag, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID), "session_id", sess.sessionID)) + coll.Observe("iq.dec.magnitude.max", maxMag, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID), "session_id", sess.sessionID)) + coll.Observe("iq.dec.phase_step.max", maxHeadStep, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID), "session_id", sess.sessionID)) + } + + if len(dec) >= 2 { + sess.prevDecIQ = dec[len(dec)-2] + sess.lastDecIQ = dec[len(dec)-1] + } else { + sess.prevDecIQ = sess.lastDecIQ + sess.lastDecIQ = dec[0] + } + sess.lastDecIQSet = true + } + + // --- FM/AM/etc Demod --- + // For FM demod (NFM/WFM): bridge the block boundary by prepending the + // previous block's last IQ sample. Without this, the discriminator loses + // the cross-boundary phase step (1 audio sample missing per block) and + // any phase discontinuity at the seam becomes an unsmoothed audio transient. + var audio []float32 + isFMDemod := demodName == "NFM" || demodName == "WFM" + if isFMDemod && sess.lastDiscrimIQSet && len(dec) > 0 { + bridged := make([]complex64, len(dec)+1) + bridged[0] = sess.lastDiscrimIQ + copy(bridged[1:], dec) + audio = d.Demod(bridged, actualDemodRate) + // bridged produced len(dec) audio samples (= len(bridged)-1) + // which is exactly the correct count for the new data + } else { + audio = d.Demod(dec, actualDemodRate) + } + if len(dec) > 0 { + sess.lastDiscrimIQ = dec[len(dec)-1] + sess.lastDiscrimIQSet = true + } if len(audio) == 0 { return nil, 0 } + if coll != nil { + coll.SetGauge("audio.stage.demod.length", float64(len(audio)), baseTags) + probe := 64 + if len(audio) < probe { + probe = len(audio) + } + if probe > 0 { + var headAbs, tailAbs float64 + for i := 0; i < probe; i++ { + headAbs += math.Abs(float64(audio[i])) + tailAbs += math.Abs(float64(audio[len(audio)-probe+i])) + } + coll.Observe("audio.demod.head_mean_abs", headAbs/float64(probe), baseTags) + coll.Observe("audio.demod.tail_mean_abs", tailAbs/float64(probe), baseTags) + coll.Observe("audio.demod.edge_delta_abs", math.Abs(float64(audio[0])-float64(audio[len(audio)-1])), baseTags) + } + } + if logging.EnabledCategory("boundary") { + stride := d.Channels() + if stride < 1 { + stride = 1 + } + nFrames := len(audio) / stride + if nFrames > 0 { + first := float64(audio[0]) + if sess.lastDemodSet { + d2 := math.Abs(2*float64(sess.lastDemodL) - sess.prevDemodL - first) + if d2 > 0.15 { + logging.Warn("boundary", "demod_boundary", "signal", sess.signalID, "d2", d2) + if coll != nil { + coll.IncCounter("audio.demod_boundary.count", 1, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID), "session_id", sess.sessionID)) + coll.Observe("audio.demod_boundary.d2", d2, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID))) + } + } + } + if nFrames >= 2 { + sess.prevDemodL = float64(audio[(nFrames-2)*stride]) + sess.lastDemodL = audio[(nFrames-1)*stride] + } else { + sess.prevDemodL = float64(sess.lastDemodL) + sess.lastDemodL = audio[0] + } + sess.lastDemodSet = true + } + } + logging.Debug("boundary", "audio_path", "signal", sess.signalID, "demod", demodName, "actual_rate", actualDemodRate, "audio_len", len(audio), "channels", d.Channels(), "overlap_applied", overlapApplied, "prev_tail_valid", prevTailValid) - // --- Trim the 1-sample FM discriminator overlap --- - // TEMP: skip audio trim to test if per-block trimming causes ticks + shouldDump := !sess.debugDumpStart.IsZero() && !sess.debugDumpUntil.IsZero() + if shouldDump { + now := time.Now() + shouldDump = !now.Before(sess.debugDumpStart) && now.Before(sess.debugDumpUntil) + } + if shouldDump { + sess.demodDump = append(sess.demodDump, audio...) + } // --- Stateful stereo decode with conservative lock/hysteresis --- channels := 1 @@ -829,6 +1355,11 @@ func (sess *streamSession) processSnippet(snippet []complex64, snipRate int) ([] audio = stereoAudio } else { sess.stereoState = "mono-fallback" + // Apply 15kHz LPF before output: the raw discriminator contains + // the 19kHz pilot (+55dB), L-R subcarrier (23-53kHz), and RDS (57kHz). + // Without filtering, the pilot leaks into audio and subcarrier + // energy produces audible click-like artifacts. + audio = sess.wfmAudioFilter(audio, actualDemodRate) dual := make([]float32, len(audio)*2) for i, s := range audio { dual[i*2] = s @@ -839,6 +1370,9 @@ func (sess *streamSession) processSnippet(snippet []complex64, snipRate int) ([] if (prevPlayback != sess.playbackMode || prevStereo != sess.stereoState) && len(sess.audioSubs) > 0 { sendAudioInfo(sess.audioSubs, sess.audioInfo()) } + } else if isWFM { + // Plain WFM (not stereo): also needs 15kHz LPF on discriminator output + audio = sess.wfmAudioFilter(audio, actualDemodRate) } // --- Polyphase resample to exact 48kHz --- @@ -848,6 +1382,12 @@ func (sess *streamSession) processSnippet(snippet []complex64, snipRate int) ([] logging.Info("resample", "reset", "mode", "stereo", "rate", actualDemodRate) sess.stereoResampler = dsp.NewStereoResampler(actualDemodRate, streamAudioRate, resamplerTaps) sess.stereoResamplerRate = actualDemodRate + if coll != nil { + coll.Event("resampler_reset", "info", "stereo resampler reset", telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID), "session_id", sess.sessionID), map[string]any{ + "mode": "stereo", + "rate": actualDemodRate, + }) + } } audio = sess.stereoResampler.Process(audio) } else { @@ -855,10 +1395,19 @@ func (sess *streamSession) processSnippet(snippet []complex64, snipRate int) ([] logging.Info("resample", "reset", "mode", "mono", "rate", actualDemodRate) sess.monoResampler = dsp.NewResampler(actualDemodRate, streamAudioRate, resamplerTaps) sess.monoResamplerRate = actualDemodRate + if coll != nil { + coll.Event("resampler_reset", "info", "mono resampler reset", telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID), "session_id", sess.sessionID), map[string]any{ + "mode": "mono", + "rate": actualDemodRate, + }) + } } audio = sess.monoResampler.Process(audio) } } + if coll != nil { + coll.SetGauge("audio.stage.output.length", float64(len(audio)), telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID), "session_id", sess.sessionID)) + } // --- De-emphasis (configurable: 50µs Europe, 75µs US/Japan, 0=disabled) --- if isWFM && sess.deemphasisUs > 0 && streamAudioRate > 0 { @@ -890,6 +1439,24 @@ func (sess *streamSession) processSnippet(snippet []complex64, snipRate int) ([] } } + if shouldDump { + sess.finalDump = append(sess.finalDump, audio...) + } else if !sess.debugDumpUntil.IsZero() && time.Now().After(sess.debugDumpUntil) && sess.debugDumpBase != "" { + _ = os.MkdirAll(filepath.Dir(sess.debugDumpBase), 0o755) + if len(sess.demodDump) > 0 { + _ = writeWAVFile(sess.debugDumpBase+"-demod.wav", sess.demodDump, actualDemodRate, d.Channels()) + } + if len(sess.finalDump) > 0 { + _ = writeWAVFile(sess.debugDumpBase+"-final.wav", sess.finalDump, streamAudioRate, channels) + } + logging.Warn("boundary", "debug_audio_dump_window", "signal", sess.signalID, "base", sess.debugDumpBase) + sess.debugDumpBase = "" + sess.demodDump = nil + sess.finalDump = nil + sess.debugDumpStart = time.Time{} + sess.debugDumpUntil = time.Time{} + } + return audio, streamAudioRate } @@ -908,6 +1475,20 @@ func pllCoefficients(loopBW, damping float64, sampleRate int) (float64, float64) return alpha, beta } +// wfmAudioFilter applies a stateful 15kHz lowpass to WFM discriminator output. +// Removes the 19kHz stereo pilot, L-R DSB-SC subcarrier (23-53kHz), and RDS (57kHz) +// that would otherwise leak into the audio output as clicks and tonal artifacts. +func (sess *streamSession) wfmAudioFilter(audio []float32, sampleRate int) []float32 { + if len(audio) == 0 || sampleRate <= 0 { + return audio + } + if sess.wfmAudioLPF == nil || sess.wfmAudioLPFRate != sampleRate { + sess.wfmAudioLPF = dsp.NewStatefulFIRReal(dsp.LowpassFIR(15000, sampleRate, 101)) + sess.wfmAudioLPFRate = sampleRate + } + return sess.wfmAudioLPF.Process(audio) +} + // stereoDecodeStateful: pilot-locked 38kHz oscillator for L-R extraction. // Uses persistent FIR filter state across frames for click-free stereo. // Reuses session scratch buffers to minimize allocations. @@ -1055,10 +1636,13 @@ type dspStateSnapshot struct { pilotLPFHi *dsp.StatefulFIRReal pilotLPFLo *dsp.StatefulFIRReal preDemodFIR *dsp.StatefulFIRComplex + preDemodDecimator *dsp.StatefulDecimatingFIRComplex preDemodDecim int preDemodRate int preDemodCutoff float64 preDemodDecimPhase int + wfmAudioLPF *dsp.StatefulFIRReal + wfmAudioLPFRate int } func (sess *streamSession) captureDSPState() dspStateSnapshot { @@ -1087,10 +1671,13 @@ func (sess *streamSession) captureDSPState() dspStateSnapshot { pilotLPFHi: sess.pilotLPFHi, pilotLPFLo: sess.pilotLPFLo, preDemodFIR: sess.preDemodFIR, + preDemodDecimator: sess.preDemodDecimator, preDemodDecim: sess.preDemodDecim, preDemodRate: sess.preDemodRate, preDemodCutoff: sess.preDemodCutoff, preDemodDecimPhase: sess.preDemodDecimPhase, + wfmAudioLPF: sess.wfmAudioLPF, + wfmAudioLPFRate: sess.wfmAudioLPFRate, } } @@ -1119,10 +1706,13 @@ func (sess *streamSession) restoreDSPState(s dspStateSnapshot) { sess.pilotLPFHi = s.pilotLPFHi sess.pilotLPFLo = s.pilotLPFLo sess.preDemodFIR = s.preDemodFIR + sess.preDemodDecimator = s.preDemodDecimator sess.preDemodDecim = s.preDemodDecim sess.preDemodRate = s.preDemodRate sess.preDemodCutoff = s.preDemodCutoff sess.preDemodDecimPhase = s.preDemodDecimPhase + sess.wfmAudioLPF = s.wfmAudioLPF + sess.wfmAudioLPFRate = s.wfmAudioLPFRate } // --------------------------------------------------------------------------- @@ -1157,6 +1747,7 @@ func (st *Streamer) openRecordingSession(sig *detector.Signal, now time.Time) (* playbackMode, stereoState := initialPlaybackState(demodName) sess := &streamSession{ + sessionID: fmt.Sprintf("%d-%d-r", sig.ID, now.UnixMilli()), signalID: sig.ID, centerHz: sig.CenterHz, bwHz: sig.BWHz, @@ -1201,6 +1792,7 @@ func (st *Streamer) openListenSession(sig *detector.Signal, now time.Time) *stre playbackMode, stereoState := initialPlaybackState(demodName) sess := &streamSession{ + sessionID: fmt.Sprintf("%d-%d-l", sig.ID, now.UnixMilli()), signalID: sig.ID, centerHz: sig.CenterHz, bwHz: sig.BWHz, @@ -1405,10 +1997,16 @@ func (st *Streamer) fanoutPCM(sess *streamSession, pcm []byte, pcmLen int) { default: st.droppedPCM++ logging.Warn("drop", "pcm_drop", "count", st.droppedPCM) + if st.telemetry != nil { + st.telemetry.IncCounter("streamer.pcm.drop", 1, telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID), "session_id", sess.sessionID)) + } } alive = append(alive, sub) } sess.audioSubs = alive + if st.telemetry != nil { + st.telemetry.SetGauge("streamer.subscribers.count", float64(len(alive)), telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sess.signalID), "session_id", sess.sessionID)) + } } func (st *Streamer) classAllowed(cls *classifier.Classification) bool { @@ -1433,6 +2031,15 @@ var ErrNoSession = errors.New("no active or pending session for this frequency") // WAV header helpers // --------------------------------------------------------------------------- +func writeWAVFile(path string, audio []float32, sampleRate int, channels int) error { + f, err := os.Create(path) + if err != nil { + return err + } + defer f.Close() + return writeWAVTo(f, audio, sampleRate, channels) +} + func writeStreamWAVHeader(f *os.File, sampleRate int, channels int) error { if channels <= 0 { channels = 1 @@ -1483,3 +2090,22 @@ func fixStreamWAVHeader(f *os.File, totalSamples int64, sampleRate int, channels } _, _ = f.Write(buf[:]) } + +// ResetStreams forces all active streaming sessions to discard their FIR states and decimation phases. +// This is used when the upstream DSP drops samples, creating a hard break in phase continuity. +func (st *Streamer) ResetStreams() { + st.mu.Lock() + defer st.mu.Unlock() + if st.telemetry != nil { + st.telemetry.IncCounter("streamer.reset.count", 1, nil) + st.telemetry.Event("stream_reset", "warn", "stream DSP state reset", nil, map[string]any{"sessions": len(st.sessions)}) + } + for _, sess := range st.sessions { + sess.preDemodFIR = nil + sess.preDemodDecimator = nil + sess.preDemodDecimPhase = 0 + sess.stereoResampler = nil + sess.monoResampler = nil + sess.wfmAudioLPF = nil + } +} diff --git a/internal/telemetry/telemetry.go b/internal/telemetry/telemetry.go new file mode 100644 index 0000000..e57a6a1 --- /dev/null +++ b/internal/telemetry/telemetry.go @@ -0,0 +1,966 @@ +package telemetry + +import ( + "bufio" + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "sort" + "strconv" + "strings" + "sync" + "sync/atomic" + "time" +) + +type Config struct { + Enabled bool `json:"enabled"` + HeavyEnabled bool `json:"heavy_enabled"` + HeavySampleEvery int `json:"heavy_sample_every"` + MetricSampleEvery int `json:"metric_sample_every"` + MetricHistoryMax int `json:"metric_history_max"` + EventHistoryMax int `json:"event_history_max"` + Retention time.Duration `json:"retention"` + PersistEnabled bool `json:"persist_enabled"` + PersistDir string `json:"persist_dir"` + RotateMB int `json:"rotate_mb"` + KeepFiles int `json:"keep_files"` +} + +func DefaultConfig() Config { + return Config{ + Enabled: true, + HeavyEnabled: false, + HeavySampleEvery: 12, + MetricSampleEvery: 2, + MetricHistoryMax: 12_000, + EventHistoryMax: 4_000, + Retention: 15 * time.Minute, + PersistEnabled: false, + PersistDir: "debug/telemetry", + RotateMB: 16, + KeepFiles: 8, + } +} + +type Tags map[string]string + +type MetricPoint struct { + Timestamp time.Time `json:"ts"` + Name string `json:"name"` + Type string `json:"type"` + Value float64 `json:"value"` + Tags Tags `json:"tags,omitempty"` +} + +type Event struct { + ID uint64 `json:"id"` + Timestamp time.Time `json:"ts"` + Name string `json:"name"` + Level string `json:"level"` + Message string `json:"message,omitempty"` + Tags Tags `json:"tags,omitempty"` + Fields map[string]any `json:"fields,omitempty"` +} + +type SeriesValue struct { + Name string `json:"name"` + Value float64 `json:"value"` + Tags Tags `json:"tags,omitempty"` +} + +type DistValue struct { + Name string `json:"name"` + Count int64 `json:"count"` + Min float64 `json:"min"` + Max float64 `json:"max"` + Mean float64 `json:"mean"` + Last float64 `json:"last"` + P95 float64 `json:"p95"` + Tags Tags `json:"tags,omitempty"` +} + +type LiveSnapshot struct { + Now time.Time `json:"now"` + StartedAt time.Time `json:"started_at"` + UptimeMs int64 `json:"uptime_ms"` + Config Config `json:"config"` + Counters []SeriesValue `json:"counters"` + Gauges []SeriesValue `json:"gauges"` + Distributions []DistValue `json:"distributions"` + RecentEvents []Event `json:"recent_events"` + Status map[string]any `json:"status,omitempty"` +} + +type Query struct { + From time.Time + To time.Time + Limit int + Name string + NamePrefix string + Level string + Tags Tags + IncludePersisted bool +} + +type collectorMetric struct { + name string + tags Tags + value float64 +} + +type distMetric struct { + name string + tags Tags + count int64 + sum float64 + min float64 + max float64 + last float64 + samples []float64 + next int + full bool +} + +type persistedEnvelope struct { + Kind string `json:"kind"` + Metric *MetricPoint `json:"metric,omitempty"` + Event *Event `json:"event,omitempty"` +} + +type Collector struct { + mu sync.RWMutex + cfg Config + startedAt time.Time + counterSeq uint64 + heavySeq uint64 + eventSeq uint64 + + counters map[string]*collectorMetric + gauges map[string]*collectorMetric + dists map[string]*distMetric + metricsHistory []MetricPoint + events []Event + status map[string]any + + writer *jsonlWriter +} + +func New(cfg Config) (*Collector, error) { + cfg = sanitizeConfig(cfg) + c := &Collector{ + cfg: cfg, + startedAt: time.Now().UTC(), + counters: map[string]*collectorMetric{}, + gauges: map[string]*collectorMetric{}, + dists: map[string]*distMetric{}, + metricsHistory: make([]MetricPoint, 0, cfg.MetricHistoryMax), + events: make([]Event, 0, cfg.EventHistoryMax), + status: map[string]any{}, + } + if cfg.PersistEnabled { + writer, err := newJSONLWriter(cfg) + if err != nil { + return nil, err + } + c.writer = writer + } + return c, nil +} + +func (c *Collector) Close() error { + if c == nil { + return nil + } + c.mu.Lock() + writer := c.writer + c.writer = nil + c.mu.Unlock() + if writer != nil { + return writer.Close() + } + return nil +} + +func (c *Collector) Configure(cfg Config) error { + if c == nil { + return nil + } + cfg = sanitizeConfig(cfg) + var writer *jsonlWriter + var err error + if cfg.PersistEnabled { + writer, err = newJSONLWriter(cfg) + if err != nil { + return err + } + } + c.mu.Lock() + old := c.writer + c.cfg = cfg + c.writer = writer + c.trimLocked(time.Now().UTC()) + c.mu.Unlock() + if old != nil { + _ = old.Close() + } + return nil +} + +func (c *Collector) Config() Config { + c.mu.RLock() + defer c.mu.RUnlock() + return c.cfg +} + +func (c *Collector) Enabled() bool { + if c == nil { + return false + } + c.mu.RLock() + defer c.mu.RUnlock() + return c.cfg.Enabled +} + +func (c *Collector) ShouldSampleHeavy() bool { + if c == nil { + return false + } + c.mu.RLock() + cfg := c.cfg + c.mu.RUnlock() + if !cfg.Enabled || !cfg.HeavyEnabled { + return false + } + n := cfg.HeavySampleEvery + if n <= 1 { + return true + } + seq := atomic.AddUint64(&c.heavySeq, 1) + return seq%uint64(n) == 0 +} + +func (c *Collector) SetStatus(key string, value any) { + if c == nil { + return + } + c.mu.Lock() + c.status[key] = value + c.mu.Unlock() +} + +func (c *Collector) IncCounter(name string, delta float64, tags Tags) { + c.recordMetric("counter", name, delta, tags, true) +} + +func (c *Collector) SetGauge(name string, value float64, tags Tags) { + c.recordMetric("gauge", name, value, tags, false) +} + +func (c *Collector) Observe(name string, value float64, tags Tags) { + c.recordMetric("distribution", name, value, tags, false) +} + +func (c *Collector) Event(name string, level string, message string, tags Tags, fields map[string]any) { + if c == nil { + return + } + now := time.Now().UTC() + c.mu.Lock() + if !c.cfg.Enabled { + c.mu.Unlock() + return + } + ev := Event{ + ID: atomic.AddUint64(&c.eventSeq, 1), + Timestamp: now, + Name: name, + Level: strings.TrimSpace(strings.ToLower(level)), + Message: message, + Tags: cloneTags(tags), + Fields: cloneFields(fields), + } + if ev.Level == "" { + ev.Level = "info" + } + c.events = append(c.events, ev) + c.trimLocked(now) + writer := c.writer + c.mu.Unlock() + if writer != nil { + _ = writer.Write(persistedEnvelope{Kind: "event", Event: &ev}) + } +} + +func (c *Collector) recordMetric(kind string, name string, value float64, tags Tags, add bool) { + if c == nil || strings.TrimSpace(name) == "" { + return + } + now := time.Now().UTC() + c.mu.Lock() + if !c.cfg.Enabled { + c.mu.Unlock() + return + } + key := metricKey(name, tags) + switch kind { + case "counter": + m := c.counters[key] + if m == nil { + m = &collectorMetric{name: name, tags: cloneTags(tags)} + c.counters[key] = m + } + if add { + m.value += value + } else { + m.value = value + } + case "gauge": + m := c.gauges[key] + if m == nil { + m = &collectorMetric{name: name, tags: cloneTags(tags)} + c.gauges[key] = m + } + m.value = value + case "distribution": + d := c.dists[key] + if d == nil { + d = &distMetric{ + name: name, + tags: cloneTags(tags), + min: value, + max: value, + samples: make([]float64, 64), + } + c.dists[key] = d + } + d.count++ + d.sum += value + d.last = value + if d.count == 1 || value < d.min { + d.min = value + } + if d.count == 1 || value > d.max { + d.max = value + } + if len(d.samples) > 0 { + d.samples[d.next] = value + d.next++ + if d.next >= len(d.samples) { + d.next = 0 + d.full = true + } + } + } + sampleN := c.cfg.MetricSampleEvery + seq := atomic.AddUint64(&c.counterSeq, 1) + forceStore := strings.HasPrefix(name, "iq.extract.raw.boundary.") || strings.HasPrefix(name, "iq.extract.trimmed.boundary.") + shouldStore := forceStore || sampleN <= 1 || seq%uint64(sampleN) == 0 || kind == "counter" + var mp MetricPoint + if shouldStore { + mp = MetricPoint{ + Timestamp: now, + Name: name, + Type: kind, + Value: value, + Tags: cloneTags(tags), + } + c.metricsHistory = append(c.metricsHistory, mp) + } + c.trimLocked(now) + writer := c.writer + c.mu.Unlock() + + if writer != nil && shouldStore { + _ = writer.Write(persistedEnvelope{Kind: "metric", Metric: &mp}) + } +} + +func (c *Collector) LiveSnapshot() LiveSnapshot { + now := time.Now().UTC() + c.mu.RLock() + cfg := c.cfg + out := LiveSnapshot{ + Now: now, + StartedAt: c.startedAt, + UptimeMs: now.Sub(c.startedAt).Milliseconds(), + Config: cfg, + Counters: make([]SeriesValue, 0, len(c.counters)), + Gauges: make([]SeriesValue, 0, len(c.gauges)), + Distributions: make([]DistValue, 0, len(c.dists)), + RecentEvents: make([]Event, 0, min(40, len(c.events))), + Status: cloneFields(c.status), + } + for _, m := range c.counters { + out.Counters = append(out.Counters, SeriesValue{Name: m.name, Value: m.value, Tags: cloneTags(m.tags)}) + } + for _, m := range c.gauges { + out.Gauges = append(out.Gauges, SeriesValue{Name: m.name, Value: m.value, Tags: cloneTags(m.tags)}) + } + for _, d := range c.dists { + mean := 0.0 + if d.count > 0 { + mean = d.sum / float64(d.count) + } + out.Distributions = append(out.Distributions, DistValue{ + Name: d.name, + Count: d.count, + Min: d.min, + Max: d.max, + Mean: mean, + Last: d.last, + P95: p95FromDist(d), + Tags: cloneTags(d.tags), + }) + } + start := len(c.events) - cap(out.RecentEvents) + if start < 0 { + start = 0 + } + for _, ev := range c.events[start:] { + out.RecentEvents = append(out.RecentEvents, copyEvent(ev)) + } + c.mu.RUnlock() + sort.Slice(out.Counters, func(i, j int) bool { return out.Counters[i].Name < out.Counters[j].Name }) + sort.Slice(out.Gauges, func(i, j int) bool { return out.Gauges[i].Name < out.Gauges[j].Name }) + sort.Slice(out.Distributions, func(i, j int) bool { return out.Distributions[i].Name < out.Distributions[j].Name }) + return out +} + +func (c *Collector) QueryMetrics(q Query) ([]MetricPoint, error) { + if c == nil { + return nil, nil + } + q = normalizeQuery(q) + c.mu.RLock() + items := make([]MetricPoint, 0, len(c.metricsHistory)) + for _, m := range c.metricsHistory { + if metricMatch(m, q) { + items = append(items, copyMetric(m)) + } + } + cfg := c.cfg + c.mu.RUnlock() + if q.IncludePersisted && cfg.PersistEnabled { + persisted, err := readPersistedMetrics(cfg, q) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return nil, err + } + items = append(items, persisted...) + } + sort.Slice(items, func(i, j int) bool { + return items[i].Timestamp.Before(items[j].Timestamp) + }) + if q.Limit > 0 && len(items) > q.Limit { + items = items[len(items)-q.Limit:] + } + return items, nil +} + +func (c *Collector) QueryEvents(q Query) ([]Event, error) { + if c == nil { + return nil, nil + } + q = normalizeQuery(q) + c.mu.RLock() + items := make([]Event, 0, len(c.events)) + for _, ev := range c.events { + if eventMatch(ev, q) { + items = append(items, copyEvent(ev)) + } + } + cfg := c.cfg + c.mu.RUnlock() + if q.IncludePersisted && cfg.PersistEnabled { + persisted, err := readPersistedEvents(cfg, q) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return nil, err + } + items = append(items, persisted...) + } + sort.Slice(items, func(i, j int) bool { + return items[i].Timestamp.Before(items[j].Timestamp) + }) + if q.Limit > 0 && len(items) > q.Limit { + items = items[len(items)-q.Limit:] + } + return items, nil +} + +func (c *Collector) trimLocked(now time.Time) { + if c.cfg.MetricHistoryMax > 0 && len(c.metricsHistory) > c.cfg.MetricHistoryMax { + c.metricsHistory = append([]MetricPoint(nil), c.metricsHistory[len(c.metricsHistory)-c.cfg.MetricHistoryMax:]...) + } + if c.cfg.EventHistoryMax > 0 && len(c.events) > c.cfg.EventHistoryMax { + c.events = append([]Event(nil), c.events[len(c.events)-c.cfg.EventHistoryMax:]...) + } + ret := c.cfg.Retention + if ret <= 0 { + return + } + cut := now.Add(-ret) + mStart := 0 + for mStart < len(c.metricsHistory) && c.metricsHistory[mStart].Timestamp.Before(cut) { + mStart++ + } + if mStart > 0 { + c.metricsHistory = append([]MetricPoint(nil), c.metricsHistory[mStart:]...) + } + eStart := 0 + for eStart < len(c.events) && c.events[eStart].Timestamp.Before(cut) { + eStart++ + } + if eStart > 0 { + c.events = append([]Event(nil), c.events[eStart:]...) + } +} + +func sanitizeConfig(cfg Config) Config { + def := DefaultConfig() + if cfg.HeavySampleEvery <= 0 { + cfg.HeavySampleEvery = def.HeavySampleEvery + } + if cfg.MetricSampleEvery <= 0 { + cfg.MetricSampleEvery = def.MetricSampleEvery + } + if cfg.MetricHistoryMax <= 0 { + cfg.MetricHistoryMax = def.MetricHistoryMax + } + if cfg.EventHistoryMax <= 0 { + cfg.EventHistoryMax = def.EventHistoryMax + } + if cfg.Retention <= 0 { + cfg.Retention = def.Retention + } + if strings.TrimSpace(cfg.PersistDir) == "" { + cfg.PersistDir = def.PersistDir + } + if cfg.RotateMB <= 0 { + cfg.RotateMB = def.RotateMB + } + if cfg.KeepFiles <= 0 { + cfg.KeepFiles = def.KeepFiles + } + return cfg +} + +func normalizeQuery(q Query) Query { + if q.Limit <= 0 || q.Limit > 5000 { + q.Limit = 500 + } + if q.Tags == nil { + q.Tags = Tags{} + } + return q +} + +func metricMatch(m MetricPoint, q Query) bool { + if !q.From.IsZero() && m.Timestamp.Before(q.From) { + return false + } + if !q.To.IsZero() && m.Timestamp.After(q.To) { + return false + } + if q.Name != "" && m.Name != q.Name { + return false + } + if q.NamePrefix != "" && !strings.HasPrefix(m.Name, q.NamePrefix) { + return false + } + for k, v := range q.Tags { + if m.Tags[k] != v { + return false + } + } + return true +} + +func eventMatch(ev Event, q Query) bool { + if !q.From.IsZero() && ev.Timestamp.Before(q.From) { + return false + } + if !q.To.IsZero() && ev.Timestamp.After(q.To) { + return false + } + if q.Name != "" && ev.Name != q.Name { + return false + } + if q.NamePrefix != "" && !strings.HasPrefix(ev.Name, q.NamePrefix) { + return false + } + if q.Level != "" && !strings.EqualFold(q.Level, ev.Level) { + return false + } + for k, v := range q.Tags { + if ev.Tags[k] != v { + return false + } + } + return true +} + +func metricKey(name string, tags Tags) string { + if len(tags) == 0 { + return name + } + keys := make([]string, 0, len(tags)) + for k := range tags { + keys = append(keys, k) + } + sort.Strings(keys) + var b strings.Builder + b.Grow(len(name) + len(keys)*16) + b.WriteString(name) + for _, k := range keys { + b.WriteString("|") + b.WriteString(k) + b.WriteString("=") + b.WriteString(tags[k]) + } + return b.String() +} + +func cloneTags(tags Tags) Tags { + if len(tags) == 0 { + return nil + } + out := make(Tags, len(tags)) + for k, v := range tags { + out[k] = v + } + return out +} + +func cloneFields(fields map[string]any) map[string]any { + if len(fields) == 0 { + return nil + } + out := make(map[string]any, len(fields)) + for k, v := range fields { + out[k] = v + } + return out +} + +func copyMetric(m MetricPoint) MetricPoint { + return MetricPoint{ + Timestamp: m.Timestamp, + Name: m.Name, + Type: m.Type, + Value: m.Value, + Tags: cloneTags(m.Tags), + } +} + +func copyEvent(ev Event) Event { + return Event{ + ID: ev.ID, + Timestamp: ev.Timestamp, + Name: ev.Name, + Level: ev.Level, + Message: ev.Message, + Tags: cloneTags(ev.Tags), + Fields: cloneFields(ev.Fields), + } +} + +func p95FromDist(d *distMetric) float64 { + if d == nil || d.count == 0 { + return 0 + } + n := d.next + if d.full { + n = len(d.samples) + } + if n <= 0 { + return d.last + } + buf := make([]float64, n) + copy(buf, d.samples[:n]) + sort.Float64s(buf) + idx := int(float64(n-1) * 0.95) + if idx < 0 { + idx = 0 + } + if idx >= n { + idx = n - 1 + } + return buf[idx] +} + +type jsonlWriter struct { + cfg Config + mu sync.Mutex + dir string + f *os.File + w *bufio.Writer + currentPath string + currentSize int64 + seq int64 +} + +func newJSONLWriter(cfg Config) (*jsonlWriter, error) { + dir := filepath.Clean(cfg.PersistDir) + if err := os.MkdirAll(dir, 0o755); err != nil { + return nil, err + } + w := &jsonlWriter{cfg: cfg, dir: dir} + if err := w.rotateLocked(); err != nil { + return nil, err + } + return w, nil +} + +func (w *jsonlWriter) Write(v persistedEnvelope) error { + w.mu.Lock() + defer w.mu.Unlock() + if w.f == nil || w.w == nil { + return nil + } + line, err := json.Marshal(v) + if err != nil { + return err + } + line = append(line, '\n') + if w.currentSize+int64(len(line)) > int64(w.cfg.RotateMB)*1024*1024 { + if err := w.rotateLocked(); err != nil { + return err + } + } + n, err := w.w.Write(line) + w.currentSize += int64(n) + if err != nil { + return err + } + return w.w.Flush() +} + +func (w *jsonlWriter) Close() error { + w.mu.Lock() + defer w.mu.Unlock() + if w.w != nil { + _ = w.w.Flush() + } + if w.f != nil { + err := w.f.Close() + w.f = nil + w.w = nil + return err + } + return nil +} + +func (w *jsonlWriter) rotateLocked() error { + if w.w != nil { + _ = w.w.Flush() + } + if w.f != nil { + _ = w.f.Close() + } + w.seq++ + name := fmt.Sprintf("telemetry-%s-%04d.jsonl", time.Now().UTC().Format("20060102-150405"), w.seq) + path := filepath.Join(w.dir, name) + f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644) + if err != nil { + return err + } + info, _ := f.Stat() + size := int64(0) + if info != nil { + size = info.Size() + } + w.f = f + w.w = bufio.NewWriterSize(f, 64*1024) + w.currentPath = path + w.currentSize = size + _ = pruneFiles(w.dir, w.cfg.KeepFiles) + return nil +} + +func pruneFiles(dir string, keep int) error { + if keep <= 0 { + return nil + } + ents, err := os.ReadDir(dir) + if err != nil { + return err + } + files := make([]string, 0, len(ents)) + for _, ent := range ents { + if ent.IsDir() { + continue + } + name := ent.Name() + if !strings.HasPrefix(name, "telemetry-") || !strings.HasSuffix(name, ".jsonl") { + continue + } + files = append(files, filepath.Join(dir, name)) + } + if len(files) <= keep { + return nil + } + sort.Strings(files) + for _, path := range files[:len(files)-keep] { + _ = os.Remove(path) + } + return nil +} + +func readPersistedMetrics(cfg Config, q Query) ([]MetricPoint, error) { + files, err := listPersistedFiles(cfg.PersistDir) + if err != nil { + return nil, err + } + out := make([]MetricPoint, 0, 256) + for _, path := range files { + points, err := parsePersistedFile(path, q) + if err != nil { + continue + } + for _, p := range points.metrics { + if metricMatch(p, q) { + out = append(out, p) + } + } + } + return out, nil +} + +func readPersistedEvents(cfg Config, q Query) ([]Event, error) { + files, err := listPersistedFiles(cfg.PersistDir) + if err != nil { + return nil, err + } + out := make([]Event, 0, 128) + for _, path := range files { + points, err := parsePersistedFile(path, q) + if err != nil { + continue + } + for _, ev := range points.events { + if eventMatch(ev, q) { + out = append(out, ev) + } + } + } + return out, nil +} + +type parsedFile struct { + metrics []MetricPoint + events []Event +} + +func parsePersistedFile(path string, q Query) (parsedFile, error) { + f, err := os.Open(path) + if err != nil { + return parsedFile{}, err + } + defer f.Close() + out := parsedFile{ + metrics: make([]MetricPoint, 0, 64), + events: make([]Event, 0, 32), + } + s := bufio.NewScanner(f) + s.Buffer(make([]byte, 0, 32*1024), 1024*1024) + for s.Scan() { + line := s.Bytes() + if len(line) == 0 { + continue + } + var env persistedEnvelope + if err := json.Unmarshal(line, &env); err != nil { + continue + } + if env.Metric != nil { + out.metrics = append(out.metrics, *env.Metric) + } else if env.Event != nil { + out.events = append(out.events, *env.Event) + } + if q.Limit > 0 && len(out.metrics)+len(out.events) > q.Limit*2 { + // keep bounded while scanning + if len(out.metrics) > q.Limit { + out.metrics = out.metrics[len(out.metrics)-q.Limit:] + } + if len(out.events) > q.Limit { + out.events = out.events[len(out.events)-q.Limit:] + } + } + } + return out, s.Err() +} + +func listPersistedFiles(dir string) ([]string, error) { + ents, err := os.ReadDir(dir) + if err != nil { + return nil, err + } + files := make([]string, 0, len(ents)) + for _, ent := range ents { + if ent.IsDir() { + continue + } + name := ent.Name() + if strings.HasPrefix(name, "telemetry-") && strings.HasSuffix(name, ".jsonl") { + files = append(files, filepath.Join(dir, name)) + } + } + sort.Strings(files) + return files, nil +} + +func ParseTimeQuery(raw string) (time.Time, error) { + raw = strings.TrimSpace(raw) + if raw == "" { + return time.Time{}, nil + } + if ms, err := strconv.ParseInt(raw, 10, 64); err == nil { + if ms > 1e12 { + return time.UnixMilli(ms).UTC(), nil + } + return time.Unix(ms, 0).UTC(), nil + } + if t, err := time.Parse(time.RFC3339Nano, raw); err == nil { + return t.UTC(), nil + } + if t, err := time.Parse(time.RFC3339, raw); err == nil { + return t.UTC(), nil + } + return time.Time{}, errors.New("invalid time query") +} + +func TagsWith(base Tags, key string, value any) Tags { + out := cloneTags(base) + if out == nil { + out = Tags{} + } + out[key] = fmt.Sprint(value) + return out +} + +func TagsFromPairs(kv ...string) Tags { + if len(kv) < 2 { + return nil + } + out := Tags{} + for i := 0; i+1 < len(kv); i += 2 { + k := strings.TrimSpace(kv[i]) + if k == "" { + continue + } + out[k] = kv[i+1] + } + if len(out) == 0 { + return nil + } + return out +} + +func min(a int, b int) int { + if a < b { + return a + } + return b +}