Просмотр исходного кода

Merge branch 'refactor/stateful-streaming-extractor'

master
Jan Svabenik 6 часов назад
Родитель
Сommit
bf71f8acfd
77 измененных файлов: 8674 добавлений и 325 удалений
  1. +299
    -0
      AGENTS.md
  2. +26
    -0
      README.md
  3. +0
    -184
      STATE.md
  4. +0
    -23
      TODO.md
  5. +17
    -4
      build-gpudemod-dll.ps1
  6. +6
    -0
      build-sdrplay.ps1
  7. +101
    -3
      cmd/sdrd/dsp_loop.go
  8. +434
    -3
      cmd/sdrd/helpers.go
  9. +185
    -3
      cmd/sdrd/http_handlers.go
  10. +6
    -0
      cmd/sdrd/legacy_extract.go
  11. +24
    -4
      cmd/sdrd/main.go
  12. +177
    -3
      cmd/sdrd/pipeline_runtime.go
  13. +2
    -2
      cmd/sdrd/pipeline_runtime_test.go
  14. +41
    -2
      cmd/sdrd/source_manager.go
  15. +45
    -0
      cmd/sdrd/streaming_compare.go
  16. +27
    -0
      cmd/sdrd/streaming_monitoring.go
  17. +50
    -0
      cmd/sdrd/streaming_production.go
  18. +137
    -0
      cmd/sdrd/streaming_refactor.go
  19. +2
    -0
      cmd/sdrd/types.go
  20. +343
    -0
      config.autosave.yaml
  21. +20
    -5
      config.yaml
  22. +1077
    -0
      docs/audio-click-debug-notes-2026-03-24.md
  23. +48
    -0
      docs/gpu-streaming-refactor-plan-2026-03-25.md
  24. +196
    -0
      docs/known-issues.md
  25. +711
    -0
      docs/telemetry-api.md
  26. +100
    -0
      docs/telemetry-debug-runbook.md
  27. +62
    -0
      internal/config/config.go
  28. +35
    -1
      internal/demod/fm.go
  29. +0
    -34
      internal/demod/gpudemod/README.md
  30. +1
    -1
      internal/demod/gpudemod/batch.go
  31. +13
    -4
      internal/demod/gpudemod/batch_runner.go
  32. +3
    -3
      internal/demod/gpudemod/batch_runner_windows.go
  33. Двоичные данные
      internal/demod/gpudemod/build/gpudemod_kernels.lib
  34. +47
    -0
      internal/demod/gpudemod/compare.go
  35. +19
    -0
      internal/demod/gpudemod/compare_gpu.go
  36. +10
    -0
      internal/demod/gpudemod/compare_oracle.go
  37. +27
    -0
      internal/demod/gpudemod/compare_pipeline.go
  38. +32
    -0
      internal/demod/gpudemod/compare_pipeline_test.go
  39. +12
    -0
      internal/demod/gpudemod/compare_state.go
  40. +18
    -0
      internal/demod/gpudemod/compare_test.go
  41. +170
    -0
      internal/demod/gpudemod/cpu_oracle.go
  42. +89
    -0
      internal/demod/gpudemod/cpu_oracle_test.go
  43. +309
    -0
      internal/demod/gpudemod/native/exports.cu
  44. +31
    -0
      internal/demod/gpudemod/oracle_runner_test.go
  45. +45
    -0
      internal/demod/gpudemod/oracle_validation_test.go
  46. +28
    -0
      internal/demod/gpudemod/polyphase.go
  47. +22
    -0
      internal/demod/gpudemod/polyphase_test.go
  48. +57
    -0
      internal/demod/gpudemod/state_reset_test.go
  49. +70
    -0
      internal/demod/gpudemod/stream_state.go
  50. +31
    -0
      internal/demod/gpudemod/stream_state_test.go
  51. +39
    -0
      internal/demod/gpudemod/streaming_gpu_contract.go
  52. +29
    -0
      internal/demod/gpudemod/streaming_gpu_exec.go
  53. +112
    -0
      internal/demod/gpudemod/streaming_gpu_exec_test.go
  54. +30
    -0
      internal/demod/gpudemod/streaming_gpu_host_exec.go
  55. +49
    -0
      internal/demod/gpudemod/streaming_gpu_host_oracle.go
  56. +35
    -0
      internal/demod/gpudemod/streaming_gpu_host_oracle_test.go
  57. +4
    -0
      internal/demod/gpudemod/streaming_gpu_modes.go
  58. +284
    -0
      internal/demod/gpudemod/streaming_gpu_native_prepare.go
  59. +44
    -0
      internal/demod/gpudemod/streaming_gpu_native_prepare_stub.go
  60. +206
    -0
      internal/demod/gpudemod/streaming_gpu_native_prepare_test.go
  61. +28
    -0
      internal/demod/gpudemod/streaming_gpu_native_state.go
  62. +61
    -0
      internal/demod/gpudemod/streaming_gpu_prepare.go
  63. +26
    -0
      internal/demod/gpudemod/streaming_gpu_stub.go
  64. +59
    -0
      internal/demod/gpudemod/streaming_gpu_stub_test.go
  65. +213
    -0
      internal/demod/gpudemod/streaming_gpu_validation_helpers_test.go
  66. +64
    -0
      internal/demod/gpudemod/streaming_host_core.go
  67. +40
    -0
      internal/demod/gpudemod/streaming_host_core_test.go
  68. +111
    -0
      internal/demod/gpudemod/streaming_oracle_extract.go
  69. +64
    -0
      internal/demod/gpudemod/streaming_types.go
  70. +78
    -0
      internal/demod/gpudemod/test_harness.go
  71. +39
    -0
      internal/demod/gpudemod/test_harness_test.go
  72. +49
    -11
      internal/demod/gpudemod/windows_bridge.go
  73. +95
    -0
      internal/dsp/decimating_fir.go
  74. +57
    -0
      internal/dsp/decimating_fir_test.go
  75. +28
    -2
      internal/recorder/recorder.go
  76. +659
    -33
      internal/recorder/streamer.go
  77. +966
    -0
      internal/telemetry/telemetry.go

+ 299
- 0
AGENTS.md Просмотреть файл

@@ -0,0 +1,299 @@
# AGENTS.md

This file is the repo-level working guide for humans, coding agents, and LLMs.
Read it before making changes.

---

## 1. Purpose of this file

Use this file as the canonical "how to work in this repo" guide.
It is intentionally practical and operational.

Use it to answer questions like:
- Where should changes go?
- What must not be committed?
- How should builds/tests be run?
- Which docs are canonical?
- How should debugging work be documented?
- How should agents behave when touching this repo?

---

## 2. Repo intent

`sd r-wideband-suite` is a Go-based SDR analysis and streaming system with:
- live spectrum/waterfall UI
- signal detection/classification
- extraction / demodulation / recording
- GPU-assisted paths
- streaming audio paths
- extensive telemetry/debugging support

This repo has gone through active streaming-path and audio-click debugging.
Do not assume older comments, notes, or experimental code paths are still authoritative.
Prefer current code, current docs in `docs/`, and current branch state over historical assumptions.

---

## 3. Canonical documentation

### Keep as primary references
- `README.md`
- high-level project overview
- build/run basics
- feature summary
- `ROADMAP.md`
- longer-lived architectural direction
- `docs/known-issues.md`
- curated open engineering issues
- `docs/telemetry-api.md`
- telemetry endpoint documentation
- `docs/telemetry-debug-runbook.md`
- telemetry/debug operating guide
- `docs/audio-click-debug-notes-2026-03-24.md`
- historical incident record and final resolution notes for the audio-click investigation

### Treat as historical / contextual docs
Anything in `docs/` that reads like an incident log, deep debug note, or one-off investigation should be treated as supporting context, not automatic source of truth.

### Do not create multiple competing issue lists
If new open problems are found:
- update `docs/known-issues.md`
- keep raw reviewer/ad-hoc reports out of the main repo flow unless they are converted into curated docs

---

## 4. Branching and workflow rules

### Current working model
- Use focused branches for real feature/fix work.
- Do not keep long-lived junk/debug branches alive once the useful work has been transferred.
- Prefer short-lived cleanup branches for docs/config cleanup.

### Branch hygiene
- Do not pile unrelated work onto one branch if it can be split cleanly.
- Keep bugfixes, config cleanup, and large refactors logically separable when possible.
- Before deleting an old branch, ensure all useful work is already present in the active branch or merged into the main line.

### Mainline policy
- Do not merge to `master` blindly.
- Before merge, prefer at least a short sanity pass on:
- live playback
- recording
- WFM / WFM_STEREO / at least one non-WFM mode if relevant
- restart behavior if the change affects runtime state

---

## 5. Commit policy

### Commit what matters
Good commits are:
- real code fixes
- clear docs improvements
- deliberate config-default changes
- cleanup that reduces confusion

### Do not commit accidental noise
Do **not** commit unless explicitly intended:
- local debug dumps
- ad-hoc telemetry exports
- generated WAV debug windows
- temporary patch files
- throwaway reviewer JSON snapshots
- local-only runtime artifacts

### Prefer small, readable commit scopes
Examples of good separate commit scopes:
- code fix
- config default cleanup
- doc cleanup
- known-issues update

---

## 6. Files and paths that need extra care

### Config files
- `config.yaml`
- `config.autosave.yaml`

Rules:
- These can drift during debugging.
- Do not commit config changes accidentally.
- Only commit them when the intent is to change repo defaults.
- Keep in mind that `config.autosave.yaml` can override expected runtime behavior after restart.

### Debug / dump artifacts
Examples:
- `debug/`
- `tele-*.json`
- ad-hoc patch/report scratch files
- generated WAV capture windows

Rules:
- Treat these as local investigation material unless intentionally promoted into docs.
- Do not leave them hanging around as tracked repo clutter.

### Root docs
The repo root should stay relatively clean.
Keep only genuinely canonical top-level docs there.
One-off investigation output belongs in `docs/` or should be deleted.

---

## 7. Build and test rules

### General rule
Prefer the repo's own scripts and established workflow over ad-hoc raw build commands.

### Important operational rule
Before coding/build/test sessions on this repo:
- stop the browser UI
- stop `sdrd.exe`

This avoids file locks, stale runtime state, and misleading live-test behavior.

### Build preference
Use the project scripts where applicable, especially for the real app flows.
Examples already used during this project include:
- `build-sdrplay.ps1`
- `start-sdr.ps1`

Do **not** default to random raw `go build` commands for full workflow validation unless the goal is a narrow compile-only sanity check.

### GPU / native-path caution
If working on GPU/native streaming code:
- do not assume the CPU oracle path is currently trustworthy unless you have just validated it
- do not assume old README notes inside subdirectories are current
- check the current code and current docs first

---

## 8. Debugging rules

### Telemetry-first, but disciplined
Telemetry is available and useful.
However:
- heavy telemetry can distort runtime behavior
- debug config can accidentally persist via autosave
- not every one-off probe belongs in permanent code

### When debugging
Prefer this order:
1. existing telemetry and current docs
2. focused additional instrumentation
3. short-lived dumps / captures
4. cleanup afterward

### If you add debugging support
Ask:
- Is this reusable for future incidents?
- Should it live in `docs/known-issues.md` or a runbook?
- Is it temporary and should be removed after use?

### If a reviewer provides a raw report
Do not blindly keep raw snapshots as canonical repo docs.
Instead:
- extract the durable findings
- update `docs/known-issues.md`
- keep only the cleaned/curated version in the main repo flow

---

## 9. Documentation rules

### Prefer curated docs over raw dumps
Good:
- `docs/known-issues.md`
- runbooks
- architectural notes
- incident summaries with clear final status

Bad:
- random JSON reviewer dumps as primary docs
- duplicate issue lists
- stale TODO/STATE files that nobody maintains

### If a doc becomes stale
Choose one:
- update it
- move it into `docs/` as historical context
- delete it

Do not keep stale docs in prominent locations if they compete with current truth.

---

## 10. Known lessons from recent work

These are important enough to keep visible:

### Audio-click investigation lessons
- The final click bug was not a single simple DSP bug.
- Real causes included:
- shared-buffer mutation / aliasing
- extractor reset churn from unstable config hashing
- streaming-path batch rejection / fallback behavior
- Secondary contributing issues existed in discriminator bridging and WFM mono/plain-path filtering.

### Practical repo lessons
- Silent fallback paths are dangerous; keep important fallthrough/fallback visibility.
- Shared IQ buffers should be treated very carefully.
- Debug artifacts should not become permanent repo clutter.
- Curated issue tracking in Git is better than keeping raw review snapshots around.

---

## 11. Agent behavior expectations

If you are an AI coding agent / LLM working in this repo:

### Do
- read this file first
- prefer current code and current docs over old assumptions
- keep changes scoped and explainable
- separate config cleanup from code fixes when possible
- leave the repo cleaner than you found it
- promote durable findings into curated docs

### Do not
- commit local debug noise by default
- create duplicate status/todo/issue files without a strong reason
- assume experimental comments or old subdirectory READMEs are still correct
- leave raw reviewer output as the only source of truth
- hide fallback behavior or silently ignore critical path failures

---

## 12. Recommended doc update pattern after meaningful work

When a meaningful fix or investigation lands:
1. update code
2. update any relevant canonical docs
3. update `docs/known-issues.md` if open issues changed
4. remove or archive temporary debug artifacts
5. keep the repo root and branch state clean

---

## 13. Minimal pre-commit checklist

Before committing, quickly check:
- Am I committing only intended files?
- Are config changes intentional?
- Am I accidentally committing dumps/logs/debug exports?
- Should any reviewer findings be moved into `docs/known-issues.md`?
- Did I leave stale temporary files behind?

---

## 14. If unsure

If a file looks ambiguous:
- canonical + actively maintained -> keep/update
- historical but useful -> move or keep in `docs/`
- stale and confusing -> delete

Clarity beats nostalgia.

+ 26
- 0
README.md Просмотреть файл

@@ -192,6 +192,32 @@ go build -tags sdrplay ./cmd/sdrd
- `GET /api/signals` -> current live signals
- `GET /api/events?limit=&since=` -> recent events

### Debug Telemetry
- `GET /api/debug/telemetry/live` -> current telemetry snapshot (counters, gauges, distributions, recent events, collector status/config)
- `GET /api/debug/telemetry/history` -> historical metric samples with filtering by time/name/prefix/tags
- `GET /api/debug/telemetry/events` -> telemetry event/anomaly history with filtering by time/name/prefix/level/tags
- `GET /api/debug/telemetry/config` -> current collector config plus `debug.telemetry` runtime config
- `POST /api/debug/telemetry/config` -> update telemetry settings at runtime and persist them to autosave config

Telemetry query params (`history` / `events`) include:
- `since`, `until` -> unix seconds, unix milliseconds, or RFC3339 timestamps
- `limit`
- `name`, `prefix`
- `signal_id`, `session_id`, `stage`, `trace_id`, `component`
- `tag_<key>=<value>` for arbitrary tag filters
- `include_persisted=true|false` (default `true`)
- `level` on the events endpoint

Telemetry config lives under `debug.telemetry`:
- `enabled`, `heavy_enabled`, `heavy_sample_every`
- `metric_sample_every`, `metric_history_max`, `event_history_max`
- `retention_seconds`
- `persist_enabled`, `persist_dir`, `rotate_mb`, `keep_files`

See also:
- `docs/telemetry-api.md` for the full telemetry API reference
- `docs/telemetry-debug-runbook.md` for the short operational debug flow

### Recordings
- `GET /api/recordings`
- `GET /api/recordings/:id` (meta.json)


+ 0
- 184
STATE.md Просмотреть файл

@@ -1,184 +0,0 @@
# SDR Wideband Suite - Current State

This file is the practical handoff / resume state for future work.
Use it together with `ROADMAP.md`.

- `ROADMAP.md` = long-term architecture and phase roadmap
- `STATE.md` = current repo state, working conventions, and next recommended entry point

## Current Milestone State

- **Phase 1 complete**
- **Phase 2 complete**
- **Phase 3 complete**
- **Phase 4 complete**

Current project state should be treated as:
- Phase 1 = architecture foundation landed
- Phase 2 = multi-resolution surveillance semantics landed
- Phase 3 = conservative runtime prioritization/admission/rebalance landed
- Phase 4 = monitor-window operating model landed

Do not reopen these phases unless there is a concrete bug, mismatch, or regression.

---

## Most Recent Relevant Commits

These are the most important recent milestone commits that define the current state:

### Phase 4 monitor-window operating model
- `efe137b` Add monitor window goals for multi-span gating
- `ac64d6b` Add monitor window matches and stats
- `d7e457d` Expose monitor window summaries in runtime debug
- `c520423` Add monitor window priority bias
- `838c941` Add window-based record/decode actions
- `962cf06` Add window zone biases for record/decode actions
- `402a772` Consolidate monitor window summary in debug outputs
- `8545b62` Add per-window outcome summaries for admission pressure
- `65b9845` test: cover overlapping monitor windows
- `efe3215` docs: capture Phase-4 monitor-window status

### Phase 3 runtime intelligence milestone
- `4ebd51d` Add priority tiers and admission classes to pipeline
- `18b179b` Expose admission metadata in debug output and tests
- `ba9adca` Add budget preference and pressure modeling
- `7a75367` Expose arbitration pressure summary
- `592fa03` pipeline: deepen hold/displacement semantics
- `30a5d11` pipeline: apply intent holds and family tier floors
- `1f5d4ab` pipeline: add intent and family priority tests
- `822829c` Add conservative budget rebalance layer
- `da5fa22` Update Phase-3 Wave 3E status

### Documentation / stable defaults
- `fd718d5` docs: finalize phase milestones and ukf test config

If resuming after a long pause, inspect the current `git log` around these commits first.

---

## Current Important Files / Subsystems

### Long-term guidance
- `ROADMAP.md` - durable roadmap across phases
- `STATE.md` - practical resume/handoff state
- `PLAN.md` - project plan / narrative (may be less pristine than ROADMAP.md)
- `README.md` - user-facing/current feature status

### Config / runtime surface
- `config.yaml` - current committed default config
- `config.autosave.yaml` - local autosave; intentionally not tracked in git
- `internal/config/config.go`
- `internal/runtime/runtime.go`

### Phase 3 core runtime intelligence
- `internal/pipeline/arbiter.go`
- `internal/pipeline/arbitration.go`
- `internal/pipeline/arbitration_state.go`
- `internal/pipeline/priority.go`
- `internal/pipeline/budget.go`
- `internal/pipeline/pressure.go`
- `internal/pipeline/rebalance.go`
- `internal/pipeline/decision_queue.go`

### Phase 2 surveillance/evidence model
- `internal/pipeline/types.go`
- `internal/pipeline/evidence.go`
- `internal/pipeline/candidate_fusion.go`
- `internal/pipeline/scheduler.go`
- `cmd/sdrd/pipeline_runtime.go`

### Phase 4 monitor-window model
- `internal/pipeline/monitor_rules.go`
- `cmd/sdrd/window_summary.go`
- `cmd/sdrd/level_summary.go`
- `cmd/sdrd/http_handlers.go`
- `cmd/sdrd/decision_compact.go`
- `cmd/sdrd/dsp_loop.go`

---

## Current Default Operator / Test Posture

The repo was intentionally switched to an FM/UKW-friendly default test posture.

### Current committed config defaults
- band: `87.5-108.0 MHz`
- center: `99.5 MHz`
- sample rate: `2.048 MHz`
- FFT: `4096`
- profile: `wideband-balanced`
- intent: `broadcast-monitoring`
- priorities include `wfm`, `rds`, `broadcast`, `digital`

### Important config note
- `config.yaml` is committed and intended as the stable default reference
- `config.autosave.yaml` is **not** git-tracked and may diverge locally
- if behavior seems odd, compare the active runtime config against `config.yaml`

---

## Working Conventions That Matter

### Codex invocation on Windows
Preferred stable flow:
1. write prompt to `codex_prompt.txt`
2. create/use `run_codex.ps1` containing:
- read prompt file
- pipe to `codex exec --yolo`
3. run with PTY/background from the repo root
4. remove `codex_prompt.txt` and `run_codex.ps1` after the run

This was adopted specifically to avoid PowerShell quoting failures.

### Expectations for coding runs
- before every commit: `go test ./...` and `go build ./cmd/sdrd`
- commit in coherent blocks with clear messages
- push after successful validation
- avoid reopening already-closed phase work without a concrete reason

---

## Known Practical Caveats

- `PLAN.md` has had encoding/character issues in some reads; treat `ROADMAP.md` + `STATE.md` as the cleaner authoritative continuity docs.
- README is generally useful, but `ROADMAP.md`/`STATE.md` are better for architectural continuity.
- `config.autosave.yaml` can become misleading because it is local/autosaved and not tracked.

---

## Recommended Next Entry Point

If resuming technical work after this checkpoint:

### Start with **Phase 5**
Do **not** reopen Phase 1-4 unless there is a concrete bug or regression.

### Recommended Phase 5 direction
Move from monitor windows inside a single capture span toward richer span / operating orchestration:
- span / zone groups
- span-aware resource allocation
- stronger profile-driven operating modes
- retune / scan / dwell semantics where needed

### Avoid jumping ahead prematurely to
- full adaptive QoS engine (Phase 6)
- major GPU/performance re-architecture (Phase 7)
- heavy UX/product polish (Phase 8)

Those should build on Phase 5, not bypass it.

---

## Resume Checklist For A Future Agent

1. Read `ROADMAP.md`
2. Read `STATE.md`
3. Check current `git log` near the commits listed above
4. Inspect `config.yaml`
5. Confirm current repo state with:
- `go test ./...`
- `go build ./cmd/sdrd`
6. Then start Phase 5 planning from the actual repo state

If these steps still match the repo, continuation should be seamless enough even after a hard context reset.

+ 0
- 23
TODO.md Просмотреть файл

@@ -1,23 +0,0 @@
# TODO — SDR Visual Suite

## UI
- [ ] RDS RadioText (RT) Anzeige hinzufügen:
- Overlay: 1 Zeile, sanfter Fade bei Updates, Ellipsis bei Überlänge, optional kleines „RT“-Badge.
- Detail-Panel: 2 Zeilen Auto-Wrap; bei Überlänge Ellipsis + Expand (Modal/Zone) für Volltext.
- Update-Logik: RT nur bei stabilem Text (z. B. 2–3 identische Blöcke), optional „RT · HH:MM“ Timestamp.

## Band Settings Profiles (v1.2)
- [ ] Backend: built-in Profile-Struktur + embedded JSON (6 Profile)
- [ ] Backend: Apply-Helper (shared mit /api/config) inkl. source/dsp/save
- [ ] Backend: Merge-Patch mit Feld-Präsenz (nur explizite Felder anwenden)
- [ ] Backend: DisallowUnknownFields + Config-Validierung → 400
- [ ] Backend: Endpoints GET /api/profiles, POST /api/profiles/apply, POST /api/profiles/undo, GET /api/profiles/suggest
- [ ] Backend: Undo-Snapshot (1 Level) + Active Profile ID (Runtime-State)
- [ ] Optional: Active Profile ID über Neustart persistieren (falls gewünscht)
- [ ] UI: Dropdown + Split-Apply (full/dsp_only) + Undo + Active-Badge
- [ ] UI: Suggest-Toast bei center_hz Wechsel, Dismiss-Schutz (>5 MHz)
- [ ] UX: Loading-Indicator während Profilwechsel (1–3s Reset)
- [ ] Tests: Patch-Semantik, dsp_only (center_hz/gain_db bleiben), Unknown Fields, Suggest-Match

## Notes
- Ab jetzt hier die Todo-Liste führen.

+ 17
- 4
build-gpudemod-dll.ps1 Просмотреть файл

@@ -16,12 +16,25 @@ if (!(Test-Path $outDir)) { New-Item -ItemType Directory -Path $outDir | Out-Nul

Remove-Item $dll,$lib,$exp -Force -ErrorAction SilentlyContinue

$cmd = @"
call "$vcvars" && "$nvcc" -shared "$src" -o "$dll" -cudart=hybrid -Xcompiler "/MD" -arch=sm_75 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_89,code=sm_89 -gencode arch=compute_90,code=sm_90
$bat = Join-Path $env:TEMP 'build-gpudemod-dll.bat'
$batContent = @"
@echo off
call "$vcvars"
if errorlevel 1 exit /b %errorlevel%
"$nvcc" -shared "$src" -o "$dll" -cudart=hybrid -Xcompiler "/MD" -arch=sm_75 ^
-gencode arch=compute_75,code=sm_75 ^
-gencode arch=compute_80,code=sm_80 ^
-gencode arch=compute_86,code=sm_86 ^
-gencode arch=compute_89,code=sm_89 ^
-gencode arch=compute_90,code=sm_90
exit /b %errorlevel%
"@
Set-Content -Path $bat -Value $batContent -Encoding ASCII

Write-Host 'Building gpudemod CUDA DLL...' -ForegroundColor Cyan
cmd.exe /c $cmd
if ($LASTEXITCODE -ne 0) { throw 'gpudemod DLL build failed' }
cmd.exe /c ""$bat""
$exitCode = $LASTEXITCODE
Remove-Item $bat -Force -ErrorAction SilentlyContinue
if ($exitCode -ne 0) { throw 'gpudemod DLL build failed' }

Write-Host "Built: $dll" -ForegroundColor Green

+ 6
- 0
build-sdrplay.ps1 Просмотреть файл

@@ -21,10 +21,13 @@ if (Test-Path $sdrplayBin) { $env:PATH = "$sdrplayBin;" + $env:PATH }
# CUDA runtime / cuFFT
$cudaInc = 'C:\CUDA\include'
$cudaBin = 'C:\CUDA\bin'
$cudaBinX64 = 'C:\CUDA\bin\x64'
if (-not (Test-Path $cudaInc)) { $cudaInc = 'C:\PROGRA~1\NVIDIA~2\CUDA\v13.2\include' }
if (-not (Test-Path $cudaBin)) { $cudaBin = 'C:\PROGRA~1\NVIDIA~2\CUDA\v13.2\bin' }
if (-not (Test-Path $cudaBinX64)) { $cudaBinX64 = 'C:\PROGRA~1\NVIDIA~2\CUDA\v13.2\bin\x64' }
$cudaMingw = Join-Path $PSScriptRoot 'cuda-mingw'
if (Test-Path $cudaInc) { $env:CGO_CFLAGS = "$env:CGO_CFLAGS -I$cudaInc" }
if (Test-Path $cudaBinX64) { $env:PATH = "$cudaBinX64;" + $env:PATH }
if (Test-Path $cudaBin) { $env:PATH = "$cudaBin;" + $env:PATH }
if (Test-Path $cudaMingw) { $env:CGO_LDFLAGS = "$env:CGO_LDFLAGS -L$cudaMingw -lcudart64_13 -lcufft64_12 -lkernel32" }

@@ -68,8 +71,11 @@ if ($dllSrc) {
}

$cudartCandidates = @(
(Join-Path $cudaBinX64 'cudart64_13.dll'),
(Join-Path $cudaBin 'cudart64_13.dll'),
'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.2\bin\x64\cudart64_13.dll',
'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.2\bin\cudart64_13.dll',
'C:\CUDA\bin\x64\cudart64_13.dll',
'C:\CUDA\bin\cudart64_13.dll'
)
$cudartSrc = $cudartCandidates | Where-Object { $_ -and (Test-Path $_) } | Select-Object -First 1


+ 101
- 3
cmd/sdrd/dsp_loop.go Просмотреть файл

@@ -3,6 +3,7 @@ package main
import (
"context"
"encoding/json"
"fmt"
"log"
"os"
"runtime/debug"
@@ -16,15 +17,16 @@ import (
"sdr-wideband-suite/internal/logging"
"sdr-wideband-suite/internal/pipeline"
"sdr-wideband-suite/internal/recorder"
"sdr-wideband-suite/internal/telemetry"
)

func runDSP(ctx context.Context, srcMgr *sourceManager, cfg config.Config, det *detector.Detector, window []float64, h *hub, eventFile *os.File, eventMu *sync.RWMutex, updates <-chan dspUpdate, gpuState *gpuStatus, rec *recorder.Manager, sigSnap *signalSnapshot, extractMgr *extractionManager, phaseSnap *phaseSnapshot) {
func runDSP(ctx context.Context, srcMgr *sourceManager, cfg config.Config, det *detector.Detector, window []float64, h *hub, eventFile *os.File, eventMu *sync.RWMutex, updates <-chan dspUpdate, gpuState *gpuStatus, rec *recorder.Manager, sigSnap *signalSnapshot, extractMgr *extractionManager, phaseSnap *phaseSnapshot, coll *telemetry.Collector) {
defer func() {
if r := recover(); r != nil {
log.Printf("FATAL: runDSP goroutine panic: %v\n%s", r, debug.Stack())
}
}()
rt := newDSPRuntime(cfg, det, window, gpuState)
rt := newDSPRuntime(cfg, det, window, gpuState, coll)
ticker := time.NewTicker(cfg.FrameInterval())
defer ticker.Stop()
logTicker := time.NewTicker(5 * time.Second)
@@ -33,6 +35,9 @@ func runDSP(ctx context.Context, srcMgr *sourceManager, cfg config.Config, det *
dcBlocker := dsp.NewDCBlocker(0.995)
state := &phaseState{}
var frameID uint64
prevDisplayed := map[int64]detector.Signal{}
lastSourceDrops := uint64(0)
lastSourceResets := uint64(0)
for {
select {
case <-ctx.Done():
@@ -40,11 +45,28 @@ func runDSP(ctx context.Context, srcMgr *sourceManager, cfg config.Config, det *
case <-logTicker.C:
st := srcMgr.Stats()
log.Printf("stats: buf=%d drop=%d reset=%d last=%dms", st.BufferSamples, st.Dropped, st.Resets, st.LastSampleAgoMs)
if coll != nil {
coll.SetGauge("source.buffer_samples", float64(st.BufferSamples), nil)
coll.SetGauge("source.last_sample_ago_ms", float64(st.LastSampleAgoMs), nil)
if st.Dropped > lastSourceDrops {
coll.IncCounter("source.drop.count", float64(st.Dropped-lastSourceDrops), nil)
}
if st.Resets > lastSourceResets {
coll.IncCounter("source.reset.count", float64(st.Resets-lastSourceResets), nil)
coll.Event("source_reset", "warn", "source reset observed", nil, map[string]any{"resets": st.Resets})
}
lastSourceDrops = st.Dropped
lastSourceResets = st.Resets
}
case upd := <-updates:
rt.applyUpdate(upd, srcMgr, rec, gpuState)
dcBlocker.Reset()
ticker.Reset(rt.cfg.FrameInterval())
if coll != nil {
coll.IncCounter("dsp.update.apply", 1, nil)
}
case <-ticker.C:
frameStart := time.Now()
frameID++
art, err := rt.captureSpectrum(srcMgr, rec, dcBlocker, gpuState)
if err != nil {
@@ -61,8 +83,19 @@ func runDSP(ctx context.Context, srcMgr *sourceManager, cfg config.Config, det *
rt.gotSamples = true
}
logging.Debug("trace", "capture_done", "trace", frameID, "allIQ", len(art.allIQ), "detailIQ", len(art.detailIQ))
if coll != nil {
coll.Observe("stage.capture.duration_ms", float64(time.Since(frameStart).Microseconds())/1000.0, telemetry.TagsFromPairs("frame_id", fmt.Sprintf("%d", frameID)))
}
survStart := time.Now()
state.surveillance = rt.buildSurveillanceResult(art)
if coll != nil {
coll.Observe("stage.surveillance.duration_ms", float64(time.Since(survStart).Microseconds())/1000.0, telemetry.TagsFromPairs("frame_id", fmt.Sprintf("%d", frameID)))
}
refineStart := time.Now()
state.refinement = rt.runRefinement(art, state.surveillance, extractMgr, rec)
if coll != nil {
coll.Observe("stage.refinement.duration_ms", float64(time.Since(refineStart).Microseconds())/1000.0, telemetry.TagsFromPairs("frame_id", fmt.Sprintf("%d", frameID)))
}
finished := state.surveillance.Finished
thresholds := state.surveillance.Thresholds
noiseFloor := state.surveillance.NoiseFloor
@@ -75,11 +108,44 @@ func runDSP(ctx context.Context, srcMgr *sourceManager, cfg config.Config, det *
streamSignals = stableSignals
}
if rec != nil && len(art.allIQ) > 0 {
if art.streamDropped {
rt.streamOverlap = &streamIQOverlap{}
for k := range rt.streamPhaseState {
rt.streamPhaseState[k].phase = 0
}
resetStreamingOracleRunner()
rec.ResetStreams()
logging.Warn("gap", "iq_dropped", "msg", "buffer bloat caused extraction drop; overlap reset")
if coll != nil {
coll.IncCounter("capture.stream_reset", 1, nil)
coll.Event("iq_dropped", "warn", "stream overlap reset after dropped IQ", nil, map[string]any{"frame_id": frameID})
}
}
if rt.cfg.Recorder.DebugLiveAudio {
log.Printf("LIVEAUDIO DSP: detailIQ=%d displaySignals=%d streamSignals=%d stableSignals=%d allIQ=%d", len(art.detailIQ), len(displaySignals), len(streamSignals), len(stableSignals), len(art.allIQ))
}
aqCfg := extractionConfig{firTaps: rt.cfg.Recorder.ExtractionTaps, bwMult: rt.cfg.Recorder.ExtractionBwMult}
streamSnips, streamRates := extractForStreaming(extractMgr, art.allIQ, rt.cfg.SampleRate, rt.cfg.CenterHz, streamSignals, rt.streamPhaseState, rt.streamOverlap, aqCfg)
extractStart := time.Now()
streamSnips, streamRates := extractForStreaming(extractMgr, art.allIQ, rt.cfg.SampleRate, rt.cfg.CenterHz, streamSignals, rt.streamPhaseState, rt.streamOverlap, aqCfg, rt.telemetry)
if coll != nil {
coll.Observe("stage.extract_stream.duration_ms", float64(time.Since(extractStart).Microseconds())/1000.0, telemetry.TagsFromPairs("frame_id", fmt.Sprintf("%d", frameID)))
coll.SetGauge("stage.extract_stream.signals", float64(len(streamSignals)), nil)
if coll.ShouldSampleHeavy() {
for i := range streamSnips {
if i >= len(streamSignals) {
break
}
tags := telemetry.TagsFromPairs(
"signal_id", fmt.Sprintf("%d", streamSignals[i].ID),
"stage", "extract_stream",
)
coll.SetGauge("iq.stage.extract.length", float64(len(streamSnips[i])), tags)
if len(streamSnips[i]) > 0 {
observeIQStats(coll, "extract_stream", streamSnips[i], tags)
}
}
}
}
nonEmpty := 0
minLen := 0
maxLen := 0
@@ -127,10 +193,18 @@ func runDSP(ctx context.Context, srcMgr *sourceManager, cfg config.Config, det *
log.Printf("LIVEAUDIO DSP: feedItems=%d", len(items))
}
if len(items) > 0 {
feedStart := time.Now()
rec.FeedSnippets(items, frameID)
if coll != nil {
coll.Observe("stage.feed_enqueue.duration_ms", float64(time.Since(feedStart).Microseconds())/1000.0, telemetry.TagsFromPairs("frame_id", fmt.Sprintf("%d", frameID)))
coll.SetGauge("stage.feed.items", float64(len(items)), nil)
}
logging.Debug("trace", "feed", "trace", frameID, "items", len(items), "signals", len(streamSignals), "allIQ", len(art.allIQ))
} else {
logging.Warn("gap", "feed_empty", "signals", len(streamSignals), "trace", frameID)
if coll != nil {
coll.IncCounter("stage.feed.empty", 1, nil)
}
}
}
rt.maintenance(displaySignals, rec)
@@ -156,6 +230,27 @@ func runDSP(ctx context.Context, srcMgr *sourceManager, cfg config.Config, det *
if sigSnap != nil {
sigSnap.set(displaySignals)
}
if coll != nil {
coll.SetGauge("signals.display.count", float64(len(displaySignals)), nil)
current := make(map[int64]detector.Signal, len(displaySignals))
for _, s := range displaySignals {
current[s.ID] = s
if _, ok := prevDisplayed[s.ID]; !ok {
coll.Event("signal_create", "info", "signal entered display set", telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", s.ID)), map[string]any{
"center_hz": s.CenterHz,
"bw_hz": s.BWHz,
})
}
}
for id, prev := range prevDisplayed {
if _, ok := current[id]; !ok {
coll.Event("signal_remove", "info", "signal left display set", telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", id)), map[string]any{
"center_hz": prev.CenterHz,
})
}
}
prevDisplayed = current
}
eventMu.Lock()
for _, ev := range finished {
_ = enc.Encode(ev)
@@ -244,6 +339,9 @@ func runDSP(ctx context.Context, srcMgr *sourceManager, cfg config.Config, det *
debugInfo.Refinement = refinementDebug
}
h.broadcast(SpectrumFrame{Timestamp: art.now.UnixMilli(), CenterHz: rt.cfg.CenterHz, SampleHz: rt.cfg.SampleRate, FFTSize: rt.cfg.FFTSize, Spectrum: art.surveillanceSpectrum, Signals: displaySignals, Debug: debugInfo})
if coll != nil {
coll.Observe("dsp.frame.duration_ms", float64(time.Since(frameStart).Microseconds())/1000.0, nil)
}
}
}
}

+ 434
- 3
cmd/sdrd/helpers.go Просмотреть файл

@@ -1,10 +1,13 @@
package main

import (
"fmt"
"log"
"math"
"os"
"sort"
"strconv"
"strings"
"time"

"sdr-wideband-suite/internal/config"
@@ -12,6 +15,7 @@ import (
"sdr-wideband-suite/internal/detector"
"sdr-wideband-suite/internal/dsp"
"sdr-wideband-suite/internal/logging"
"sdr-wideband-suite/internal/telemetry"
)

func mustParseDuration(raw string, fallback time.Duration) time.Duration {
@@ -227,15 +231,30 @@ type extractionConfig struct {

const streamOverlapLen = 512 // must be >= FIR tap count with margin
const (
wfmStreamOutRate = 500000
wfmStreamOutRate = 512000
wfmStreamMinBW = 250000
)

var forceCPUStreamExtract = func() bool {
raw := strings.TrimSpace(os.Getenv("SDR_FORCE_CPU_STREAM_EXTRACT"))
if raw == "" {
return false
}
v, err := strconv.ParseBool(raw)
if err != nil {
return false
}
return v
}()

// extractForStreaming performs GPU-accelerated extraction with:
// - Per-signal phase-continuous FreqShift (via PhaseStart in ExtractJob)
// - IQ overlap prepended to allIQ so FIR kernel has real data in halo
//
// Returns extracted snippets with overlap trimmed, and updates phase state.
// extractForStreaming is the current legacy production path.
// It still relies on overlap-prepend + trim semantics and is intentionally
// kept separate from the new streaming refactor/oracle path under development.
func extractForStreaming(
extractMgr *extractionManager,
allIQ []complex64,
@@ -245,7 +264,57 @@ func extractForStreaming(
phaseState map[int64]*streamExtractState,
overlap *streamIQOverlap,
aqCfg extractionConfig,
coll *telemetry.Collector,
) ([][]complex64, []int) {
if useStreamingProductionPath {
out, rates, err := extractForStreamingProduction(extractMgr, allIQ, sampleRate, centerHz, signals, aqCfg, coll)
if err == nil {
logging.Debug("extract", "path_active", "path", "streaming_production", "signals", len(signals), "allIQ", len(allIQ))
if coll != nil {
coll.IncCounter("extract.path.streaming_production", 1, nil)
}
return out, rates
}
// CRITICAL: the streaming production path failed — log WHY before falling through
log.Printf("EXTRACT PATH FALLTHROUGH: streaming production failed: %v — using legacy overlap+trim", err)
logging.Warn("extract", "streaming_production_fallthrough",
"err", err.Error(),
"signals", len(signals),
"allIQ", len(allIQ),
"sampleRate", sampleRate,
)
if coll != nil {
coll.IncCounter("extract.path.streaming_production_failed", 1, nil)
coll.Event("extraction_path_fallthrough", "warn",
"streaming production path failed, using legacy overlap+trim", nil,
map[string]any{
"error": err.Error(),
"signals": len(signals),
"allIQ_len": len(allIQ),
"sampleRate": sampleRate,
})
}
}
if useStreamingOraclePath {
out, rates, err := extractForStreamingOracle(allIQ, sampleRate, centerHz, signals, aqCfg, coll)
if err == nil {
logging.Debug("extract", "path_active", "path", "streaming_oracle", "signals", len(signals))
if coll != nil {
coll.IncCounter("extract.path.streaming_oracle", 1, nil)
}
return out, rates
}
log.Printf("EXTRACT PATH FALLTHROUGH: streaming oracle failed: %v", err)
logging.Warn("extract", "streaming_oracle_fallthrough", "err", err.Error())
if coll != nil {
coll.IncCounter("extract.path.streaming_oracle_failed", 1, nil)
}
}
// If we reach here, the legacy overlap+trim path is running
logging.Warn("extract", "path_active", "path", "legacy_overlap_trim", "signals", len(signals), "allIQ", len(allIQ))
if coll != nil {
coll.IncCounter("extract.path.legacy_overlap_trim", 1, nil)
}
out := make([][]complex64, len(signals))
rates := make([]int, len(signals))
if len(allIQ) == 0 || sampleRate <= 0 || len(signals) == 0 {
@@ -286,6 +355,18 @@ func extractForStreaming(
bwMult = 1.0
}

if coll != nil {
coll.SetGauge("iq.extract.input.length", float64(len(allIQ)), nil)
coll.SetGauge("iq.extract.input.overlap_length", float64(overlapLen), nil)
headMean, tailMean, boundaryScore, _ := boundaryMetrics(overlap.tail, allIQ, 32)
coll.SetGauge("iq.extract.input.head_mean_mag", headMean, nil)
coll.SetGauge("iq.extract.input.prev_tail_mean_mag", tailMean, nil)
coll.Observe("iq.extract.input.discontinuity_score", boundaryScore, nil)
}

rawBoundary := make(map[int64]boundaryProbeState, len(signals))
trimmedBoundary := make(map[int64]boundaryProbeState, len(signals))

// Build jobs with per-signal phase
jobs := make([]gpudemod.ExtractJob, len(signals))
for i, sig := range signals {
@@ -323,11 +404,45 @@ func extractForStreaming(
OutRate: jobOutRate,
PhaseStart: gpuPhaseStart,
}
if coll != nil {
tags := telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", sig.ID), "path", "gpu")
inputHead := probeHead(gpuIQ, 16, 1e-6)
coll.SetGauge("iq.extract.input_head.zero_count", float64(inputHead.zeroCount), tags)
coll.SetGauge("iq.extract.input_head.first_nonzero_index", float64(inputHead.firstNonZeroIndex), tags)
coll.SetGauge("iq.extract.input_head.max_step", inputHead.maxStep, tags)
coll.Event("extract_input_head_probe", "info", "extractor input head probe", tags, map[string]any{
"mags": inputHead.mags,
"zero_count": inputHead.zeroCount,
"first_nonzero_index": inputHead.firstNonZeroIndex,
"head_max_step": inputHead.maxStep,
"center_offset_hz": jobs[i].OffsetHz,
"bandwidth_hz": bw,
"out_rate": jobOutRate,
"trim_samples": (overlapLen + int(math.Max(1, math.Round(float64(sampleRate)/float64(jobOutRate)))) - 1) / int(math.Max(1, math.Round(float64(sampleRate)/float64(jobOutRate)))),
})
}
}

// Try GPU BatchRunner with phase
runner := extractMgr.get(len(gpuIQ), sampleRate)
// Try GPU BatchRunner with phase unless CPU-only debug is forced.
var runner *gpudemod.BatchRunner
if forceCPUStreamExtract {
logging.Warn("boundary", "force_cpu_stream_extract", "allIQ_len", len(allIQ), "gpuIQ_len", len(gpuIQ), "signals", len(signals))
} else {
runner = extractMgr.get(len(gpuIQ), sampleRate)
}
if runner != nil {
if coll != nil && len(gpuIQ) > 0 {
inputProbe := probeHead(gpuIQ, 16, 1e-6)
coll.Event("gpu_kernel_input_head_probe", "info", "gpu kernel input head probe", nil, map[string]any{
"mags": inputProbe.mags,
"zero_count": inputProbe.zeroCount,
"first_nonzero_index": inputProbe.firstNonZeroIndex,
"head_max_step": inputProbe.maxStep,
"gpuIQ_len": len(gpuIQ),
"sample_rate": sampleRate,
"signals": len(signals),
})
}
results, err := runner.ShiftFilterDecimateBatchWithPhase(gpuIQ, jobs)
if err == nil && len(results) == len(signals) {
for i, res := range results {
@@ -356,9 +471,95 @@ func extractForStreaming(

// Trim overlap from output
iq := res.IQ
rawLen := len(iq)
if trimSamples > 0 && trimSamples < len(iq) {
iq = iq[trimSamples:]
}
if i == 0 {
logging.Debug("boundary", "extract_trim", "path", "gpu", "raw_len", rawLen, "trim", trimSamples, "out_len", len(iq), "overlap_len", overlapLen, "allIQ_len", len(allIQ), "gpuIQ_len", len(gpuIQ), "outRate", outRate, "signal", signals[i].ID)
logExtractorHeadComparison(signals[i].ID, "gpu", overlapLen, res.IQ, trimSamples, iq)
}
if coll != nil {
tags := telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", signals[i].ID), "path", "gpu")
kernelProbe := probeHead(res.IQ, 16, 1e-6)
coll.Event("gpu_kernel_output_head_probe", "info", "gpu kernel output head probe", tags, map[string]any{
"mags": kernelProbe.mags,
"zero_count": kernelProbe.zeroCount,
"first_nonzero_index": kernelProbe.firstNonZeroIndex,
"head_max_step": kernelProbe.maxStep,
"raw_len": rawLen,
"out_rate": outRate,
"trim_samples": trimSamples,
})
stats := computeIQHeadStats(iq, 64)
coll.SetGauge("iq.extract.output.length", float64(len(iq)), tags)
coll.Observe("iq.extract.output.head_mean_mag", stats.meanMag, tags)
coll.Observe("iq.extract.output.head_min_mag", stats.minMag, tags)
coll.Observe("iq.extract.output.head_max_step", stats.maxStep, tags)
coll.Observe("iq.extract.output.head_p95_step", stats.p95Step, tags)
coll.Observe("iq.extract.output.head_tail_ratio", stats.headTail, tags)
coll.SetGauge("iq.extract.output.head_low_magnitude_count", float64(stats.lowMag), tags)
coll.SetGauge("iq.extract.raw.length", float64(rawLen), tags)
coll.SetGauge("iq.extract.trim.trim_samples", float64(trimSamples), tags)
if rawLen > 0 {
coll.SetGauge("iq.extract.raw.head_mag", math.Hypot(float64(real(res.IQ[0])), float64(imag(res.IQ[0]))), tags)
coll.SetGauge("iq.extract.raw.tail_mag", math.Hypot(float64(real(res.IQ[rawLen-1])), float64(imag(res.IQ[rawLen-1]))), tags)
rawHead := probeHead(res.IQ, 16, 1e-6)
coll.SetGauge("iq.extract.raw.head_zero_count", float64(rawHead.zeroCount), tags)
coll.SetGauge("iq.extract.raw.first_nonzero_index", float64(rawHead.firstNonZeroIndex), tags)
coll.SetGauge("iq.extract.raw.head_max_step", rawHead.maxStep, tags)
coll.Event("extract_raw_head_probe", "info", "raw extractor head probe", tags, map[string]any{
"mags": rawHead.mags,
"zero_count": rawHead.zeroCount,
"first_nonzero_index": rawHead.firstNonZeroIndex,
"head_max_step": rawHead.maxStep,
"trim_samples": trimSamples,
})
}
if len(iq) > 0 {
coll.SetGauge("iq.extract.trimmed.head_mag", math.Hypot(float64(real(iq[0])), float64(imag(iq[0]))), tags)
coll.SetGauge("iq.extract.trimmed.tail_mag", math.Hypot(float64(real(iq[len(iq)-1])), float64(imag(iq[len(iq)-1]))), tags)
trimmedHead := probeHead(iq, 16, 1e-6)
coll.SetGauge("iq.extract.trimmed.head_zero_count", float64(trimmedHead.zeroCount), tags)
coll.SetGauge("iq.extract.trimmed.first_nonzero_index", float64(trimmedHead.firstNonZeroIndex), tags)
coll.SetGauge("iq.extract.trimmed.head_max_step", trimmedHead.maxStep, tags)
coll.Event("extract_trimmed_head_probe", "info", "trimmed extractor head probe", tags, map[string]any{
"mags": trimmedHead.mags,
"zero_count": trimmedHead.zeroCount,
"first_nonzero_index": trimmedHead.firstNonZeroIndex,
"head_max_step": trimmedHead.maxStep,
"trim_samples": trimSamples,
})
}
if rb := rawBoundary[signals[i].ID]; rb.set && rawLen > 0 {
prevMag := math.Hypot(float64(real(rb.last)), float64(imag(rb.last)))
currMag := math.Hypot(float64(real(res.IQ[0])), float64(imag(res.IQ[0])))
coll.SetGauge("iq.extract.raw.boundary.prev_tail_mag", prevMag, tags)
coll.SetGauge("iq.extract.raw.boundary.curr_head_mag", currMag, tags)
coll.Event("extract_raw_boundary", "info", "raw extractor boundary", tags, map[string]any{
"delta_mag": math.Abs(currMag - prevMag),
"trim_samples": trimSamples,
"raw_len": rawLen,
})
}
if tb := trimmedBoundary[signals[i].ID]; tb.set && len(iq) > 0 {
prevMag := math.Hypot(float64(real(tb.last)), float64(imag(tb.last)))
currMag := math.Hypot(float64(real(iq[0])), float64(imag(iq[0])))
coll.SetGauge("iq.extract.trimmed.boundary.prev_tail_mag", prevMag, tags)
coll.SetGauge("iq.extract.trimmed.boundary.curr_head_mag", currMag, tags)
coll.Event("extract_trimmed_boundary", "info", "trimmed extractor boundary", tags, map[string]any{
"delta_mag": math.Abs(currMag - prevMag),
"trim_samples": trimSamples,
"out_len": len(iq),
})
}
}
if rawLen > 0 {
rawBoundary[signals[i].ID] = boundaryProbeState{last: res.IQ[rawLen-1], set: true}
}
if len(iq) > 0 {
trimmedBoundary[signals[i].ID] = boundaryProbeState{last: iq[len(iq)-1], set: true}
}
out[i] = iq
rates[i] = res.Rate
}
@@ -424,10 +625,240 @@ func extractForStreaming(
if i == 0 {
logging.Debug("extract", "cpu_result", "outRate", outRate, "decim", decim, "trim", trimSamples)
}
rawIQ := decimated
rawLen := len(rawIQ)
if trimSamples > 0 && trimSamples < len(decimated) {
decimated = decimated[trimSamples:]
}
if i == 0 {
logging.Debug("boundary", "extract_trim", "path", "cpu", "raw_len", rawLen, "trim", trimSamples, "out_len", len(decimated), "overlap_len", overlapLen, "allIQ_len", len(allIQ), "gpuIQ_len", len(gpuIQ), "outRate", outRate, "signal", signals[i].ID)
logExtractorHeadComparison(signals[i].ID, "cpu", overlapLen, decimated, trimSamples, decimated)
}
if coll != nil {
tags := telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", signals[i].ID), "path", "cpu")
stats := computeIQHeadStats(decimated, 64)
coll.SetGauge("iq.extract.output.length", float64(len(decimated)), tags)
coll.Observe("iq.extract.output.head_mean_mag", stats.meanMag, tags)
coll.Observe("iq.extract.output.head_min_mag", stats.minMag, tags)
coll.Observe("iq.extract.output.head_max_step", stats.maxStep, tags)
coll.Observe("iq.extract.output.head_p95_step", stats.p95Step, tags)
coll.Observe("iq.extract.output.head_tail_ratio", stats.headTail, tags)
coll.SetGauge("iq.extract.output.head_low_magnitude_count", float64(stats.lowMag), tags)
coll.SetGauge("iq.extract.raw.length", float64(rawLen), tags)
coll.SetGauge("iq.extract.trim.trim_samples", float64(trimSamples), tags)
if rb := rawBoundary[signals[i].ID]; rb.set && rawLen > 0 {
observeBoundarySample(coll, "iq.extract.raw.boundary", tags, rb.last, rawIQ[0])
}
if tb := trimmedBoundary[signals[i].ID]; tb.set && len(decimated) > 0 {
observeBoundarySample(coll, "iq.extract.trimmed.boundary", tags, tb.last, decimated[0])
}
}
if rawLen > 0 {
rawBoundary[signals[i].ID] = boundaryProbeState{last: rawIQ[rawLen-1], set: true}
}
if len(decimated) > 0 {
trimmedBoundary[signals[i].ID] = boundaryProbeState{last: decimated[len(decimated)-1], set: true}
}
out[i] = decimated
}
return out, rates
}

type iqHeadStats struct {
length int
minMag float64
maxMag float64
meanMag float64
lowMag int
maxStep float64
maxStepIdx int
p95Step float64
headTail float64
headMinIdx int
stepSamples []float64
}

type boundaryProbeState struct {
last complex64
set bool
}

type headProbe struct {
zeroCount int
firstNonZeroIndex int
maxStep float64
mags []float64
}

func probeHead(samples []complex64, n int, zeroThreshold float64) headProbe {
if n <= 0 || len(samples) == 0 {
return headProbe{firstNonZeroIndex: -1}
}
if len(samples) < n {
n = len(samples)
}
if zeroThreshold <= 0 {
zeroThreshold = 1e-6
}
out := headProbe{firstNonZeroIndex: -1, mags: make([]float64, 0, n)}
for i := 0; i < n; i++ {
v := samples[i]
mag := math.Hypot(float64(real(v)), float64(imag(v)))
out.mags = append(out.mags, mag)
if mag <= zeroThreshold {
out.zeroCount++
} else if out.firstNonZeroIndex < 0 {
out.firstNonZeroIndex = i
}
if i > 0 {
p := samples[i-1]
num := float64(real(p))*float64(imag(v)) - float64(imag(p))*float64(real(v))
den := float64(real(p))*float64(real(v)) + float64(imag(p))*float64(imag(v))
step := math.Abs(math.Atan2(num, den))
if step > out.maxStep {
out.maxStep = step
}
}
}
return out
}

func observeBoundarySample(coll *telemetry.Collector, metricPrefix string, tags map[string]string, prev complex64, curr complex64) {
prevMag := math.Hypot(float64(real(prev)), float64(imag(prev)))
currMag := math.Hypot(float64(real(curr)), float64(imag(curr)))
deltaMag := math.Abs(currMag - prevMag)
num := float64(real(prev))*float64(imag(curr)) - float64(imag(prev))*float64(real(curr))
den := float64(real(prev))*float64(real(curr)) + float64(imag(prev))*float64(imag(curr))
deltaPhase := math.Abs(math.Atan2(num, den))
d2 := float64(real(curr-prev))*float64(real(curr-prev)) + float64(imag(curr-prev))*float64(imag(curr-prev))
coll.Observe(metricPrefix+".delta_mag", deltaMag, tags)
coll.Observe(metricPrefix+".delta_phase", deltaPhase, tags)
coll.Observe(metricPrefix+".d2", d2, tags)
coll.Observe(metricPrefix+".discontinuity_score", deltaMag+deltaPhase, tags)
}

func computeIQHeadStats(iq []complex64, headLen int) iqHeadStats {
stats := iqHeadStats{minMag: math.MaxFloat64, headMinIdx: -1, maxStepIdx: -1}
if len(iq) == 0 {
stats.minMag = 0
return stats
}
n := len(iq)
if headLen > 0 && headLen < n {
n = headLen
}
stats.length = n
stats.stepSamples = make([]float64, 0, max(0, n-1))
sumMag := 0.0
headSum := 0.0
tailSum := 0.0
tailCount := 0
for i := 0; i < n; i++ {
v := iq[i]
mag := math.Hypot(float64(real(v)), float64(imag(v)))
if mag < stats.minMag {
stats.minMag = mag
stats.headMinIdx = i
}
if mag > stats.maxMag {
stats.maxMag = mag
}
sumMag += mag
if mag < 0.05 {
stats.lowMag++
}
if i < min(16, n) {
headSum += mag
}
if i >= max(0, n-16) {
tailSum += mag
tailCount++
}
if i > 0 {
p := iq[i-1]
num := float64(real(p))*float64(imag(v)) - float64(imag(p))*float64(real(v))
den := float64(real(p))*float64(real(v)) + float64(imag(p))*float64(imag(v))
step := math.Abs(math.Atan2(num, den))
if step > stats.maxStep {
stats.maxStep = step
stats.maxStepIdx = i - 1
}
stats.stepSamples = append(stats.stepSamples, step)
}
}
stats.meanMag = sumMag / float64(n)
if len(stats.stepSamples) > 0 {
sorted := append([]float64(nil), stats.stepSamples...)
sort.Float64s(sorted)
idx := int(float64(len(sorted)-1) * 0.95)
stats.p95Step = sorted[idx]
} else {
stats.p95Step = stats.maxStep
}
if headSum > 0 && tailCount > 0 {
headMean := headSum / float64(min(16, n))
tailMean := tailSum / float64(tailCount)
if tailMean > 0 {
stats.headTail = headMean / tailMean
}
}
return stats
}

func observeIQStats(coll *telemetry.Collector, stage string, iq []complex64, tags telemetry.Tags) {
if coll == nil || len(iq) == 0 {
return
}
stats := computeIQHeadStats(iq, len(iq))
stageTags := telemetry.TagsWith(tags, "stage", stage)
coll.Observe("iq.magnitude.min", stats.minMag, stageTags)
coll.Observe("iq.magnitude.max", stats.maxMag, stageTags)
coll.Observe("iq.magnitude.mean", stats.meanMag, stageTags)
coll.Observe("iq.phase_step.max", stats.maxStep, stageTags)
coll.Observe("iq.phase_step.p95", stats.p95Step, stageTags)
coll.Observe("iq.low_magnitude.count", float64(stats.lowMag), stageTags)
coll.SetGauge("iq.length", float64(stats.length), stageTags)
}

func logExtractorHeadComparison(signalID int64, path string, overlapLen int, raw []complex64, trimSamples int, out []complex64) {
rawStats := computeIQHeadStats(raw, 96)
trimmedStats := computeIQHeadStats(out, 96)
logging.Debug("boundary", "extract_head_compare",
"signal", signalID,
"path", path,
"raw_len", len(raw),
"trim", trimSamples,
"out_len", len(out),
"overlap_len", overlapLen,
"raw_min_mag", rawStats.minMag,
"raw_min_idx", rawStats.headMinIdx,
"raw_max_step", rawStats.maxStep,
"raw_max_step_idx", rawStats.maxStepIdx,
"raw_head_tail", rawStats.headTail,
"trimmed_min_mag", trimmedStats.minMag,
"trimmed_min_idx", trimmedStats.headMinIdx,
"trimmed_max_step", trimmedStats.maxStep,
"trimmed_max_step_idx", trimmedStats.maxStepIdx,
"trimmed_head_tail", trimmedStats.headTail,
)
for _, off := range []int{2, 4, 8, 16} {
if len(out) <= off+8 {
continue
}
offStats := computeIQHeadStats(out[off:], 96)
logging.Debug("boundary", "extract_head_offset_compare",
"signal", signalID,
"path", path,
"offset", off,
"base_min_mag", trimmedStats.minMag,
"base_min_idx", trimmedStats.headMinIdx,
"base_max_step", trimmedStats.maxStep,
"base_max_step_idx", trimmedStats.maxStepIdx,
"offset_min_mag", offStats.minMag,
"offset_min_idx", offStats.headMinIdx,
"offset_max_step", offStats.maxStep,
"offset_max_step_idx", offStats.maxStepIdx,
"offset_head_tail", offStats.headTail,
)
}
}

+ 185
- 3
cmd/sdrd/http_handlers.go Просмотреть файл

@@ -3,6 +3,7 @@ package main
import (
"context"
"encoding/json"
"errors"
"log"
"net/http"
"os"
@@ -19,9 +20,10 @@ import (
"sdr-wideband-suite/internal/pipeline"
"sdr-wideband-suite/internal/recorder"
"sdr-wideband-suite/internal/runtime"
"sdr-wideband-suite/internal/telemetry"
)

func registerAPIHandlers(mux *http.ServeMux, cfgPath string, cfgManager *runtime.Manager, srcMgr *sourceManager, dspUpdates chan dspUpdate, gpuState *gpuStatus, recMgr *recorder.Manager, sigSnap *signalSnapshot, eventMu *sync.RWMutex, phaseSnap *phaseSnapshot) {
func registerAPIHandlers(mux *http.ServeMux, cfgPath string, cfgManager *runtime.Manager, srcMgr *sourceManager, dspUpdates chan dspUpdate, gpuState *gpuStatus, recMgr *recorder.Manager, sigSnap *signalSnapshot, eventMu *sync.RWMutex, phaseSnap *phaseSnapshot, telem *telemetry.Collector) {
mux.HandleFunc("/api/config", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
switch r.Method {
@@ -378,16 +380,196 @@ func registerAPIHandlers(mux *http.ServeMux, cfgPath string, cfgManager *runtime
w.Header().Set("Content-Type", "audio/wav")
_, _ = w.Write(data)
})
mux.HandleFunc("/api/debug/telemetry/live", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
if telem == nil {
_ = json.NewEncoder(w).Encode(map[string]any{"enabled": false, "error": "telemetry unavailable"})
return
}
_ = json.NewEncoder(w).Encode(telem.LiveSnapshot())
})
mux.HandleFunc("/api/debug/telemetry/history", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
if telem == nil {
http.Error(w, "telemetry unavailable", http.StatusServiceUnavailable)
return
}
query, err := telemetryQueryFromRequest(r)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
items, err := telem.QueryMetrics(query)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
_ = json.NewEncoder(w).Encode(map[string]any{"items": items, "count": len(items)})
})
mux.HandleFunc("/api/debug/telemetry/events", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
if telem == nil {
http.Error(w, "telemetry unavailable", http.StatusServiceUnavailable)
return
}
query, err := telemetryQueryFromRequest(r)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
items, err := telem.QueryEvents(query)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
_ = json.NewEncoder(w).Encode(map[string]any{"items": items, "count": len(items)})
})
mux.HandleFunc("/api/debug/telemetry/config", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
if telem == nil {
http.Error(w, "telemetry unavailable", http.StatusServiceUnavailable)
return
}
switch r.Method {
case http.MethodGet:
_ = json.NewEncoder(w).Encode(map[string]any{
"collector": telem.Config(),
"config": cfgManager.Snapshot().Debug.Telemetry,
})
case http.MethodPost:
var update struct {
Enabled *bool `json:"enabled"`
HeavyEnabled *bool `json:"heavy_enabled"`
HeavySampleEvery *int `json:"heavy_sample_every"`
MetricSampleEvery *int `json:"metric_sample_every"`
MetricHistoryMax *int `json:"metric_history_max"`
EventHistoryMax *int `json:"event_history_max"`
RetentionSeconds *int `json:"retention_seconds"`
PersistEnabled *bool `json:"persist_enabled"`
PersistDir *string `json:"persist_dir"`
RotateMB *int `json:"rotate_mb"`
KeepFiles *int `json:"keep_files"`
}
if err := json.NewDecoder(r.Body).Decode(&update); err != nil {
http.Error(w, "invalid json", http.StatusBadRequest)
return
}
next := cfgManager.Snapshot()
cur := next.Debug.Telemetry
if update.Enabled != nil {
cur.Enabled = *update.Enabled
}
if update.HeavyEnabled != nil {
cur.HeavyEnabled = *update.HeavyEnabled
}
if update.HeavySampleEvery != nil {
cur.HeavySampleEvery = *update.HeavySampleEvery
}
if update.MetricSampleEvery != nil {
cur.MetricSampleEvery = *update.MetricSampleEvery
}
if update.MetricHistoryMax != nil {
cur.MetricHistoryMax = *update.MetricHistoryMax
}
if update.EventHistoryMax != nil {
cur.EventHistoryMax = *update.EventHistoryMax
}
if update.RetentionSeconds != nil {
cur.RetentionSeconds = *update.RetentionSeconds
}
if update.PersistEnabled != nil {
cur.PersistEnabled = *update.PersistEnabled
}
if update.PersistDir != nil && *update.PersistDir != "" {
cur.PersistDir = *update.PersistDir
}
if update.RotateMB != nil {
cur.RotateMB = *update.RotateMB
}
if update.KeepFiles != nil {
cur.KeepFiles = *update.KeepFiles
}
next.Debug.Telemetry = cur
cfgManager.Replace(next)
if err := config.Save(cfgPath, next); err != nil {
log.Printf("telemetry config save failed: %v", err)
}
err := telem.Configure(telemetry.Config{
Enabled: cur.Enabled,
HeavyEnabled: cur.HeavyEnabled,
HeavySampleEvery: cur.HeavySampleEvery,
MetricSampleEvery: cur.MetricSampleEvery,
MetricHistoryMax: cur.MetricHistoryMax,
EventHistoryMax: cur.EventHistoryMax,
Retention: time.Duration(cur.RetentionSeconds) * time.Second,
PersistEnabled: cur.PersistEnabled,
PersistDir: cur.PersistDir,
RotateMB: cur.RotateMB,
KeepFiles: cur.KeepFiles,
})
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
_ = json.NewEncoder(w).Encode(map[string]any{"ok": true, "collector": telem.Config(), "config": cur})
default:
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
}
})
}

func newHTTPServer(addr string, webRoot string, h *hub, cfgPath string, cfgManager *runtime.Manager, srcMgr *sourceManager, dspUpdates chan dspUpdate, gpuState *gpuStatus, recMgr *recorder.Manager, sigSnap *signalSnapshot, eventMu *sync.RWMutex, phaseSnap *phaseSnapshot) *http.Server {
func newHTTPServer(addr string, webRoot string, h *hub, cfgPath string, cfgManager *runtime.Manager, srcMgr *sourceManager, dspUpdates chan dspUpdate, gpuState *gpuStatus, recMgr *recorder.Manager, sigSnap *signalSnapshot, eventMu *sync.RWMutex, phaseSnap *phaseSnapshot, telem *telemetry.Collector) *http.Server {
mux := http.NewServeMux()
registerWSHandlers(mux, h, recMgr)
registerAPIHandlers(mux, cfgPath, cfgManager, srcMgr, dspUpdates, gpuState, recMgr, sigSnap, eventMu, phaseSnap)
registerAPIHandlers(mux, cfgPath, cfgManager, srcMgr, dspUpdates, gpuState, recMgr, sigSnap, eventMu, phaseSnap, telem)
mux.Handle("/", http.FileServer(http.Dir(webRoot)))
return &http.Server{Addr: addr, Handler: mux}
}

func telemetryQueryFromRequest(r *http.Request) (telemetry.Query, error) {
q := r.URL.Query()
var out telemetry.Query
var err error
if out.From, err = telemetry.ParseTimeQuery(q.Get("since")); err != nil {
return out, errors.New("invalid since")
}
if out.To, err = telemetry.ParseTimeQuery(q.Get("until")); err != nil {
return out, errors.New("invalid until")
}
if v := q.Get("limit"); v != "" {
if parsed, parseErr := strconv.Atoi(v); parseErr == nil {
out.Limit = parsed
}
}
out.Name = q.Get("name")
out.NamePrefix = q.Get("prefix")
out.Level = q.Get("level")
out.IncludePersisted = true
if v := q.Get("include_persisted"); v != "" {
if b, parseErr := strconv.ParseBool(v); parseErr == nil {
out.IncludePersisted = b
}
}
tags := telemetry.Tags{}
for key, vals := range q {
if len(vals) == 0 {
continue
}
if strings.HasPrefix(key, "tag_") {
tags[strings.TrimPrefix(key, "tag_")] = vals[0]
}
}
for _, key := range []string{"signal_id", "session_id", "stage", "trace_id", "component"} {
if v := q.Get(key); v != "" {
tags[key] = v
}
}
if len(tags) > 0 {
out.Tags = tags
}
return out, nil
}

func shutdownServer(server *http.Server) {
ctxTimeout, cancelTimeout := context.WithTimeout(context.Background(), 5*time.Second)
defer cancelTimeout()


+ 6
- 0
cmd/sdrd/legacy_extract.go Просмотреть файл

@@ -0,0 +1,6 @@
package main

// NOTE: Legacy extractor logic still lives in helpers.go for now.
// This file is intentionally reserved for the later explicit move once the
// production-path rewrite is far enough along that the split can be done in one
// safe pass instead of a risky mechanical half-step.

+ 24
- 4
cmd/sdrd/main.go Просмотреть файл

@@ -23,6 +23,7 @@ import (
"sdr-wideband-suite/internal/runtime"
"sdr-wideband-suite/internal/sdr"
"sdr-wideband-suite/internal/sdrplay"
"sdr-wideband-suite/internal/telemetry"
)

func main() {
@@ -51,6 +52,25 @@ func main() {

cfgManager := runtime.New(cfg)
gpuState := &gpuStatus{Available: gpufft.Available()}
telemetryCfg := telemetry.Config{
Enabled: cfg.Debug.Telemetry.Enabled,
HeavyEnabled: cfg.Debug.Telemetry.HeavyEnabled,
HeavySampleEvery: cfg.Debug.Telemetry.HeavySampleEvery,
MetricSampleEvery: cfg.Debug.Telemetry.MetricSampleEvery,
MetricHistoryMax: cfg.Debug.Telemetry.MetricHistoryMax,
EventHistoryMax: cfg.Debug.Telemetry.EventHistoryMax,
Retention: time.Duration(cfg.Debug.Telemetry.RetentionSeconds) * time.Second,
PersistEnabled: cfg.Debug.Telemetry.PersistEnabled,
PersistDir: cfg.Debug.Telemetry.PersistDir,
RotateMB: cfg.Debug.Telemetry.RotateMB,
KeepFiles: cfg.Debug.Telemetry.KeepFiles,
}
telemetryCollector, err := telemetry.New(telemetryCfg)
if err != nil {
log.Fatalf("telemetry init failed: %v", err)
}
defer telemetryCollector.Close()
telemetryCollector.SetStatus("build", "sdrd")

newSource := func(cfg config.Config) (sdr.Source, error) {
if mockFlag {
@@ -74,7 +94,7 @@ func main() {
if err != nil {
log.Fatalf("sdrplay init failed: %v (try --mock or build with -tags sdrplay)", err)
}
srcMgr := newSourceManager(src, newSource)
srcMgr := newSourceManagerWithTelemetry(src, newSource, telemetryCollector)
if err := srcMgr.Start(); err != nil {
log.Fatalf("source start: %v", err)
}
@@ -118,7 +138,7 @@ func main() {
DeemphasisUs: cfg.Recorder.DeemphasisUs,
ExtractionTaps: cfg.Recorder.ExtractionTaps,
ExtractionBwMult: cfg.Recorder.ExtractionBwMult,
}, cfg.CenterHz, decodeMap)
}, cfg.CenterHz, decodeMap, telemetryCollector)
defer recMgr.Close()

sigSnap := &signalSnapshot{}
@@ -126,9 +146,9 @@ func main() {
defer extractMgr.reset()

phaseSnap := &phaseSnapshot{}
go runDSP(ctx, srcMgr, cfg, det, window, h, eventFile, eventMu, dspUpdates, gpuState, recMgr, sigSnap, extractMgr, phaseSnap)
go runDSP(ctx, srcMgr, cfg, det, window, h, eventFile, eventMu, dspUpdates, gpuState, recMgr, sigSnap, extractMgr, phaseSnap, telemetryCollector)

server := newHTTPServer(cfg.WebAddr, cfg.WebRoot, h, cfgPath, cfgManager, srcMgr, dspUpdates, gpuState, recMgr, sigSnap, eventMu, phaseSnap)
server := newHTTPServer(cfg.WebAddr, cfg.WebRoot, h, cfgPath, cfgManager, srcMgr, dspUpdates, gpuState, recMgr, sigSnap, eventMu, phaseSnap, telemetryCollector)
go func() {
log.Printf("web listening on %s", cfg.WebAddr)
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {


+ 177
- 3
cmd/sdrd/pipeline_runtime.go Просмотреть файл

@@ -3,6 +3,8 @@ package main
import (
"fmt"
"math"
"os"
"strconv"
"strings"
"sync"
"sync/atomic"
@@ -19,6 +21,7 @@ import (
"sdr-wideband-suite/internal/pipeline"
"sdr-wideband-suite/internal/rds"
"sdr-wideband-suite/internal/recorder"
"sdr-wideband-suite/internal/telemetry"
)

type rdsState struct {
@@ -29,6 +32,18 @@ type rdsState struct {
mu sync.Mutex
}

var forceFixedStreamReadSamples = func() int {
raw := strings.TrimSpace(os.Getenv("SDR_FORCE_FIXED_STREAM_READ_SAMPLES"))
if raw == "" {
return 0
}
v, err := strconv.Atoi(raw)
if err != nil || v <= 0 {
return 0
}
return v
}()

type dspRuntime struct {
cfg config.Config
det *detector.Detector
@@ -52,10 +67,13 @@ type dspRuntime struct {
arbiter *pipeline.Arbiter
arbitration pipeline.ArbitrationState
gotSamples bool
telemetry *telemetry.Collector
lastAllIQTail []complex64
}

type spectrumArtifacts struct {
allIQ []complex64
streamDropped bool
surveillanceIQ []complex64
detailIQ []complex64
surveillanceSpectrum []float64
@@ -94,7 +112,7 @@ type surveillancePlan struct {

const derivedIDBlock = int64(1_000_000_000)

func newDSPRuntime(cfg config.Config, det *detector.Detector, window []float64, gpuState *gpuStatus) *dspRuntime {
func newDSPRuntime(cfg config.Config, det *detector.Detector, window []float64, gpuState *gpuStatus, coll *telemetry.Collector) *dspRuntime {
detailFFT := cfg.Refinement.DetailFFTSize
if detailFFT <= 0 {
detailFFT = cfg.FFTSize
@@ -119,6 +137,7 @@ func newDSPRuntime(cfg config.Config, det *detector.Detector, window []float64,
streamPhaseState: map[int64]*streamExtractState{},
streamOverlap: &streamIQOverlap{},
arbiter: pipeline.NewArbiter(),
telemetry: coll,
}
if rt.useGPU && gpuState != nil {
snap := gpuState.snapshot()
@@ -216,6 +235,15 @@ func (rt *dspRuntime) applyUpdate(upd dspUpdate, srcMgr *sourceManager, rec *rec
gpuState.set(false, nil)
}
}
if rt.telemetry != nil {
rt.telemetry.Event("dsp_config_update", "info", "dsp runtime configuration updated", nil, map[string]any{
"fft_size": rt.cfg.FFTSize,
"sample_rate": rt.cfg.SampleRate,
"use_gpu_fft": rt.cfg.UseGPUFFT,
"detail_fft": rt.detailFFT,
"surv_strategy": rt.cfg.Surveillance.Strategy,
})
}
}

func (rt *dspRuntime) spectrumFromIQ(iq []complex64, gpuState *gpuStatus) []float64 {
@@ -334,26 +362,112 @@ func (rt *dspRuntime) decimateSurveillanceIQ(iq []complex64, factor int) []compl
return dsp.Decimate(filtered, factor)
}

func meanMagComplex(samples []complex64) float64 {
if len(samples) == 0 {
return 0
}
var sum float64
for _, v := range samples {
sum += math.Hypot(float64(real(v)), float64(imag(v)))
}
return sum / float64(len(samples))
}

func phaseStepAbs(a, b complex64) float64 {
num := float64(real(a))*float64(imag(b)) - float64(imag(a))*float64(real(b))
den := float64(real(a))*float64(real(b)) + float64(imag(a))*float64(imag(b))
return math.Abs(math.Atan2(num, den))
}

func boundaryMetrics(prevTail []complex64, curr []complex64, window int) (float64, float64, float64, int) {
if len(curr) == 0 {
return 0, 0, 0, 0
}
if window <= 0 {
window = 16
}
headN := window
if len(curr) < headN {
headN = len(curr)
}
headMean := meanMagComplex(curr[:headN])
if len(prevTail) == 0 {
return headMean, 0, 0, headN
}
tailN := window
if len(prevTail) < tailN {
tailN = len(prevTail)
}
tailMean := meanMagComplex(prevTail[len(prevTail)-tailN:])
deltaMag := math.Abs(headMean - tailMean)
phaseJump := phaseStepAbs(prevTail[len(prevTail)-1], curr[0])
score := deltaMag + phaseJump
return headMean, tailMean, score, headN
}

func tailWindowComplex(src []complex64, n int) []complex64 {
if n <= 0 || len(src) == 0 {
return nil
}
if len(src) <= n {
out := make([]complex64, len(src))
copy(out, src)
return out
}
out := make([]complex64, n)
copy(out, src[len(src)-n:])
return out
}

func (rt *dspRuntime) captureSpectrum(srcMgr *sourceManager, rec *recorder.Manager, dcBlocker *dsp.DCBlocker, gpuState *gpuStatus) (*spectrumArtifacts, error) {
start := time.Now()
required := rt.cfg.FFTSize
if rt.detailFFT > required {
required = rt.detailFFT
}
available := required
st := srcMgr.Stats()
if st.BufferSamples > required {
if rt.telemetry != nil {
rt.telemetry.SetGauge("source.buffer_samples", float64(st.BufferSamples), nil)
rt.telemetry.SetGauge("source.last_sample_ago_ms", float64(st.LastSampleAgoMs), nil)
rt.telemetry.SetGauge("source.dropped", float64(st.Dropped), nil)
rt.telemetry.SetGauge("source.resets", float64(st.Resets), nil)
}
if forceFixedStreamReadSamples > 0 {
available = forceFixedStreamReadSamples
if available < required {
available = required
}
available = (available / required) * required
if available < required {
available = required
}
logging.Warn("boundary", "fixed_stream_read_samples", "configured", forceFixedStreamReadSamples, "effective", available, "required", required)
} else if st.BufferSamples > required {
available = (st.BufferSamples / required) * required
if available < required {
available = required
}
}
logging.Debug("capture", "read_iq", "required", required, "available", available, "buf", st.BufferSamples, "reset", st.Resets, "drop", st.Dropped)
readStart := time.Now()
allIQ, err := srcMgr.ReadIQ(available)
if err != nil {
if rt.telemetry != nil {
rt.telemetry.IncCounter("capture.read.error", 1, nil)
}
return nil, err
}
if rt.telemetry != nil {
rt.telemetry.Observe("capture.read.duration_ms", float64(time.Since(readStart).Microseconds())/1000.0, nil)
rt.telemetry.Observe("capture.read.samples", float64(len(allIQ)), nil)
}
if rec != nil {
ingestStart := time.Now()
rec.Ingest(time.Now(), allIQ)
if rt.telemetry != nil {
rt.telemetry.Observe("capture.ingest.duration_ms", float64(time.Since(ingestStart).Microseconds())/1000.0, nil)
}
}
// Cap allIQ for downstream extraction to prevent buffer bloat.
// Without this cap, buffer accumulation during processing stalls causes
@@ -366,8 +480,17 @@ func (rt *dspRuntime) captureSpectrum(srcMgr *sourceManager, rec *recorder.Manag
maxStreamSamples = required
}
maxStreamSamples = (maxStreamSamples / required) * required
streamDropped := false
if len(allIQ) > maxStreamSamples {
allIQ = allIQ[len(allIQ)-maxStreamSamples:]
streamDropped = true
if rt.telemetry != nil {
rt.telemetry.IncCounter("capture.stream_drop.count", 1, nil)
rt.telemetry.Event("iq_dropped", "warn", "capture IQ dropped before extraction", nil, map[string]any{
"max_stream_samples": maxStreamSamples,
"required": required,
})
}
}
logging.Debug("capture", "iq_len", "len", len(allIQ), "surv_fft", rt.cfg.FFTSize, "detail_fft", rt.detailFFT)
survIQ := allIQ
@@ -380,14 +503,60 @@ func (rt *dspRuntime) captureSpectrum(srcMgr *sourceManager, rec *recorder.Manag
}
if rt.dcEnabled {
dcBlocker.Apply(allIQ)
if rt.telemetry != nil {
rt.telemetry.IncCounter("dsp.dc_block.apply", 1, nil)
}
}
if rt.iqEnabled {
// IQBalance must NOT modify allIQ in-place: allIQ goes to the extraction
// pipeline and any in-place modification creates a phase/amplitude
// discontinuity at the survIQ boundary (len-FFTSize) that the polyphase
// extractor then sees as paired click artifacts in the FM discriminator.
detailIsSurv := sameIQBuffer(detailIQ, survIQ)
survIQ = append([]complex64(nil), survIQ...)
dsp.IQBalance(survIQ)
if !sameIQBuffer(detailIQ, survIQ) {
if detailIsSurv {
detailIQ = survIQ
} else {
detailIQ = append([]complex64(nil), detailIQ...)
dsp.IQBalance(detailIQ)
}
}
if rt.telemetry != nil {
rt.telemetry.SetGauge("iq.stage.all.length", float64(len(allIQ)), nil)
rt.telemetry.SetGauge("iq.stage.surveillance.length", float64(len(survIQ)), nil)
rt.telemetry.SetGauge("iq.stage.detail.length", float64(len(detailIQ)), nil)
rt.telemetry.Observe("capture.total.duration_ms", float64(time.Since(start).Microseconds())/1000.0, nil)

headMean, tailMean, boundaryScore, boundaryWindow := boundaryMetrics(rt.lastAllIQTail, allIQ, 32)
rt.telemetry.SetGauge("iq.boundary.all.head_mean_mag", headMean, nil)
rt.telemetry.SetGauge("iq.boundary.all.prev_tail_mean_mag", tailMean, nil)
rt.telemetry.Observe("iq.boundary.all.discontinuity_score", boundaryScore, nil)
if len(rt.lastAllIQTail) > 0 && len(allIQ) > 0 {
deltaMag := math.Abs(math.Hypot(float64(real(allIQ[0])), float64(imag(allIQ[0]))) - math.Hypot(float64(real(rt.lastAllIQTail[len(rt.lastAllIQTail)-1])), float64(imag(rt.lastAllIQTail[len(rt.lastAllIQTail)-1]))))
phaseJump := phaseStepAbs(rt.lastAllIQTail[len(rt.lastAllIQTail)-1], allIQ[0])
rt.telemetry.Observe("iq.boundary.all.delta_mag", deltaMag, nil)
rt.telemetry.Observe("iq.boundary.all.delta_phase", phaseJump, nil)
if rt.telemetry.ShouldSampleHeavy() {
rt.telemetry.Event("alliq_boundary", "info", "allIQ boundary snapshot", nil, map[string]any{
"window": boundaryWindow,
"head_mean_mag": headMean,
"prev_tail_mean_mag": tailMean,
"delta_mag": deltaMag,
"delta_phase": phaseJump,
"discontinuity_score": boundaryScore,
"alliq_len": len(allIQ),
"stream_dropped": streamDropped,
})
}
}
if rt.telemetry.ShouldSampleHeavy() {
observeIQStats(rt.telemetry, "capture_all", allIQ, nil)
observeIQStats(rt.telemetry, "capture_surveillance", survIQ, nil)
observeIQStats(rt.telemetry, "capture_detail", detailIQ, nil)
}
}
rt.lastAllIQTail = tailWindowComplex(allIQ, 32)
survSpectrum := rt.spectrumFromIQ(survIQ, gpuState)
sanitizeSpectrum(survSpectrum)
detailSpectrum := survSpectrum
@@ -430,8 +599,13 @@ func (rt *dspRuntime) captureSpectrum(srcMgr *sourceManager, rec *recorder.Manag
}
now := time.Now()
finished, detected := rt.det.Process(now, survSpectrum, rt.cfg.CenterHz)
if rt.telemetry != nil {
rt.telemetry.SetGauge("signals.detected.count", float64(len(detected)), nil)
rt.telemetry.SetGauge("signals.finished.count", float64(len(finished)), nil)
}
return &spectrumArtifacts{
allIQ: allIQ,
streamDropped: streamDropped,
surveillanceIQ: survIQ,
detailIQ: detailIQ,
surveillanceSpectrum: survSpectrum,


+ 2
- 2
cmd/sdrd/pipeline_runtime_test.go Просмотреть файл

@@ -13,7 +13,7 @@ func TestNewDSPRuntime(t *testing.T) {
cfg := config.Default()
det := detector.New(cfg.Detector, cfg.SampleRate, cfg.FFTSize)
window := fftutil.Hann(cfg.FFTSize)
rt := newDSPRuntime(cfg, det, window, &gpuStatus{})
rt := newDSPRuntime(cfg, det, window, &gpuStatus{}, nil)
if rt == nil {
t.Fatalf("runtime is nil")
}
@@ -47,7 +47,7 @@ func TestSurveillanceLevelsRespectStrategy(t *testing.T) {
cfg := config.Default()
det := detector.New(cfg.Detector, cfg.SampleRate, cfg.FFTSize)
window := fftutil.Hann(cfg.FFTSize)
rt := newDSPRuntime(cfg, det, window, &gpuStatus{})
rt := newDSPRuntime(cfg, det, window, &gpuStatus{}, nil)
policy := pipeline.Policy{SurveillanceStrategy: "single-resolution"}
plan := rt.buildSurveillancePlan(policy)
if len(plan.Levels) != 1 {


+ 41
- 2
cmd/sdrd/source_manager.go Просмотреть файл

@@ -1,11 +1,16 @@
package main

import (
"fmt"
"time"

"sdr-wideband-suite/internal/config"
"sdr-wideband-suite/internal/sdr"
"sdr-wideband-suite/internal/telemetry"
)

func (m *sourceManager) Restart(cfg config.Config) error {
start := time.Now()
m.mu.Lock()
defer m.mu.Unlock()
old := m.src
@@ -14,15 +19,27 @@ func (m *sourceManager) Restart(cfg config.Config) error {
if err != nil {
_ = old.Start()
m.src = old
if m.telemetry != nil {
m.telemetry.IncCounter("source.restart.error", 1, nil)
m.telemetry.Event("source_restart_failed", "warn", "source restart failed", nil, map[string]any{"error": err.Error()})
}
return err
}
if err := next.Start(); err != nil {
_ = next.Stop()
_ = old.Start()
m.src = old
if m.telemetry != nil {
m.telemetry.IncCounter("source.restart.error", 1, nil)
m.telemetry.Event("source_restart_failed", "warn", "source restart failed", nil, map[string]any{"error": err.Error()})
}
return err
}
m.src = next
if m.telemetry != nil {
m.telemetry.IncCounter("source.restart.count", 1, nil)
m.telemetry.Observe("source.restart.duration_ms", float64(time.Since(start).Milliseconds()), nil)
}
return nil
}

@@ -44,7 +61,11 @@ func (m *sourceManager) Flush() {
}

func newSourceManager(src sdr.Source, newSource func(cfg config.Config) (sdr.Source, error)) *sourceManager {
return &sourceManager{src: src, newSource: newSource}
return newSourceManagerWithTelemetry(src, newSource, nil)
}

func newSourceManagerWithTelemetry(src sdr.Source, newSource func(cfg config.Config) (sdr.Source, error), coll *telemetry.Collector) *sourceManager {
return &sourceManager{src: src, newSource: newSource, telemetry: coll}
}

func (m *sourceManager) Start() error {
@@ -60,9 +81,27 @@ func (m *sourceManager) Stop() error {
}

func (m *sourceManager) ReadIQ(n int) ([]complex64, error) {
waitStart := time.Now()
m.mu.RLock()
wait := time.Since(waitStart)
defer m.mu.RUnlock()
return m.src.ReadIQ(n)
if m.telemetry != nil {
m.telemetry.Observe("source.lock_wait_ms", float64(wait.Microseconds())/1000.0, telemetry.TagsFromPairs("lock", "read"))
if wait > 2*time.Millisecond {
m.telemetry.IncCounter("source.lock_contention.count", 1, telemetry.TagsFromPairs("lock", "read"))
}
}
readStart := time.Now()
out, err := m.src.ReadIQ(n)
if m.telemetry != nil {
tags := telemetry.TagsFromPairs("requested", fmt.Sprintf("%d", n))
m.telemetry.Observe("source.read.duration_ms", float64(time.Since(readStart).Microseconds())/1000.0, tags)
m.telemetry.SetGauge("source.read.samples", float64(len(out)), nil)
if err != nil {
m.telemetry.IncCounter("source.read.error", 1, nil)
}
}
return out, err
}

func (m *sourceManager) ApplyConfig(cfg config.Config) error {


+ 45
- 0
cmd/sdrd/streaming_compare.go Просмотреть файл

@@ -0,0 +1,45 @@
package main

import (
"fmt"

"sdr-wideband-suite/internal/demod/gpudemod"
"sdr-wideband-suite/internal/telemetry"
)

func observeStreamingComparison(coll *telemetry.Collector, oracle gpudemod.StreamingExtractResult, prod gpudemod.StreamingExtractResult) {
if coll == nil {
return
}
metrics, stats := gpudemod.CompareOracleAndGPUHostOracle(oracle, prod)
tags := telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", oracle.SignalID), "path", "streaming_compare")
coll.SetGauge("streaming.compare.n_out", float64(metrics.NOut), tags)
coll.SetGauge("streaming.compare.phase_count", float64(metrics.PhaseCount), tags)
coll.SetGauge("streaming.compare.history_len", float64(metrics.HistoryLen), tags)
coll.Observe("streaming.compare.ref_max_abs_err", metrics.RefMaxAbsErr, tags)
coll.Observe("streaming.compare.ref_rms_err", metrics.RefRMSErr, tags)
coll.SetGauge("streaming.compare.compare_count", float64(stats.Count), tags)
coll.SetGauge("streaming.compare.oracle_rate", float64(oracle.Rate), tags)
coll.SetGauge("streaming.compare.production_rate", float64(prod.Rate), tags)
coll.SetGauge("streaming.compare.oracle_output_len", float64(len(oracle.IQ)), tags)
coll.SetGauge("streaming.compare.production_output_len", float64(len(prod.IQ)), tags)
if len(oracle.IQ) > 0 {
oracleStats := computeIQHeadStats(oracle.IQ, 64)
coll.Observe("streaming.compare.oracle_head_mean_mag", oracleStats.meanMag, tags)
coll.Observe("streaming.compare.oracle_head_max_step", oracleStats.maxStep, tags)
}
if len(prod.IQ) > 0 {
prodStats := computeIQHeadStats(prod.IQ, 64)
coll.Observe("streaming.compare.production_head_mean_mag", prodStats.meanMag, tags)
coll.Observe("streaming.compare.production_head_max_step", prodStats.maxStep, tags)
}
coll.Event("streaming_compare_snapshot", "info", "streaming comparison snapshot", tags, map[string]any{
"oracle_rate": oracle.Rate,
"production_rate": prod.Rate,
"oracle_output_len": len(oracle.IQ),
"production_output_len": len(prod.IQ),
"ref_max_abs_err": metrics.RefMaxAbsErr,
"ref_rms_err": metrics.RefRMSErr,
"compare_count": stats.Count,
})
}

+ 27
- 0
cmd/sdrd/streaming_monitoring.go Просмотреть файл

@@ -0,0 +1,27 @@
package main

import (
"fmt"

"sdr-wideband-suite/internal/demod/gpudemod"
"sdr-wideband-suite/internal/telemetry"
)

func observeStreamingResult(coll *telemetry.Collector, prefix string, res gpudemod.StreamingExtractResult) {
if coll == nil {
return
}
tags := telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", res.SignalID), "path", prefix)
coll.SetGauge(prefix+".n_out", float64(res.NOut), tags)
coll.SetGauge(prefix+".phase_count", float64(res.PhaseCount), tags)
coll.SetGauge(prefix+".history_len", float64(res.HistoryLen), tags)
coll.SetGauge(prefix+".rate", float64(res.Rate), tags)
coll.SetGauge(prefix+".output_len", float64(len(res.IQ)), tags)
if len(res.IQ) > 0 {
stats := computeIQHeadStats(res.IQ, 64)
coll.Observe(prefix+".head_mean_mag", stats.meanMag, tags)
coll.Observe(prefix+".head_max_step", stats.maxStep, tags)
coll.Observe(prefix+".head_p95_step", stats.p95Step, tags)
coll.SetGauge(prefix+".head_low_magnitude_count", float64(stats.lowMag), tags)
}
}

+ 50
- 0
cmd/sdrd/streaming_production.go Просмотреть файл

@@ -0,0 +1,50 @@
package main

import (
"fmt"

"sdr-wideband-suite/internal/demod/gpudemod"
"sdr-wideband-suite/internal/detector"
"sdr-wideband-suite/internal/telemetry"
)

func extractForStreamingProduction(
extractMgr *extractionManager,
allIQ []complex64,
sampleRate int,
centerHz float64,
signals []detector.Signal,
aqCfg extractionConfig,
coll *telemetry.Collector,
) ([][]complex64, []int, error) {
out := make([][]complex64, len(signals))
rates := make([]int, len(signals))
jobs, err := buildStreamingJobs(sampleRate, centerHz, signals, aqCfg)
if err != nil {
return nil, nil, err
}
runner := extractMgr.get(len(allIQ), sampleRate)
if runner == nil {
return nil, nil, fmt.Errorf("streaming production path unavailable: no batch runner")
}
results, err := runner.StreamingExtractGPU(allIQ, jobs)
if err != nil {
return nil, nil, err
}
var oracleResults []gpudemod.StreamingExtractResult
if useStreamingOraclePath {
if streamingOracleRunner == nil || streamingOracleRunner.SampleRate != sampleRate {
streamingOracleRunner = gpudemod.NewCPUOracleRunner(sampleRate)
}
oracleResults, _ = streamingOracleRunner.StreamingExtract(allIQ, jobs)
}
for i, res := range results {
out[i] = res.IQ
rates[i] = res.Rate
observeStreamingResult(coll, "streaming.production", res)
if i < len(oracleResults) {
observeStreamingComparison(coll, oracleResults[i], res)
}
}
return out, rates, nil
}

+ 137
- 0
cmd/sdrd/streaming_refactor.go Просмотреть файл

@@ -0,0 +1,137 @@
package main

import (
"math"

"sdr-wideband-suite/internal/demod/gpudemod"
"sdr-wideband-suite/internal/detector"
"sdr-wideband-suite/internal/telemetry"
)

const useStreamingOraclePath = false // temporarily disable oracle during bring-up to isolate production-path runtime behavior
const useStreamingProductionPath = true // route top-level extraction through the new production path during bring-up/validation

var streamingOracleRunner *gpudemod.CPUOracleRunner

func buildStreamingJobs(sampleRate int, centerHz float64, signals []detector.Signal, aqCfg extractionConfig) ([]gpudemod.StreamingExtractJob, error) {
jobs := make([]gpudemod.StreamingExtractJob, len(signals))
bwMult := aqCfg.bwMult
if bwMult <= 0 {
bwMult = 1.0
}
firTaps := aqCfg.firTaps
if firTaps <= 0 {
firTaps = 101
}
for i, sig := range signals {
bw := sig.BWHz * bwMult
sigMHz := sig.CenterHz / 1e6
isWFM := (sigMHz >= 87.5 && sigMHz <= 108.0) ||
(sig.Class != nil && (sig.Class.ModType == "WFM" || sig.Class.ModType == "WFM_STEREO"))
var outRate int
if isWFM {
outRate = wfmStreamOutRate
if bw < wfmStreamMinBW {
bw = wfmStreamMinBW
}
} else {
// Non-WFM target: must be an exact integer divisor of sampleRate.
// The old hardcoded 200000 fails for common SDR rates (e.g. 4096000/200000=20.48).
// Find the nearest valid rate >= 128000 (enough for NFM/AM/SSB).
outRate = nearestExactDecimationRate(sampleRate, 200000, 128000)
if bw < 20000 {
bw = 20000
}
}
if _, err := gpudemod.ExactIntegerDecimation(sampleRate, outRate); err != nil {
return nil, err
}
offset := sig.CenterHz - centerHz
jobs[i] = gpudemod.StreamingExtractJob{
SignalID: sig.ID,
OffsetHz: offset,
Bandwidth: bw,
OutRate: outRate,
NumTaps: firTaps,
ConfigHash: gpudemod.StreamingConfigHash(sig.ID, offset, bw, outRate, firTaps, sampleRate),
}
}
return jobs, nil
}

func resetStreamingOracleRunner() {
if streamingOracleRunner != nil {
streamingOracleRunner.ResetAllStates()
}
}

func extractForStreamingOracle(
allIQ []complex64,
sampleRate int,
centerHz float64,
signals []detector.Signal,
aqCfg extractionConfig,
coll *telemetry.Collector,
) ([][]complex64, []int, error) {
out := make([][]complex64, len(signals))
rates := make([]int, len(signals))
jobs, err := buildStreamingJobs(sampleRate, centerHz, signals, aqCfg)
if err != nil {
return nil, nil, err
}
if streamingOracleRunner == nil || streamingOracleRunner.SampleRate != sampleRate {
streamingOracleRunner = gpudemod.NewCPUOracleRunner(sampleRate)
}
results, err := streamingOracleRunner.StreamingExtract(allIQ, jobs)
if err != nil {
return nil, nil, err
}
for i, res := range results {
out[i] = res.IQ
rates[i] = res.Rate
observeStreamingResult(coll, "streaming.oracle", res)
}
return out, rates, nil
}

func phaseIncForOffset(sampleRate int, offsetHz float64) float64 {
return -2.0 * math.Pi * offsetHz / float64(sampleRate)
}

// nearestExactDecimationRate finds the output rate closest to targetRate
// (but not below minRate) that is an exact integer divisor of sampleRate.
// This avoids the ExactIntegerDecimation check failing for rates like
// 4096000/200000=20.48 which silently killed the entire streaming batch.
func nearestExactDecimationRate(sampleRate int, targetRate int, minRate int) int {
if sampleRate <= 0 || targetRate <= 0 {
return targetRate
}
if sampleRate%targetRate == 0 {
return targetRate // already exact
}
// Try decimation factors near the target
targetDecim := sampleRate / targetRate // floor
bestRate := 0
bestDist := sampleRate // impossibly large
for d := max(1, targetDecim-2); d <= targetDecim+2; d++ {
rate := sampleRate / d
if rate < minRate {
continue
}
if sampleRate%rate != 0 {
continue // not exact (shouldn't happen since rate = sampleRate/d, but guard)
}
dist := targetRate - rate
if dist < 0 {
dist = -dist
}
if dist < bestDist {
bestDist = dist
bestRate = rate
}
}
if bestRate > 0 {
return bestRate
}
return targetRate // fallback — will fail ExactIntegerDecimation and surface the error
}

+ 2
- 0
cmd/sdrd/types.go Просмотреть файл

@@ -11,6 +11,7 @@ import (
"sdr-wideband-suite/internal/detector"
"sdr-wideband-suite/internal/pipeline"
"sdr-wideband-suite/internal/sdr"
"sdr-wideband-suite/internal/telemetry"
)

type SpectrumDebug struct {
@@ -110,6 +111,7 @@ type sourceManager struct {
mu sync.RWMutex
src sdr.Source
newSource func(cfg config.Config) (sdr.Source, error)
telemetry *telemetry.Collector
}

type extractionManager struct {


+ 343
- 0
config.autosave.yaml Просмотреть файл

@@ -0,0 +1,343 @@
bands:
- name: uk-fm-broadcast
start_hz: 8.75e+07
end_hz: 1.08e+08
center_hz: 1.02e+08
sample_rate: 4096000
fft_size: 512
gain_db: 32
tuner_bw_khz: 5000
use_gpu_fft: true
classifier_mode: combined
agc: true
dc_block: true
iq_balance: true
pipeline:
mode: wideband-balanced
profile: wideband-balanced
goals:
intent: broadcast-monitoring
monitor_start_hz: 8.8e+07
monitor_end_hz: 1.08e+08
monitor_span_hz: 2e+07
monitor_windows:
- label: ""
zone: focus
start_hz: 8.75e+07
end_hz: 1.08e+08
center_hz: 0
span_hz: 0
priority: 1.25
auto_record: false
auto_decode: false
- label: ""
zone: decode
start_hz: 8.75e+07
end_hz: 1.08e+08
center_hz: 0
span_hz: 0
priority: 1.35
auto_record: false
auto_decode: false
signal_priorities:
- wfm
- rds
- broadcast
auto_record_classes:
- WFM
- WFM_STEREO
auto_decode_classes:
- WFM
- WFM_STEREO
- RDS
surveillance:
analysis_fft_size: 512
frame_rate: 12
strategy: multi-resolution
display_bins: 2048
display_fps: 12
derived_detection: auto
refinement:
enabled: true
max_concurrent: 24
detail_fft_size: 4096
min_candidate_snr_db: -3
min_span_hz: 60000
max_span_hz: 250000
auto_span: true
resources:
prefer_gpu: true
max_refinement_jobs: 24
max_recording_streams: 32
max_decode_jobs: 16
decision_hold_ms: 2500
profiles:
- name: legacy
description: Current single-band pipeline behavior
pipeline:
mode: legacy
profile: legacy
goals:
intent: general-monitoring
monitor_start_hz: 0
monitor_end_hz: 0
monitor_span_hz: 0
monitor_windows: []
signal_priorities: []
auto_record_classes: []
auto_decode_classes: []
surveillance:
analysis_fft_size: 2048
frame_rate: 15
strategy: single-resolution
display_bins: 2048
display_fps: 15
derived_detection: auto
refinement:
enabled: true
max_concurrent: 8
detail_fft_size: 2048
min_candidate_snr_db: 0
min_span_hz: 0
max_span_hz: 0
auto_span: true
resources:
prefer_gpu: false
max_refinement_jobs: 8
max_recording_streams: 16
max_decode_jobs: 16
decision_hold_ms: 2000
- name: wideband-balanced
description: Baseline multi-resolution wideband surveillance
pipeline:
mode: wideband-balanced
profile: wideband-balanced
goals:
intent: broadcast-monitoring
monitor_start_hz: 0
monitor_end_hz: 0
monitor_span_hz: 0
monitor_windows: []
signal_priorities:
- wfm
- rds
- broadcast
auto_record_classes:
- WFM
- WFM_STEREO
auto_decode_classes:
- WFM
- WFM_STEREO
- RDS
surveillance:
analysis_fft_size: 4096
frame_rate: 12
strategy: multi-resolution
display_bins: 2048
display_fps: 12
derived_detection: auto
refinement:
enabled: true
max_concurrent: 24
detail_fft_size: 4096
min_candidate_snr_db: -3
min_span_hz: 60000
max_span_hz: 250000
auto_span: true
resources:
prefer_gpu: true
max_refinement_jobs: 24
max_recording_streams: 32
max_decode_jobs: 16
decision_hold_ms: 2500
- name: wideband-aggressive
description: Higher surveillance/refinement budgets for dense wideband monitoring
pipeline:
mode: wideband-aggressive
profile: wideband-aggressive
goals:
intent: high-density-wideband-surveillance
monitor_start_hz: 0
monitor_end_hz: 0
monitor_span_hz: 0
monitor_windows: []
signal_priorities:
- wfm
- rds
- broadcast
- digital
auto_record_classes: []
auto_decode_classes: []
surveillance:
analysis_fft_size: 8192
frame_rate: 10
strategy: multi-resolution
display_bins: 4096
display_fps: 10
derived_detection: auto
refinement:
enabled: true
max_concurrent: 32
detail_fft_size: 8192
min_candidate_snr_db: -3
min_span_hz: 50000
max_span_hz: 280000
auto_span: true
resources:
prefer_gpu: true
max_refinement_jobs: 32
max_recording_streams: 40
max_decode_jobs: 24
decision_hold_ms: 2500
- name: archive
description: Record-first monitoring profile
pipeline:
mode: archive
profile: archive
goals:
intent: archive-and-triage
monitor_start_hz: 0
monitor_end_hz: 0
monitor_span_hz: 0
monitor_windows: []
signal_priorities:
- wfm
- broadcast
- digital
auto_record_classes: []
auto_decode_classes: []
surveillance:
analysis_fft_size: 4096
frame_rate: 12
strategy: single-resolution
display_bins: 2048
display_fps: 12
derived_detection: auto
refinement:
enabled: true
max_concurrent: 16
detail_fft_size: 4096
min_candidate_snr_db: -2
min_span_hz: 50000
max_span_hz: 250000
auto_span: true
resources:
prefer_gpu: true
max_refinement_jobs: 16
max_recording_streams: 40
max_decode_jobs: 16
decision_hold_ms: 3000
- name: digital-hunting
description: Digital-first refinement and decode focus
pipeline:
mode: digital-hunting
profile: digital-hunting
goals:
intent: digital-surveillance
monitor_start_hz: 0
monitor_end_hz: 0
monitor_span_hz: 0
monitor_windows: []
signal_priorities:
- rds
- digital
- wfm
auto_record_classes: []
auto_decode_classes: []
surveillance:
analysis_fft_size: 4096
frame_rate: 12
strategy: multi-resolution
display_bins: 2048
display_fps: 12
derived_detection: auto
refinement:
enabled: true
max_concurrent: 20
detail_fft_size: 4096
min_candidate_snr_db: -2
min_span_hz: 50000
max_span_hz: 200000
auto_span: true
resources:
prefer_gpu: true
max_refinement_jobs: 20
max_recording_streams: 20
max_decode_jobs: 24
decision_hold_ms: 2500
detector:
threshold_db: -60
min_duration_ms: 500
hold_ms: 1500
ema_alpha: 0.025
hysteresis_db: 10
min_stable_frames: 4
gap_tolerance_ms: 2000
cfar_mode: GOSCA
cfar_guard_hz: 200000
cfar_train_hz: 100000
cfar_guard_cells: 3
cfar_train_cells: 24
cfar_rank: 36
cfar_scale_db: 23
cfar_wrap_around: true
edge_margin_db: 6
max_signal_bw_hz: 260000
merge_gap_hz: 20000
class_history_size: 10
class_switch_ratio: 0.6
recorder:
enabled: false
min_snr_db: 0
min_duration: 500ms
max_duration: 300s
preroll_ms: 500
record_iq: false
record_audio: true
auto_demod: true
auto_decode: false
max_disk_mb: 0
output_dir: data/recordings
class_filter: []
ring_seconds: 12
deemphasis_us: 50
extraction_fir_taps: 101
extraction_bw_mult: 1.35
debug_live_audio: false
decoder:
ft8_cmd: C:/WSJT/wsjtx-2.7.0-rc6/bin/jt9.exe -8 {audio}
wspr_cmd: C:/WSJT/wsjtx-2.7.0-rc6/bin/wsprd.exe {audio}
dmr_cmd: tools/dsd-neo/bin/dsd-neo.exe -fs -i {audio} -s {sr} -o null
dstar_cmd: tools/dsd-neo/bin/dsd-neo.exe -fd -i {audio} -s {sr} -o null
fsk_cmd: tools/fsk/fsk_decoder --iq {iq} --sample-rate {sr}
psk_cmd: tools/psk/psk_decoder --iq {iq} --sample-rate {sr}
debug:
audio_dump_enabled: false
cpu_monitoring: false
telemetry:
enabled: true
heavy_enabled: false
heavy_sample_every: 12
metric_sample_every: 8
metric_history_max: 6000
event_history_max: 1500
retention_seconds: 900
persist_enabled: false
persist_dir: debug/telemetry
rotate_mb: 16
keep_files: 8
logging:
level: error
categories: []
rate_limit_ms: 1000
stdout: true
stdout_color: true
file: logs/trace.log
file_level: error
time_format: "15:04:05"
disable_time: false
web_addr: :8080
event_path: data/events.jsonl
frame_rate: 12
waterfall_lines: 200
web_root: web

+ 20
- 5
config.yaml Просмотреть файл

@@ -248,14 +248,29 @@ decoder:
dstar_cmd: tools/dsd-neo/bin/dsd-neo.exe -fd -i {audio} -s {sr} -o null
fsk_cmd: tools/fsk/fsk_decoder --iq {iq} --sample-rate {sr}
psk_cmd: tools/psk/psk_decoder --iq {iq} --sample-rate {sr}
debug:
audio_dump_enabled: false
cpu_monitoring: false
telemetry:
enabled: true
heavy_enabled: false
heavy_sample_every: 12
metric_sample_every: 8
metric_history_max: 6000
event_history_max: 1500
retention_seconds: 900
persist_enabled: true
persist_dir: debug/telemetry
rotate_mb: 16
keep_files: 8
logging:
level: debug
categories: [capture, extract, demod, resample, drop, ws, boundary]
rate_limit_ms: 500
level: error
categories: []
rate_limit_ms: 1000
stdout: true
stdout_color: true
file: logs/trace.log
file_level: debug
file: ""
file_level: error
time_format: "15:04:05"
disable_time: false
web_addr: :8080


+ 1077
- 0
docs/audio-click-debug-notes-2026-03-24.md
Разница между файлами не показана из-за своего большого размера
Просмотреть файл


+ 48
- 0
docs/gpu-streaming-refactor-plan-2026-03-25.md Просмотреть файл

@@ -0,0 +1,48 @@
# GPU Streaming Refactor Plan (2026-03-25)

## Goal
Replace the current overlap+trim GPU extractor model with a true stateful per-signal streaming architecture, and build a corrected CPU oracle/reference path for validation.

## Non-negotiables
- No production start-index-only patch.
- No production overlap-prepend + trim continuity model.
- Exact integer decimation only in the new streaming production path.
- Persistent per-signal state must include NCO phase, FIR history, and decimator phase/residue.
- GPU validation must compare against a corrected CPU oracle, not the legacy CPU fallback.

## Work order
1. Introduce explicit stateful streaming types in `gpudemod`.
2. Add a clean CPU oracle implementation and monolithic-vs-chunked tests.
3. Add per-signal state ownership in batch runner.
4. Implement new streaming extractor semantics in Go using NEW IQ samples only.
5. Replace legacy GPU-path assumptions (rounding decimation, overlap-prepend, trim-defined validity) in the new path.
6. Add production telemetry that proves state continuity (`phase_count`, `history_len`, `n_out`, reference error).
7. Keep legacy path isolated only for temporary comparison if needed.

## Initial files in scope
- `internal/demod/gpudemod/batch.go`
- `internal/demod/gpudemod/batch_runner.go`
- `internal/demod/gpudemod/batch_runner_windows.go`
- `internal/demod/gpudemod/kernels.cu`
- `internal/demod/gpudemod/native/exports.cu`
- `cmd/sdrd/helpers.go`

## Immediate implementation strategy
### Phase 1
- Create explicit streaming state structs in Go.
- Add CPU oracle/reference path with exact semantics and tests.
- Introduce exact integer-decimation checks.

### Phase 2
- Rework batch runner to own persistent per-signal state.
- Add config-hash-based resets.
- Stop modeling continuity via overlap tail in the new path.

### Phase 3
- Introduce a real streaming GPU entry path that consumes NEW shifted samples plus carried state.
- Move to a stateful polyphase decimator model.

## Validation expectations
- CPU oracle monolithic == CPU oracle chunked within tolerance.
- GPU streaming output == CPU oracle chunked within tolerance.
- Former periodic block-boundary clicks gone in real-world testing.

+ 196
- 0
docs/known-issues.md Просмотреть файл

@@ -0,0 +1,196 @@
# Known Issues

This file tracks durable open engineering issues that remain after the 2026-03-25 audio-click fix.

Primary source:
- `docs/open-issues-report-2026-03-25.json`

Status values used here:
- `open`
- `deferred`
- `info`

---

## High Priority

### OI-02 — `lastDiscrimIQ` missing from `dspStateSnapshot`
- Status: `open`
- Severity: High
- Category: state-continuity
- File: `internal/recorder/streamer.go`
- Summary: FM discriminator bridging state is not preserved across `captureDSPState()` / `restoreDSPState()`, so recording segment splits can lose the final IQ sample and create a micro-click at the segment boundary.
- Recommended fix: add `lastDiscrimIQ` and `lastDiscrimIQSet` to `dspStateSnapshot`.
- Source: `docs/open-issues-report-2026-03-25.json` (OI-02)

### OI-03 — CPU oracle path not yet usable as validation baseline
- Status: `open`
- Severity: High
- Category: architecture
- File: `cmd/sdrd/streaming_refactor.go`, `internal/demod/gpudemod/cpu_oracle.go`
- Summary: the CPU oracle exists, but the production comparison/integration path is not trusted yet. That means GPU-path regressions still cannot be checked automatically with confidence.
- Recommended fix: repair oracle integration and restore GPU-vs-CPU validation flow.
- Source: `docs/open-issues-report-2026-03-25.json` (OI-03)

### OI-18 — planned C2-C validation gate never completed
- Status: `open`
- Severity: Info
- Category: architecture
- File: `docs/audio-click-debug-notes-2026-03-24.md`
- Summary: the final native streaming path works in practice, but the planned formal GPU-vs-oracle validation gate was never completed.
- Recommended fix: complete this together with OI-03.
- Source: `docs/open-issues-report-2026-03-25.json` (OI-18)

---

## Medium Priority

### OI-14 — no regression test for `allIQ` immutability through spectrum/detection pipeline
- Status: `open`
- Severity: Low
- Category: test-coverage
- File: `cmd/sdrd/pipeline_runtime.go`
- Summary: the `IQBalance` aliasing bug showed that shared-buffer mutation can slip in undetected. There is still no test asserting that `allIQ` remains unchanged after capture/detection-side processing.
- Recommended fix: add an integration test that compares `allIQ` before and after the relevant pipeline stage.
- Source: `docs/open-issues-report-2026-03-25.json` (OI-14)

### OI-15 — very low test coverage for `processSnippet` audio pipeline
- Status: `open`
- Severity: Low
- Category: test-coverage
- File: `internal/recorder/streamer.go`
- Summary: the main live audio pipeline still lacks focused tests for boundary continuity, WFM mono/stereo behavior, resampling, and demod-path regressions.
- Recommended fix: add synthetic fixtures and continuity-oriented tests around repeated `processSnippet` calls.
- Source: `docs/open-issues-report-2026-03-25.json` (OI-15)

### OI-07 — taps are recalculated every frame
- Status: `open`
- Severity: Medium
- Category: correctness
- File: `internal/demod/gpudemod/stream_state.go`
- Summary: FIR/polyphase taps are recomputed every frame even when parameters do not change, which is unnecessary work and makes it easier for host/GPU tap state to drift apart.
- Recommended fix: only rebuild taps when tap-relevant inputs actually change.
- Source: `docs/open-issues-report-2026-03-25.json` (OI-07)

### OI-17 — bandwidth changes can change Go-side taps without GPU tap re-upload
- Status: `open`
- Severity: Low-Medium
- Category: correctness
- File: `internal/demod/gpudemod/streaming_gpu_native_prepare.go`, `internal/demod/gpudemod/stream_state.go`
- Summary: after the config-hash fix, a bandwidth change may rebuild taps on the Go side while the GPU still keeps older uploaded taps unless a reset happens.
- Recommended fix: add a separate tap-change detection/re-upload path without forcing full extractor reset.
- Source: `docs/open-issues-report-2026-03-25.json` (OI-17)

### OI-09 — streaming feature flags are compile-time constants
- Status: `open`
- Severity: Medium
- Category: architecture
- File: `cmd/sdrd/streaming_refactor.go`, `internal/demod/gpudemod/streaming_gpu_modes.go`
- Summary: switching between production/oracle/native-host modes still requires code changes and rebuilds, which makes field debugging and A/B validation harder than necessary.
- Recommended fix: expose these as config or environment-driven switches.
- Source: `docs/open-issues-report-2026-03-25.json` (OI-09)

### OI-05 — feed channel is shallow and can drop frames under pressure
- Status: `open`
- Severity: Medium
- Category: reliability
- File: `internal/recorder/streamer.go`
- Summary: `feedCh` has a buffer of only 2. Under heavier processing or debug load, dropped feed messages can create audible gaps.
- Recommended fix: increase channel depth or redesign backpressure behavior.
- Source: `docs/open-issues-report-2026-03-25.json` (OI-05)

### OI-06 — legacy overlap/trim extractor path is now mostly legacy baggage
- Status: `deferred`
- Severity: Medium
- Category: dead-code
- File: `cmd/sdrd/helpers.go`
- Summary: the old overlap/trim path is now mainly fallback/legacy code and adds complexity plus old instrumentation noise.
- Recommended fix: isolate, simplify, or remove it once the production path and fallback strategy are formally settled.
- Source: `docs/open-issues-report-2026-03-25.json` (OI-06)

### OI-04 — telemetry history storage still uses append+copy trim
- Status: `deferred`
- Severity: Medium
- Category: telemetry
- File: `internal/telemetry/telemetry.go`
- Summary: heavy telemetry can still create avoidable allocation/copy pressure because history trimming is O(n) and happens under lock.
- Recommended fix: replace with a ring-buffer design.
- Source: `docs/open-issues-report-2026-03-25.json` (OI-04)

---

## Lower Priority / Nice-to-Have

### OI-01 — `DCBlocker.Apply(allIQ)` still mutates extraction input in-place
- Status: `deferred`
- Severity: High
- Category: data-integrity
- File: `cmd/sdrd/pipeline_runtime.go`
- Summary: unlike the old `IQBalance` bug this does not create a boundary artifact, but it does mean live extraction and recorded/replayed data are not semantically identical.
- Recommended fix: clarify the contract or move to immutable/copy-based handling.
- Source: `docs/open-issues-report-2026-03-25.json` (OI-01)

### OI-08 — WFM audio LPF could reject pilot more strongly
- Status: `deferred`
- Severity: Medium
- Category: audio-quality
- File: `internal/recorder/streamer.go`
- Summary: the current 15 kHz LPF is good enough functionally, but a steeper filter could further improve pilot suppression.
- Recommended fix: more taps or a dedicated pilot notch.
- Source: `docs/open-issues-report-2026-03-25.json` (OI-08)

### OI-10 — `demod.wav` debug dumps can clip and mislead analysis
- Status: `deferred`
- Severity: Medium
- Category: correctness
- File: `internal/recorder/streamer.go`, `internal/recorder/wavwriter.go`
- Summary: raw discriminator output can exceed the WAV writer's `[-1,+1]` clip range, so debug dumps can show artifacts that are not part of the real downstream audio path.
- Recommended fix: scale by `1/pi` before dumping or use float WAV output.
- Source: `docs/open-issues-report-2026-03-25.json` (OI-10)

### OI-11 — browser AudioContext resync still causes audible micro-gaps
- Status: `deferred`
- Severity: Low
- Category: reliability
- File: `web/app.js`
- Summary: underrun recovery is softened with a fade-in, but repeated resyncs still create audible stutter on the browser side.
- Recommended fix: prefer the AudioWorklet/ring-player path wherever possible.
- Source: `docs/open-issues-report-2026-03-25.json` (OI-11)

### OI-12 — tiny per-frame tail copy for boundary telemetry
- Status: `info`
- Severity: Low
- Category: performance
- File: `cmd/sdrd/pipeline_runtime.go`
- Summary: the last-32-sample copy is trivial and not urgent, but it is one more small allocation in a path that already has several.
- Recommended fix: none needed unless a broader allocation cleanup happens.
- Source: `docs/open-issues-report-2026-03-25.json` (OI-12)

### OI-13 — temporary patch artifacts should not live in the repo long-term
- Status: `deferred`
- Severity: Low
- Category: dead-code
- File: `patches/*`
- Summary: reviewer/debug patch artifacts were useful during the investigation, but they should either be removed or archived under docs rather than kept as loose patch files.
- Recommended fix: delete or archive them once no longer needed.
- Source: `docs/open-issues-report-2026-03-25.json` (OI-13)

### OI-16 — `config.autosave.yaml` can re-enable unwanted debug telemetry after restart
- Status: `deferred`
- Severity: Low
- Category: config
- File: `config.autosave.yaml`
- Summary: autosave can silently restore debug-heavy telemetry settings after restart and distort future runs.
- Recommended fix: stop persisting debug telemetry knobs to autosave or explicitly ignore them.
- Source: `docs/open-issues-report-2026-03-25.json` (OI-16)

---

## Suggested next execution order

1. Fix OI-02 (`lastDiscrimIQ` snapshot/restore)
2. Repair OI-03 and close OI-18 (oracle + formal validation path)
3. Add OI-14 and OI-15 regression tests
4. Consolidate OI-07 and OI-17 (tap rebuild / tap upload logic)
5. Expose OI-09 feature flags via config or env
6. Revisit OI-05 / OI-06 / OI-04 when doing reliability/cleanup work

+ 711
- 0
docs/telemetry-api.md Просмотреть файл

@@ -0,0 +1,711 @@
# Telemetry API Reference

This document describes the server-side telemetry collector, its runtime configuration, and the HTTP API exposed by `sdrd`.

The telemetry system is intended for debugging and performance analysis of the SDR pipeline, especially around source cadence, extraction, DSP timing, boundary artifacts, queue pressure, and other runtime anomalies.

## Goals

The telemetry layer gives you three different views of runtime state:

1. **Live snapshot**
- Current counters, gauges, distributions, recent events, and collector status.
2. **Historical metrics**
- Timestamped metric samples that can be filtered by name, prefix, or tags.
3. **Historical events**
- Structured anomalies / warnings / debug events with optional fields.

It is designed to be lightweight in normal operation and more detailed when `heavy_enabled` is turned on.

---

## Base URLs

All telemetry endpoints live under:

- `/api/debug/telemetry/live`
- `/api/debug/telemetry/history`
- `/api/debug/telemetry/events`
- `/api/debug/telemetry/config`

Responses are JSON.

---

## Data model

### Metric types

Telemetry metrics are stored in three logical groups:

- **counter**
- Accumulating values, usually incremented over time.
- **gauge**
- Latest current value.
- **distribution**
- Observed numeric samples with summary stats.

A historical metric sample is returned as:

```json
{
"ts": "2026-03-25T12:00:00Z",
"name": "stage.extract_stream.duration_ms",
"type": "distribution",
"value": 4.83,
"tags": {
"stage": "extract_stream",
"signal_id": "1"
}
}
```

### Events

Telemetry events are structured anomaly/debug records:

```json
{
"id": 123,
"ts": "2026-03-25T12:00:02Z",
"name": "demod_boundary",
"level": "warn",
"message": "boundary discontinuity detected",
"tags": {
"signal_id": "1",
"stage": "demod"
},
"fields": {
"d2": 0.3358,
"index": 25
}
}
```

### Tags

Tags are string key/value metadata used for filtering and correlation.

Common tag keys already supported by the HTTP layer:

- `signal_id`
- `session_id`
- `stage`
- `trace_id`
- `component`

You can also filter on arbitrary tags via `tag_<key>=<value>` query parameters.

---

## Endpoint: `GET /api/debug/telemetry/live`

Returns a live snapshot of the in-memory collector state.

### Response shape

```json
{
"now": "2026-03-25T12:00:05Z",
"started_at": "2026-03-25T11:52:10Z",
"uptime_ms": 472500,
"config": {
"enabled": true,
"heavy_enabled": false,
"heavy_sample_every": 12,
"metric_sample_every": 2,
"metric_history_max": 12000,
"event_history_max": 4000,
"retention": 900000000000,
"persist_enabled": false,
"persist_dir": "debug/telemetry",
"rotate_mb": 16,
"keep_files": 8
},
"counters": [
{
"name": "source.resets",
"value": 1,
"tags": {
"component": "source"
}
}
],
"gauges": [
{
"name": "source.buffer_samples",
"value": 304128,
"tags": {
"component": "source"
}
}
],
"distributions": [
{
"name": "dsp.frame.duration_ms",
"count": 96,
"min": 82.5,
"max": 212.4,
"mean": 104.8,
"last": 98.3,
"p95": 149.2,
"tags": {
"stage": "dsp"
}
}
],
"recent_events": [],
"status": {
"source_state": "running"
}
}
```

### Notes

- `counters`, `gauges`, and `distributions` are sorted by metric name.
- `recent_events` contains the most recent in-memory event slice.
- `status` is optional and contains arbitrary runtime status published by code using `SetStatus(...)`.
- If telemetry is unavailable, the server returns a small JSON object instead of a full snapshot.

### Typical uses

- Check whether telemetry is enabled.
- Look for timing hotspots in `*.duration_ms` distributions.
- Inspect current queue or source gauges.
- See recent anomaly events without querying history.

---

## Endpoint: `GET /api/debug/telemetry/history`

Returns historical metric samples from in-memory history and, optionally, persisted JSONL files.

### Response shape

```json
{
"items": [
{
"ts": "2026-03-25T12:00:01Z",
"name": "stage.extract_stream.duration_ms",
"type": "distribution",
"value": 5.2,
"tags": {
"stage": "extract_stream",
"signal_id": "2"
}
}
],
"count": 1
}
```

### Supported query parameters

#### Time filters

- `since`
- `until`

Accepted formats:

- Unix seconds
- Unix milliseconds
- RFC3339
- RFC3339Nano

Examples:

- `?since=1711368000`
- `?since=1711368000123`
- `?since=2026-03-25T12:00:00Z`

#### Result shaping

- `limit`
- Default normalization is 500.
- Values above 5000 are clamped down by the collector query layer.

#### Name filters

- `name=<exact_metric_name>`
- `prefix=<metric_name_prefix>`

Examples:

- `?name=source.read.duration_ms`
- `?prefix=stage.`
- `?prefix=iq.extract.`

#### Tag filters

Special convenience query params map directly to tag filters:

- `signal_id`
- `session_id`
- `stage`
- `trace_id`
- `component`

Arbitrary tag filters:

- `tag_<key>=<value>`

Examples:

- `?signal_id=1`
- `?stage=extract_stream`
- `?tag_path=gpu`
- `?tag_zone=broadcast`

#### Persistence control

- `include_persisted=true|false`
- Default: `true`

When enabled and persistence is active, the server reads matching data from rotated JSONL telemetry files in addition to in-memory history.

### Notes

- Results are sorted by timestamp ascending.
- If `limit` is hit, the most recent matching items are retained.
- Exact retention depends on both in-memory retention and persisted file availability.
- A small set of boundary-related IQ metrics is force-stored regardless of the normal metric sample cadence.

### Typical queries

Get all stage timing since a specific start:

```text
/api/debug/telemetry/history?since=2026-03-25T12:00:00Z&prefix=stage.
```

Get extraction metrics for a single signal:

```text
/api/debug/telemetry/history?since=2026-03-25T12:00:00Z&prefix=extract.&signal_id=2
```

Get source cadence metrics only from in-memory history:

```text
/api/debug/telemetry/history?prefix=source.&include_persisted=false
```

---

## Endpoint: `GET /api/debug/telemetry/events`

Returns historical telemetry events from memory and, optionally, persisted storage.

### Response shape

```json
{
"items": [
{
"id": 991,
"ts": "2026-03-25T12:00:03Z",
"name": "source_reset",
"level": "warn",
"message": "source reader reset observed",
"tags": {
"component": "source"
},
"fields": {
"reason": "short_read"
}
}
],
"count": 1
}
```

### Supported query parameters

All `history` filters are also supported here, plus:

- `level=<debug|info|warn|error|...>`

Examples:

- `?since=2026-03-25T12:00:00Z&level=warn`
- `?prefix=audio.&signal_id=1`
- `?name=demod_boundary&signal_id=1`

### Notes

- Event matching supports `name`, `prefix`, `level`, time range, and tags.
- Event `level` matching is case-insensitive.
- Results are timestamp-sorted ascending.

### Typical queries

Get warnings during a reproduction run:

```text
/api/debug/telemetry/events?since=2026-03-25T12:00:00Z&level=warn
```

Get boundary-related events for one signal:

```text
/api/debug/telemetry/events?since=2026-03-25T12:00:00Z&signal_id=1&prefix=demod_
```

---

## Endpoint: `GET /api/debug/telemetry/config`

Returns both:

1. the active collector configuration, and
2. the current runtime config under `debug.telemetry`

### Response shape

```json
{
"collector": {
"enabled": true,
"heavy_enabled": false,
"heavy_sample_every": 12,
"metric_sample_every": 2,
"metric_history_max": 12000,
"event_history_max": 4000,
"retention": 900000000000,
"persist_enabled": false,
"persist_dir": "debug/telemetry",
"rotate_mb": 16,
"keep_files": 8
},
"config": {
"enabled": true,
"heavy_enabled": false,
"heavy_sample_every": 12,
"metric_sample_every": 2,
"metric_history_max": 12000,
"event_history_max": 4000,
"retention_seconds": 900,
"persist_enabled": false,
"persist_dir": "debug/telemetry",
"rotate_mb": 16,
"keep_files": 8
}
}
```

### Important distinction

- `collector.retention` is a Go duration serialized in nanoseconds.
- `config.retention_seconds` is the config-facing field used by YAML and the POST update API.

If you are writing tooling, prefer `config.retention_seconds` for human-facing config edits.

---

## Endpoint: `POST /api/debug/telemetry/config`

Updates telemetry settings at runtime and writes them back via the autosave config path.

### Request body

All fields are optional. Only provided fields are changed.

```json
{
"enabled": true,
"heavy_enabled": true,
"heavy_sample_every": 8,
"metric_sample_every": 1,
"metric_history_max": 20000,
"event_history_max": 6000,
"retention_seconds": 1800,
"persist_enabled": true,
"persist_dir": "debug/telemetry",
"rotate_mb": 32,
"keep_files": 12
}
```

### Response shape

```json
{
"ok": true,
"collector": {
"enabled": true,
"heavy_enabled": true,
"heavy_sample_every": 8,
"metric_sample_every": 1,
"metric_history_max": 20000,
"event_history_max": 6000,
"retention": 1800000000000,
"persist_enabled": true,
"persist_dir": "debug/telemetry",
"rotate_mb": 32,
"keep_files": 12
},
"config": {
"enabled": true,
"heavy_enabled": true,
"heavy_sample_every": 8,
"metric_sample_every": 1,
"metric_history_max": 20000,
"event_history_max": 6000,
"retention_seconds": 1800,
"persist_enabled": true,
"persist_dir": "debug/telemetry",
"rotate_mb": 32,
"keep_files": 12
}
}
```

### Persistence behavior

A POST updates:

- the runtime manager snapshot/config
- the in-process collector config
- the autosave config file via `config.Save(...)`

That means these updates are runtime-effective immediately and also survive restarts through autosave, unless manually reverted.

### Error cases

- Invalid JSON -> `400 Bad Request`
- Invalid collector reconfiguration -> `400 Bad Request`
- Telemetry unavailable -> `503 Service Unavailable`

---

## Configuration fields (`debug.telemetry`)

Telemetry config lives under:

```yaml
debug:
telemetry:
enabled: true
heavy_enabled: false
heavy_sample_every: 12
metric_sample_every: 2
metric_history_max: 12000
event_history_max: 4000
retention_seconds: 900
persist_enabled: false
persist_dir: debug/telemetry
rotate_mb: 16
keep_files: 8
```

### Field reference

#### `enabled`
Master on/off switch for telemetry collection.

If false:
- metrics are not recorded
- events are not recorded
- live snapshot remains effectively empty/minimal

#### `heavy_enabled`
Enables more expensive / more detailed telemetry paths that should not be left on permanently unless needed.

Use this for deep extractor/IQ/boundary debugging.

#### `heavy_sample_every`
Sampling cadence for heavy telemetry.

- `1` means every eligible heavy sample
- higher numbers reduce cost by sampling less often

#### `metric_sample_every`
Sampling cadence for normal historical metric point storage.

Collector summaries still update live, but historical storage becomes less dense when this value is greater than 1.

#### `metric_history_max`
Maximum number of in-memory historical metric samples retained.

#### `event_history_max`
Maximum number of in-memory telemetry events retained.

#### `retention_seconds`
Time-based in-memory retention window.

Older in-memory metrics/events are trimmed once they fall outside this retention period.

#### `persist_enabled`
When enabled, telemetry metrics/events are also appended to rotated JSONL files.

#### `persist_dir`
Directory where rotated telemetry JSONL files are written.

Default:

- `debug/telemetry`

#### `rotate_mb`
Approximate JSONL file rotation threshold in megabytes.

#### `keep_files`
How many rotated telemetry files to retain in `persist_dir`.

Older files beyond this count are pruned.

---

## Collector behavior and caveats

### In-memory vs persisted data

The query endpoints can read from both:

- current in-memory collector state/history
- persisted JSONL files

This means a request may return data older than current in-memory retention if:

- `persist_enabled=true`, and
- `include_persisted=true`

### Sampling behavior

Not every observation necessarily becomes a historical metric point.

The collector:

- always updates live counters/gauges/distributions while enabled
- stores historical points according to `metric_sample_every`
- force-stores selected boundary IQ metrics even when sampling would normally skip them

So the live snapshot and historical series density are intentionally different.

### Distribution summaries

Distribution values in the live snapshot include:

- `count`
- `min`
- `max`
- `mean`
- `last`
- `p95`

The p95 estimate is based on the collector's bounded rolling sample buffer, not an unbounded full-history quantile computation.

### Config serialization detail

The collector's `retention` field is a Go duration. In JSON this appears as an integer nanosecond count.

This is expected.

---

## Recommended workflows

### Fast low-overhead runtime watch

Use:

- `enabled=true`
- `heavy_enabled=false`
- `persist_enabled=false` or `true` if you want an archive

Then query:

- `/api/debug/telemetry/live`
- `/api/debug/telemetry/history?prefix=stage.`
- `/api/debug/telemetry/events?level=warn`

### 5-10 minute anomaly capture

Suggested settings:

- `enabled=true`
- `heavy_enabled=false`
- `persist_enabled=true`
- moderate `metric_sample_every`

Then:

1. note start time
2. reproduce workload
3. fetch live snapshot
4. inspect warning events
5. inspect `stage.*`, `streamer.*`, and `source.*` history

### Deep extractor / boundary investigation

Temporarily enable:

- `heavy_enabled=true`
- `heavy_sample_every` > 1 unless you really need every sample
- `persist_enabled=true`

Then inspect:

- `iq.*`
- `extract.*`
- `audio.*`
- boundary/anomaly events for specific `signal_id` or `session_id`

Turn heavy telemetry back off once done.

---

## Example requests

### Fetch live snapshot

```bash
curl http://localhost:8080/api/debug/telemetry/live
```

### Fetch stage timings from the last 10 minutes

```bash
curl "http://localhost:8080/api/debug/telemetry/history?since=2026-03-25T12:00:00Z&prefix=stage."
```

### Fetch source metrics for one signal

```bash
curl "http://localhost:8080/api/debug/telemetry/history?prefix=source.&signal_id=1"
```

### Fetch warning events only

```bash
curl "http://localhost:8080/api/debug/telemetry/events?since=2026-03-25T12:00:00Z&level=warn"
```

### Fetch events with a custom tag filter

```bash
curl "http://localhost:8080/api/debug/telemetry/events?tag_zone=broadcast"
```

### Enable persistence and heavy telemetry temporarily

```bash
curl -X POST http://localhost:8080/api/debug/telemetry/config \
-H "Content-Type: application/json" \
-d '{
"heavy_enabled": true,
"heavy_sample_every": 8,
"persist_enabled": true
}'
```

---

## Related docs

- `README.md` - high-level project overview and endpoint summary
- `docs/telemetry-debug-runbook.md` - quick operational runbook for short debug sessions
- `internal/telemetry/telemetry.go` - collector implementation details
- `cmd/sdrd/http_handlers.go` - HTTP wiring for telemetry endpoints

+ 100
- 0
docs/telemetry-debug-runbook.md Просмотреть файл

@@ -0,0 +1,100 @@
# Debug Telemetry Runbook

This project now includes structured server-side telemetry for the audio/DSP pipeline.

## Endpoints

- `GET /api/debug/telemetry/live`
- Current counters/gauges/distributions and recent events.
- `GET /api/debug/telemetry/history`
- Historical metric samples.
- Query params:
- `since`, `until`: unix seconds/ms or RFC3339
- `limit`
- `name`, `prefix`
- `signal_id`, `session_id`, `stage`, `trace_id`, `component`
- `tag_<key>=<value>` for arbitrary tag filters
- `include_persisted=true|false`
- `GET /api/debug/telemetry/events`
- Historical events/anomalies.
- Same filters as history plus `level`.
- `GET /api/debug/telemetry/config`
- Active telemetry config from runtime + collector.
- `POST /api/debug/telemetry/config`
- Runtime config update (also saved to autosave config).

## Config knobs

`debug.telemetry` in config:

- `enabled`
- `heavy_enabled`
- `heavy_sample_every`
- `metric_sample_every`
- `metric_history_max`
- `event_history_max`
- `retention_seconds`
- `persist_enabled`
- `persist_dir`
- `rotate_mb`
- `keep_files`

Persisted JSONL files rotate in `persist_dir` (default: `debug/telemetry`).

## 5-10 minute debug flow

1. Keep `enabled=true`, `heavy_enabled=false`, `persist_enabled=true`.
2. Run workload for 5-10 minutes.
3. Pull live state:
- `GET /api/debug/telemetry/live`
4. Pull anomalies:
- `GET /api/debug/telemetry/events?since=<start>&level=warn`
5. Pull pipeline timing and queue/backpressure:
- `GET /api/debug/telemetry/history?since=<start>&prefix=stage.`
- `GET /api/debug/telemetry/history?since=<start>&prefix=streamer.`
6. If IQ boundary issues persist, temporarily set `heavy_enabled=true` (keep sampling coarse with `heavy_sample_every` > 1), rerun, then inspect `iq.*` metrics and `audio.*` anomalies by `signal_id`/`session_id`.

## 2026-03-25 audio click incident — final resolved summary

Status: **SOLVED**

The March 2026 live-audio click investigation ultimately converged on a combination of three real root causes plus two secondary fixes:

### Root causes

1. **Shared `allIQ` corruption by `IQBalance` aliasing**
- `cmd/sdrd/pipeline_runtime.go`
- `survIQ` aliased the tail of `allIQ`
- `dsp.IQBalance(survIQ)` modified `allIQ` in-place
- extractor then saw a corrupted boundary inside the shared buffer
- final fix: copy `survIQ` before `IQBalance`

2. **Per-frame extractor reset due to `StreamingConfigHash` jitter**
- `internal/demod/gpudemod/streaming_types.go`
- smoothed tuning values changed slightly every frame
- offset/bandwidth in the hash caused repeated state resets
- final fix: hash only structural parameters

3. **Streaming path batch rejection for non-WFM exact-decimation mismatch**
- `cmd/sdrd/streaming_refactor.go`
- one non-WFM signal could reject the whole batch and silently force fallback to the legacy path
- final fix: choose nearest exact integer-divisor output rate and keep fallback logging visible

### Secondary fixes

- FM discriminator cross-block carry in `internal/recorder/streamer.go`
- WFM mono/plain-path 15 kHz audio lowpass in `internal/recorder/streamer.go`

### Verification notes

- major discontinuities dropped sharply after the config-hash fix
- remaining fine clicks were eliminated only after the `IQBalance` aliasing fix in `pipeline_runtime.go`
- final confirmation was by operator listening test, backed by prior telemetry and WAV analysis

### Practical lesson

When the same captured `allIQ` buffer feeds both:
- surveillance/detail analysis
- and extraction/streaming

then surveillance-side DSP helpers must not mutate a shared sub-slice in-place unless that mutation is intentionally part of the extraction contract.

+ 62
- 0
internal/config/config.go Просмотреть файл

@@ -96,6 +96,26 @@ type DecoderConfig struct {
PSKCmd string `yaml:"psk_cmd" json:"psk_cmd"`
}

type DebugConfig struct {
AudioDumpEnabled bool `yaml:"audio_dump_enabled" json:"audio_dump_enabled"`
CPUMonitoring bool `yaml:"cpu_monitoring" json:"cpu_monitoring"`
Telemetry TelemetryConfig `yaml:"telemetry" json:"telemetry"`
}

type TelemetryConfig struct {
Enabled bool `yaml:"enabled" json:"enabled"`
HeavyEnabled bool `yaml:"heavy_enabled" json:"heavy_enabled"`
HeavySampleEvery int `yaml:"heavy_sample_every" json:"heavy_sample_every"`
MetricSampleEvery int `yaml:"metric_sample_every" json:"metric_sample_every"`
MetricHistoryMax int `yaml:"metric_history_max" json:"metric_history_max"`
EventHistoryMax int `yaml:"event_history_max" json:"event_history_max"`
RetentionSeconds int `yaml:"retention_seconds" json:"retention_seconds"`
PersistEnabled bool `yaml:"persist_enabled" json:"persist_enabled"`
PersistDir string `yaml:"persist_dir" json:"persist_dir"`
RotateMB int `yaml:"rotate_mb" json:"rotate_mb"`
KeepFiles int `yaml:"keep_files" json:"keep_files"`
}

type PipelineGoalConfig struct {
Intent string `yaml:"intent" json:"intent"`
MonitorStartHz float64 `yaml:"monitor_start_hz" json:"monitor_start_hz"`
@@ -169,6 +189,7 @@ type Config struct {
Detector DetectorConfig `yaml:"detector" json:"detector"`
Recorder RecorderConfig `yaml:"recorder" json:"recorder"`
Decoder DecoderConfig `yaml:"decoder" json:"decoder"`
Debug DebugConfig `yaml:"debug" json:"debug"`
Logging LogConfig `yaml:"logging" json:"logging"`
WebAddr string `yaml:"web_addr" json:"web_addr"`
EventPath string `yaml:"event_path" json:"event_path"`
@@ -421,6 +442,23 @@ func Default() Config {
ExtractionBwMult: 1.2,
},
Decoder: DecoderConfig{},
Debug: DebugConfig{
AudioDumpEnabled: false,
CPUMonitoring: false,
Telemetry: TelemetryConfig{
Enabled: true,
HeavyEnabled: false,
HeavySampleEvery: 12,
MetricSampleEvery: 2,
MetricHistoryMax: 12000,
EventHistoryMax: 4000,
RetentionSeconds: 900,
PersistEnabled: false,
PersistDir: "debug/telemetry",
RotateMB: 16,
KeepFiles: 8,
},
},
Logging: LogConfig{
Level: "informal",
Categories: []string{},
@@ -664,6 +702,30 @@ func applyDefaults(cfg Config) Config {
if cfg.Recorder.ExtractionBwMult <= 0 {
cfg.Recorder.ExtractionBwMult = 1.2
}
if cfg.Debug.Telemetry.HeavySampleEvery <= 0 {
cfg.Debug.Telemetry.HeavySampleEvery = 12
}
if cfg.Debug.Telemetry.MetricSampleEvery <= 0 {
cfg.Debug.Telemetry.MetricSampleEvery = 2
}
if cfg.Debug.Telemetry.MetricHistoryMax <= 0 {
cfg.Debug.Telemetry.MetricHistoryMax = 12000
}
if cfg.Debug.Telemetry.EventHistoryMax <= 0 {
cfg.Debug.Telemetry.EventHistoryMax = 4000
}
if cfg.Debug.Telemetry.RetentionSeconds <= 0 {
cfg.Debug.Telemetry.RetentionSeconds = 900
}
if cfg.Debug.Telemetry.PersistDir == "" {
cfg.Debug.Telemetry.PersistDir = "debug/telemetry"
}
if cfg.Debug.Telemetry.RotateMB <= 0 {
cfg.Debug.Telemetry.RotateMB = 16
}
if cfg.Debug.Telemetry.KeepFiles <= 0 {
cfg.Debug.Telemetry.KeepFiles = 8
}
return cfg
}



+ 35
- 1
internal/demod/fm.go Просмотреть файл

@@ -4,6 +4,7 @@ import (
"math"

"sdr-wideband-suite/internal/dsp"
"sdr-wideband-suite/internal/logging"
)

type NFM struct{}
@@ -45,12 +46,45 @@ func fmDiscrim(iq []complex64) []float32 {
return nil
}
out := make([]float32, len(iq)-1)
maxAbs := 0.0
maxIdx := 0
largeSteps := 0
minMag := math.MaxFloat64
maxMag := 0.0
for i := 1; i < len(iq); i++ {
p := iq[i-1]
c := iq[i]
pmag := math.Hypot(float64(real(p)), float64(imag(p)))
cmag := math.Hypot(float64(real(c)), float64(imag(c)))
if pmag < minMag {
minMag = pmag
}
if cmag < minMag {
minMag = cmag
}
if pmag > maxMag {
maxMag = pmag
}
if cmag > maxMag {
maxMag = cmag
}
num := float64(real(p))*float64(imag(c)) - float64(imag(p))*float64(real(c))
den := float64(real(p))*float64(real(c)) + float64(imag(p))*float64(imag(c))
out[i-1] = float32(math.Atan2(num, den))
step := math.Atan2(num, den)
if a := math.Abs(step); a > maxAbs {
maxAbs = a
maxIdx = i - 1
}
if math.Abs(step) > 1.5 {
largeSteps++
}
out[i-1] = float32(step)
}
if logging.EnabledCategory("discrim") {
logging.Debug("discrim", "fm_meter", "iq_len", len(iq), "audio_len", len(out), "min_mag", minMag, "max_mag", maxMag, "max_abs_step", maxAbs, "max_idx", maxIdx, "large_steps", largeSteps)
if largeSteps > 0 {
logging.Warn("discrim", "fm_large_steps", "iq_len", len(iq), "large_steps", largeSteps, "max_abs_step", maxAbs, "max_idx", maxIdx, "min_mag", minMag, "max_mag", maxMag)
}
}
return out
}


+ 0
- 34
internal/demod/gpudemod/README.md Просмотреть файл

@@ -1,34 +0,0 @@
# gpudemod

Phase 1 CUDA demod scaffolding.

## Current state

- Standard Go builds use `gpudemod_stub.go` (`!cufft`).
- `cufft` builds allocate GPU buffers and cross the CGO/CUDA launch boundary.
- If CUDA launch wrappers are not backed by compiled kernels yet, the code falls back to CPU DSP.
- The shifted IQ path is already wired so a successful GPU freq-shift result can be copied back and reused immediately.
- Build orchestration should now be considered OS-specific; see `docs/build-cuda.md`.

## First real kernel

`kernels.cu` contains the first candidate implementation:
- `gpud_freq_shift_kernel`

This is **not compiled automatically yet** in the current environment because the machine currently lacks a CUDA compiler toolchain in PATH (`nvcc` not found).

## Next machine-side step

On a CUDA-capable dev machine with toolchain installed:

1. Compile `kernels.cu` into an object file and archive it into a linkable library
- helper script: `tools/build-gpudemod-kernel.ps1`
2. On Jan's Windows machine, the working kernel-build path currently relies on `nvcc` + MSVC `cl.exe` in PATH
3. Link `gpudemod_kernels.lib` into the `cufft` build
3. Replace `gpud_launch_freq_shift(...)` stub body with the real kernel launch
4. Validate copied-back shifted IQ against `dsp.FreqShift`
5. Only then move the next stage (FM discriminator) onto the GPU

## Why this is still useful

The runtime/buffer/recorder/fallback structure is already in place, so once kernel compilation is available, real acceleration can be inserted without another architecture rewrite.

+ 1
- 1
internal/demod/gpudemod/batch.go Просмотреть файл

@@ -6,7 +6,7 @@ type ExtractJob struct {
OffsetHz float64
BW float64
OutRate int
PhaseStart float64 // FreqShift starting phase (0 for stateless, carry over for streaming)
PhaseStart float64 // legacy batch phase field; retained only while migrating to streaming extractor semantics
}

// ExtractResult holds the output of a batch extraction including the ending


+ 13
- 4
internal/demod/gpudemod/batch_runner.go Просмотреть файл

@@ -10,10 +10,12 @@ type batchSlot struct {
}

type BatchRunner struct {
eng *Engine
slots []batchSlot
slotBufs []slotBuffers
eng *Engine
slots []batchSlot
slotBufs []slotBuffers
slotBufSize int // number of IQ samples the slot buffers were allocated for
streamState map[int64]*ExtractStreamState
nativeState map[int64]*nativeStreamingSignalState
}

func NewBatchRunner(maxSamples int, sampleRate int) (*BatchRunner, error) {
@@ -21,7 +23,11 @@ func NewBatchRunner(maxSamples int, sampleRate int) (*BatchRunner, error) {
if err != nil {
return nil, err
}
return &BatchRunner{eng: eng}, nil
return &BatchRunner{
eng: eng,
streamState: make(map[int64]*ExtractStreamState),
nativeState: make(map[int64]*nativeStreamingSignalState),
}, nil
}

func (r *BatchRunner) Close() {
@@ -29,9 +35,12 @@ func (r *BatchRunner) Close() {
return
}
r.freeSlotBuffers()
r.freeAllNativeStreamingStates()
r.eng.Close()
r.eng = nil
r.slots = nil
r.streamState = nil
r.nativeState = nil
}

func (r *BatchRunner) prepare(jobs []ExtractJob) {


+ 3
- 3
internal/demod/gpudemod/batch_runner_windows.go Просмотреть файл

@@ -160,9 +160,9 @@ func (r *BatchRunner) shiftFilterDecimateSlotParallel(iq []complex64, job Extrac
if bridgeMemcpyH2D(buf.dTaps, unsafe.Pointer(&taps[0]), tapsBytes) != 0 {
return 0, 0, errors.New("taps H2D failed")
}
decim := int(math.Round(float64(e.sampleRate) / float64(job.OutRate)))
if decim < 1 {
decim = 1
decim, err := ExactIntegerDecimation(e.sampleRate, job.OutRate)
if err != nil {
return 0, 0, err
}
nOut := n / decim
if nOut <= 0 {


Двоичные данные
internal/demod/gpudemod/build/gpudemod_kernels.lib Просмотреть файл


+ 47
- 0
internal/demod/gpudemod/compare.go Просмотреть файл

@@ -0,0 +1,47 @@
package gpudemod

import "math/cmplx"

type CompareStats struct {
MaxAbsErr float64
RMSErr float64
Count int
}

func CompareComplexSlices(a []complex64, b []complex64) CompareStats {
n := len(a)
if len(b) < n {
n = len(b)
}
if n == 0 {
return CompareStats{}
}
var sumSq float64
var maxAbs float64
for i := 0; i < n; i++ {
err := cmplx.Abs(complex128(a[i] - b[i]))
if err > maxAbs {
maxAbs = err
}
sumSq += err * err
}
return CompareStats{
MaxAbsErr: maxAbs,
RMSErr: mathSqrt(sumSq / float64(n)),
Count: n,
}
}

func mathSqrt(v float64) float64 {
// tiny shim to keep the compare helper self-contained and easy to move
// without importing additional logic elsewhere
z := v
if z <= 0 {
return 0
}
x := z
for i := 0; i < 12; i++ {
x = 0.5 * (x + z/x)
}
return x
}

+ 19
- 0
internal/demod/gpudemod/compare_gpu.go Просмотреть файл

@@ -0,0 +1,19 @@
package gpudemod

func BuildGPUStubDebugMetrics(res StreamingExtractResult) ExtractDebugMetrics {
return ExtractDebugMetrics{
SignalID: res.SignalID,
PhaseCount: res.PhaseCount,
HistoryLen: res.HistoryLen,
NOut: res.NOut,
}
}

func BuildGPUHostOracleDebugMetrics(res StreamingExtractResult) ExtractDebugMetrics {
return ExtractDebugMetrics{
SignalID: res.SignalID,
PhaseCount: res.PhaseCount,
HistoryLen: res.HistoryLen,
NOut: res.NOut,
}
}

+ 10
- 0
internal/demod/gpudemod/compare_oracle.go Просмотреть файл

@@ -0,0 +1,10 @@
package gpudemod

func BuildOracleDebugMetrics(res StreamingExtractResult) ExtractDebugMetrics {
return ExtractDebugMetrics{
SignalID: res.SignalID,
PhaseCount: res.PhaseCount,
HistoryLen: res.HistoryLen,
NOut: res.NOut,
}
}

+ 27
- 0
internal/demod/gpudemod/compare_pipeline.go Просмотреть файл

@@ -0,0 +1,27 @@
package gpudemod

func CompareOracleAndGPUStub(oracle StreamingExtractResult, gpu StreamingExtractResult) (ExtractDebugMetrics, CompareStats) {
stats := CompareComplexSlices(oracle.IQ, gpu.IQ)
metrics := ExtractDebugMetrics{
SignalID: oracle.SignalID,
PhaseCount: gpu.PhaseCount,
HistoryLen: gpu.HistoryLen,
NOut: gpu.NOut,
RefMaxAbsErr: stats.MaxAbsErr,
RefRMSErr: stats.RMSErr,
}
return metrics, stats
}

func CompareOracleAndGPUHostOracle(oracle StreamingExtractResult, gpu StreamingExtractResult) (ExtractDebugMetrics, CompareStats) {
stats := CompareComplexSlices(oracle.IQ, gpu.IQ)
metrics := ExtractDebugMetrics{
SignalID: oracle.SignalID,
PhaseCount: gpu.PhaseCount,
HistoryLen: gpu.HistoryLen,
NOut: gpu.NOut,
RefMaxAbsErr: stats.MaxAbsErr,
RefRMSErr: stats.RMSErr,
}
return metrics, stats
}

+ 32
- 0
internal/demod/gpudemod/compare_pipeline_test.go Просмотреть файл

@@ -0,0 +1,32 @@
package gpudemod

import "testing"

func TestCompareOracleAndGPUStub(t *testing.T) {
oracle := StreamingExtractResult{
SignalID: 1,
IQ: []complex64{1 + 1i, 2 + 2i},
Rate: 200000,
NOut: 2,
PhaseCount: 0,
HistoryLen: 64,
}
gpu := StreamingExtractResult{
SignalID: 1,
IQ: []complex64{1 + 1i, 2.1 + 2i},
Rate: 200000,
NOut: 2,
PhaseCount: 3,
HistoryLen: 64,
}
metrics, stats := CompareOracleAndGPUStub(oracle, gpu)
if metrics.SignalID != 1 {
t.Fatalf("unexpected signal id: %d", metrics.SignalID)
}
if stats.Count != 2 {
t.Fatalf("unexpected compare count: %d", stats.Count)
}
if metrics.RefMaxAbsErr <= 0 {
t.Fatalf("expected positive max abs error")
}
}

+ 12
- 0
internal/demod/gpudemod/compare_state.go Просмотреть файл

@@ -0,0 +1,12 @@
package gpudemod

type ExtractDebugMetrics struct {
SignalID int64
PhaseCount int
HistoryLen int
NOut int
RefMaxAbsErr float64
RefRMSErr float64
BoundaryDelta float64
BoundaryD2 float64
}

+ 18
- 0
internal/demod/gpudemod/compare_test.go Просмотреть файл

@@ -0,0 +1,18 @@
package gpudemod

import "testing"

func TestCompareComplexSlices(t *testing.T) {
a := []complex64{1 + 1i, 2 + 2i, 3 + 3i}
b := []complex64{1 + 1i, 2.1 + 2i, 2.9 + 3.2i}
stats := CompareComplexSlices(a, b)
if stats.Count != 3 {
t.Fatalf("unexpected count: %d", stats.Count)
}
if stats.MaxAbsErr <= 0 {
t.Fatalf("expected positive max abs error")
}
if stats.RMSErr <= 0 {
t.Fatalf("expected positive rms error")
}
}

+ 170
- 0
internal/demod/gpudemod/cpu_oracle.go Просмотреть файл

@@ -0,0 +1,170 @@
package gpudemod

import (
"fmt"
"math"
)

type CPUOracleState struct {
SignalID int64
ConfigHash uint64
NCOPhase float64
Decim int
PhaseCount int
NumTaps int
ShiftedHistory []complex64
BaseTaps []float32
PolyphaseTaps []float32
}

func ResetCPUOracleStateIfConfigChanged(state *CPUOracleState, newHash uint64) {
if state == nil {
return
}
if state.ConfigHash != newHash {
state.ConfigHash = newHash
state.NCOPhase = 0
state.PhaseCount = 0
state.ShiftedHistory = state.ShiftedHistory[:0]
}
}

func CPUOracleExtract(iqNew []complex64, state *CPUOracleState, phaseInc float64) []complex64 {
if state == nil || state.NumTaps <= 0 || state.Decim <= 0 || len(state.BaseTaps) < state.NumTaps {
return nil
}
out := make([]complex64, 0, len(iqNew)/maxInt(1, state.Decim)+2)
phase := state.NCOPhase
hist := append([]complex64(nil), state.ShiftedHistory...)

for _, x := range iqNew {
rot := complex64(complex(math.Cos(phase), math.Sin(phase)))
s := x * rot
hist = append(hist, s)
state.PhaseCount++

if state.PhaseCount == state.Decim {
var y complex64
for k := 0; k < state.NumTaps; k++ {
idx := len(hist) - 1 - k
var sample complex64
if idx >= 0 {
sample = hist[idx]
}
y += complex(state.BaseTaps[k], 0) * sample
}
out = append(out, y)
state.PhaseCount = 0
}

if len(hist) > state.NumTaps-1 {
hist = hist[len(hist)-(state.NumTaps-1):]
}

phase += phaseInc
if phase >= math.Pi {
phase -= 2 * math.Pi
} else if phase < -math.Pi {
phase += 2 * math.Pi
}
}

state.NCOPhase = phase
state.ShiftedHistory = append(state.ShiftedHistory[:0], hist...)
return out
}

// CPUOracleExtractPolyphase keeps the same streaming state semantics as CPUOracleExtract,
// but computes outputs using the explicit phase-major polyphase tap layout.
func CPUOracleExtractPolyphase(iqNew []complex64, state *CPUOracleState, phaseInc float64) []complex64 {
if state == nil || state.NumTaps <= 0 || state.Decim <= 0 || len(state.BaseTaps) < state.NumTaps {
return nil
}
if len(state.PolyphaseTaps) == 0 {
state.PolyphaseTaps = BuildPolyphaseTapsPhaseMajor(state.BaseTaps, state.Decim)
}
phaseLen := PolyphasePhaseLen(len(state.BaseTaps), state.Decim)
out := make([]complex64, 0, len(iqNew)/maxInt(1, state.Decim)+2)
phase := state.NCOPhase
hist := append([]complex64(nil), state.ShiftedHistory...)

for _, x := range iqNew {
rot := complex64(complex(math.Cos(phase), math.Sin(phase)))
s := x * rot
hist = append(hist, s)
state.PhaseCount++

if state.PhaseCount == state.Decim {
var y complex64
for p := 0; p < state.Decim; p++ {
for k := 0; k < phaseLen; k++ {
tap := state.PolyphaseTaps[p*phaseLen+k]
if tap == 0 {
continue
}
srcBack := p + k*state.Decim
idx := len(hist) - 1 - srcBack
if idx < 0 {
continue
}
y += complex(tap, 0) * hist[idx]
}
}
out = append(out, y)
state.PhaseCount = 0
}

if len(hist) > state.NumTaps-1 {
hist = hist[len(hist)-(state.NumTaps-1):]
}

phase += phaseInc
if phase >= math.Pi {
phase -= 2 * math.Pi
} else if phase < -math.Pi {
phase += 2 * math.Pi
}
}

state.NCOPhase = phase
state.ShiftedHistory = append(state.ShiftedHistory[:0], hist...)
return out
}

func RunChunkedCPUOracle(all []complex64, chunkSizes []int, mkState func() *CPUOracleState, phaseInc float64) []complex64 {
state := mkState()
out := make([]complex64, 0)
pos := 0
for _, n := range chunkSizes {
if pos >= len(all) {
break
}
end := pos + n
if end > len(all) {
end = len(all)
}
out = append(out, CPUOracleExtract(all[pos:end], state, phaseInc)...)
pos = end
}
if pos < len(all) {
out = append(out, CPUOracleExtract(all[pos:], state, phaseInc)...)
}
return out
}

func ExactIntegerDecimation(sampleRate int, outRate int) (int, error) {
if sampleRate <= 0 || outRate <= 0 {
return 0, fmt.Errorf("invalid sampleRate/outRate: %d/%d", sampleRate, outRate)
}
if sampleRate%outRate != 0 {
return 0, fmt.Errorf("streaming polyphase extractor requires integer decimation: sampleRate=%d outRate=%d", sampleRate, outRate)
}
return sampleRate / outRate, nil
}

func maxInt(a int, b int) int {
if a > b {
return a
}
return b
}

+ 89
- 0
internal/demod/gpudemod/cpu_oracle_test.go Просмотреть файл

@@ -0,0 +1,89 @@
package gpudemod

import (
"math"
"math/cmplx"
"testing"
)

func makeDeterministicIQ(n int) []complex64 {
out := make([]complex64, n)
for i := 0; i < n; i++ {
a := 0.017 * float64(i)
b := 0.031 * float64(i)
out[i] = complex64(complex(math.Cos(a)+0.2*math.Cos(b), math.Sin(a)+0.15*math.Sin(b)))
}
return out
}

func makeLowpassTaps(n int) []float32 {
out := make([]float32, n)
for i := range out {
out[i] = 1.0 / float32(n)
}
return out
}

func requireComplexSlicesClose(t *testing.T, a []complex64, b []complex64, tol float64) {
t.Helper()
if len(a) != len(b) {
t.Fatalf("length mismatch: %d vs %d", len(a), len(b))
}
for i := range a {
if cmplx.Abs(complex128(a[i]-b[i])) > tol {
t.Fatalf("slice mismatch at %d: %v vs %v (tol=%f)", i, a[i], b[i], tol)
}
}
}

func TestCPUOracleMonolithicVsChunked(t *testing.T) {
iq := makeDeterministicIQ(200000)
mk := func() *CPUOracleState {
return &CPUOracleState{
SignalID: 1,
ConfigHash: 123,
NCOPhase: 0,
Decim: 20,
PhaseCount: 0,
NumTaps: 65,
ShiftedHistory: make([]complex64, 0, 64),
BaseTaps: makeLowpassTaps(65),
}
}
phaseInc := 0.017
monoState := mk()
mono := CPUOracleExtract(iq, monoState, phaseInc)
chunked := RunChunkedCPUOracle(iq, []int{4096, 5000, 8192, 27307}, mk, phaseInc)
requireComplexSlicesClose(t, mono, chunked, 1e-5)
}

func TestExactIntegerDecimation(t *testing.T) {
if d, err := ExactIntegerDecimation(4000000, 200000); err != nil || d != 20 {
t.Fatalf("unexpected exact decim result: d=%d err=%v", d, err)
}
if _, err := ExactIntegerDecimation(4000000, 192000); err == nil {
t.Fatalf("expected non-integer decimation error")
}
}

func TestCPUOracleDirectVsPolyphase(t *testing.T) {
iq := makeDeterministicIQ(50000)
mk := func() *CPUOracleState {
taps := makeLowpassTaps(65)
return &CPUOracleState{
SignalID: 1,
ConfigHash: 123,
NCOPhase: 0,
Decim: 20,
PhaseCount: 0,
NumTaps: 65,
ShiftedHistory: make([]complex64, 0, 64),
BaseTaps: taps,
PolyphaseTaps: BuildPolyphaseTapsPhaseMajor(taps, 20),
}
}
phaseInc := 0.017
direct := CPUOracleExtract(iq, mk(), phaseInc)
poly := CPUOracleExtractPolyphase(iq, mk(), phaseInc)
requireComplexSlicesClose(t, direct, poly, 1e-5)
}

+ 309
- 0
internal/demod/gpudemod/native/exports.cu Просмотреть файл

@@ -11,6 +11,10 @@

typedef void* gpud_stream_handle;

static __forceinline__ int gpud_max_i(int a, int b) {
return a > b ? a : b;
}

GPUD_API int GPUD_CALL gpud_stream_create(gpud_stream_handle* out) {
if (!out) return -1;
cudaStream_t stream;
@@ -320,3 +324,308 @@ GPUD_API int GPUD_CALL gpud_launch_ssb_product_cuda(
gpud_ssb_product_kernel<<<grid, block>>>(in, out, n, phase_inc, phase_start);
return (int)cudaGetLastError();
}

__global__ void gpud_streaming_polyphase_accum_kernel(
const float2* __restrict__ history_state,
int history_len,
const float2* __restrict__ shifted_new,
int n_new,
const float* __restrict__ polyphase_taps,
int polyphase_len,
int decim,
int phase_len,
int start_idx,
int n_out,
float2* __restrict__ out
);

__global__ void gpud_streaming_history_tail_kernel(
const float2* __restrict__ history_state,
int history_len,
const float2* __restrict__ shifted_new,
int n_new,
int keep,
float2* __restrict__ history_out
);

static __forceinline__ double gpud_reduce_phase(double phase);

// Transitional legacy entrypoint retained for bring-up and comparison.
// The production-native streaming path is gpud_launch_streaming_polyphase_stateful_cuda,
// which preserves per-signal carry state across NEW-samples-only chunks.
GPUD_API int GPUD_CALL gpud_launch_streaming_polyphase_prepare_cuda(
const float2* in_new,
int n_new,
const float2* history_in,
int history_len,
const float* polyphase_taps,
int polyphase_len,
int decim,
int num_taps,
int phase_count_in,
double phase_start,
double phase_inc,
float2* out,
int* n_out,
int* phase_count_out,
double* phase_end_out,
float2* history_out
) {
if (n_new < 0 || !polyphase_taps || polyphase_len <= 0 || decim <= 0 || num_taps <= 0) return -1;
const int phase_len = (num_taps + decim - 1) / decim;
if (polyphase_len < decim * phase_len) return -2;

const int keep = num_taps > 1 ? num_taps - 1 : 0;
int clamped_history_len = history_len;
if (clamped_history_len < 0) clamped_history_len = 0;
if (clamped_history_len > keep) clamped_history_len = keep;
if (clamped_history_len > 0 && !history_in) return -5;

float2* shifted = NULL;
cudaError_t err = cudaSuccess;
if (n_new > 0) {
if (!in_new) return -3;
err = cudaMalloc((void**)&shifted, (size_t)gpud_max_i(1, n_new) * sizeof(float2));
if (err != cudaSuccess) return (int)err;
const int block = 256;
const int grid_shift = (n_new + block - 1) / block;
gpud_freq_shift_kernel<<<grid_shift, block>>>(in_new, shifted, n_new, phase_inc, phase_start);
err = cudaGetLastError();
if (err != cudaSuccess) {
cudaFree(shifted);
return (int)err;
}
}

int phase_count = phase_count_in;
if (phase_count < 0) phase_count = 0;
if (phase_count >= decim) phase_count %= decim;
const int total_phase = phase_count + n_new;
const int out_count = total_phase / decim;
if (out_count > 0) {
if (!out) {
cudaFree(shifted);
return -4;
}
const int block = 256;
const int grid = (out_count + block - 1) / block;
const int start_idx = decim - phase_count - 1;
gpud_streaming_polyphase_accum_kernel<<<grid, block>>>(
history_in,
clamped_history_len,
shifted,
n_new,
polyphase_taps,
polyphase_len,
decim,
phase_len,
start_idx,
out_count,
out
);
err = cudaGetLastError();
if (err != cudaSuccess) {
cudaFree(shifted);
return (int)err;
}
}

if (history_out && keep > 0) {
const int new_history_len = clamped_history_len + n_new < keep ? clamped_history_len + n_new : keep;
if (new_history_len > 0) {
const int block = 256;
const int grid = (new_history_len + block - 1) / block;
gpud_streaming_history_tail_kernel<<<grid, block>>>(
history_in,
clamped_history_len,
shifted,
n_new,
new_history_len,
history_out
);
err = cudaGetLastError();
if (err != cudaSuccess) {
cudaFree(shifted);
return (int)err;
}
}
}

if (n_out) *n_out = out_count;
if (phase_count_out) *phase_count_out = total_phase % decim;
if (phase_end_out) *phase_end_out = gpud_reduce_phase(phase_start + phase_inc * (double)n_new);

if (shifted) cudaFree(shifted);
return 0;
}

static __device__ __forceinline__ float2 gpud_stream_sample_at(
const float2* __restrict__ history_state,
int history_len,
const float2* __restrict__ shifted_new,
int n_new,
int idx
) {
if (idx < 0) return make_float2(0.0f, 0.0f);
if (idx < history_len) return history_state[idx];
int shifted_idx = idx - history_len;
if (shifted_idx < 0 || shifted_idx >= n_new) return make_float2(0.0f, 0.0f);
return shifted_new[shifted_idx];
}

__global__ void gpud_streaming_polyphase_accum_kernel(
const float2* __restrict__ history_state,
int history_len,
const float2* __restrict__ shifted_new,
int n_new,
const float* __restrict__ polyphase_taps,
int polyphase_len,
int decim,
int phase_len,
int start_idx,
int n_out,
float2* __restrict__ out
) {
int out_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (out_idx >= n_out) return;

int newest = history_len + start_idx + out_idx * decim;
float acc_r = 0.0f;
float acc_i = 0.0f;
for (int p = 0; p < decim; ++p) {
for (int k = 0; k < phase_len; ++k) {
int tap_idx = p * phase_len + k;
if (tap_idx >= polyphase_len) continue;
float tap = polyphase_taps[tap_idx];
if (tap == 0.0f) continue;
int src_back = p + k * decim;
int src_idx = newest - src_back;
float2 sample = gpud_stream_sample_at(history_state, history_len, shifted_new, n_new, src_idx);
acc_r += sample.x * tap;
acc_i += sample.y * tap;
}
}
out[out_idx] = make_float2(acc_r, acc_i);
}

__global__ void gpud_streaming_history_tail_kernel(
const float2* __restrict__ history_state,
int history_len,
const float2* __restrict__ shifted_new,
int n_new,
int keep,
float2* __restrict__ history_out
) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx >= keep) return;
int combined_len = history_len + n_new;
int src_idx = combined_len - keep + idx;
history_out[idx] = gpud_stream_sample_at(history_state, history_len, shifted_new, n_new, src_idx);
}

static __forceinline__ double gpud_reduce_phase(double phase) {
const double TWO_PI = 6.283185307179586;
return phase - rint(phase / TWO_PI) * TWO_PI;
}

// Production-native candidate entrypoint for the stateful streaming extractor.
// Callers provide only NEW samples; overlap+trim is intentionally not part of this path.
GPUD_API int GPUD_CALL gpud_launch_streaming_polyphase_stateful_cuda(
const float2* in_new,
int n_new,
float2* shifted_new_tmp,
const float* polyphase_taps,
int polyphase_len,
int decim,
int num_taps,
float2* history_state,
float2* history_scratch,
int history_cap,
int* history_len_io,
int* phase_count_state,
double* phase_state,
double phase_inc,
float2* out,
int out_cap,
int* n_out
) {
if (!polyphase_taps || decim <= 0 || num_taps <= 0 || !history_len_io || !phase_count_state || !phase_state || !n_out) return -10;
if (n_new < 0 || out_cap < 0 || history_cap < 0) return -11;
const int phase_len = (num_taps + decim - 1) / decim;
if (polyphase_len < decim * phase_len) return -12;

int history_len = *history_len_io;
if (history_len < 0) history_len = 0;
if (history_len > history_cap) history_len = history_cap;

int phase_count = *phase_count_state;
if (phase_count < 0) phase_count = 0;
if (phase_count >= decim) phase_count %= decim;

double phase_start = *phase_state;
if (n_new > 0) {
if (!in_new || !shifted_new_tmp) return -13;
const int block = 256;
const int grid = (n_new + block - 1) / block;
gpud_freq_shift_kernel<<<grid, block>>>(in_new, shifted_new_tmp, n_new, phase_inc, phase_start);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) return (int)err;
}

const int total_phase = phase_count + n_new;
const int out_count = total_phase / decim;
if (out_count > out_cap) return -14;

if (out_count > 0) {
if (!out) return -15;
const int block = 256;
const int grid = (out_count + block - 1) / block;
const int start_idx = decim - phase_count - 1;
gpud_streaming_polyphase_accum_kernel<<<grid, block>>>(
history_state,
history_len,
shifted_new_tmp,
n_new,
polyphase_taps,
polyphase_len,
decim,
phase_len,
start_idx,
out_count,
out
);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) return (int)err;
}

int new_history_len = history_len;
if (history_cap > 0) {
new_history_len = history_len + n_new;
if (new_history_len > history_cap) new_history_len = history_cap;
if (new_history_len > 0) {
if (!history_state || !history_scratch) return -16;
const int block = 256;
const int grid = (new_history_len + block - 1) / block;
gpud_streaming_history_tail_kernel<<<grid, block>>>(
history_state,
history_len,
shifted_new_tmp,
n_new,
new_history_len,
history_scratch
);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) return (int)err;
err = cudaMemcpy(history_state, history_scratch, (size_t)new_history_len * sizeof(float2), cudaMemcpyDeviceToDevice);
if (err != cudaSuccess) return (int)err;
}
} else {
new_history_len = 0;
}

*history_len_io = new_history_len;
*phase_count_state = total_phase % decim;
*phase_state = gpud_reduce_phase(phase_start + phase_inc * (double)n_new);
*n_out = out_count;
return 0;
}

+ 31
- 0
internal/demod/gpudemod/oracle_runner_test.go Просмотреть файл

@@ -0,0 +1,31 @@
package gpudemod

import "testing"

func TestCPUOracleRunnerCleansUpDisappearedSignals(t *testing.T) {
r := NewCPUOracleRunner(4000000)
jobs1 := []StreamingExtractJob{
{SignalID: 1, OffsetHz: 1000, Bandwidth: 20000, OutRate: 200000, NumTaps: 65, ConfigHash: 101},
{SignalID: 2, OffsetHz: 2000, Bandwidth: 20000, OutRate: 200000, NumTaps: 65, ConfigHash: 102},
}
_, err := r.StreamingExtract(makeDeterministicIQ(4096), jobs1)
if err != nil {
t.Fatalf("unexpected error on first extract: %v", err)
}
if len(r.States) != 2 {
t.Fatalf("expected 2 states, got %d", len(r.States))
}
jobs2 := []StreamingExtractJob{
{SignalID: 2, OffsetHz: 2000, Bandwidth: 20000, OutRate: 200000, NumTaps: 65, ConfigHash: 102},
}
_, err = r.StreamingExtract(makeDeterministicIQ(2048), jobs2)
if err != nil {
t.Fatalf("unexpected error on second extract: %v", err)
}
if len(r.States) != 1 {
t.Fatalf("expected 1 state after cleanup, got %d", len(r.States))
}
if _, ok := r.States[1]; ok {
t.Fatalf("expected signal 1 state to be cleaned up")
}
}

+ 45
- 0
internal/demod/gpudemod/oracle_validation_test.go Просмотреть файл

@@ -0,0 +1,45 @@
package gpudemod

import "testing"

func TestCPUOracleMonolithicVsChunkedPolyphase(t *testing.T) {
iq := makeDeterministicIQ(120000)
mk := func() *CPUOracleState {
taps := makeLowpassTaps(65)
return &CPUOracleState{
SignalID: 1,
ConfigHash: 999,
NCOPhase: 0,
Decim: 20,
PhaseCount: 0,
NumTaps: 65,
ShiftedHistory: make([]complex64, 0, 64),
BaseTaps: taps,
PolyphaseTaps: BuildPolyphaseTapsPhaseMajor(taps, 20),
}
}
phaseInc := 0.013
mono := CPUOracleExtractPolyphase(iq, mk(), phaseInc)
chunked := func() []complex64 {
state := mk()
out := make([]complex64, 0)
chunks := []int{4096, 3000, 8192, 7777, 12000}
pos := 0
for _, n := range chunks {
if pos >= len(iq) {
break
}
end := pos + n
if end > len(iq) {
end = len(iq)
}
out = append(out, CPUOracleExtractPolyphase(iq[pos:end], state, phaseInc)...)
pos = end
}
if pos < len(iq) {
out = append(out, CPUOracleExtractPolyphase(iq[pos:], state, phaseInc)...)
}
return out
}()
requireComplexSlicesClose(t, mono, chunked, 1e-5)
}

+ 28
- 0
internal/demod/gpudemod/polyphase.go Просмотреть файл

@@ -0,0 +1,28 @@
package gpudemod

// BuildPolyphaseTapsPhaseMajor builds a phase-major polyphase tap layout:
// tapsByPhase[p][k] = h[p + k*D]
// Flattened as: [phase0 taps..., phase1 taps..., ...]
func BuildPolyphaseTapsPhaseMajor(base []float32, decim int) []float32 {
if decim <= 0 || len(base) == 0 {
return nil
}
maxPhaseLen := (len(base) + decim - 1) / decim
out := make([]float32, decim*maxPhaseLen)
for p := 0; p < decim; p++ {
for k := 0; k < maxPhaseLen; k++ {
src := p + k*decim
if src < len(base) {
out[p*maxPhaseLen+k] = base[src]
}
}
}
return out
}

func PolyphasePhaseLen(baseLen int, decim int) int {
if decim <= 0 || baseLen <= 0 {
return 0
}
return (baseLen + decim - 1) / decim
}

+ 22
- 0
internal/demod/gpudemod/polyphase_test.go Просмотреть файл

@@ -0,0 +1,22 @@
package gpudemod

import "testing"

func TestBuildPolyphaseTapsPhaseMajor(t *testing.T) {
base := []float32{1, 2, 3, 4, 5, 6, 7}
got := BuildPolyphaseTapsPhaseMajor(base, 3)
// phase-major with phase len ceil(7/3)=3
want := []float32{
1, 4, 7,
2, 5, 0,
3, 6, 0,
}
if len(got) != len(want) {
t.Fatalf("len mismatch: got %d want %d", len(got), len(want))
}
for i := range want {
if got[i] != want[i] {
t.Fatalf("mismatch at %d: got %v want %v", i, got[i], want[i])
}
}
}

+ 57
- 0
internal/demod/gpudemod/state_reset_test.go Просмотреть файл

@@ -0,0 +1,57 @@
package gpudemod

import "testing"

func TestResetCPUOracleStateIfConfigChanged(t *testing.T) {
state := &CPUOracleState{
SignalID: 1,
ConfigHash: 111,
NCOPhase: 1.23,
Decim: 20,
PhaseCount: 7,
NumTaps: 65,
ShiftedHistory: []complex64{1 + 1i, 2 + 2i},
}
ResetCPUOracleStateIfConfigChanged(state, 222)
if state.ConfigHash != 222 {
t.Fatalf("config hash not updated")
}
if state.NCOPhase != 0 {
t.Fatalf("expected phase reset")
}
if state.PhaseCount != 0 {
t.Fatalf("expected phase count reset")
}
if len(state.ShiftedHistory) != 0 {
t.Fatalf("expected shifted history reset")
}
}

func TestResetExtractStreamState(t *testing.T) {
state := &ExtractStreamState{
SignalID: 1,
ConfigHash: 111,
NCOPhase: 2.34,
Decim: 20,
PhaseCount: 9,
NumTaps: 65,
ShiftedHistory: []complex64{3 + 3i, 4 + 4i},
Initialized: true,
}
ResetExtractStreamState(state, 333)
if state.ConfigHash != 333 {
t.Fatalf("config hash not updated")
}
if state.NCOPhase != 0 {
t.Fatalf("expected phase reset")
}
if state.PhaseCount != 0 {
t.Fatalf("expected phase count reset")
}
if len(state.ShiftedHistory) != 0 {
t.Fatalf("expected shifted history reset")
}
if state.Initialized {
t.Fatalf("expected initialized=false after reset")
}
}

+ 70
- 0
internal/demod/gpudemod/stream_state.go Просмотреть файл

@@ -0,0 +1,70 @@
package gpudemod

import (
"log"

"sdr-wideband-suite/internal/dsp"
)

func (r *BatchRunner) ResetSignalState(signalID int64) {
if r == nil || r.streamState == nil {
return
}
delete(r.streamState, signalID)
r.resetNativeStreamingState(signalID)
}

func (r *BatchRunner) ResetAllSignalStates() {
if r == nil {
return
}
r.streamState = make(map[int64]*ExtractStreamState)
r.resetAllNativeStreamingStates()
}

func (r *BatchRunner) getOrInitExtractState(job StreamingExtractJob, sampleRate int) (*ExtractStreamState, error) {
if r == nil {
return nil, ErrUnavailable
}
if r.streamState == nil {
r.streamState = make(map[int64]*ExtractStreamState)
}
decim, err := ExactIntegerDecimation(sampleRate, job.OutRate)
if err != nil {
return nil, err
}
state := r.streamState[job.SignalID]
if state == nil {
state = &ExtractStreamState{SignalID: job.SignalID}
r.streamState[job.SignalID] = state
}
if state.ConfigHash != job.ConfigHash {
if state.Initialized {
log.Printf("STREAMING STATE RESET: signal=%d oldHash=%d newHash=%d historyLen=%d",
job.SignalID, state.ConfigHash, job.ConfigHash, len(state.ShiftedHistory))
}
ResetExtractStreamState(state, job.ConfigHash)
}
state.Decim = decim
state.NumTaps = job.NumTaps
if state.NumTaps <= 0 {
state.NumTaps = 101
}
cutoff := job.Bandwidth / 2
if cutoff < 200 {
cutoff = 200
}
base := dsp.LowpassFIR(cutoff, sampleRate, state.NumTaps)
state.BaseTaps = make([]float32, len(base))
for i, v := range base {
state.BaseTaps[i] = float32(v)
}
state.PolyphaseTaps = BuildPolyphaseTapsPhaseMajor(state.BaseTaps, state.Decim)
if cap(state.ShiftedHistory) < maxInt(0, state.NumTaps-1) {
state.ShiftedHistory = make([]complex64, 0, maxInt(0, state.NumTaps-1))
} else if state.ShiftedHistory == nil {
state.ShiftedHistory = make([]complex64, 0, maxInt(0, state.NumTaps-1))
}
state.Initialized = true
return state, nil
}

+ 31
- 0
internal/demod/gpudemod/stream_state_test.go Просмотреть файл

@@ -0,0 +1,31 @@
package gpudemod

import "testing"

func TestGetOrInitExtractStateInitializesPolyphaseAndHistory(t *testing.T) {
r := &BatchRunner{streamState: make(map[int64]*ExtractStreamState)}
job := StreamingExtractJob{
SignalID: 7,
OffsetHz: 12500,
Bandwidth: 20000,
OutRate: 200000,
NumTaps: 65,
ConfigHash: 555,
}
state, err := r.getOrInitExtractState(job, 4000000)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if state.Decim != 20 {
t.Fatalf("unexpected decim: %d", state.Decim)
}
if len(state.BaseTaps) != 65 {
t.Fatalf("unexpected base taps len: %d", len(state.BaseTaps))
}
if len(state.PolyphaseTaps) == 0 {
t.Fatalf("expected polyphase taps")
}
if cap(state.ShiftedHistory) < 64 {
t.Fatalf("expected shifted history capacity >= 64, got %d", cap(state.ShiftedHistory))
}
}

+ 39
- 0
internal/demod/gpudemod/streaming_gpu_contract.go Просмотреть файл

@@ -0,0 +1,39 @@
package gpudemod

type StreamingGPUExecutionMode string

const (
StreamingGPUExecUnavailable StreamingGPUExecutionMode = "unavailable"
StreamingGPUExecHostOracle StreamingGPUExecutionMode = "host_oracle"
StreamingGPUExecCUDA StreamingGPUExecutionMode = "cuda"
)

type StreamingGPUInvocation struct {
SignalID int64
ConfigHash uint64
OffsetHz float64
OutRate int
Bandwidth float64
SampleRate int
NumTaps int
Decim int
PhaseCountIn int
NCOPhaseIn float64
HistoryLen int
BaseTaps []float32
PolyphaseTaps []float32
ShiftedHistory []complex64
IQNew []complex64
}

type StreamingGPUExecutionResult struct {
SignalID int64
Mode StreamingGPUExecutionMode
IQ []complex64
Rate int
NOut int
PhaseCountOut int
NCOPhaseOut float64
HistoryOut []complex64
HistoryLenOut int
}

+ 29
- 0
internal/demod/gpudemod/streaming_gpu_exec.go Просмотреть файл

@@ -0,0 +1,29 @@
package gpudemod

// StreamingExtractGPUExec is the internal execution selector for the new
// production-path semantics. It intentionally keeps the public API stable while
// allowing the implementation to evolve from host-side oracle execution toward
// a real GPU polyphase path.
func (r *BatchRunner) StreamingExtractGPUExec(iqNew []complex64, jobs []StreamingExtractJob) ([]StreamingExtractResult, error) {
invocations, err := r.buildStreamingGPUInvocations(iqNew, jobs)
if err != nil {
return nil, err
}
if useGPUNativePreparedExecution {
execResults, err := r.executeStreamingGPUNativePrepared(invocations)
if err == nil {
return r.applyStreamingGPUExecutionResults(execResults), nil
}
if !useGPUHostOracleExecution {
return nil, err
}
}
if useGPUHostOracleExecution {
execResults, err := r.executeStreamingGPUHostOraclePrepared(invocations)
if err != nil {
return nil, err
}
return r.applyStreamingGPUExecutionResults(execResults), nil
}
return nil, ErrUnavailable
}

+ 112
- 0
internal/demod/gpudemod/streaming_gpu_exec_test.go Просмотреть файл

@@ -0,0 +1,112 @@
package gpudemod

import "testing"

func TestStreamingExtractGPUExecUsesSafeDefaultMode(t *testing.T) {
r := &BatchRunner{eng: &Engine{sampleRate: 4000000}, streamState: make(map[int64]*ExtractStreamState)}
job := StreamingExtractJob{
SignalID: 1,
OffsetHz: 12500,
Bandwidth: 20000,
OutRate: 200000,
NumTaps: 65,
ConfigHash: 777,
}
res, err := r.StreamingExtractGPUExec(makeDeterministicIQ(2048), []StreamingExtractJob{job})
if err != nil {
t.Fatalf("expected safe default execution path, got error: %v", err)
}
if len(res) != 1 {
t.Fatalf("expected 1 result, got %d", len(res))
}
if res[0].Rate != job.OutRate {
t.Fatalf("expected output rate %d, got %d", job.OutRate, res[0].Rate)
}
if res[0].NOut <= 0 {
t.Fatalf("expected streaming output samples")
}
}

func TestStreamingGPUExecMatchesCPUOracleAcrossChunkPatterns(t *testing.T) {
job := StreamingExtractJob{
SignalID: 1,
OffsetHz: 12500,
Bandwidth: 20000,
OutRate: 200000,
NumTaps: 65,
ConfigHash: 777,
}
t.Run("DeterministicIQ", func(t *testing.T) {
r := &BatchRunner{eng: &Engine{sampleRate: 4000000}, streamState: make(map[int64]*ExtractStreamState)}
steps := makeStreamingValidationSteps(
makeDeterministicIQ(1500),
[]int{0, 1, 2, 17, 63, 64, 65, 129, 511},
[]StreamingExtractJob{job},
)
runStreamingExecSequenceAgainstOracle(t, r, steps, 1e-5, 1e-9)
})
t.Run("ToneNoiseIQ", func(t *testing.T) {
r := &BatchRunner{eng: &Engine{sampleRate: 4000000}, streamState: make(map[int64]*ExtractStreamState)}
steps := makeStreamingValidationSteps(
makeToneNoiseIQ(4096, 0.023),
[]int{7, 20, 3, 63, 64, 65, 777},
[]StreamingExtractJob{job},
)
runStreamingExecSequenceAgainstOracle(t, r, steps, 1e-5, 1e-9)
})
}

func TestStreamingGPUExecLifecycleMatchesCPUOracle(t *testing.T) {
r := &BatchRunner{
eng: &Engine{sampleRate: 4000000},
streamState: make(map[int64]*ExtractStreamState),
nativeState: make(map[int64]*nativeStreamingSignalState),
}
baseA := StreamingExtractJob{
SignalID: 11,
OffsetHz: 12500,
Bandwidth: 20000,
OutRate: 200000,
NumTaps: 65,
ConfigHash: 1001,
}
baseB := StreamingExtractJob{
SignalID: 22,
OffsetHz: -18750,
Bandwidth: 16000,
OutRate: 100000,
NumTaps: 33,
ConfigHash: 2002,
}
steps := []streamingValidationStep{
{
name: "prime_both_signals",
iq: makeDeterministicIQ(512),
jobs: []StreamingExtractJob{baseA, baseB},
},
{
name: "config_reset_with_zero_new",
iq: nil,
jobs: []StreamingExtractJob{{SignalID: baseA.SignalID, OffsetHz: baseA.OffsetHz, Bandwidth: baseA.Bandwidth, OutRate: baseA.OutRate, NumTaps: baseA.NumTaps, ConfigHash: baseA.ConfigHash + 1}, baseB},
},
{
name: "signal_b_disappears",
iq: makeToneNoiseIQ(96, 0.041),
jobs: []StreamingExtractJob{baseA},
},
{
name: "signal_b_reappears_fresh",
iq: makeDeterministicIQ(160),
jobs: []StreamingExtractJob{baseA, baseB},
},
{
name: "small_history_boundary_chunk",
iq: makeToneNoiseIQ(65, 0.017),
jobs: []StreamingExtractJob{baseA, baseB},
},
}
runStreamingExecSequenceAgainstOracle(t, r, steps, 1e-5, 1e-9)
if _, ok := r.nativeState[baseB.SignalID]; ok {
t.Fatalf("expected safe host-oracle path to keep native state inactive while gate is off")
}
}

+ 30
- 0
internal/demod/gpudemod/streaming_gpu_host_exec.go Просмотреть файл

@@ -0,0 +1,30 @@
package gpudemod

func (r *BatchRunner) executeStreamingGPUHostOraclePrepared(invocations []StreamingGPUInvocation) ([]StreamingGPUExecutionResult, error) {
results := make([]StreamingGPUExecutionResult, len(invocations))
for i, inv := range invocations {
out, phase, phaseCount, hist := runStreamingPolyphaseHostCore(
inv.IQNew,
inv.SampleRate,
inv.OffsetHz,
inv.NCOPhaseIn,
inv.PhaseCountIn,
inv.NumTaps,
inv.Decim,
inv.ShiftedHistory,
inv.PolyphaseTaps,
)
results[i] = StreamingGPUExecutionResult{
SignalID: inv.SignalID,
Mode: StreamingGPUExecHostOracle,
IQ: out,
Rate: inv.OutRate,
NOut: len(out),
PhaseCountOut: phaseCount,
NCOPhaseOut: phase,
HistoryOut: hist,
HistoryLenOut: len(hist),
}
}
return results, nil
}

+ 49
- 0
internal/demod/gpudemod/streaming_gpu_host_oracle.go Просмотреть файл

@@ -0,0 +1,49 @@
package gpudemod

// StreamingExtractGPUHostOracle is a temporary host-side execution of the intended
// streaming semantics using GPU-owned stream state. It is not the final GPU
// production implementation, but it allows the new production entrypoint to move
// from pure stub semantics toward real NEW-samples-only streaming behavior
// without reintroducing overlap+trim.
func (r *BatchRunner) StreamingExtractGPUHostOracle(iqNew []complex64, jobs []StreamingExtractJob) ([]StreamingExtractResult, error) {
if r == nil || r.eng == nil {
return nil, ErrUnavailable
}
results := make([]StreamingExtractResult, len(jobs))
active := make(map[int64]struct{}, len(jobs))
for i, job := range jobs {
active[job.SignalID] = struct{}{}
state, err := r.getOrInitExtractState(job, r.eng.sampleRate)
if err != nil {
return nil, err
}
out, phase, phaseCount, hist := runStreamingPolyphaseHostCore(
iqNew,
r.eng.sampleRate,
job.OffsetHz,
state.NCOPhase,
state.PhaseCount,
state.NumTaps,
state.Decim,
state.ShiftedHistory,
state.PolyphaseTaps,
)
state.NCOPhase = phase
state.PhaseCount = phaseCount
state.ShiftedHistory = append(state.ShiftedHistory[:0], hist...)
results[i] = StreamingExtractResult{
SignalID: job.SignalID,
IQ: out,
Rate: job.OutRate,
NOut: len(out),
PhaseCount: state.PhaseCount,
HistoryLen: len(state.ShiftedHistory),
}
}
for signalID := range r.streamState {
if _, ok := active[signalID]; !ok {
delete(r.streamState, signalID)
}
}
return results, nil
}

+ 35
- 0
internal/demod/gpudemod/streaming_gpu_host_oracle_test.go Просмотреть файл

@@ -0,0 +1,35 @@
package gpudemod

import "testing"

func TestStreamingGPUHostOracleComparableToCPUOracle(t *testing.T) {
r := &BatchRunner{eng: &Engine{sampleRate: 4000000}, streamState: make(map[int64]*ExtractStreamState)}
job := StreamingExtractJob{
SignalID: 1,
OffsetHz: 12500,
Bandwidth: 20000,
OutRate: 200000,
NumTaps: 65,
ConfigHash: 777,
}
iq := makeDeterministicIQ(16000)
gpuLike, err := r.StreamingExtractGPUHostOracle(iq, []StreamingExtractJob{job})
if err != nil {
t.Fatalf("unexpected host-oracle error: %v", err)
}
oracleRunner := NewCPUOracleRunner(4000000)
oracle, err := oracleRunner.StreamingExtract(iq, []StreamingExtractJob{job})
if err != nil {
t.Fatalf("unexpected oracle error: %v", err)
}
if len(gpuLike) != 1 || len(oracle) != 1 {
t.Fatalf("unexpected result lengths: gpuLike=%d oracle=%d", len(gpuLike), len(oracle))
}
metrics, stats := CompareOracleAndGPUHostOracle(oracle[0], gpuLike[0])
if stats.Count == 0 {
t.Fatalf("expected compare count > 0")
}
if metrics.RefMaxAbsErr > 1e-5 {
t.Fatalf("expected host-oracle path to match cpu oracle closely, got max abs err %f", metrics.RefMaxAbsErr)
}
}

+ 4
- 0
internal/demod/gpudemod/streaming_gpu_modes.go Просмотреть файл

@@ -0,0 +1,4 @@
package gpudemod

const useGPUHostOracleExecution = false
const useGPUNativePreparedExecution = true

+ 284
- 0
internal/demod/gpudemod/streaming_gpu_native_prepare.go Просмотреть файл

@@ -0,0 +1,284 @@
//go:build cufft && windows

package gpudemod

/*
#cgo windows CFLAGS: -I"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.2/include"
#include <cuda_runtime.h>
typedef struct { float x; float y; } gpud_float2;
*/
import "C"

import (
"math"
"unsafe"
)

func (r *BatchRunner) executeStreamingGPUNativePrepared(invocations []StreamingGPUInvocation) ([]StreamingGPUExecutionResult, error) {
if r == nil || r.eng == nil {
return nil, ErrUnavailable
}
if r.nativeState == nil {
r.nativeState = make(map[int64]*nativeStreamingSignalState)
}
results := make([]StreamingGPUExecutionResult, len(invocations))
for i, inv := range invocations {
state, err := r.getOrInitNativeStreamingState(inv)
if err != nil {
return nil, err
}
if len(inv.IQNew) > 0 {
if err := ensureNativeBuffer(&state.dInNew, &state.inNewCap, len(inv.IQNew), unsafe.Sizeof(C.gpud_float2{})); err != nil {
return nil, err
}
if bridgeMemcpyH2D(state.dInNew, unsafe.Pointer(&inv.IQNew[0]), uintptr(len(inv.IQNew))*unsafe.Sizeof(complex64(0))) != 0 {
return nil, ErrUnavailable
}
}
outCap := len(inv.IQNew)/maxInt(1, inv.Decim) + 2
if outCap > 0 {
if err := ensureNativeBuffer(&state.dOut, &state.outCap, outCap, unsafe.Sizeof(C.gpud_float2{})); err != nil {
return nil, err
}
}

phaseInc := -2.0 * math.Pi * inv.OffsetHz / float64(inv.SampleRate)
// The native export consumes phase carry as host scalars while sample/history
// buffers remain device-resident, so keep these counters in nativeState.
var nOut C.int
historyLen := C.int(state.historyLen)
phaseCount := C.int(state.phaseCount)
phaseNCO := C.double(state.phaseNCO)
res := bridgeLaunchStreamingPolyphaseStateful(
(*C.gpud_float2)(state.dInNew),
len(inv.IQNew),
(*C.gpud_float2)(state.dShifted),
(*C.float)(state.dTaps),
state.tapsLen,
state.decim,
state.numTaps,
(*C.gpud_float2)(state.dHistory),
(*C.gpud_float2)(state.dHistoryScratch),
state.historyCap,
&historyLen,
&phaseCount,
&phaseNCO,
phaseInc,
(*C.gpud_float2)(state.dOut),
outCap,
&nOut,
)
if res != 0 {
return nil, ErrUnavailable
}
state.historyLen = int(historyLen)
state.phaseCount = int(phaseCount)
state.phaseNCO = float64(phaseNCO)

outHost := make([]complex64, int(nOut))
if len(outHost) > 0 {
if bridgeMemcpyD2H(unsafe.Pointer(&outHost[0]), state.dOut, uintptr(len(outHost))*unsafe.Sizeof(complex64(0))) != 0 {
return nil, ErrUnavailable
}
}
histHost := make([]complex64, state.historyLen)
if state.historyLen > 0 {
if bridgeMemcpyD2H(unsafe.Pointer(&histHost[0]), state.dHistory, uintptr(state.historyLen)*unsafe.Sizeof(complex64(0))) != 0 {
return nil, ErrUnavailable
}
}

results[i] = StreamingGPUExecutionResult{
SignalID: inv.SignalID,
Mode: StreamingGPUExecCUDA,
IQ: outHost,
Rate: inv.OutRate,
NOut: len(outHost),
PhaseCountOut: state.phaseCount,
NCOPhaseOut: state.phaseNCO,
HistoryOut: histHost,
HistoryLenOut: len(histHost),
}
}
return results, nil
}

func (r *BatchRunner) getOrInitNativeStreamingState(inv StreamingGPUInvocation) (*nativeStreamingSignalState, error) {
state := r.nativeState[inv.SignalID]
needReset := false
historyCap := maxInt(0, inv.NumTaps-1)
if state == nil {
state = &nativeStreamingSignalState{signalID: inv.SignalID}
r.nativeState[inv.SignalID] = state
needReset = true
}
if state.configHash != inv.ConfigHash {
needReset = true
}
if state.decim != inv.Decim || state.numTaps != inv.NumTaps || state.tapsLen != len(inv.PolyphaseTaps) {
needReset = true
}
if state.historyCap != historyCap {
needReset = true
}
if needReset {
releaseNativeStreamingSignalState(state)
}
if len(inv.PolyphaseTaps) == 0 {
return nil, ErrUnavailable
}
if state.dTaps == nil && len(inv.PolyphaseTaps) > 0 {
if bridgeCudaMalloc(&state.dTaps, uintptr(len(inv.PolyphaseTaps))*unsafe.Sizeof(C.float(0))) != 0 {
return nil, ErrUnavailable
}
if bridgeMemcpyH2D(state.dTaps, unsafe.Pointer(&inv.PolyphaseTaps[0]), uintptr(len(inv.PolyphaseTaps))*unsafe.Sizeof(float32(0))) != 0 {
return nil, ErrUnavailable
}
state.tapsLen = len(inv.PolyphaseTaps)
}
if state.dShifted == nil {
minCap := maxInt(1, len(inv.IQNew))
if bridgeCudaMalloc(&state.dShifted, uintptr(minCap)*unsafe.Sizeof(C.gpud_float2{})) != 0 {
return nil, ErrUnavailable
}
state.shiftedCap = minCap
}
if state.shiftedCap < len(inv.IQNew) {
if bridgeCudaFree(state.dShifted) != 0 {
return nil, ErrUnavailable
}
state.dShifted = nil
state.shiftedCap = 0
if bridgeCudaMalloc(&state.dShifted, uintptr(len(inv.IQNew))*unsafe.Sizeof(C.gpud_float2{})) != 0 {
return nil, ErrUnavailable
}
state.shiftedCap = len(inv.IQNew)
}
if state.dHistory == nil && historyCap > 0 {
if bridgeCudaMalloc(&state.dHistory, uintptr(historyCap)*unsafe.Sizeof(C.gpud_float2{})) != 0 {
return nil, ErrUnavailable
}
}
if state.dHistoryScratch == nil && historyCap > 0 {
if bridgeCudaMalloc(&state.dHistoryScratch, uintptr(historyCap)*unsafe.Sizeof(C.gpud_float2{})) != 0 {
return nil, ErrUnavailable
}
state.historyScratchCap = historyCap
}
if needReset {
state.phaseCount = inv.PhaseCountIn
state.phaseNCO = inv.NCOPhaseIn
state.historyLen = minInt(len(inv.ShiftedHistory), historyCap)
if state.historyLen > 0 {
if bridgeMemcpyH2D(state.dHistory, unsafe.Pointer(&inv.ShiftedHistory[len(inv.ShiftedHistory)-state.historyLen]), uintptr(state.historyLen)*unsafe.Sizeof(complex64(0))) != 0 {
return nil, ErrUnavailable
}
}
}
state.decim = inv.Decim
state.numTaps = inv.NumTaps
state.historyCap = historyCap
state.historyScratchCap = historyCap
state.configHash = inv.ConfigHash
return state, nil
}

func ensureNativeBuffer(ptr *unsafe.Pointer, capRef *int, need int, elemSize uintptr) error {
if need <= 0 {
return nil
}
if *ptr != nil && *capRef >= need {
return nil
}
if *ptr != nil {
if bridgeCudaFree(*ptr) != 0 {
return ErrUnavailable
}
*ptr = nil
*capRef = 0
}
if bridgeCudaMalloc(ptr, uintptr(need)*elemSize) != 0 {
return ErrUnavailable
}
*capRef = need
return nil
}

func (r *BatchRunner) syncNativeStreamingStates(active map[int64]struct{}) {
if r == nil || r.nativeState == nil {
return
}
for id, state := range r.nativeState {
if _, ok := active[id]; ok {
continue
}
releaseNativeStreamingSignalState(state)
delete(r.nativeState, id)
}
}

func (r *BatchRunner) resetNativeStreamingState(signalID int64) {
if r == nil || r.nativeState == nil {
return
}
if state := r.nativeState[signalID]; state != nil {
releaseNativeStreamingSignalState(state)
}
delete(r.nativeState, signalID)
}

func (r *BatchRunner) resetAllNativeStreamingStates() {
if r == nil {
return
}
r.freeAllNativeStreamingStates()
r.nativeState = make(map[int64]*nativeStreamingSignalState)
}

func (r *BatchRunner) freeAllNativeStreamingStates() {
if r == nil || r.nativeState == nil {
return
}
for id, state := range r.nativeState {
releaseNativeStreamingSignalState(state)
delete(r.nativeState, id)
}
}

func releaseNativeStreamingSignalState(state *nativeStreamingSignalState) {
if state == nil {
return
}
for _, ptr := range []*unsafe.Pointer{
&state.dInNew,
&state.dShifted,
&state.dOut,
&state.dTaps,
&state.dHistory,
&state.dHistoryScratch,
} {
if *ptr != nil {
_ = bridgeCudaFree(*ptr)
*ptr = nil
}
}
state.inNewCap = 0
state.shiftedCap = 0
state.outCap = 0
state.tapsLen = 0
state.historyCap = 0
state.historyLen = 0
state.historyScratchCap = 0
state.phaseCount = 0
state.phaseNCO = 0
state.decim = 0
state.numTaps = 0
state.configHash = 0
}

func minInt(a int, b int) int {
if a < b {
return a
}
return b
}

+ 44
- 0
internal/demod/gpudemod/streaming_gpu_native_prepare_stub.go Просмотреть файл

@@ -0,0 +1,44 @@
//go:build !cufft || !windows

package gpudemod

func (r *BatchRunner) executeStreamingGPUNativePrepared(invocations []StreamingGPUInvocation) ([]StreamingGPUExecutionResult, error) {
_ = invocations
return nil, ErrUnavailable
}

func (r *BatchRunner) syncNativeStreamingStates(active map[int64]struct{}) {
_ = active
if r == nil {
return
}
if r.nativeState == nil {
r.nativeState = make(map[int64]*nativeStreamingSignalState)
}
for id := range r.nativeState {
if _, ok := active[id]; !ok {
delete(r.nativeState, id)
}
}
}

func (r *BatchRunner) resetNativeStreamingState(signalID int64) {
if r == nil || r.nativeState == nil {
return
}
delete(r.nativeState, signalID)
}

func (r *BatchRunner) resetAllNativeStreamingStates() {
if r == nil {
return
}
r.nativeState = make(map[int64]*nativeStreamingSignalState)
}

func (r *BatchRunner) freeAllNativeStreamingStates() {
if r == nil {
return
}
r.nativeState = nil
}

+ 206
- 0
internal/demod/gpudemod/streaming_gpu_native_prepare_test.go Просмотреть файл

@@ -0,0 +1,206 @@
//go:build cufft && windows

package gpudemod

import (
"os"
"path/filepath"
"testing"
)

func configureNativePreparedDLLPath(t *testing.T) {
t.Helper()
candidates := []string{
filepath.Join("build", "gpudemod_kernels.dll"),
filepath.Join("internal", "demod", "gpudemod", "build", "gpudemod_kernels.dll"),
"gpudemod_kernels.dll",
}
for _, candidate := range candidates {
if _, err := os.Stat(candidate); err == nil {
abs, err := filepath.Abs(candidate)
if err != nil {
t.Fatalf("resolve native prepared DLL path: %v", err)
}
t.Setenv("GPUMOD_DLL", abs)
return
}
}
}

func requireNativePreparedTestRunner(t *testing.T) *BatchRunner {
t.Helper()
configureNativePreparedDLLPath(t)
if err := ensureDLLLoaded(); err != nil {
t.Skipf("native prepared path unavailable: %v", err)
}
if !Available() {
t.Skip("native prepared path unavailable: cuda device not available")
}
r, err := NewBatchRunner(32768, 4000000)
if err != nil {
t.Skipf("native prepared path unavailable: %v", err)
}
t.Cleanup(r.Close)
return r
}

func TestStreamingGPUNativePreparedMatchesCPUOracleAcrossChunkPatterns(t *testing.T) {
job := StreamingExtractJob{
SignalID: 1,
OffsetHz: 12500,
Bandwidth: 20000,
OutRate: 200000,
NumTaps: 65,
ConfigHash: 777,
}
exec := func(r *BatchRunner, invocations []StreamingGPUInvocation) ([]StreamingGPUExecutionResult, error) {
return r.executeStreamingGPUNativePrepared(invocations)
}
t.Run("DeterministicIQ", func(t *testing.T) {
r := requireNativePreparedTestRunner(t)
steps := makeStreamingValidationSteps(
makeDeterministicIQ(8192),
[]int{0, 1, 2, 17, 63, 64, 65, 129, 511, 2048},
[]StreamingExtractJob{job},
)
runPreparedSequenceAgainstOracle(t, r, exec, steps, 1e-4, 1e-8)
})
t.Run("ToneNoiseIQ", func(t *testing.T) {
r := requireNativePreparedTestRunner(t)
steps := makeStreamingValidationSteps(
makeToneNoiseIQ(12288, 0.023),
[]int{7, 20, 3, 63, 64, 65, 777, 2048, 4096},
[]StreamingExtractJob{job},
)
runPreparedSequenceAgainstOracle(t, r, exec, steps, 1e-4, 1e-8)
})
}

func TestStreamingGPUNativePreparedLifecycleResetAndCapacity(t *testing.T) {
r := requireNativePreparedTestRunner(t)
exec := func(invocations []StreamingGPUInvocation) ([]StreamingGPUExecutionResult, error) {
return r.executeStreamingGPUNativePrepared(invocations)
}
jobA := StreamingExtractJob{
SignalID: 11,
OffsetHz: 12500,
Bandwidth: 20000,
OutRate: 200000,
NumTaps: 65,
ConfigHash: 3001,
}
jobB := StreamingExtractJob{
SignalID: 22,
OffsetHz: -18750,
Bandwidth: 16000,
OutRate: 100000,
NumTaps: 33,
ConfigHash: 4002,
}

steps := []streamingValidationStep{
{
name: "prime_both_signals",
iq: makeDeterministicIQ(256),
jobs: []StreamingExtractJob{jobA, jobB},
},
{
name: "grow_capacity",
iq: makeToneNoiseIQ(4096, 0.037),
jobs: []StreamingExtractJob{jobA, jobB},
},
{
name: "config_reset_zero_new",
iq: nil,
jobs: []StreamingExtractJob{{SignalID: jobA.SignalID, OffsetHz: jobA.OffsetHz, Bandwidth: jobA.Bandwidth, OutRate: jobA.OutRate, NumTaps: jobA.NumTaps, ConfigHash: jobA.ConfigHash + 1}, jobB},
},
{
name: "signal_b_disappears",
iq: makeDeterministicIQ(64),
jobs: []StreamingExtractJob{jobA},
},
{
name: "signal_b_reappears",
iq: makeToneNoiseIQ(96, 0.017),
jobs: []StreamingExtractJob{jobA, jobB},
},
{
name: "history_boundary",
iq: makeDeterministicIQ(65),
jobs: []StreamingExtractJob{jobA, jobB},
},
}

oracle := NewCPUOracleRunner(r.eng.sampleRate)
var grownCap int
for idx, step := range steps {
invocations, err := r.buildStreamingGPUInvocations(step.iq, step.jobs)
if err != nil {
t.Fatalf("step %d (%s): build invocations failed: %v", idx, step.name, err)
}
got, err := exec(invocations)
if err != nil {
t.Fatalf("step %d (%s): native prepared exec failed: %v", idx, step.name, err)
}
want, err := oracle.StreamingExtract(step.iq, step.jobs)
if err != nil {
t.Fatalf("step %d (%s): oracle failed: %v", idx, step.name, err)
}
if len(got) != len(want) {
t.Fatalf("step %d (%s): result count mismatch: got=%d want=%d", idx, step.name, len(got), len(want))
}
applied := r.applyStreamingGPUExecutionResults(got)
for i, job := range step.jobs {
oracleState := oracle.States[job.SignalID]
requirePreparedExecutionResultMatchesOracle(t, got[i], want[i], oracleState, 1e-4, 1e-8)
requireStreamingExtractResultMatchesOracle(t, applied[i], want[i])
requireExtractStateMatchesOracle(t, r.streamState[job.SignalID], oracleState, 1e-8, 1e-4)

state := r.nativeState[job.SignalID]
if state == nil {
t.Fatalf("step %d (%s): missing native state for signal %d", idx, step.name, job.SignalID)
}
if state.configHash != job.ConfigHash {
t.Fatalf("step %d (%s): native config hash mismatch for signal %d: got=%d want=%d", idx, step.name, job.SignalID, state.configHash, job.ConfigHash)
}
if state.decim != oracleState.Decim {
t.Fatalf("step %d (%s): native decim mismatch for signal %d: got=%d want=%d", idx, step.name, job.SignalID, state.decim, oracleState.Decim)
}
if state.numTaps != oracleState.NumTaps {
t.Fatalf("step %d (%s): native num taps mismatch for signal %d: got=%d want=%d", idx, step.name, job.SignalID, state.numTaps, oracleState.NumTaps)
}
if state.historyCap != maxInt(0, oracleState.NumTaps-1) {
t.Fatalf("step %d (%s): native history cap mismatch for signal %d: got=%d want=%d", idx, step.name, job.SignalID, state.historyCap, maxInt(0, oracleState.NumTaps-1))
}
if state.historyLen != len(oracleState.ShiftedHistory) {
t.Fatalf("step %d (%s): native history len mismatch for signal %d: got=%d want=%d", idx, step.name, job.SignalID, state.historyLen, len(oracleState.ShiftedHistory))
}
if len(step.iq) > 0 && state.shiftedCap < len(step.iq) {
t.Fatalf("step %d (%s): native shifted capacity too small for signal %d: got=%d need>=%d", idx, step.name, job.SignalID, state.shiftedCap, len(step.iq))
}
if state.outCap < got[i].NOut {
t.Fatalf("step %d (%s): native out capacity too small for signal %d: got=%d need>=%d", idx, step.name, job.SignalID, state.outCap, got[i].NOut)
}
if job.SignalID == jobA.SignalID && state.shiftedCap > grownCap {
grownCap = state.shiftedCap
}
}
if step.name == "grow_capacity" && grownCap < len(step.iq) {
t.Fatalf("expected capacity growth for signal %d, got=%d want>=%d", jobA.SignalID, grownCap, len(step.iq))
}
if step.name == "config_reset_zero_new" {
state := r.nativeState[jobA.SignalID]
if state == nil {
t.Fatalf("missing native state for signal %d after config reset", jobA.SignalID)
}
if state.historyLen != 0 {
t.Fatalf("expected cleared native history after config reset, got=%d", state.historyLen)
}
}
if step.name == "signal_b_disappears" {
if _, ok := r.nativeState[jobB.SignalID]; ok {
t.Fatalf("expected native state for signal %d to be removed on disappearance", jobB.SignalID)
}
}
}
}

+ 28
- 0
internal/demod/gpudemod/streaming_gpu_native_state.go Просмотреть файл

@@ -0,0 +1,28 @@
package gpudemod

import "unsafe"

type nativeStreamingSignalState struct {
signalID int64

configHash uint64
decim int
numTaps int

dInNew unsafe.Pointer
dShifted unsafe.Pointer
dOut unsafe.Pointer
dTaps unsafe.Pointer
dHistory unsafe.Pointer
dHistoryScratch unsafe.Pointer

inNewCap int
shiftedCap int
outCap int
tapsLen int
historyCap int
historyLen int
historyScratchCap int
phaseCount int
phaseNCO float64
}

+ 61
- 0
internal/demod/gpudemod/streaming_gpu_prepare.go Просмотреть файл

@@ -0,0 +1,61 @@
package gpudemod

func (r *BatchRunner) buildStreamingGPUInvocations(iqNew []complex64, jobs []StreamingExtractJob) ([]StreamingGPUInvocation, error) {
if r == nil || r.eng == nil {
return nil, ErrUnavailable
}
invocations := make([]StreamingGPUInvocation, len(jobs))
active := make(map[int64]struct{}, len(jobs))
for i, job := range jobs {
active[job.SignalID] = struct{}{}
state, err := r.getOrInitExtractState(job, r.eng.sampleRate)
if err != nil {
return nil, err
}
invocations[i] = StreamingGPUInvocation{
SignalID: job.SignalID,
ConfigHash: state.ConfigHash,
OffsetHz: job.OffsetHz,
OutRate: job.OutRate,
Bandwidth: job.Bandwidth,
SampleRate: r.eng.sampleRate,
NumTaps: state.NumTaps,
Decim: state.Decim,
PhaseCountIn: state.PhaseCount,
NCOPhaseIn: state.NCOPhase,
HistoryLen: len(state.ShiftedHistory),
BaseTaps: append([]float32(nil), state.BaseTaps...),
PolyphaseTaps: append([]float32(nil), state.PolyphaseTaps...),
ShiftedHistory: append([]complex64(nil), state.ShiftedHistory...),
IQNew: iqNew,
}
}
for signalID := range r.streamState {
if _, ok := active[signalID]; !ok {
delete(r.streamState, signalID)
}
}
r.syncNativeStreamingStates(active)
return invocations, nil
}

func (r *BatchRunner) applyStreamingGPUExecutionResults(results []StreamingGPUExecutionResult) []StreamingExtractResult {
out := make([]StreamingExtractResult, len(results))
for i, res := range results {
state := r.streamState[res.SignalID]
if state != nil {
state.NCOPhase = res.NCOPhaseOut
state.PhaseCount = res.PhaseCountOut
state.ShiftedHistory = append(state.ShiftedHistory[:0], res.HistoryOut...)
}
out[i] = StreamingExtractResult{
SignalID: res.SignalID,
IQ: res.IQ,
Rate: res.Rate,
NOut: res.NOut,
PhaseCount: res.PhaseCountOut,
HistoryLen: res.HistoryLenOut,
}
}
return out
}

+ 26
- 0
internal/demod/gpudemod/streaming_gpu_stub.go Просмотреть файл

@@ -0,0 +1,26 @@
package gpudemod

func updateShiftedHistory(prev []complex64, shiftedNew []complex64, numTaps int) []complex64 {
need := numTaps - 1
if need <= 0 {
return nil
}
combined := append(append(make([]complex64, 0, len(prev)+len(shiftedNew)), prev...), shiftedNew...)
if len(combined) <= need {
out := make([]complex64, len(combined))
copy(out, combined)
return out
}
out := make([]complex64, need)
copy(out, combined[len(combined)-need:])
return out
}

// StreamingExtractGPU is the production entry point for the stateful streaming
// extractor path. Execution strategy is selected by StreamingExtractGPUExec.
func (r *BatchRunner) StreamingExtractGPU(iqNew []complex64, jobs []StreamingExtractJob) ([]StreamingExtractResult, error) {
if r == nil || r.eng == nil {
return nil, ErrUnavailable
}
return r.StreamingExtractGPUExec(iqNew, jobs)
}

+ 59
- 0
internal/demod/gpudemod/streaming_gpu_stub_test.go Просмотреть файл

@@ -0,0 +1,59 @@
package gpudemod

import "testing"

func TestStreamingGPUUsesSafeProductionDefault(t *testing.T) {
r := &BatchRunner{eng: &Engine{sampleRate: 4000000}, streamState: make(map[int64]*ExtractStreamState)}
job := StreamingExtractJob{
SignalID: 1,
OffsetHz: 12500,
Bandwidth: 20000,
OutRate: 200000,
NumTaps: 65,
ConfigHash: 777,
}
iq := makeDeterministicIQ(1000)
results, err := r.StreamingExtractGPU(iq, []StreamingExtractJob{job})
if err != nil {
t.Fatalf("expected safe production default path, got error: %v", err)
}
if len(results) != 1 {
t.Fatalf("expected 1 result, got %d", len(results))
}
if results[0].NOut == 0 {
t.Fatalf("expected non-zero output count from safe production path")
}
}

func TestStreamingGPUHostOracleAdvancesState(t *testing.T) {
r := &BatchRunner{eng: &Engine{sampleRate: 4000000}, streamState: make(map[int64]*ExtractStreamState)}
job := StreamingExtractJob{
SignalID: 1,
OffsetHz: 12500,
Bandwidth: 20000,
OutRate: 200000,
NumTaps: 65,
ConfigHash: 777,
}
iq := makeDeterministicIQ(1000)
results, err := r.StreamingExtractGPUHostOracle(iq, []StreamingExtractJob{job})
if err != nil {
t.Fatalf("unexpected host-oracle error: %v", err)
}
if len(results) != 1 {
t.Fatalf("expected 1 result, got %d", len(results))
}
state := r.streamState[1]
if state == nil {
t.Fatalf("expected state to be initialized")
}
if state.NCOPhase == 0 {
t.Fatalf("expected phase to advance")
}
if len(state.ShiftedHistory) == 0 {
t.Fatalf("expected shifted history to be updated")
}
if results[0].NOut == 0 {
t.Fatalf("expected non-zero output count from host oracle path")
}
}

+ 213
- 0
internal/demod/gpudemod/streaming_gpu_validation_helpers_test.go Просмотреть файл

@@ -0,0 +1,213 @@
package gpudemod

import (
"math"
"testing"
)

type streamingValidationStep struct {
name string
iq []complex64
jobs []StreamingExtractJob
}

type streamingPreparedExecutor func(*BatchRunner, []StreamingGPUInvocation) ([]StreamingGPUExecutionResult, error)

func makeToneNoiseIQ(n int, phaseInc float64) []complex64 {
out := make([]complex64, n)
phase := 0.0
for i := 0; i < n; i++ {
tone := complex(math.Cos(phase), math.Sin(phase))
noiseI := 0.17*math.Cos(0.113*float64(i)+0.31) + 0.07*math.Sin(0.071*float64(i))
noiseQ := 0.13*math.Sin(0.097*float64(i)+0.11) - 0.05*math.Cos(0.043*float64(i))
out[i] = complex64(0.85*tone + 0.15*complex(noiseI, noiseQ))
phase += phaseInc
}
return out
}

func makeStreamingValidationSteps(iq []complex64, chunkSizes []int, jobs []StreamingExtractJob) []streamingValidationStep {
steps := make([]streamingValidationStep, 0, len(chunkSizes)+1)
pos := 0
for idx, n := range chunkSizes {
if n < 0 {
n = 0
}
end := pos + n
if end > len(iq) {
end = len(iq)
}
steps = append(steps, streamingValidationStep{
name: "chunk",
iq: append([]complex64(nil), iq[pos:end]...),
jobs: append([]StreamingExtractJob(nil), jobs...),
})
_ = idx
pos = end
}
if pos < len(iq) {
steps = append(steps, streamingValidationStep{
name: "remainder",
iq: append([]complex64(nil), iq[pos:]...),
jobs: append([]StreamingExtractJob(nil), jobs...),
})
}
return steps
}

func requirePhaseClose(t *testing.T, got float64, want float64, tol float64) {
t.Helper()
diff := got - want
for diff > math.Pi {
diff -= 2 * math.Pi
}
for diff < -math.Pi {
diff += 2 * math.Pi
}
if math.Abs(diff) > tol {
t.Fatalf("phase mismatch: got=%0.12f want=%0.12f diff=%0.12f tol=%0.12f", got, want, diff, tol)
}
}

func requireStreamingExtractResultMatchesOracle(t *testing.T, got StreamingExtractResult, want StreamingExtractResult) {
t.Helper()
if got.SignalID != want.SignalID {
t.Fatalf("signal id mismatch: got=%d want=%d", got.SignalID, want.SignalID)
}
if got.Rate != want.Rate {
t.Fatalf("rate mismatch for signal %d: got=%d want=%d", got.SignalID, got.Rate, want.Rate)
}
if got.NOut != want.NOut {
t.Fatalf("n_out mismatch for signal %d: got=%d want=%d", got.SignalID, got.NOut, want.NOut)
}
if got.PhaseCount != want.PhaseCount {
t.Fatalf("phase count mismatch for signal %d: got=%d want=%d", got.SignalID, got.PhaseCount, want.PhaseCount)
}
if got.HistoryLen != want.HistoryLen {
t.Fatalf("history len mismatch for signal %d: got=%d want=%d", got.SignalID, got.HistoryLen, want.HistoryLen)
}
}

func requirePreparedExecutionResultMatchesOracle(t *testing.T, got StreamingGPUExecutionResult, want StreamingExtractResult, oracleState *CPUOracleState, sampleTol float64, phaseTol float64) {
t.Helper()
if oracleState == nil {
t.Fatalf("missing oracle state for signal %d", got.SignalID)
}
if got.SignalID != want.SignalID {
t.Fatalf("signal id mismatch: got=%d want=%d", got.SignalID, want.SignalID)
}
if got.Rate != want.Rate {
t.Fatalf("rate mismatch for signal %d: got=%d want=%d", got.SignalID, got.Rate, want.Rate)
}
if got.NOut != want.NOut {
t.Fatalf("n_out mismatch for signal %d: got=%d want=%d", got.SignalID, got.NOut, want.NOut)
}
if got.PhaseCountOut != oracleState.PhaseCount {
t.Fatalf("phase count mismatch for signal %d: got=%d want=%d", got.SignalID, got.PhaseCountOut, oracleState.PhaseCount)
}
requirePhaseClose(t, got.NCOPhaseOut, oracleState.NCOPhase, phaseTol)
if got.HistoryLenOut != len(oracleState.ShiftedHistory) {
t.Fatalf("history len mismatch for signal %d: got=%d want=%d", got.SignalID, got.HistoryLenOut, len(oracleState.ShiftedHistory))
}
requireComplexSlicesClose(t, got.IQ, want.IQ, sampleTol)
requireComplexSlicesClose(t, got.HistoryOut, oracleState.ShiftedHistory, sampleTol)
}

func requireExtractStateMatchesOracle(t *testing.T, got *ExtractStreamState, want *CPUOracleState, phaseTol float64, sampleTol float64) {
t.Helper()
if got == nil || want == nil {
t.Fatalf("state mismatch: got nil=%t want nil=%t", got == nil, want == nil)
}
if got.SignalID != want.SignalID {
t.Fatalf("signal id mismatch: got=%d want=%d", got.SignalID, want.SignalID)
}
if got.ConfigHash != want.ConfigHash {
t.Fatalf("config hash mismatch for signal %d: got=%d want=%d", got.SignalID, got.ConfigHash, want.ConfigHash)
}
if got.Decim != want.Decim {
t.Fatalf("decim mismatch for signal %d: got=%d want=%d", got.SignalID, got.Decim, want.Decim)
}
if got.NumTaps != want.NumTaps {
t.Fatalf("num taps mismatch for signal %d: got=%d want=%d", got.SignalID, got.NumTaps, want.NumTaps)
}
if got.PhaseCount != want.PhaseCount {
t.Fatalf("phase count mismatch for signal %d: got=%d want=%d", got.SignalID, got.PhaseCount, want.PhaseCount)
}
requirePhaseClose(t, got.NCOPhase, want.NCOPhase, phaseTol)
requireComplexSlicesClose(t, got.ShiftedHistory, want.ShiftedHistory, sampleTol)
}

func requireStateKeysMatchOracle(t *testing.T, got map[int64]*ExtractStreamState, want map[int64]*CPUOracleState) {
t.Helper()
if len(got) != len(want) {
t.Fatalf("active state count mismatch: got=%d want=%d", len(got), len(want))
}
for signalID := range want {
if got[signalID] == nil {
t.Fatalf("missing active state for signal %d", signalID)
}
}
for signalID := range got {
if want[signalID] == nil {
t.Fatalf("unexpected active state for signal %d", signalID)
}
}
}

func runStreamingExecSequenceAgainstOracle(t *testing.T, runner *BatchRunner, steps []streamingValidationStep, sampleTol float64, phaseTol float64) {
t.Helper()
oracle := NewCPUOracleRunner(runner.eng.sampleRate)
for idx, step := range steps {
got, err := runner.StreamingExtractGPUExec(step.iq, step.jobs)
if err != nil {
t.Fatalf("step %d (%s): exec failed: %v", idx, step.name, err)
}
want, err := oracle.StreamingExtract(step.iq, step.jobs)
if err != nil {
t.Fatalf("step %d (%s): oracle failed: %v", idx, step.name, err)
}
if len(got) != len(want) {
t.Fatalf("step %d (%s): result count mismatch: got=%d want=%d", idx, step.name, len(got), len(want))
}
for i, job := range step.jobs {
requireStreamingExtractResultMatchesOracle(t, got[i], want[i])
requireComplexSlicesClose(t, got[i].IQ, want[i].IQ, sampleTol)
requireExtractStateMatchesOracle(t, runner.streamState[job.SignalID], oracle.States[job.SignalID], phaseTol, sampleTol)
}
requireStateKeysMatchOracle(t, runner.streamState, oracle.States)
}
}

func runPreparedSequenceAgainstOracle(t *testing.T, runner *BatchRunner, exec streamingPreparedExecutor, steps []streamingValidationStep, sampleTol float64, phaseTol float64) {
t.Helper()
oracle := NewCPUOracleRunner(runner.eng.sampleRate)
for idx, step := range steps {
invocations, err := runner.buildStreamingGPUInvocations(step.iq, step.jobs)
if err != nil {
t.Fatalf("step %d (%s): build invocations failed: %v", idx, step.name, err)
}
got, err := exec(runner, invocations)
if err != nil {
t.Fatalf("step %d (%s): prepared exec failed: %v", idx, step.name, err)
}
want, err := oracle.StreamingExtract(step.iq, step.jobs)
if err != nil {
t.Fatalf("step %d (%s): oracle failed: %v", idx, step.name, err)
}
if len(got) != len(want) {
t.Fatalf("step %d (%s): result count mismatch: got=%d want=%d", idx, step.name, len(got), len(want))
}
applied := runner.applyStreamingGPUExecutionResults(got)
if len(applied) != len(want) {
t.Fatalf("step %d (%s): applied result count mismatch: got=%d want=%d", idx, step.name, len(applied), len(want))
}
for i, job := range step.jobs {
oracleState := oracle.States[job.SignalID]
requirePreparedExecutionResultMatchesOracle(t, got[i], want[i], oracleState, sampleTol, phaseTol)
requireStreamingExtractResultMatchesOracle(t, applied[i], want[i])
requireComplexSlicesClose(t, applied[i].IQ, want[i].IQ, sampleTol)
requireExtractStateMatchesOracle(t, runner.streamState[job.SignalID], oracleState, phaseTol, sampleTol)
}
requireStateKeysMatchOracle(t, runner.streamState, oracle.States)
}
}

+ 64
- 0
internal/demod/gpudemod/streaming_host_core.go Просмотреть файл

@@ -0,0 +1,64 @@
package gpudemod

import "math"

func runStreamingPolyphaseHostCore(
iqNew []complex64,
sampleRate int,
offsetHz float64,
stateNCOPhase float64,
statePhaseCount int,
stateNumTaps int,
stateDecim int,
stateHistory []complex64,
polyphaseTaps []float32,
) ([]complex64, float64, int, []complex64) {
out := make([]complex64, 0, len(iqNew)/maxInt(1, stateDecim)+2)
phase := stateNCOPhase
phaseCount := statePhaseCount
hist := append([]complex64(nil), stateHistory...)
phaseLen := PolyphasePhaseLen(len(polyphaseTaps)/maxInt(1, stateDecim)*maxInt(1, stateDecim), stateDecim)
if phaseLen == 0 {
phaseLen = PolyphasePhaseLen(len(polyphaseTaps), stateDecim)
}
phaseInc := -2.0 * math.Pi * offsetHz / float64(sampleRate)
for _, x := range iqNew {
rot := complex64(complex(math.Cos(phase), math.Sin(phase)))
s := x * rot
hist = append(hist, s)
phaseCount++
if phaseCount == stateDecim {
var y complex64
for p := 0; p < stateDecim; p++ {
for k := 0; k < phaseLen; k++ {
idxTap := p*phaseLen + k
if idxTap >= len(polyphaseTaps) {
continue
}
tap := polyphaseTaps[idxTap]
if tap == 0 {
continue
}
srcBack := p + k*stateDecim
idx := len(hist) - 1 - srcBack
if idx < 0 {
continue
}
y += complex(tap, 0) * hist[idx]
}
}
out = append(out, y)
phaseCount = 0
}
if len(hist) > stateNumTaps-1 {
hist = hist[len(hist)-(stateNumTaps-1):]
}
phase += phaseInc
if phase >= math.Pi {
phase -= 2 * math.Pi
} else if phase < -math.Pi {
phase += 2 * math.Pi
}
}
return out, phase, phaseCount, append([]complex64(nil), hist...)
}

+ 40
- 0
internal/demod/gpudemod/streaming_host_core_test.go Просмотреть файл

@@ -0,0 +1,40 @@
package gpudemod

import "testing"

func TestRunStreamingPolyphaseHostCoreMatchesCPUOraclePolyphase(t *testing.T) {
cfg := OracleHarnessConfig{
SignalID: 1,
ConfigHash: 123,
NCOPhase: 0,
Decim: 20,
NumTaps: 65,
PhaseInc: 0.017,
}
state := MakeCPUOracleState(cfg)
iq := MakeDeterministicIQ(12000)
oracle := CPUOracleExtractPolyphase(iq, state, cfg.PhaseInc)

state2 := MakeCPUOracleState(cfg)
out, phase, phaseCount, hist := runStreamingPolyphaseHostCore(
iq,
4000000,
-cfg.PhaseInc*4000000/(2*3.141592653589793),
state2.NCOPhase,
state2.PhaseCount,
state2.NumTaps,
state2.Decim,
state2.ShiftedHistory,
state2.PolyphaseTaps,
)
requireComplexSlicesClose(t, oracle, out, 1e-5)
if phase == 0 && len(iq) > 0 {
t.Fatalf("expected phase to advance")
}
if phaseCount < 0 || phaseCount >= state2.Decim {
t.Fatalf("unexpected phaseCount: %d", phaseCount)
}
if len(hist) == 0 {
t.Fatalf("expected history to be retained")
}
}

+ 111
- 0
internal/demod/gpudemod/streaming_oracle_extract.go Просмотреть файл

@@ -0,0 +1,111 @@
package gpudemod

import (
"fmt"

"sdr-wideband-suite/internal/dsp"
)

type CPUOracleRunner struct {
SampleRate int
States map[int64]*CPUOracleState
}

func (r *CPUOracleRunner) ResetAllStates() {
if r == nil {
return
}
r.States = make(map[int64]*CPUOracleState)
}

func NewCPUOracleRunner(sampleRate int) *CPUOracleRunner {
return &CPUOracleRunner{
SampleRate: sampleRate,
States: make(map[int64]*CPUOracleState),
}
}

func (r *CPUOracleRunner) ResetSignalState(signalID int64) {
if r == nil || r.States == nil {
return
}
delete(r.States, signalID)
}

func (r *CPUOracleRunner) getOrInitState(job StreamingExtractJob) (*CPUOracleState, error) {
if r == nil {
return nil, fmt.Errorf("nil CPUOracleRunner")
}
if r.States == nil {
r.States = make(map[int64]*CPUOracleState)
}
decim, err := ExactIntegerDecimation(r.SampleRate, job.OutRate)
if err != nil {
return nil, err
}
state := r.States[job.SignalID]
if state == nil {
state = &CPUOracleState{SignalID: job.SignalID}
r.States[job.SignalID] = state
}
ResetCPUOracleStateIfConfigChanged(state, job.ConfigHash)
state.Decim = decim
state.NumTaps = job.NumTaps
if state.NumTaps <= 0 {
state.NumTaps = 101
}
cutoff := job.Bandwidth / 2
if cutoff < 200 {
cutoff = 200
}
base := dsp.LowpassFIR(cutoff, r.SampleRate, state.NumTaps)
state.BaseTaps = make([]float32, len(base))
for i, v := range base {
state.BaseTaps[i] = float32(v)
}
state.PolyphaseTaps = BuildPolyphaseTapsPhaseMajor(state.BaseTaps, state.Decim)
if state.ShiftedHistory == nil {
state.ShiftedHistory = make([]complex64, 0, maxInt(0, state.NumTaps-1))
}
return state, nil
}

func (r *CPUOracleRunner) StreamingExtract(iqNew []complex64, jobs []StreamingExtractJob) ([]StreamingExtractResult, error) {
results := make([]StreamingExtractResult, len(jobs))
active := make(map[int64]struct{}, len(jobs))
for i, job := range jobs {
active[job.SignalID] = struct{}{}
state, err := r.getOrInitState(job)
if err != nil {
return nil, err
}
out, phase, phaseCount, hist := runStreamingPolyphaseHostCore(
iqNew,
r.SampleRate,
job.OffsetHz,
state.NCOPhase,
state.PhaseCount,
state.NumTaps,
state.Decim,
state.ShiftedHistory,
state.PolyphaseTaps,
)
state.NCOPhase = phase
state.PhaseCount = phaseCount
state.ShiftedHistory = append(state.ShiftedHistory[:0], hist...)
results[i] = StreamingExtractResult{
SignalID: job.SignalID,
IQ: out,
Rate: job.OutRate,
NOut: len(out),
PhaseCount: state.PhaseCount,
HistoryLen: len(state.ShiftedHistory),
}
}
for signalID := range r.States {
if _, ok := active[signalID]; !ok {
delete(r.States, signalID)
}
}
return results, nil
}

+ 64
- 0
internal/demod/gpudemod/streaming_types.go Просмотреть файл

@@ -0,0 +1,64 @@
package gpudemod

import (
"fmt"
"hash/fnv"
)

type StreamingExtractJob struct {
SignalID int64
OffsetHz float64
Bandwidth float64
OutRate int
NumTaps int
ConfigHash uint64
}

type StreamingExtractResult struct {
SignalID int64
IQ []complex64
Rate int
NOut int
PhaseCount int
HistoryLen int
}

type ExtractStreamState struct {
SignalID int64
ConfigHash uint64
NCOPhase float64
Decim int
PhaseCount int
NumTaps int
ShiftedHistory []complex64
BaseTaps []float32
PolyphaseTaps []float32
Initialized bool
}

func ResetExtractStreamState(state *ExtractStreamState, cfgHash uint64) {
if state == nil {
return
}
state.ConfigHash = cfgHash
state.NCOPhase = 0
state.PhaseCount = 0
state.ShiftedHistory = state.ShiftedHistory[:0]
state.Initialized = false
}

func StreamingConfigHash(signalID int64, offsetHz float64, bandwidth float64, outRate int, numTaps int, sampleRate int) uint64 {
// Hash only structural parameters that change the FIR/decimation geometry.
// Offset is NOT included because the NCO phase_inc tracks it smoothly each frame.
// Bandwidth is NOT included because taps are rebuilt every frame in getOrInitExtractState.
// A state reset (zeroing NCO phase, history, phase count) is only needed when
// decimation factor, tap count, or sample rate changes — all of which affect
// buffer sizes and polyphase structure.
//
// Previous bug: offset and bandwidth were formatted at %.9f precision, causing
// a new hash (and full state reset) every single frame because the detector's
// exponential smoothing changes CenterHz by sub-Hz fractions each frame.
h := fnv.New64a()
_, _ = h.Write([]byte(fmt.Sprintf("sig=%d|out=%d|taps=%d|sr=%d", signalID, outRate, numTaps, sampleRate)))
return h.Sum64()
}

+ 78
- 0
internal/demod/gpudemod/test_harness.go Просмотреть файл

@@ -0,0 +1,78 @@
package gpudemod

import (
"math"
)

type OracleHarnessConfig struct {
SignalID int64
ConfigHash uint64
NCOPhase float64
Decim int
NumTaps int
PhaseInc float64
}

func MakeDeterministicIQ(n int) []complex64 {
out := make([]complex64, n)
for i := 0; i < n; i++ {
a := 0.017 * float64(i)
b := 0.031 * float64(i)
out[i] = complex64(complex(math.Cos(a)+0.2*math.Cos(b), math.Sin(a)+0.15*math.Sin(b)))
}
return out
}

func MakeToneIQ(n int, phaseInc float64) []complex64 {
out := make([]complex64, n)
phase := 0.0
for i := 0; i < n; i++ {
out[i] = complex64(complex(math.Cos(phase), math.Sin(phase)))
phase += phaseInc
}
return out
}

func MakeLowpassTaps(n int) []float32 {
out := make([]float32, n)
for i := range out {
out[i] = 1.0 / float32(n)
}
return out
}

func MakeCPUOracleState(cfg OracleHarnessConfig) *CPUOracleState {
taps := MakeLowpassTaps(cfg.NumTaps)
return &CPUOracleState{
SignalID: cfg.SignalID,
ConfigHash: cfg.ConfigHash,
NCOPhase: cfg.NCOPhase,
Decim: cfg.Decim,
PhaseCount: 0,
NumTaps: cfg.NumTaps,
ShiftedHistory: make([]complex64, 0, maxInt(0, cfg.NumTaps-1)),
BaseTaps: taps,
PolyphaseTaps: BuildPolyphaseTapsPhaseMajor(taps, cfg.Decim),
}
}

func RunChunkedCPUOraclePolyphase(all []complex64, chunkSizes []int, mkState func() *CPUOracleState, phaseInc float64) []complex64 {
state := mkState()
out := make([]complex64, 0)
pos := 0
for _, n := range chunkSizes {
if pos >= len(all) {
break
}
end := pos + n
if end > len(all) {
end = len(all)
}
out = append(out, CPUOracleExtractPolyphase(all[pos:end], state, phaseInc)...)
pos = end
}
if pos < len(all) {
out = append(out, CPUOracleExtractPolyphase(all[pos:], state, phaseInc)...)
}
return out
}

+ 39
- 0
internal/demod/gpudemod/test_harness_test.go Просмотреть файл

@@ -0,0 +1,39 @@
package gpudemod

import "testing"

func requireComplexSlicesCloseHarness(t *testing.T, a []complex64, b []complex64, tol float64) {
t.Helper()
if len(a) != len(b) {
t.Fatalf("length mismatch: %d vs %d", len(a), len(b))
}
for i := range a {
d := CompareComplexSlices([]complex64{a[i]}, []complex64{b[i]})
if d.MaxAbsErr > tol {
t.Fatalf("slice mismatch at %d: %v vs %v (tol=%f)", i, a[i], b[i], tol)
}
}
}

func TestHarnessChunkedCPUOraclePolyphase(t *testing.T) {
cfg := OracleHarnessConfig{
SignalID: 1,
ConfigHash: 123,
NCOPhase: 0,
Decim: 20,
NumTaps: 65,
PhaseInc: 0.017,
}
iq := MakeDeterministicIQ(150000)
mk := func() *CPUOracleState { return MakeCPUOracleState(cfg) }
mono := CPUOracleExtractPolyphase(iq, mk(), cfg.PhaseInc)
chunked := RunChunkedCPUOraclePolyphase(iq, []int{4096, 5000, 8192, 27307}, mk, cfg.PhaseInc)
requireComplexSlicesCloseHarness(t, mono, chunked, 1e-5)
}

func TestHarnessToneIQ(t *testing.T) {
iq := MakeToneIQ(1024, 0.05)
if len(iq) != 1024 {
t.Fatalf("unexpected tone iq length: %d", len(iq))
}
}

+ 49
- 11
internal/demod/gpudemod/windows_bridge.go Просмотреть файл

@@ -4,7 +4,7 @@ package gpudemod

/*
#cgo windows CFLAGS: -I"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.2/include"
#cgo windows LDFLAGS: -lcudart64_13 -lkernel32
#cgo windows LDFLAGS: -L"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.2/bin/x64" -l:cudart64_13.dll -lkernel32
#include <windows.h>
#include <stdlib.h>
#include <cuda_runtime.h>
@@ -26,6 +26,8 @@ typedef int (__stdcall *gpud_launch_decimate_stream_fn)(const gpud_float2* in, g
typedef int (__stdcall *gpud_launch_decimate_fn)(const gpud_float2* in, gpud_float2* out, int n_out, int factor);
typedef int (__stdcall *gpud_launch_am_envelope_fn)(const gpud_float2* in, float* out, int n);
typedef int (__stdcall *gpud_launch_ssb_product_fn)(const gpud_float2* in, float* out, int n, double phase_inc, double phase_start);
typedef int (__stdcall *gpud_launch_streaming_polyphase_prepare_fn)(const gpud_float2* in_new, int n_new, const gpud_float2* history_in, int history_len, const float* polyphase_taps, int polyphase_len, int decim, int num_taps, int phase_count_in, double phase_start, double phase_inc, gpud_float2* out, int* n_out, int* phase_count_out, double* phase_end_out, gpud_float2* history_out);
typedef int (__stdcall *gpud_launch_streaming_polyphase_stateful_fn)(const gpud_float2* in_new, int n_new, gpud_float2* shifted_new_tmp, const float* polyphase_taps, int polyphase_len, int decim, int num_taps, gpud_float2* history_state, gpud_float2* history_scratch, int history_cap, int* history_len_io, int* phase_count_state, double* phase_state, double phase_inc, gpud_float2* out, int out_cap, int* n_out);

static HMODULE gpud_mod = NULL;
static gpud_stream_create_fn gpud_p_stream_create = NULL;
@@ -42,6 +44,8 @@ static gpud_launch_decimate_stream_fn gpud_p_launch_decimate_stream = NULL;
static gpud_launch_decimate_fn gpud_p_launch_decimate = NULL;
static gpud_launch_am_envelope_fn gpud_p_launch_am_envelope = NULL;
static gpud_launch_ssb_product_fn gpud_p_launch_ssb_product = NULL;
static gpud_launch_streaming_polyphase_prepare_fn gpud_p_launch_streaming_polyphase_prepare = NULL;
static gpud_launch_streaming_polyphase_stateful_fn gpud_p_launch_streaming_polyphase_stateful = NULL;

static int gpud_cuda_malloc(void **ptr, size_t bytes) { return (int)cudaMalloc(ptr, bytes); }
static int gpud_cuda_free(void *ptr) { return (int)cudaFree(ptr); }
@@ -67,6 +71,8 @@ static int gpud_load_library(const char* path) {
gpud_p_launch_decimate = (gpud_launch_decimate_fn)GetProcAddress(gpud_mod, "gpud_launch_decimate_cuda");
gpud_p_launch_am_envelope = (gpud_launch_am_envelope_fn)GetProcAddress(gpud_mod, "gpud_launch_am_envelope_cuda");
gpud_p_launch_ssb_product = (gpud_launch_ssb_product_fn)GetProcAddress(gpud_mod, "gpud_launch_ssb_product_cuda");
gpud_p_launch_streaming_polyphase_prepare = (gpud_launch_streaming_polyphase_prepare_fn)GetProcAddress(gpud_mod, "gpud_launch_streaming_polyphase_prepare_cuda");
gpud_p_launch_streaming_polyphase_stateful = (gpud_launch_streaming_polyphase_stateful_fn)GetProcAddress(gpud_mod, "gpud_launch_streaming_polyphase_stateful_cuda");
if (!gpud_p_stream_create || !gpud_p_stream_destroy || !gpud_p_stream_sync || !gpud_p_upload_fir_taps || !gpud_p_launch_freq_shift_stream || !gpud_p_launch_freq_shift || !gpud_p_launch_fm_discrim || !gpud_p_launch_fir_stream || !gpud_p_launch_fir || !gpud_p_launch_decimate_stream || !gpud_p_launch_decimate || !gpud_p_launch_am_envelope || !gpud_p_launch_ssb_product) {
FreeLibrary(gpud_mod);
gpud_mod = NULL;
@@ -89,6 +95,8 @@ static int gpud_launch_decimate_stream(gpud_float2 *in, gpud_float2 *out, int n_
static int gpud_launch_decimate(gpud_float2 *in, gpud_float2 *out, int n_out, int factor) { if (!gpud_p_launch_decimate) return -1; return gpud_p_launch_decimate(in, out, n_out, factor); }
static int gpud_launch_am_envelope(gpud_float2 *in, float *out, int n) { if (!gpud_p_launch_am_envelope) return -1; return gpud_p_launch_am_envelope(in, out, n); }
static int gpud_launch_ssb_product(gpud_float2 *in, float *out, int n, double phase_inc, double phase_start) { if (!gpud_p_launch_ssb_product) return -1; return gpud_p_launch_ssb_product(in, out, n, phase_inc, phase_start); }
static int gpud_launch_streaming_polyphase_prepare(gpud_float2 *in_new, int n_new, gpud_float2 *history_in, int history_len, float *polyphase_taps, int polyphase_len, int decim, int num_taps, int phase_count_in, double phase_start, double phase_inc, gpud_float2 *out, int *n_out, int *phase_count_out, double *phase_end_out, gpud_float2 *history_out) { if (!gpud_p_launch_streaming_polyphase_prepare) return -1; return gpud_p_launch_streaming_polyphase_prepare(in_new, n_new, history_in, history_len, polyphase_taps, polyphase_len, decim, num_taps, phase_count_in, phase_start, phase_inc, out, n_out, phase_count_out, phase_end_out, history_out); }
static int gpud_launch_streaming_polyphase_stateful(gpud_float2 *in_new, int n_new, gpud_float2 *shifted_new_tmp, float *polyphase_taps, int polyphase_len, int decim, int num_taps, gpud_float2 *history_state, gpud_float2 *history_scratch, int history_cap, int *history_len_io, int *phase_count_state, double *phase_state, double phase_inc, gpud_float2 *out, int out_cap, int *n_out) { if (!gpud_p_launch_streaming_polyphase_stateful) return -1; return gpud_p_launch_streaming_polyphase_stateful(in_new, n_new, shifted_new_tmp, polyphase_taps, polyphase_len, decim, num_taps, history_state, history_scratch, history_cap, history_len_io, phase_count_state, phase_state, phase_inc, out, out_cap, n_out); }
*/
import "C"

@@ -103,38 +111,68 @@ func bridgeLoadLibrary(path string) int {
defer C.free(unsafe.Pointer(cp))
return int(C.gpud_load_library(cp))
}
func bridgeCudaMalloc(ptr *unsafe.Pointer, bytes uintptr) int { return int(C.gpud_cuda_malloc(ptr, C.size_t(bytes))) }
func bridgeCudaMalloc(ptr *unsafe.Pointer, bytes uintptr) int {
return int(C.gpud_cuda_malloc(ptr, C.size_t(bytes)))
}
func bridgeCudaFree(ptr unsafe.Pointer) int { return int(C.gpud_cuda_free(ptr)) }
func bridgeMemcpyH2D(dst unsafe.Pointer, src unsafe.Pointer, bytes uintptr) int { return int(C.gpud_memcpy_h2d(dst, src, C.size_t(bytes))) }
func bridgeMemcpyD2H(dst unsafe.Pointer, src unsafe.Pointer, bytes uintptr) int { return int(C.gpud_memcpy_d2h(dst, src, C.size_t(bytes))) }
func bridgeMemcpyH2D(dst unsafe.Pointer, src unsafe.Pointer, bytes uintptr) int {
return int(C.gpud_memcpy_h2d(dst, src, C.size_t(bytes)))
}
func bridgeMemcpyD2H(dst unsafe.Pointer, src unsafe.Pointer, bytes uintptr) int {
return int(C.gpud_memcpy_d2h(dst, src, C.size_t(bytes)))
}
func bridgeDeviceSync() int { return int(C.gpud_device_sync()) }
func bridgeUploadFIRTaps(taps *C.float, n int) int { return int(C.gpud_upload_fir_taps(taps, C.int(n))) }
func bridgeUploadFIRTaps(taps *C.float, n int) int {
return int(C.gpud_upload_fir_taps(taps, C.int(n)))
}
func bridgeLaunchFreqShift(in *C.gpud_float2, out *C.gpud_float2, n int, phaseInc float64, phaseStart float64) int {
return int(C.gpud_launch_freq_shift(in, out, C.int(n), C.double(phaseInc), C.double(phaseStart)))
}
func bridgeLaunchFreqShiftStream(in *C.gpud_float2, out *C.gpud_float2, n int, phaseInc float64, phaseStart float64, stream streamHandle) int {
return int(C.gpud_launch_freq_shift_stream(in, out, C.int(n), C.double(phaseInc), C.double(phaseStart), C.gpud_stream_handle(stream)))
}
func bridgeLaunchFIR(in *C.gpud_float2, out *C.gpud_float2, n int, numTaps int) int { return int(C.gpud_launch_fir(in, out, C.int(n), C.int(numTaps))) }
func bridgeLaunchFIR(in *C.gpud_float2, out *C.gpud_float2, n int, numTaps int) int {
return int(C.gpud_launch_fir(in, out, C.int(n), C.int(numTaps)))
}
func bridgeLaunchFIRStream(in *C.gpud_float2, out *C.gpud_float2, n int, numTaps int, stream streamHandle) int {
return int(C.gpud_launch_fir_stream(in, out, C.int(n), C.int(numTaps), C.gpud_stream_handle(stream)))
}
func bridgeLaunchFIRv2Stream(in *C.gpud_float2, out *C.gpud_float2, taps *C.float, n int, numTaps int, stream streamHandle) int {
return int(C.gpud_launch_fir_v2_stream(in, out, taps, C.int(n), C.int(numTaps), C.gpud_stream_handle(stream)))
}
func bridgeLaunchDecimate(in *C.gpud_float2, out *C.gpud_float2, nOut int, factor int) int { return int(C.gpud_launch_decimate(in, out, C.int(nOut), C.int(factor))) }
func bridgeLaunchDecimate(in *C.gpud_float2, out *C.gpud_float2, nOut int, factor int) int {
return int(C.gpud_launch_decimate(in, out, C.int(nOut), C.int(factor)))
}
func bridgeLaunchDecimateStream(in *C.gpud_float2, out *C.gpud_float2, nOut int, factor int, stream streamHandle) int {
return int(C.gpud_launch_decimate_stream(in, out, C.int(nOut), C.int(factor), C.gpud_stream_handle(stream)))
}
func bridgeLaunchFMDiscrim(in *C.gpud_float2, out *C.float, n int) int { return int(C.gpud_launch_fm_discrim(in, out, C.int(n))) }
func bridgeLaunchAMEnvelope(in *C.gpud_float2, out *C.float, n int) int { return int(C.gpud_launch_am_envelope(in, out, C.int(n))) }
func bridgeLaunchFMDiscrim(in *C.gpud_float2, out *C.float, n int) int {
return int(C.gpud_launch_fm_discrim(in, out, C.int(n)))
}
func bridgeLaunchAMEnvelope(in *C.gpud_float2, out *C.float, n int) int {
return int(C.gpud_launch_am_envelope(in, out, C.int(n)))
}
func bridgeLaunchSSBProduct(in *C.gpud_float2, out *C.float, n int, phaseInc float64, phaseStart float64) int {
return int(C.gpud_launch_ssb_product(in, out, C.int(n), C.double(phaseInc), C.double(phaseStart)))
}

// bridgeLaunchStreamingPolyphasePrepare is a transitional bridge for the
// legacy single-call prepare path. The stateful native path uses
// bridgeLaunchStreamingPolyphaseStateful.
func bridgeLaunchStreamingPolyphasePrepare(inNew *C.gpud_float2, nNew int, historyIn *C.gpud_float2, historyLen int, polyphaseTaps *C.float, polyphaseLen int, decim int, numTaps int, phaseCountIn int, phaseStart float64, phaseInc float64, out *C.gpud_float2, nOut *C.int, phaseCountOut *C.int, phaseEndOut *C.double, historyOut *C.gpud_float2) int {
return int(C.gpud_launch_streaming_polyphase_prepare(inNew, C.int(nNew), historyIn, C.int(historyLen), polyphaseTaps, C.int(polyphaseLen), C.int(decim), C.int(numTaps), C.int(phaseCountIn), C.double(phaseStart), C.double(phaseInc), out, nOut, phaseCountOut, phaseEndOut, historyOut))
}
func bridgeLaunchStreamingPolyphaseStateful(inNew *C.gpud_float2, nNew int, shiftedNewTmp *C.gpud_float2, polyphaseTaps *C.float, polyphaseLen int, decim int, numTaps int, historyState *C.gpud_float2, historyScratch *C.gpud_float2, historyCap int, historyLenIO *C.int, phaseCountState *C.int, phaseState *C.double, phaseInc float64, out *C.gpud_float2, outCap int, nOut *C.int) int {
return int(C.gpud_launch_streaming_polyphase_stateful(inNew, C.int(nNew), shiftedNewTmp, polyphaseTaps, C.int(polyphaseLen), C.int(decim), C.int(numTaps), historyState, historyScratch, C.int(historyCap), historyLenIO, phaseCountState, phaseState, C.double(phaseInc), out, C.int(outCap), nOut))
}
func bridgeStreamCreate() (streamHandle, int) {
var s C.gpud_stream_handle
res := int(C.gpud_stream_create(&s))
return streamHandle(s), res
}
func bridgeStreamDestroy(stream streamHandle) int { return int(C.gpud_stream_destroy(C.gpud_stream_handle(stream))) }
func bridgeStreamSync(stream streamHandle) int { return int(C.gpud_stream_sync(C.gpud_stream_handle(stream))) }
func bridgeStreamDestroy(stream streamHandle) int {
return int(C.gpud_stream_destroy(C.gpud_stream_handle(stream)))
}
func bridgeStreamSync(stream streamHandle) int {
return int(C.gpud_stream_sync(C.gpud_stream_handle(stream)))
}

+ 95
- 0
internal/dsp/decimating_fir.go Просмотреть файл

@@ -0,0 +1,95 @@
package dsp

// StatefulDecimatingFIRComplex combines FIR filtering and decimation into a
// single stateful stage. This avoids exposing FIR settling/transient output as
// ordinary block-leading samples before decimation.
type StatefulDecimatingFIRComplex struct {
taps []float64
delayR []float64
delayI []float64
factor int
phase int // number of input samples until next output sample (0 => emit now)
}

func (f *StatefulDecimatingFIRComplex) Phase() int {
if f == nil {
return 0
}
return f.phase
}

func (f *StatefulDecimatingFIRComplex) TapsLen() int {
if f == nil {
return 0
}
return len(f.taps)
}

func NewStatefulDecimatingFIRComplex(taps []float64, factor int) *StatefulDecimatingFIRComplex {
if factor < 1 {
factor = 1
}
t := make([]float64, len(taps))
copy(t, taps)
return &StatefulDecimatingFIRComplex{
taps: t,
delayR: make([]float64, len(taps)),
delayI: make([]float64, len(taps)),
factor: factor,
phase: 0,
}
}

func (f *StatefulDecimatingFIRComplex) Reset() {
for i := range f.delayR {
f.delayR[i] = 0
f.delayI[i] = 0
}
f.phase = 0
}

func (f *StatefulDecimatingFIRComplex) Process(iq []complex64) []complex64 {
if len(iq) == 0 || len(f.taps) == 0 {
return nil
}
if f.factor <= 1 {
out := make([]complex64, len(iq))
for i := 0; i < len(iq); i++ {
copy(f.delayR[1:], f.delayR[:len(f.taps)-1])
copy(f.delayI[1:], f.delayI[:len(f.taps)-1])
f.delayR[0] = float64(real(iq[i]))
f.delayI[0] = float64(imag(iq[i]))
var accR, accI float64
for k := 0; k < len(f.taps); k++ {
w := f.taps[k]
accR += f.delayR[k] * w
accI += f.delayI[k] * w
}
out[i] = complex(float32(accR), float32(accI))
}
return out
}

out := make([]complex64, 0, len(iq)/f.factor+1)
n := len(f.taps)
for i := 0; i < len(iq); i++ {
copy(f.delayR[1:], f.delayR[:n-1])
copy(f.delayI[1:], f.delayI[:n-1])
f.delayR[0] = float64(real(iq[i]))
f.delayI[0] = float64(imag(iq[i]))

if f.phase == 0 {
var accR, accI float64
for k := 0; k < n; k++ {
w := f.taps[k]
accR += f.delayR[k] * w
accI += f.delayI[k] * w
}
out = append(out, complex(float32(accR), float32(accI)))
f.phase = f.factor - 1
} else {
f.phase--
}
}
return out
}

+ 57
- 0
internal/dsp/decimating_fir_test.go Просмотреть файл

@@ -0,0 +1,57 @@
package dsp

import (
"math/cmplx"
"testing"
)

func TestStatefulDecimatingFIRComplexStreamContinuity(t *testing.T) {
taps := LowpassFIR(90000, 512000, 101)
factor := 2

input := make([]complex64, 8192)
for i := range input {
input[i] = complex(float32((i%17)-8)/8.0, float32((i%11)-5)/8.0)
}

one := NewStatefulDecimatingFIRComplex(taps, factor)
whole := one.Process(input)

chunkedProc := NewStatefulDecimatingFIRComplex(taps, factor)
var chunked []complex64
for i := 0; i < len(input); i += 733 {
end := i + 733
if end > len(input) {
end = len(input)
}
chunked = append(chunked, chunkedProc.Process(input[i:end])...)
}

if len(whole) != len(chunked) {
t.Fatalf("length mismatch whole=%d chunked=%d", len(whole), len(chunked))
}
for i := range whole {
if cmplx.Abs(complex128(whole[i]-chunked[i])) > 1e-5 {
t.Fatalf("sample %d mismatch whole=%v chunked=%v", i, whole[i], chunked[i])
}
}
}

func TestStatefulDecimatingFIRComplexMatchesBlockPipelineLength(t *testing.T) {
taps := LowpassFIR(90000, 512000, 101)
factor := 2
input := make([]complex64, 48640)
for i := range input {
input[i] = complex(float32((i%13)-6)/8.0, float32((i%7)-3)/8.0)
}

stateful := NewStatefulDecimatingFIRComplex(taps, factor)
out := stateful.Process(input)

filtered := ApplyFIR(input, taps)
dec := Decimate(filtered, factor)

if len(out) != len(dec) {
t.Fatalf("unexpected output len got=%d want=%d", len(out), len(dec))
}
}

+ 28
- 2
internal/recorder/recorder.go Просмотреть файл

@@ -12,6 +12,7 @@ import (

"sdr-wideband-suite/internal/demod/gpudemod"
"sdr-wideband-suite/internal/detector"
"sdr-wideband-suite/internal/telemetry"
)

type Policy struct {
@@ -54,9 +55,10 @@ type Manager struct {
streamer *Streamer
streamedIDs map[int64]bool // signal IDs that were streamed (skip retroactive recording)
streamedMu sync.Mutex
telemetry *telemetry.Collector
}

func New(sampleRate int, blockSize int, policy Policy, centerHz float64, decodeCommands map[string]string) *Manager {
func New(sampleRate int, blockSize int, policy Policy, centerHz float64, decodeCommands map[string]string, coll *telemetry.Collector) *Manager {
if policy.OutputDir == "" {
policy.OutputDir = "data/recordings"
}
@@ -71,8 +73,9 @@ func New(sampleRate int, blockSize int, policy Policy, centerHz float64, decodeC
centerHz: centerHz,
decodeCommands: decodeCommands,
queue: make(chan detector.Event, 64),
streamer: newStreamer(policy, centerHz),
streamer: newStreamer(policy, centerHz, coll),
streamedIDs: make(map[int64]bool),
telemetry: coll,
}
m.initGPUDemod(sampleRate, blockSize)
m.workerWG.Add(1)
@@ -103,6 +106,13 @@ func (m *Manager) Update(sampleRate int, blockSize int, policy Policy, centerHz
if m.streamer != nil {
m.streamer.updatePolicy(policy, centerHz)
}
if m.telemetry != nil {
m.telemetry.Event("recorder_update", "info", "recorder policy updated", nil, map[string]any{
"sample_rate": sampleRate,
"block_size": blockSize,
"enabled": policy.Enabled,
})
}
}

func (m *Manager) Ingest(t0 time.Time, samples []complex64) {
@@ -116,6 +126,9 @@ func (m *Manager) Ingest(t0 time.Time, samples []complex64) {
return
}
ring.Push(t0, samples)
if m.telemetry != nil {
m.telemetry.SetGauge("recorder.ring.push_samples", float64(len(samples)), nil)
}
}

func (m *Manager) OnEvents(events []detector.Event) {
@@ -134,8 +147,14 @@ func (m *Manager) OnEvents(events []detector.Event) {
case m.queue <- ev:
default:
// drop if queue full
if m.telemetry != nil {
m.telemetry.IncCounter("recorder.event_queue.drop", 1, nil)
}
}
}
if m.telemetry != nil {
m.telemetry.SetGauge("recorder.event_queue.len", float64(len(m.queue)), nil)
}
}

func (m *Manager) worker() {
@@ -357,6 +376,13 @@ func (m *Manager) StreamerRef() *Streamer {
return m.streamer
}

func (m *Manager) ResetStreams() {
if m == nil || m.streamer == nil {
return
}
m.streamer.ResetStreams()
}

func (m *Manager) RuntimeInfoBySignalID() map[int64]RuntimeSignalInfo {
if m == nil || m.streamer == nil {
return nil


+ 659
- 33
internal/recorder/streamer.go
Разница между файлами не показана из-за своего большого размера
Просмотреть файл


+ 966
- 0
internal/telemetry/telemetry.go Просмотреть файл

@@ -0,0 +1,966 @@
package telemetry

import (
"bufio"
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
)

type Config struct {
Enabled bool `json:"enabled"`
HeavyEnabled bool `json:"heavy_enabled"`
HeavySampleEvery int `json:"heavy_sample_every"`
MetricSampleEvery int `json:"metric_sample_every"`
MetricHistoryMax int `json:"metric_history_max"`
EventHistoryMax int `json:"event_history_max"`
Retention time.Duration `json:"retention"`
PersistEnabled bool `json:"persist_enabled"`
PersistDir string `json:"persist_dir"`
RotateMB int `json:"rotate_mb"`
KeepFiles int `json:"keep_files"`
}

func DefaultConfig() Config {
return Config{
Enabled: true,
HeavyEnabled: false,
HeavySampleEvery: 12,
MetricSampleEvery: 2,
MetricHistoryMax: 12_000,
EventHistoryMax: 4_000,
Retention: 15 * time.Minute,
PersistEnabled: false,
PersistDir: "debug/telemetry",
RotateMB: 16,
KeepFiles: 8,
}
}

type Tags map[string]string

type MetricPoint struct {
Timestamp time.Time `json:"ts"`
Name string `json:"name"`
Type string `json:"type"`
Value float64 `json:"value"`
Tags Tags `json:"tags,omitempty"`
}

type Event struct {
ID uint64 `json:"id"`
Timestamp time.Time `json:"ts"`
Name string `json:"name"`
Level string `json:"level"`
Message string `json:"message,omitempty"`
Tags Tags `json:"tags,omitempty"`
Fields map[string]any `json:"fields,omitempty"`
}

type SeriesValue struct {
Name string `json:"name"`
Value float64 `json:"value"`
Tags Tags `json:"tags,omitempty"`
}

type DistValue struct {
Name string `json:"name"`
Count int64 `json:"count"`
Min float64 `json:"min"`
Max float64 `json:"max"`
Mean float64 `json:"mean"`
Last float64 `json:"last"`
P95 float64 `json:"p95"`
Tags Tags `json:"tags,omitempty"`
}

type LiveSnapshot struct {
Now time.Time `json:"now"`
StartedAt time.Time `json:"started_at"`
UptimeMs int64 `json:"uptime_ms"`
Config Config `json:"config"`
Counters []SeriesValue `json:"counters"`
Gauges []SeriesValue `json:"gauges"`
Distributions []DistValue `json:"distributions"`
RecentEvents []Event `json:"recent_events"`
Status map[string]any `json:"status,omitempty"`
}

type Query struct {
From time.Time
To time.Time
Limit int
Name string
NamePrefix string
Level string
Tags Tags
IncludePersisted bool
}

type collectorMetric struct {
name string
tags Tags
value float64
}

type distMetric struct {
name string
tags Tags
count int64
sum float64
min float64
max float64
last float64
samples []float64
next int
full bool
}

type persistedEnvelope struct {
Kind string `json:"kind"`
Metric *MetricPoint `json:"metric,omitempty"`
Event *Event `json:"event,omitempty"`
}

type Collector struct {
mu sync.RWMutex
cfg Config
startedAt time.Time
counterSeq uint64
heavySeq uint64
eventSeq uint64

counters map[string]*collectorMetric
gauges map[string]*collectorMetric
dists map[string]*distMetric
metricsHistory []MetricPoint
events []Event
status map[string]any

writer *jsonlWriter
}

func New(cfg Config) (*Collector, error) {
cfg = sanitizeConfig(cfg)
c := &Collector{
cfg: cfg,
startedAt: time.Now().UTC(),
counters: map[string]*collectorMetric{},
gauges: map[string]*collectorMetric{},
dists: map[string]*distMetric{},
metricsHistory: make([]MetricPoint, 0, cfg.MetricHistoryMax),
events: make([]Event, 0, cfg.EventHistoryMax),
status: map[string]any{},
}
if cfg.PersistEnabled {
writer, err := newJSONLWriter(cfg)
if err != nil {
return nil, err
}
c.writer = writer
}
return c, nil
}

func (c *Collector) Close() error {
if c == nil {
return nil
}
c.mu.Lock()
writer := c.writer
c.writer = nil
c.mu.Unlock()
if writer != nil {
return writer.Close()
}
return nil
}

func (c *Collector) Configure(cfg Config) error {
if c == nil {
return nil
}
cfg = sanitizeConfig(cfg)
var writer *jsonlWriter
var err error
if cfg.PersistEnabled {
writer, err = newJSONLWriter(cfg)
if err != nil {
return err
}
}
c.mu.Lock()
old := c.writer
c.cfg = cfg
c.writer = writer
c.trimLocked(time.Now().UTC())
c.mu.Unlock()
if old != nil {
_ = old.Close()
}
return nil
}

func (c *Collector) Config() Config {
c.mu.RLock()
defer c.mu.RUnlock()
return c.cfg
}

func (c *Collector) Enabled() bool {
if c == nil {
return false
}
c.mu.RLock()
defer c.mu.RUnlock()
return c.cfg.Enabled
}

func (c *Collector) ShouldSampleHeavy() bool {
if c == nil {
return false
}
c.mu.RLock()
cfg := c.cfg
c.mu.RUnlock()
if !cfg.Enabled || !cfg.HeavyEnabled {
return false
}
n := cfg.HeavySampleEvery
if n <= 1 {
return true
}
seq := atomic.AddUint64(&c.heavySeq, 1)
return seq%uint64(n) == 0
}

func (c *Collector) SetStatus(key string, value any) {
if c == nil {
return
}
c.mu.Lock()
c.status[key] = value
c.mu.Unlock()
}

func (c *Collector) IncCounter(name string, delta float64, tags Tags) {
c.recordMetric("counter", name, delta, tags, true)
}

func (c *Collector) SetGauge(name string, value float64, tags Tags) {
c.recordMetric("gauge", name, value, tags, false)
}

func (c *Collector) Observe(name string, value float64, tags Tags) {
c.recordMetric("distribution", name, value, tags, false)
}

func (c *Collector) Event(name string, level string, message string, tags Tags, fields map[string]any) {
if c == nil {
return
}
now := time.Now().UTC()
c.mu.Lock()
if !c.cfg.Enabled {
c.mu.Unlock()
return
}
ev := Event{
ID: atomic.AddUint64(&c.eventSeq, 1),
Timestamp: now,
Name: name,
Level: strings.TrimSpace(strings.ToLower(level)),
Message: message,
Tags: cloneTags(tags),
Fields: cloneFields(fields),
}
if ev.Level == "" {
ev.Level = "info"
}
c.events = append(c.events, ev)
c.trimLocked(now)
writer := c.writer
c.mu.Unlock()
if writer != nil {
_ = writer.Write(persistedEnvelope{Kind: "event", Event: &ev})
}
}

func (c *Collector) recordMetric(kind string, name string, value float64, tags Tags, add bool) {
if c == nil || strings.TrimSpace(name) == "" {
return
}
now := time.Now().UTC()
c.mu.Lock()
if !c.cfg.Enabled {
c.mu.Unlock()
return
}
key := metricKey(name, tags)
switch kind {
case "counter":
m := c.counters[key]
if m == nil {
m = &collectorMetric{name: name, tags: cloneTags(tags)}
c.counters[key] = m
}
if add {
m.value += value
} else {
m.value = value
}
case "gauge":
m := c.gauges[key]
if m == nil {
m = &collectorMetric{name: name, tags: cloneTags(tags)}
c.gauges[key] = m
}
m.value = value
case "distribution":
d := c.dists[key]
if d == nil {
d = &distMetric{
name: name,
tags: cloneTags(tags),
min: value,
max: value,
samples: make([]float64, 64),
}
c.dists[key] = d
}
d.count++
d.sum += value
d.last = value
if d.count == 1 || value < d.min {
d.min = value
}
if d.count == 1 || value > d.max {
d.max = value
}
if len(d.samples) > 0 {
d.samples[d.next] = value
d.next++
if d.next >= len(d.samples) {
d.next = 0
d.full = true
}
}
}
sampleN := c.cfg.MetricSampleEvery
seq := atomic.AddUint64(&c.counterSeq, 1)
forceStore := strings.HasPrefix(name, "iq.extract.raw.boundary.") || strings.HasPrefix(name, "iq.extract.trimmed.boundary.")
shouldStore := forceStore || sampleN <= 1 || seq%uint64(sampleN) == 0 || kind == "counter"
var mp MetricPoint
if shouldStore {
mp = MetricPoint{
Timestamp: now,
Name: name,
Type: kind,
Value: value,
Tags: cloneTags(tags),
}
c.metricsHistory = append(c.metricsHistory, mp)
}
c.trimLocked(now)
writer := c.writer
c.mu.Unlock()

if writer != nil && shouldStore {
_ = writer.Write(persistedEnvelope{Kind: "metric", Metric: &mp})
}
}

func (c *Collector) LiveSnapshot() LiveSnapshot {
now := time.Now().UTC()
c.mu.RLock()
cfg := c.cfg
out := LiveSnapshot{
Now: now,
StartedAt: c.startedAt,
UptimeMs: now.Sub(c.startedAt).Milliseconds(),
Config: cfg,
Counters: make([]SeriesValue, 0, len(c.counters)),
Gauges: make([]SeriesValue, 0, len(c.gauges)),
Distributions: make([]DistValue, 0, len(c.dists)),
RecentEvents: make([]Event, 0, min(40, len(c.events))),
Status: cloneFields(c.status),
}
for _, m := range c.counters {
out.Counters = append(out.Counters, SeriesValue{Name: m.name, Value: m.value, Tags: cloneTags(m.tags)})
}
for _, m := range c.gauges {
out.Gauges = append(out.Gauges, SeriesValue{Name: m.name, Value: m.value, Tags: cloneTags(m.tags)})
}
for _, d := range c.dists {
mean := 0.0
if d.count > 0 {
mean = d.sum / float64(d.count)
}
out.Distributions = append(out.Distributions, DistValue{
Name: d.name,
Count: d.count,
Min: d.min,
Max: d.max,
Mean: mean,
Last: d.last,
P95: p95FromDist(d),
Tags: cloneTags(d.tags),
})
}
start := len(c.events) - cap(out.RecentEvents)
if start < 0 {
start = 0
}
for _, ev := range c.events[start:] {
out.RecentEvents = append(out.RecentEvents, copyEvent(ev))
}
c.mu.RUnlock()
sort.Slice(out.Counters, func(i, j int) bool { return out.Counters[i].Name < out.Counters[j].Name })
sort.Slice(out.Gauges, func(i, j int) bool { return out.Gauges[i].Name < out.Gauges[j].Name })
sort.Slice(out.Distributions, func(i, j int) bool { return out.Distributions[i].Name < out.Distributions[j].Name })
return out
}

func (c *Collector) QueryMetrics(q Query) ([]MetricPoint, error) {
if c == nil {
return nil, nil
}
q = normalizeQuery(q)
c.mu.RLock()
items := make([]MetricPoint, 0, len(c.metricsHistory))
for _, m := range c.metricsHistory {
if metricMatch(m, q) {
items = append(items, copyMetric(m))
}
}
cfg := c.cfg
c.mu.RUnlock()
if q.IncludePersisted && cfg.PersistEnabled {
persisted, err := readPersistedMetrics(cfg, q)
if err != nil && !errors.Is(err, os.ErrNotExist) {
return nil, err
}
items = append(items, persisted...)
}
sort.Slice(items, func(i, j int) bool {
return items[i].Timestamp.Before(items[j].Timestamp)
})
if q.Limit > 0 && len(items) > q.Limit {
items = items[len(items)-q.Limit:]
}
return items, nil
}

func (c *Collector) QueryEvents(q Query) ([]Event, error) {
if c == nil {
return nil, nil
}
q = normalizeQuery(q)
c.mu.RLock()
items := make([]Event, 0, len(c.events))
for _, ev := range c.events {
if eventMatch(ev, q) {
items = append(items, copyEvent(ev))
}
}
cfg := c.cfg
c.mu.RUnlock()
if q.IncludePersisted && cfg.PersistEnabled {
persisted, err := readPersistedEvents(cfg, q)
if err != nil && !errors.Is(err, os.ErrNotExist) {
return nil, err
}
items = append(items, persisted...)
}
sort.Slice(items, func(i, j int) bool {
return items[i].Timestamp.Before(items[j].Timestamp)
})
if q.Limit > 0 && len(items) > q.Limit {
items = items[len(items)-q.Limit:]
}
return items, nil
}

func (c *Collector) trimLocked(now time.Time) {
if c.cfg.MetricHistoryMax > 0 && len(c.metricsHistory) > c.cfg.MetricHistoryMax {
c.metricsHistory = append([]MetricPoint(nil), c.metricsHistory[len(c.metricsHistory)-c.cfg.MetricHistoryMax:]...)
}
if c.cfg.EventHistoryMax > 0 && len(c.events) > c.cfg.EventHistoryMax {
c.events = append([]Event(nil), c.events[len(c.events)-c.cfg.EventHistoryMax:]...)
}
ret := c.cfg.Retention
if ret <= 0 {
return
}
cut := now.Add(-ret)
mStart := 0
for mStart < len(c.metricsHistory) && c.metricsHistory[mStart].Timestamp.Before(cut) {
mStart++
}
if mStart > 0 {
c.metricsHistory = append([]MetricPoint(nil), c.metricsHistory[mStart:]...)
}
eStart := 0
for eStart < len(c.events) && c.events[eStart].Timestamp.Before(cut) {
eStart++
}
if eStart > 0 {
c.events = append([]Event(nil), c.events[eStart:]...)
}
}

func sanitizeConfig(cfg Config) Config {
def := DefaultConfig()
if cfg.HeavySampleEvery <= 0 {
cfg.HeavySampleEvery = def.HeavySampleEvery
}
if cfg.MetricSampleEvery <= 0 {
cfg.MetricSampleEvery = def.MetricSampleEvery
}
if cfg.MetricHistoryMax <= 0 {
cfg.MetricHistoryMax = def.MetricHistoryMax
}
if cfg.EventHistoryMax <= 0 {
cfg.EventHistoryMax = def.EventHistoryMax
}
if cfg.Retention <= 0 {
cfg.Retention = def.Retention
}
if strings.TrimSpace(cfg.PersistDir) == "" {
cfg.PersistDir = def.PersistDir
}
if cfg.RotateMB <= 0 {
cfg.RotateMB = def.RotateMB
}
if cfg.KeepFiles <= 0 {
cfg.KeepFiles = def.KeepFiles
}
return cfg
}

func normalizeQuery(q Query) Query {
if q.Limit <= 0 || q.Limit > 5000 {
q.Limit = 500
}
if q.Tags == nil {
q.Tags = Tags{}
}
return q
}

func metricMatch(m MetricPoint, q Query) bool {
if !q.From.IsZero() && m.Timestamp.Before(q.From) {
return false
}
if !q.To.IsZero() && m.Timestamp.After(q.To) {
return false
}
if q.Name != "" && m.Name != q.Name {
return false
}
if q.NamePrefix != "" && !strings.HasPrefix(m.Name, q.NamePrefix) {
return false
}
for k, v := range q.Tags {
if m.Tags[k] != v {
return false
}
}
return true
}

func eventMatch(ev Event, q Query) bool {
if !q.From.IsZero() && ev.Timestamp.Before(q.From) {
return false
}
if !q.To.IsZero() && ev.Timestamp.After(q.To) {
return false
}
if q.Name != "" && ev.Name != q.Name {
return false
}
if q.NamePrefix != "" && !strings.HasPrefix(ev.Name, q.NamePrefix) {
return false
}
if q.Level != "" && !strings.EqualFold(q.Level, ev.Level) {
return false
}
for k, v := range q.Tags {
if ev.Tags[k] != v {
return false
}
}
return true
}

func metricKey(name string, tags Tags) string {
if len(tags) == 0 {
return name
}
keys := make([]string, 0, len(tags))
for k := range tags {
keys = append(keys, k)
}
sort.Strings(keys)
var b strings.Builder
b.Grow(len(name) + len(keys)*16)
b.WriteString(name)
for _, k := range keys {
b.WriteString("|")
b.WriteString(k)
b.WriteString("=")
b.WriteString(tags[k])
}
return b.String()
}

func cloneTags(tags Tags) Tags {
if len(tags) == 0 {
return nil
}
out := make(Tags, len(tags))
for k, v := range tags {
out[k] = v
}
return out
}

func cloneFields(fields map[string]any) map[string]any {
if len(fields) == 0 {
return nil
}
out := make(map[string]any, len(fields))
for k, v := range fields {
out[k] = v
}
return out
}

func copyMetric(m MetricPoint) MetricPoint {
return MetricPoint{
Timestamp: m.Timestamp,
Name: m.Name,
Type: m.Type,
Value: m.Value,
Tags: cloneTags(m.Tags),
}
}

func copyEvent(ev Event) Event {
return Event{
ID: ev.ID,
Timestamp: ev.Timestamp,
Name: ev.Name,
Level: ev.Level,
Message: ev.Message,
Tags: cloneTags(ev.Tags),
Fields: cloneFields(ev.Fields),
}
}

func p95FromDist(d *distMetric) float64 {
if d == nil || d.count == 0 {
return 0
}
n := d.next
if d.full {
n = len(d.samples)
}
if n <= 0 {
return d.last
}
buf := make([]float64, n)
copy(buf, d.samples[:n])
sort.Float64s(buf)
idx := int(float64(n-1) * 0.95)
if idx < 0 {
idx = 0
}
if idx >= n {
idx = n - 1
}
return buf[idx]
}

type jsonlWriter struct {
cfg Config
mu sync.Mutex
dir string
f *os.File
w *bufio.Writer
currentPath string
currentSize int64
seq int64
}

func newJSONLWriter(cfg Config) (*jsonlWriter, error) {
dir := filepath.Clean(cfg.PersistDir)
if err := os.MkdirAll(dir, 0o755); err != nil {
return nil, err
}
w := &jsonlWriter{cfg: cfg, dir: dir}
if err := w.rotateLocked(); err != nil {
return nil, err
}
return w, nil
}

func (w *jsonlWriter) Write(v persistedEnvelope) error {
w.mu.Lock()
defer w.mu.Unlock()
if w.f == nil || w.w == nil {
return nil
}
line, err := json.Marshal(v)
if err != nil {
return err
}
line = append(line, '\n')
if w.currentSize+int64(len(line)) > int64(w.cfg.RotateMB)*1024*1024 {
if err := w.rotateLocked(); err != nil {
return err
}
}
n, err := w.w.Write(line)
w.currentSize += int64(n)
if err != nil {
return err
}
return w.w.Flush()
}

func (w *jsonlWriter) Close() error {
w.mu.Lock()
defer w.mu.Unlock()
if w.w != nil {
_ = w.w.Flush()
}
if w.f != nil {
err := w.f.Close()
w.f = nil
w.w = nil
return err
}
return nil
}

func (w *jsonlWriter) rotateLocked() error {
if w.w != nil {
_ = w.w.Flush()
}
if w.f != nil {
_ = w.f.Close()
}
w.seq++
name := fmt.Sprintf("telemetry-%s-%04d.jsonl", time.Now().UTC().Format("20060102-150405"), w.seq)
path := filepath.Join(w.dir, name)
f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
if err != nil {
return err
}
info, _ := f.Stat()
size := int64(0)
if info != nil {
size = info.Size()
}
w.f = f
w.w = bufio.NewWriterSize(f, 64*1024)
w.currentPath = path
w.currentSize = size
_ = pruneFiles(w.dir, w.cfg.KeepFiles)
return nil
}

func pruneFiles(dir string, keep int) error {
if keep <= 0 {
return nil
}
ents, err := os.ReadDir(dir)
if err != nil {
return err
}
files := make([]string, 0, len(ents))
for _, ent := range ents {
if ent.IsDir() {
continue
}
name := ent.Name()
if !strings.HasPrefix(name, "telemetry-") || !strings.HasSuffix(name, ".jsonl") {
continue
}
files = append(files, filepath.Join(dir, name))
}
if len(files) <= keep {
return nil
}
sort.Strings(files)
for _, path := range files[:len(files)-keep] {
_ = os.Remove(path)
}
return nil
}

func readPersistedMetrics(cfg Config, q Query) ([]MetricPoint, error) {
files, err := listPersistedFiles(cfg.PersistDir)
if err != nil {
return nil, err
}
out := make([]MetricPoint, 0, 256)
for _, path := range files {
points, err := parsePersistedFile(path, q)
if err != nil {
continue
}
for _, p := range points.metrics {
if metricMatch(p, q) {
out = append(out, p)
}
}
}
return out, nil
}

func readPersistedEvents(cfg Config, q Query) ([]Event, error) {
files, err := listPersistedFiles(cfg.PersistDir)
if err != nil {
return nil, err
}
out := make([]Event, 0, 128)
for _, path := range files {
points, err := parsePersistedFile(path, q)
if err != nil {
continue
}
for _, ev := range points.events {
if eventMatch(ev, q) {
out = append(out, ev)
}
}
}
return out, nil
}

type parsedFile struct {
metrics []MetricPoint
events []Event
}

func parsePersistedFile(path string, q Query) (parsedFile, error) {
f, err := os.Open(path)
if err != nil {
return parsedFile{}, err
}
defer f.Close()
out := parsedFile{
metrics: make([]MetricPoint, 0, 64),
events: make([]Event, 0, 32),
}
s := bufio.NewScanner(f)
s.Buffer(make([]byte, 0, 32*1024), 1024*1024)
for s.Scan() {
line := s.Bytes()
if len(line) == 0 {
continue
}
var env persistedEnvelope
if err := json.Unmarshal(line, &env); err != nil {
continue
}
if env.Metric != nil {
out.metrics = append(out.metrics, *env.Metric)
} else if env.Event != nil {
out.events = append(out.events, *env.Event)
}
if q.Limit > 0 && len(out.metrics)+len(out.events) > q.Limit*2 {
// keep bounded while scanning
if len(out.metrics) > q.Limit {
out.metrics = out.metrics[len(out.metrics)-q.Limit:]
}
if len(out.events) > q.Limit {
out.events = out.events[len(out.events)-q.Limit:]
}
}
}
return out, s.Err()
}

func listPersistedFiles(dir string) ([]string, error) {
ents, err := os.ReadDir(dir)
if err != nil {
return nil, err
}
files := make([]string, 0, len(ents))
for _, ent := range ents {
if ent.IsDir() {
continue
}
name := ent.Name()
if strings.HasPrefix(name, "telemetry-") && strings.HasSuffix(name, ".jsonl") {
files = append(files, filepath.Join(dir, name))
}
}
sort.Strings(files)
return files, nil
}

func ParseTimeQuery(raw string) (time.Time, error) {
raw = strings.TrimSpace(raw)
if raw == "" {
return time.Time{}, nil
}
if ms, err := strconv.ParseInt(raw, 10, 64); err == nil {
if ms > 1e12 {
return time.UnixMilli(ms).UTC(), nil
}
return time.Unix(ms, 0).UTC(), nil
}
if t, err := time.Parse(time.RFC3339Nano, raw); err == nil {
return t.UTC(), nil
}
if t, err := time.Parse(time.RFC3339, raw); err == nil {
return t.UTC(), nil
}
return time.Time{}, errors.New("invalid time query")
}

func TagsWith(base Tags, key string, value any) Tags {
out := cloneTags(base)
if out == nil {
out = Tags{}
}
out[key] = fmt.Sprint(value)
return out
}

func TagsFromPairs(kv ...string) Tags {
if len(kv) < 2 {
return nil
}
out := Tags{}
for i := 0; i+1 < len(kv); i += 2 {
k := strings.TrimSpace(kv[i])
if k == "" {
continue
}
out[k] = kv[i+1]
}
if len(out) == 0 {
return nil
}
return out
}

func min(a int, b int) int {
if a < b {
return a
}
return b
}

Загрузка…
Отмена
Сохранить