From 5b0e368ed16a326d4e201dc198886a0d200bcc90 Mon Sep 17 00:00:00 2001 From: Jan Svabenik Date: Wed, 25 Mar 2026 07:55:03 +0100 Subject: [PATCH] debug: add gpu kernel probes and update notes --- cmd/sdrd/helpers.go | 22 +++++ docs/audio-click-debug-notes-2026-03-24.md | 94 ++++++++++++++++++++-- 2 files changed, 110 insertions(+), 6 deletions(-) diff --git a/cmd/sdrd/helpers.go b/cmd/sdrd/helpers.go index 930630f..73dd250 100644 --- a/cmd/sdrd/helpers.go +++ b/cmd/sdrd/helpers.go @@ -379,6 +379,18 @@ func extractForStreaming( runner = extractMgr.get(len(gpuIQ), sampleRate) } if runner != nil { + if coll != nil && len(gpuIQ) > 0 { + inputProbe := probeHead(gpuIQ, 16, 1e-6) + coll.Event("gpu_kernel_input_head_probe", "info", "gpu kernel input head probe", nil, map[string]any{ + "mags": inputProbe.mags, + "zero_count": inputProbe.zeroCount, + "first_nonzero_index": inputProbe.firstNonZeroIndex, + "head_max_step": inputProbe.maxStep, + "gpuIQ_len": len(gpuIQ), + "sample_rate": sampleRate, + "signals": len(signals), + }) + } results, err := runner.ShiftFilterDecimateBatchWithPhase(gpuIQ, jobs) if err == nil && len(results) == len(signals) { for i, res := range results { @@ -417,6 +429,16 @@ func extractForStreaming( } if coll != nil { tags := telemetry.TagsFromPairs("signal_id", fmt.Sprintf("%d", signals[i].ID), "path", "gpu") + kernelProbe := probeHead(res.IQ, 16, 1e-6) + coll.Event("gpu_kernel_output_head_probe", "info", "gpu kernel output head probe", tags, map[string]any{ + "mags": kernelProbe.mags, + "zero_count": kernelProbe.zeroCount, + "first_nonzero_index": kernelProbe.firstNonZeroIndex, + "head_max_step": kernelProbe.maxStep, + "raw_len": rawLen, + "out_rate": outRate, + "trim_samples": trimSamples, + }) stats := computeIQHeadStats(iq, 64) coll.SetGauge("iq.extract.output.length", float64(len(iq)), tags) coll.Observe("iq.extract.output.head_mean_mag", stats.meanMag, tags) diff --git a/docs/audio-click-debug-notes-2026-03-24.md b/docs/audio-click-debug-notes-2026-03-24.md index d022f9e..d8f2341 100644 --- a/docs/audio-click-debug-notes-2026-03-24.md +++ b/docs/audio-click-debug-notes-2026-03-24.md @@ -429,6 +429,8 @@ Used heavily once compact per-block event probes were added, because events were This ended up being especially useful for: - raw extractor head probes - trimmed extractor head probes +- extractor input head probes +- GPU kernel input/output head probes - boundary snapshots ### Important telemetry families added/used @@ -483,6 +485,20 @@ Purpose: Purpose: - answer the key question: is the corruption already present in the raw extractor output head, or created by trimming/overlap logic afterward? +#### Additional extractor input / GPU-kernel probe telemetry +- `iq.extract.input_head.zero_count` +- `iq.extract.input_head.first_nonzero_index` +- `iq.extract.input_head.max_step` +- event `extract_input_head_probe` +- event `gpu_kernel_input_head_probe` +- event `gpu_kernel_output_head_probe` + +Purpose: +- split the remaining uncertainty between: + - signal-specific input already being bad + - GPU extractor kernel/start semantics producing the bad raw head + - later output assembly after the kernel + #### Pre-demod / audio-stage metrics - `iq.pre_demod.head_mean_mag` - `iq.pre_demod.head_min_mag` @@ -701,22 +717,88 @@ Interpretation: - trimming cleans up the visibly bad raw head region - trimming still does **not** explain the deeper output-boundary continuity issue -### Refined strongest current conclusion after the 2026-03-25 telemetry pass +### Further refinement after direct extractor-input and GPU-kernel probes + +A final telemetry round added: +- `extract_input_head_probe` +- `gpu_kernel_input_head_probe` +- `gpu_kernel_output_head_probe` + +These probes further sharpened the likely fault location. + +#### Signal-specific extractor input head looked sane +Representative values: +- `iq.extract.input_head.zero_count = 0` +- `iq.extract.input_head.first_nonzero_index = 0` + +Interpretation: +- at the observed signal-specific input probe point, the GPU extractor is **not** receiving a dead/null head + +#### Raw GPU output head remained systematically broken +Representative repeated values: +- `iq.extract.raw.head_mag = 0` +- `iq.extract.raw.head_zero_count = 1` +- `iq.extract.raw.head_max_step` repeatedly around: + - `3.141592653589793` + - `3.122847934305907` + - `3.101915352902961` + - `3.080672178550904` + - `3.062425574273907` + - `2.9785041567778427` + - `2.7508533785793476` + +Representative repeated examples from strong channels: +- signal 2: `head_mag = 0`, `head_zero_count = 1` +- signal 3: `head_mag = 0`, `head_zero_count = 1` +- signal 1/4 showed the same qualitative head-zero pattern as well + +Interpretation: +- the raw extractor output head is still repeatedly born broken +- the problem is therefore after the currently probed input head and before/during raw output creation + +#### Trimmed head still looked healthier +Representative values: +- `iq.extract.trimmed.head_zero_count = 0` +- signal 1 `iq.extract.trimmed.head_mag` repeatedly around: + - `0.2868` + - `0.2907` + - `0.3036` + - `0.3116` + - `0.2838` + - `0.2760` +- signal 2 examples: + - `0.3461` + - `0.3182` + +Representative `iq.extract.trimmed.head_max_step` values for strong channels were much lower than raw, often around: +- `0.11` +- `0.13` +- `0.21` +- `0.30` +- `0.44` +- `0.69` +- `0.86` + +Interpretation: +- trimming still removes the most visibly broken head region +- but trimming does not explain the deeper output-boundary continuity issue + +### Refined strongest current conclusion after the full 2026-03-25 telemetry pass The strongest current reading is now: -> The click root cause is very likely **not** that the signal-specific extractor input already starts dead/null. Instead, the bad raw head appears to be introduced **inside the GPU extractor path or at its immediate start/output semantics**, before final trimming. +> The click root cause is very likely **not** that the signal-specific extractor input already starts dead/null. Instead, the bad raw head appears to be introduced **inside the GPU extractor path itself** (or at its immediate start/output semantics) before final trimming. More specifically: - signal-specific extractor input head looks non-zero and sane at the probe point -- all signals still show a systematically bad raw extractor head +- raw GPU output head still repeatedly starts with an exact zero sample and a short bad settling region - the trimmed head usually looks healthier - yet the final extractor output still exhibits significant complex boundary discontinuity from block to block -This points away from a simple "shared global input head is already zero" theory and toward one of these narrower causes: -1. GPU extractor start semantics / kernel warmup / first-output handling +This now points away from a simple "shared global input head is already zero" theory and toward one of these narrower causes: +1. GPU extractor kernel start semantics / warmup / first-output handling 2. phase-start or alignment handling at extractor block start -3. output assembly semantics inside the raw GPU extractor path +3. raw GPU output assembly semantics within the extractor path ### What should not be forgotten from this stage