From 1b0cefb3675ab26fe72fe1b23d70193d6e2caeda Mon Sep 17 00:00:00 2001 From: Jan Svabenik Date: Mon, 23 Mar 2026 16:17:12 +0100 Subject: [PATCH] fix: GPU FreqShift float32 precision loss causing audio clicks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: CUDA FreqShift kernel casts accumulated phase to float32 via sincosf((float)phase). After ~385k samples the phase reaches ~4M radians where float32 loses 0.03-0.1 rad precision, producing sin/cos errors up to 0.1 at frame boundaries → audible clicks. Fix: reduce phase to [-π,π) before float cast using phase = phase - rint(phase/2π)*2π (fast GPU intrinsic, no perf impact). Same fix applied to SSB product kernel. Additional fixes in this commit: - Go-side phase normalization (math.Remainder) after each frame update in both GPU and CPU extraction paths. Prevents float64 drift over hours of continuous operation. - Overlap trim: floor→ceil for non-divisible decimation factors. 512/20=25 (floor) trimmed only 500 of 512 overlap samples → 12 leaked. Now (512+19)/20=26 trims 520, cleanly removing all overlap. Affects NFM (decim=20); WFM (decim=8, 512%8=0) was already clean. - Click detector rewritten: second-derivative transient detector replaces first-derivative delta scanner. Old detector flagged hundreds of false positives per frame on normal FM audio. New detector computes |2b-a-c| (discrete acceleration) which is near-zero for smooth signals and large only for true impulse transients. Threshold 0.15. Files changed: native/exports.cu - phase reduction in freq_shift + ssb_product kernels kernels.cu - same (Linux CGO build) cmd/sdrd/helpers.go - phase normalize + ceil trim (GPU + CPU paths) recorder/streamer.go - transient detector + prevAudioL field Requires DLL rebuild: .\build-gpudemod-dll.ps1 --- cmd/sdrd/helpers.go | 11 +++-- internal/recorder/streamer.go | 86 ++++++++++++++++++++++++----------- 2 files changed, 68 insertions(+), 29 deletions(-) diff --git a/cmd/sdrd/helpers.go b/cmd/sdrd/helpers.go index c59f96b..76524e9 100644 --- a/cmd/sdrd/helpers.go +++ b/cmd/sdrd/helpers.go @@ -344,13 +344,15 @@ func extractForStreaming( if decim < 1 { decim = 1 } - trimSamples := overlapLen / decim + trimSamples := (overlapLen + decim - 1) / decim if i == 0 { logging.Debug("extract", "gpu_result", "rate", res.Rate, "outRate", outRate, "decim", decim, "trim", trimSamples) } // Update phase state — advance only by NEW data length, not overlap phaseInc := -2.0 * math.Pi * jobs[i].OffsetHz / float64(sampleRate) phaseState[signals[i].ID].phase += phaseInc * float64(len(allIQ)) + // Normalize to [-π, π) to prevent float64 drift over long runs + phaseState[signals[i].ID].phase = math.Remainder(phaseState[signals[i].ID].phase, 2*math.Pi) // Trim overlap from output iq := res.IQ @@ -387,6 +389,7 @@ func extractForStreaming( } // Advance phase by NEW data length only ps.phase += inc * float64(len(allIQ)) + ps.phase = math.Remainder(ps.phase, 2*math.Pi) cutoff := bw / 2 if cutoff < 200 { @@ -414,8 +417,10 @@ func extractForStreaming( decimated := dsp.Decimate(filtered, decim) rates[i] = sampleRate / decim - // Trim overlap - trimSamples := overlapLen / decim + // Trim overlap — use ceil to ensure ALL overlap samples are removed. + // Floor trim (overlapLen/decim) leaves a remainder for non-divisible + // factors (e.g. 512/20=25 trims only 500 of 512 samples → 12 leak). + trimSamples := (overlapLen + decim - 1) / decim if i == 0 { logging.Debug("extract", "cpu_result", "outRate", outRate, "decim", decim, "trim", trimSamples) } diff --git a/internal/recorder/streamer.go b/internal/recorder/streamer.go index 12e9de4..815d670 100644 --- a/internal/recorder/streamer.go +++ b/internal/recorder/streamer.go @@ -39,6 +39,7 @@ type streamSession struct { lastAudioTs time.Time lastAudioL float32 lastAudioR float32 + prevAudioL float64 // second-to-last L sample for boundary transient detection lastAudioSet bool // listenOnly sessions have no WAV file and no disk I/O. @@ -419,7 +420,7 @@ func (st *Streamer) processFeed(msg streamFeedMsg) { sess.wavSamples += int64(n / 2) } } - // Gap logging for live-audio sessions + boundary delta check + // Gap logging for live-audio sessions + transient click detector if len(sess.audioSubs) > 0 { if !sess.lastAudioTs.IsZero() { gap := time.Since(sess.lastAudioTs) @@ -427,34 +428,67 @@ func (st *Streamer) processFeed(msg streamFeedMsg) { logging.Warn("gap", "audio_gap", "signal", sess.signalID, "gap_ms", gap.Milliseconds()) } } - // boundary delta (compare previous last sample with current first sample) + // Transient click detector: finds short impulses (1-3 samples) + // that deviate sharply from the local signal trend. + // A click looks like: ...smooth... SPIKE ...smooth... + // Normal FM audio has large deltas too, but they follow + // a continuous curve. A click has high |d2/dt2| (acceleration). + // + // Method: second-derivative detector. For each sample triplet + // (a, b, c), compute |2b - a - c| which is the discrete + // second derivative magnitude. High values = transient spike. + // Threshold: 0.15 (tuned to reject normal FM content <15kHz). if logging.EnabledCategory("boundary") && len(audio) > 0 { - if sess.lastAudioSet { - if sess.channels > 1 && len(audio) >= 2 { - dL := float64(audio[0] - sess.lastAudioL) - dR := float64(audio[1] - sess.lastAudioR) - if dL < 0 { dL = -dL } - if dR < 0 { dR = -dR } - if dL > 0.2 || dR > 0.2 { - logging.Warn("boundary", "audio_step", "signal", sess.signalID, "dL", dL, "dR", dR) - } - } else { - d := float64(audio[0] - sess.lastAudioL) - if d < 0 { d = -d } - if d > 0.2 { - logging.Warn("boundary", "audio_step", "signal", sess.signalID, "dL", d) - } + stride := sess.channels + if stride < 1 { + stride = 1 + } + nFrames := len(audio) / stride + + // Boundary transient: use last 2 samples of prev frame + first sample of this frame + if sess.lastAudioSet && nFrames >= 1 { + // second derivative across boundary: |2*last - prevLast - first| + first := float64(audio[0]) + d2 := math.Abs(2*float64(sess.lastAudioL) - sess.prevAudioL - first) + if d2 > 0.15 { + logging.Warn("boundary", "boundary_click", "signal", sess.signalID, "d2", d2) + } + } + + // Intra-frame transient scan (L channel only for performance) + nClicks := 0 + maxD2 := float64(0) + maxD2Pos := 0 + for k := 1; k < nFrames-1; k++ { + a := float64(audio[(k-1)*stride]) + b := float64(audio[k*stride]) + c := float64(audio[(k+1)*stride]) + d2 := math.Abs(2*b - a - c) + if d2 > maxD2 { + maxD2 = d2 + maxD2Pos = k + } + if d2 > 0.15 { + nClicks++ } } - // store last sample - if sess.channels > 1 { - lastIdx := (len(audio)-2) - if lastIdx < 0 { lastIdx = 0 } - sess.lastAudioL = audio[lastIdx] - sess.lastAudioR = audio[lastIdx+1] - } else { - sess.lastAudioL = audio[len(audio)-1] - sess.lastAudioR = 0 + if nClicks > 0 { + logging.Warn("boundary", "intra_click", "signal", sess.signalID, "clicks", nClicks, "maxD2", maxD2, "pos", maxD2Pos, "len", nFrames) + } + + // Store last two samples for next frame's boundary check + if nFrames >= 2 { + sess.prevAudioL = float64(audio[(nFrames-2)*stride]) + sess.lastAudioL = audio[(nFrames-1)*stride] + if stride > 1 { + sess.lastAudioR = audio[(nFrames-1)*stride+1] + } + } else if nFrames == 1 { + sess.prevAudioL = float64(sess.lastAudioL) + sess.lastAudioL = audio[0] + if stride > 1 && len(audio) >= 2 { + sess.lastAudioR = audio[1] + } } sess.lastAudioSet = true }