From 216dd7fcfdc274569aaa6d4e9378c48513937a4f Mon Sep 17 00:00:00 2001 From: Jan Svabenik Date: Thu, 19 Mar 2026 12:12:50 +0100 Subject: [PATCH] Add GPU shift-filter-decimate path for signal extraction --- cmd/sdrd/helpers.go | 16 ++++- internal/demod/gpudemod/gpudemod.go | 27 +++++++ internal/demod/gpudemod/gpudemod_stub.go | 4 ++ internal/demod/gpudemod/gpudemod_windows.go | 78 +++++++++++++++++++++ 4 files changed, 123 insertions(+), 2 deletions(-) diff --git a/cmd/sdrd/helpers.go b/cmd/sdrd/helpers.go index 5f27563..de3a74c 100644 --- a/cmd/sdrd/helpers.go +++ b/cmd/sdrd/helpers.go @@ -6,6 +6,7 @@ import ( "time" "sdr-visual-suite/internal/config" + "sdr-visual-suite/internal/demod/gpudemod" "sdr-visual-suite/internal/dsp" ) @@ -57,6 +58,18 @@ func extractSignalIQ(iq []complex64, sampleRate int, centerHz float64, sigHz flo return nil } offset := sigHz - centerHz + decimTarget := 200000 + if decimTarget <= 0 { + decimTarget = sampleRate + } + if gpudemod.Available() { + if eng, err := gpudemod.New(len(iq), sampleRate); err == nil { + defer eng.Close() + if out, _, err := eng.ShiftFilterDecimate(iq, offset, bwHz, decimTarget); err == nil && len(out) > 0 { + return out + } + } + } shifted := dsp.FreqShift(iq, sampleRate, offset) cutoff := bwHz / 2 if cutoff < 200 { @@ -67,7 +80,7 @@ func extractSignalIQ(iq []complex64, sampleRate int, centerHz float64, sigHz flo } taps := dsp.LowpassFIR(cutoff, sampleRate, 101) filtered := dsp.ApplyFIR(shifted, taps) - decim := sampleRate / 200000 + decim := sampleRate / decimTarget if decim < 1 { decim = 1 } @@ -89,4 +102,3 @@ func parseSince(raw string) (time.Time, error) { } return time.Parse(time.RFC3339, raw) } - diff --git a/internal/demod/gpudemod/gpudemod.go b/internal/demod/gpudemod/gpudemod.go index 926762a..015918c 100644 --- a/internal/demod/gpudemod/gpudemod.go +++ b/internal/demod/gpudemod/gpudemod.go @@ -322,6 +322,33 @@ func (e *Engine) tryCUDASSBProduct(shifted []complex64, bfoHz float64) ([]float3 return out, true } +func (e *Engine) ShiftFilterDecimate(iq []complex64, offsetHz float64, bw float64, outRate int) ([]complex64, int, error) { + if e == nil { + return nil, 0, errors.New("nil CUDA demod engine") + } + if len(iq) == 0 { + return nil, 0, nil + } + if outRate <= 0 { + return nil, 0, errors.New("invalid output sample rate") + } + shifted := dsp.FreqShift(iq, e.sampleRate, offsetHz) + cutoff := bw / 2 + if cutoff < 200 { + cutoff = 200 + } + if cutoff > float64(e.sampleRate)/2-1 { + cutoff = float64(e.sampleRate)/2 - 1 + } + ftaps := dsp.LowpassFIR(cutoff, e.sampleRate, 101) + filtered := dsp.ApplyFIR(shifted, ftaps) + decim := int(math.Round(float64(e.sampleRate) / float64(outRate))) + if decim < 1 { + decim = 1 + } + return dsp.Decimate(filtered, decim), e.sampleRate / decim, nil +} + func (e *Engine) DemodFused(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) { return e.Demod(iq, offsetHz, bw, mode) } diff --git a/internal/demod/gpudemod/gpudemod_stub.go b/internal/demod/gpudemod/gpudemod_stub.go index 773ac25..4a95c57 100644 --- a/internal/demod/gpudemod/gpudemod_stub.go +++ b/internal/demod/gpudemod/gpudemod_stub.go @@ -32,6 +32,10 @@ func (e *Engine) SetFIR(taps []float32) {} func (e *Engine) LastShiftUsedGPU() bool { return false } func (e *Engine) LastDemodUsedGPU() bool { return false } +func (e *Engine) ShiftFilterDecimate(iq []complex64, offsetHz float64, bw float64, outRate int) ([]complex64, int, error) { + return nil, 0, errors.New("CUDA demod not available: cufft build tag not enabled") +} + func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) { return nil, 0, errors.New("CUDA demod not available: cufft build tag not enabled") } diff --git a/internal/demod/gpudemod/gpudemod_windows.go b/internal/demod/gpudemod/gpudemod_windows.go index 24e1c35..ac6d16c 100644 --- a/internal/demod/gpudemod/gpudemod_windows.go +++ b/internal/demod/gpudemod/gpudemod_windows.go @@ -393,6 +393,84 @@ func (e *Engine) tryCUDASSBProduct(shifted []complex64, bfoHz float64) ([]float3 return out, true } +func (e *Engine) ShiftFilterDecimate(iq []complex64, offsetHz float64, bw float64, outRate int) ([]complex64, int, error) { + if e == nil { + return nil, 0, errors.New("nil CUDA demod engine") + } + if !e.cudaReady { + return nil, 0, errors.New("cuda demod engine is not initialized") + } + if len(iq) == 0 { + return nil, 0, nil + } + if len(iq) > e.maxSamples { + return nil, 0, errors.New("sample count exceeds engine capacity") + } + if outRate <= 0 { + return nil, 0, errors.New("invalid output sample rate") + } + e.lastShiftUsedGPU = false + e.lastFIRUsedGPU = false + e.lastDecimUsedGPU = false + e.lastDemodUsedGPU = false + + cutoff := bw / 2 + if cutoff < 200 { + cutoff = 200 + } + taps := e.firTaps + if len(taps) == 0 { + base64 := dsp.LowpassFIR(cutoff, e.sampleRate, 101) + taps = make([]float32, len(base64)) + for i, v := range base64 { + taps[i] = float32(v) + } + e.SetFIR(taps) + } + if len(taps) == 0 { + return nil, 0, errors.New("no FIR taps configured") + } + + decim := int(math.Round(float64(e.sampleRate) / float64(outRate))) + if decim < 1 { + decim = 1 + } + n := len(iq) + nOut := n / decim + if nOut <= 0 { + return nil, 0, errors.New("not enough output samples after decimation") + } + + bytesIn := C.size_t(n) * C.size_t(unsafe.Sizeof(complex64(0))) + if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytesIn) != C.cudaSuccess { + return nil, 0, errors.New("cudaMemcpy H2D failed") + } + + phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate) + if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(n), C.double(phaseInc), C.double(e.phase)) != 0 { + return nil, 0, errors.New("gpu freq shift failed") + } + if C.gpud_launch_fir(e.dShifted, e.dFiltered, C.int(n), C.int(len(taps))) != 0 { + return nil, 0, errors.New("gpu FIR failed") + } + if C.gpud_launch_decimate(e.dFiltered, e.dDecimated, C.int(nOut), C.int(decim)) != 0 { + return nil, 0, errors.New("gpu decimate failed") + } + if C.gpud_device_sync() != C.cudaSuccess { + return nil, 0, errors.New("cudaDeviceSynchronize failed") + } + out := make([]complex64, nOut) + outBytes := C.size_t(nOut) * C.size_t(unsafe.Sizeof(complex64(0))) + if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dDecimated), outBytes) != C.cudaSuccess { + return nil, 0, errors.New("cudaMemcpy D2H failed") + } + e.phase += phaseInc * float64(n) + e.lastShiftUsedGPU = true + e.lastFIRUsedGPU = true + e.lastDecimUsedGPU = true + return out, e.sampleRate / decim, nil +} + func (e *Engine) DemodFused(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) { if e == nil { return nil, 0, errors.New("nil CUDA demod engine")