Add GPU shift-filter-decimate path for signal extraction

пре 1 месец · 216dd7fcfd
--- a/cmd/sdrd/helpers.go
+++ b/cmd/sdrd/helpers.go
@@ -6,6 +6,7 @@ import (
 	"time"

 	"sdr-visual-suite/internal/config"
 	"sdr-visual-suite/internal/demod/gpudemod"
 	"sdr-visual-suite/internal/dsp"
 )

@@ -57,6 +58,18 @@ func extractSignalIQ(iq []complex64, sampleRate int, centerHz float64, sigHz flo
 		return nil
 	}
 	offset := sigHz - centerHz
 	decimTarget := 200000
 	if decimTarget <= 0 {
 		decimTarget = sampleRate
 	}
 	if gpudemod.Available() {
 		if eng, err := gpudemod.New(len(iq), sampleRate); err == nil {
 			defer eng.Close()
 			if out, _, err := eng.ShiftFilterDecimate(iq, offset, bwHz, decimTarget); err == nil && len(out) > 0 {
 				return out
 			}
 		}
 	}
 	shifted := dsp.FreqShift(iq, sampleRate, offset)
 	cutoff := bwHz / 2
 	if cutoff < 200 {
@@ -67,7 +80,7 @@ func extractSignalIQ(iq []complex64, sampleRate int, centerHz float64, sigHz flo
 	}
 	taps := dsp.LowpassFIR(cutoff, sampleRate, 101)
 	filtered := dsp.ApplyFIR(shifted, taps)
 	decim := sampleRate / 200000
 	decim := sampleRate / decimTarget
 	if decim < 1 {
 		decim = 1
 	}
@@ -89,4 +102,3 @@ func parseSince(raw string) (time.Time, error) {
 	}
 	return time.Parse(time.RFC3339, raw)
 }

--- a/internal/demod/gpudemod/gpudemod.go
+++ b/internal/demod/gpudemod/gpudemod.go
@@ -322,6 +322,33 @@ func (e *Engine) tryCUDASSBProduct(shifted []complex64, bfoHz float64) ([]float3
 	return out, true
 }

 func (e *Engine) ShiftFilterDecimate(iq []complex64, offsetHz float64, bw float64, outRate int) ([]complex64, int, error) {
 	if e == nil {
 		return nil, 0, errors.New("nil CUDA demod engine")
 	}
 	if len(iq) == 0 {
 		return nil, 0, nil
 	}
 	if outRate <= 0 {
 		return nil, 0, errors.New("invalid output sample rate")
 	}
 	shifted := dsp.FreqShift(iq, e.sampleRate, offsetHz)
 	cutoff := bw / 2
 	if cutoff < 200 {
 		cutoff = 200
 	}
 	if cutoff > float64(e.sampleRate)/2-1 {
 		cutoff = float64(e.sampleRate)/2 - 1
 	}
 	ftaps := dsp.LowpassFIR(cutoff, e.sampleRate, 101)
 	filtered := dsp.ApplyFIR(shifted, ftaps)
 	decim := int(math.Round(float64(e.sampleRate) / float64(outRate)))
 	if decim < 1 {
 		decim = 1
 	}
 	return dsp.Decimate(filtered, decim), e.sampleRate / decim, nil
 }

 func (e *Engine) DemodFused(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
 	return e.Demod(iq, offsetHz, bw, mode)
 }
--- a/internal/demod/gpudemod/gpudemod_stub.go
+++ b/internal/demod/gpudemod/gpudemod_stub.go
@@ -32,6 +32,10 @@ func (e *Engine) SetFIR(taps []float32) {}
 func (e *Engine) LastShiftUsedGPU() bool { return false }
 func (e *Engine) LastDemodUsedGPU() bool { return false }

 func (e *Engine) ShiftFilterDecimate(iq []complex64, offsetHz float64, bw float64, outRate int) ([]complex64, int, error) {
 	return nil, 0, errors.New("CUDA demod not available: cufft build tag not enabled")
 }

 func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
 	return nil, 0, errors.New("CUDA demod not available: cufft build tag not enabled")
 }
--- a/internal/demod/gpudemod/gpudemod_windows.go
+++ b/internal/demod/gpudemod/gpudemod_windows.go
@@ -393,6 +393,84 @@ func (e *Engine) tryCUDASSBProduct(shifted []complex64, bfoHz float64) ([]float3
 	return out, true
 }

 func (e *Engine) ShiftFilterDecimate(iq []complex64, offsetHz float64, bw float64, outRate int) ([]complex64, int, error) {
 	if e == nil {
 		return nil, 0, errors.New("nil CUDA demod engine")
 	}
 	if !e.cudaReady {
 		return nil, 0, errors.New("cuda demod engine is not initialized")
 	}
 	if len(iq) == 0 {
 		return nil, 0, nil
 	}
 	if len(iq) > e.maxSamples {
 		return nil, 0, errors.New("sample count exceeds engine capacity")
 	}
 	if outRate <= 0 {
 		return nil, 0, errors.New("invalid output sample rate")
 	}
 	e.lastShiftUsedGPU = false
 	e.lastFIRUsedGPU = false
 	e.lastDecimUsedGPU = false
 	e.lastDemodUsedGPU = false

 	cutoff := bw / 2
 	if cutoff < 200 {
 		cutoff = 200
 	}
 	taps := e.firTaps
 	if len(taps) == 0 {
 		base64 := dsp.LowpassFIR(cutoff, e.sampleRate, 101)
 		taps = make([]float32, len(base64))
 		for i, v := range base64 {
 			taps[i] = float32(v)
 		}
 		e.SetFIR(taps)
 	}
 	if len(taps) == 0 {
 		return nil, 0, errors.New("no FIR taps configured")
 	}

 	decim := int(math.Round(float64(e.sampleRate) / float64(outRate)))
 	if decim < 1 {
 		decim = 1
 	}
 	n := len(iq)
 	nOut := n / decim
 	if nOut <= 0 {
 		return nil, 0, errors.New("not enough output samples after decimation")
 	}

 	bytesIn := C.size_t(n) * C.size_t(unsafe.Sizeof(complex64(0)))
 	if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytesIn) != C.cudaSuccess {
 		return nil, 0, errors.New("cudaMemcpy H2D failed")
 	}

 	phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate)
 	if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(n), C.double(phaseInc), C.double(e.phase)) != 0 {
 		return nil, 0, errors.New("gpu freq shift failed")
 	}
 	if C.gpud_launch_fir(e.dShifted, e.dFiltered, C.int(n), C.int(len(taps))) != 0 {
 		return nil, 0, errors.New("gpu FIR failed")
 	}
 	if C.gpud_launch_decimate(e.dFiltered, e.dDecimated, C.int(nOut), C.int(decim)) != 0 {
 		return nil, 0, errors.New("gpu decimate failed")
 	}
 	if C.gpud_device_sync() != C.cudaSuccess {
 		return nil, 0, errors.New("cudaDeviceSynchronize failed")
 	}
 	out := make([]complex64, nOut)
 	outBytes := C.size_t(nOut) * C.size_t(unsafe.Sizeof(complex64(0)))
 	if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dDecimated), outBytes) != C.cudaSuccess {
 		return nil, 0, errors.New("cudaMemcpy D2H failed")
 	}
 	e.phase += phaseInc * float64(n)
 	e.lastShiftUsedGPU = true
 	e.lastFIRUsedGPU = true
 	e.lastDecimUsedGPU = true
 	return out, e.sampleRate / decim, nil
 }

 func (e *Engine) DemodFused(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
 	if e == nil {
 		return nil, 0, errors.New("nil CUDA demod engine")