From f5ab4148479769f9ffa2e5608a5c10d5346a1c58 Mon Sep 17 00:00:00 2001 From: Jan Svabenik Date: Thu, 19 Mar 2026 07:52:10 +0100 Subject: [PATCH] feat: prepare CUDA demod launch boundary --- internal/demod/gpudemod/gpudemod.go | 128 +++++++++++++++++- .../demod/gpudemod/gpudemod_cufft_test.go | 11 ++ internal/demod/gpudemod/gpudemod_test.go | 2 + 3 files changed, 134 insertions(+), 7 deletions(-) create mode 100644 internal/demod/gpudemod/gpudemod_cufft_test.go diff --git a/internal/demod/gpudemod/gpudemod.go b/internal/demod/gpudemod/gpudemod.go index a5e6d72..8e63f5a 100644 --- a/internal/demod/gpudemod/gpudemod.go +++ b/internal/demod/gpudemod/gpudemod.go @@ -6,12 +6,49 @@ package gpudemod #cgo windows LDFLAGS: -lcufft64_12 -lcudart64_13 #include #include + +typedef struct { float x; float y; } gpud_float2; + +static int gpud_cuda_malloc(void **ptr, size_t bytes) { + return (int)cudaMalloc(ptr, bytes); +} + +static int gpud_cuda_free(void *ptr) { + return (int)cudaFree(ptr); +} + +static int gpud_memcpy_h2d(void *dst, const void *src, size_t bytes) { + return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice); +} + +static int gpud_memcpy_d2h(void *dst, const void *src, size_t bytes) { + return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost); +} + +static int gpud_device_sync() { + return (int)cudaDeviceSynchronize(); +} + +static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) { + // TODO(phase2): replace with real CUDA kernel launch. + // Phase 1b keeps the launch boundary in place without pretending acceleration. + (void)in; (void)out; (void)n; (void)phase_inc; (void)phase_start; + return -1; +} + +static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) { + // TODO(phase2): replace with real CUDA kernel launch. + (void)in; (void)out; (void)n; + return -1; +} */ import "C" import ( "errors" + "fmt" "math" + "unsafe" "sdr-visual-suite/internal/demod" "sdr-visual-suite/internal/dsp" @@ -34,9 +71,21 @@ type Engine struct { phase float64 bfoPhase float64 firTaps []float32 + cudaReady bool + dIQIn *C.gpud_float2 + dShifted *C.gpud_float2 + dAudio *C.float + iqBytes C.size_t + audioBytes C.size_t } -func Available() bool { return true } +func Available() bool { + var count C.int + if C.cudaGetDeviceCount(&count) != C.cudaSuccess { + return false + } + return count > 0 +} func New(maxSamples int, sampleRate int) (*Engine, error) { if maxSamples <= 0 { @@ -45,7 +94,35 @@ func New(maxSamples int, sampleRate int) (*Engine, error) { if sampleRate <= 0 { return nil, errors.New("invalid sampleRate") } - return &Engine{maxSamples: maxSamples, sampleRate: sampleRate}, nil + if !Available() { + return nil, errors.New("cuda device not available") + } + e := &Engine{ + maxSamples: maxSamples, + sampleRate: sampleRate, + cudaReady: true, + iqBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.gpud_float2{})), + audioBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.float(0))), + } + var ptr unsafe.Pointer + if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess { + e.Close() + return nil, errors.New("cudaMalloc dIQIn failed") + } + e.dIQIn = (*C.gpud_float2)(ptr) + ptr = nil + if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess { + e.Close() + return nil, errors.New("cudaMalloc dShifted failed") + } + e.dShifted = (*C.gpud_float2)(ptr) + ptr = nil + if C.gpud_cuda_malloc(&ptr, e.audioBytes) != C.cudaSuccess { + e.Close() + return nil, errors.New("cudaMalloc dAudio failed") + } + e.dAudio = (*C.float)(ptr) + return e, nil } func (e *Engine) SetFIR(taps []float32) { @@ -56,10 +133,35 @@ func (e *Engine) SetFIR(taps []float32) { e.firTaps = append(e.firTaps[:0], taps...) } +func phaseStatus() string { + return "phase1b-launch-boundary" +} + +func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) bool { + if e == nil || !e.cudaReady || len(iq) == 0 || e.dIQIn == nil || e.dShifted == nil { + return false + } + if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), C.size_t(len(iq))*C.size_t(unsafe.Sizeof(complex64(0)))) != C.cudaSuccess { + return false + } + phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate) + if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(len(iq)), C.double(phaseInc), C.double(e.phase)) != 0 { + return false + } + if C.gpud_device_sync() != C.cudaSuccess { + return false + } + e.phase += phaseInc * float64(len(iq)) + return true +} + func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) { if e == nil { return nil, 0, errors.New("nil CUDA demod engine") } + if !e.cudaReady { + return nil, 0, errors.New("cuda demod engine is not initialized") + } if len(iq) == 0 { return nil, 0, nil } @@ -70,12 +172,11 @@ func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodT return nil, 0, errors.New("CUDA demod phase 1 currently supports NFM only") } - // Phase 1b note: - // This package now performs real CUDA availability gating and keeps the - // runtime/CGO boundary in place, but still intentionally falls back to the - // existing CPU DSP math for signal processing. The next phase should replace - // the FreqShift + FM discriminator sections below with actual kernel launches. + // Real CUDA boundary is now present. If the launch wrappers are not yet backed + // by actual kernels, we fall back to the existing CPU DSP path below. _ = fmt.Sprintf("%s:%0.3f", phaseStatus(), offsetHz) + _ = e.tryCUDAFreqShift(iq, offsetHz) + shifted := dsp.FreqShift(iq, e.sampleRate, offsetHz) cutoff := bw / 2 if cutoff < 200 { @@ -102,5 +203,18 @@ func (e *Engine) Close() { if e == nil { return } + if e.dIQIn != nil { + _ = C.gpud_cuda_free(unsafe.Pointer(e.dIQIn)) + e.dIQIn = nil + } + if e.dShifted != nil { + _ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted)) + e.dShifted = nil + } + if e.dAudio != nil { + _ = C.gpud_cuda_free(unsafe.Pointer(e.dAudio)) + e.dAudio = nil + } e.firTaps = nil + e.cudaReady = false } diff --git a/internal/demod/gpudemod/gpudemod_cufft_test.go b/internal/demod/gpudemod/gpudemod_cufft_test.go new file mode 100644 index 0000000..ce8f4fb --- /dev/null +++ b/internal/demod/gpudemod/gpudemod_cufft_test.go @@ -0,0 +1,11 @@ +//go:build cufft + +package gpudemod + +import "testing" + +func TestDemodTypeConstantsExist(t *testing.T) { + if DemodNFM != 0 { + t.Fatal("expected DemodNFM constant to be defined") + } +} diff --git a/internal/demod/gpudemod/gpudemod_test.go b/internal/demod/gpudemod/gpudemod_test.go index dd90763..05a945a 100644 --- a/internal/demod/gpudemod/gpudemod_test.go +++ b/internal/demod/gpudemod/gpudemod_test.go @@ -1,3 +1,5 @@ +//go:build !cufft + package gpudemod import "testing"