diff --git a/internal/demod/gpudemod/build/kernels.obj b/internal/demod/gpudemod/build/kernels.obj index 2037977..ecf18a0 100644 Binary files a/internal/demod/gpudemod/build/kernels.obj and b/internal/demod/gpudemod/build/kernels.obj differ diff --git a/internal/demod/gpudemod/gpudemod.go b/internal/demod/gpudemod/gpudemod.go index 0664ab5..3687fb5 100644 --- a/internal/demod/gpudemod/gpudemod.go +++ b/internal/demod/gpudemod/gpudemod.go @@ -30,11 +30,10 @@ static int gpud_device_sync() { return (int)cudaDeviceSynchronize(); } +extern int gpud_launch_freq_shift_cuda(const gpud_float2* in, gpud_float2* out, int n, double phase_inc, double phase_start); + static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) { - // TODO(phase2): replace with real CUDA kernel launch. - // Phase 1b keeps the launch boundary in place without pretending acceleration. - (void)in; (void)out; (void)n; (void)phase_inc; (void)phase_start; - return -1; + return gpud_launch_freq_shift_cuda(in, out, n, phase_inc, phase_start); } static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) { @@ -138,22 +137,27 @@ func phaseStatus() string { return "phase1b-launch-boundary" } -func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) bool { +func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) ([]complex64, bool) { if e == nil || !e.cudaReady || len(iq) == 0 || e.dIQIn == nil || e.dShifted == nil { - return false + return nil, false } - if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), C.size_t(len(iq))*C.size_t(unsafe.Sizeof(complex64(0)))) != C.cudaSuccess { - return false + bytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0))) + if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytes) != C.cudaSuccess { + return nil, false } phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate) if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(len(iq)), C.double(phaseInc), C.double(e.phase)) != 0 { - return false + return nil, false } if C.gpud_device_sync() != C.cudaSuccess { - return false + return nil, false + } + out := make([]complex64, len(iq)) + if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dShifted), bytes) != C.cudaSuccess { + return nil, false } e.phase += phaseInc * float64(len(iq)) - return true + return out, true } func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) { diff --git a/internal/demod/gpudemod/kernels.cu b/internal/demod/gpudemod/kernels.cu index eeef3ce..2625a77 100644 --- a/internal/demod/gpudemod/kernels.cu +++ b/internal/demod/gpudemod/kernels.cu @@ -19,3 +19,17 @@ extern "C" __global__ void gpud_freq_shift_kernel( out[idx].x = v.x * co - v.y * si; out[idx].y = v.x * si + v.y * co; } + +extern "C" int gpud_launch_freq_shift_cuda( + const float2* in, + float2* out, + int n, + double phase_inc, + double phase_start +) { + if (n <= 0) return 0; + const int block = 256; + const int grid = (n + block - 1) / block; + gpud_freq_shift_kernel<<>>(in, out, n, phase_inc, phase_start); + return (int)cudaGetLastError(); +} diff --git a/internal/demod/gpudemod/validation.go b/internal/demod/gpudemod/validation.go new file mode 100644 index 0000000..841d029 --- /dev/null +++ b/internal/demod/gpudemod/validation.go @@ -0,0 +1,25 @@ +//go:build cufft + +package gpudemod + +import ( + "math/cmplx" + + "sdr-visual-suite/internal/dsp" +) + +// ValidateFreqShift compares a candidate shifted IQ stream against the CPU DSP +// reference. This is intended for bring-up while the first real CUDA launch path +// is being wired in. +func ValidateFreqShift(iq []complex64, sampleRate int, offsetHz float64, shifted []complex64, tol float64) bool { + if len(iq) != len(shifted) { + return false + } + ref := dsp.FreqShift(iq, sampleRate, offsetHz) + for i := range ref { + if cmplx.Abs(complex128(ref[i]-shifted[i])) > tol { + return false + } + } + return true +}