diff --git a/internal/demod/gpudemod/build/kernels.obj b/internal/demod/gpudemod/build/kernels.obj index 808846a..22daf7a 100644 Binary files a/internal/demod/gpudemod/build/kernels.obj and b/internal/demod/gpudemod/build/kernels.obj differ diff --git a/internal/demod/gpudemod/gpudemod.go b/internal/demod/gpudemod/gpudemod.go index cb0b084..6703784 100644 --- a/internal/demod/gpudemod/gpudemod.go +++ b/internal/demod/gpudemod/gpudemod.go @@ -190,6 +190,52 @@ func (e *Engine) tryCUDAFMDiscrim(shifted []complex64) ([]float32, bool) { return out, true } +func (e *Engine) tryCUDAAMEnvelope(shifted []complex64) ([]float32, bool) { + if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil { + return nil, false + } + iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0))) + if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess { + return nil, false + } + if C.gpud_launch_am_envelope(e.dShifted, e.dAudio, C.int(len(shifted))) != 0 { + return nil, false + } + if C.gpud_device_sync() != C.cudaSuccess { + return nil, false + } + out := make([]float32, len(shifted)) + outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0))) + if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess { + return nil, false + } + return out, true +} + +func (e *Engine) tryCUDASSBProduct(shifted []complex64, bfoHz float64) ([]float32, bool) { + if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil { + return nil, false + } + iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0))) + if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess { + return nil, false + } + phaseInc := 2.0 * math.Pi * bfoHz / float64(e.sampleRate) + if C.gpud_launch_ssb_product(e.dShifted, e.dAudio, C.int(len(shifted)), C.double(phaseInc), C.double(e.bfoPhase)) != 0 { + return nil, false + } + if C.gpud_device_sync() != C.cudaSuccess { + return nil, false + } + out := make([]float32, len(shifted)) + outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0))) + if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess { + return nil, false + } + e.bfoPhase += phaseInc * float64(len(shifted)) + return out, true +} + func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) { if e == nil { return nil, 0, errors.New("nil CUDA demod engine") @@ -240,6 +286,7 @@ func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodT decim = 1 } dec := dsp.Decimate(filtered, decim) + e.lastDecimUsedGPU = false inputRate := e.sampleRate / decim switch mode { diff --git a/internal/demod/gpudemod/gpudemod_stub.go b/internal/demod/gpudemod/gpudemod_stub.go index 86ca9b1..b201f1a 100644 --- a/internal/demod/gpudemod/gpudemod_stub.go +++ b/internal/demod/gpudemod/gpudemod_stub.go @@ -30,6 +30,7 @@ func New(maxSamples int, sampleRate int) (*Engine, error) { func (e *Engine) SetFIR(taps []float32) {} func (e *Engine) LastShiftUsedGPU() bool { return false } +func (e *Engine) LastDemodUsedGPU() bool { return false } func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) { return nil, 0, errors.New("CUDA demod not available: cufft build tag not enabled") diff --git a/internal/demod/gpudemod/kernels.cu b/internal/demod/gpudemod/kernels.cu index d29ed54..f308b8d 100644 --- a/internal/demod/gpudemod/kernels.cu +++ b/internal/demod/gpudemod/kernels.cu @@ -127,3 +127,56 @@ extern "C" int gpud_launch_decimate_cuda( gpud_decimate_kernel<<>>(in, out, n_out, factor); return (int)cudaGetLastError(); } + +extern "C" __global__ void gpud_am_envelope_kernel( + const float2* __restrict__ in, + float* __restrict__ out, + int n +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n) return; + float2 v = in[idx]; + out[idx] = sqrtf(v.x * v.x + v.y * v.y); +} + +extern "C" int gpud_launch_am_envelope_cuda( + const float2* in, + float* out, + int n +) { + if (n <= 0) return 0; + const int block = 256; + const int grid = (n + block - 1) / block; + gpud_am_envelope_kernel<<>>(in, out, n); + return (int)cudaGetLastError(); +} + +extern "C" __global__ void gpud_ssb_product_kernel( + const float2* __restrict__ in, + float* __restrict__ out, + int n, + double phase_inc, + double phase_start +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n) return; + double phase = phase_start + phase_inc * (double)idx; + float si, co; + sincosf((float)phase, &si, &co); + float2 v = in[idx]; + out[idx] = v.x * co - v.y * si; +} + +extern "C" int gpud_launch_ssb_product_cuda( + const float2* in, + float* out, + int n, + double phase_inc, + double phase_start +) { + if (n <= 0) return 0; + const int block = 256; + const int grid = (n + block - 1) / block; + gpud_ssb_product_kernel<<>>(in, out, n, phase_inc, phase_start); + return (int)cudaGetLastError(); +} diff --git a/internal/demod/gpudemod/validation_extra.go b/internal/demod/gpudemod/validation_extra.go new file mode 100644 index 0000000..5c164f8 --- /dev/null +++ b/internal/demod/gpudemod/validation_extra.go @@ -0,0 +1,42 @@ +//go:build cufft + +package gpudemod + +import ( + "math/cmplx" + + "sdr-visual-suite/internal/dsp" +) + +func ValidateFIR(iq []complex64, taps []float32, filtered []complex64, tol float64) bool { + if len(iq) != len(filtered) { + return false + } + ftaps := make([]float64, len(taps)) + for i, v := range taps { + ftaps[i] = float64(v) + } + ref := dsp.ApplyFIR(iq, ftaps) + for i := range ref { + if cmplx.Abs(complex128(ref[i]-filtered[i])) > tol { + return false + } + } + return true +} + +func ValidateDecimate(iq []complex64, factor int, decimated []complex64, tol float64) bool { + if factor <= 0 { + return false + } + ref := dsp.Decimate(iq, factor) + if len(ref) != len(decimated) { + return false + } + for i := range ref { + if cmplx.Abs(complex128(ref[i]-decimated[i])) > tol { + return false + } + } + return true +} diff --git a/internal/demod/gpudemod/validation_extra_test.go b/internal/demod/gpudemod/validation_extra_test.go new file mode 100644 index 0000000..e192acd --- /dev/null +++ b/internal/demod/gpudemod/validation_extra_test.go @@ -0,0 +1,22 @@ +//go:build cufft + +package gpudemod + +import "testing" + +func TestValidateDecimateRejectsBadLength(t *testing.T) { + iq := []complex64{1 + 0i, 2 + 0i, 3 + 0i, 4 + 0i} + out := []complex64{1 + 0i} + if ValidateDecimate(iq, 2, out, 1e-6) { + t.Fatal("expected decimate validation to fail on bad length") + } +} + +func TestValidateFIRRejectsBadLength(t *testing.T) { + iq := []complex64{1 + 0i, 2 + 0i} + taps := []float32{1} + out := []complex64{1 + 0i} + if ValidateFIR(iq, taps, out, 1e-6) { + t.Fatal("expected FIR validation to fail on bad length") + } +} diff --git a/internal/recorder/demod.go b/internal/recorder/demod.go index b3caaa6..c937eb8 100644 --- a/internal/recorder/demod.go +++ b/internal/recorder/demod.go @@ -54,6 +54,9 @@ func (m *Manager) demodAndWrite(dir string, ev detector.Event, iq []complex64, f if m.gpuDemod.LastShiftUsedGPU() { log.Printf("gpudemod: validated GPU freq-shift used for event %d (%s)", ev.ID, name) } + if m.gpuDemod.LastDemodUsedGPU() { + log.Printf("gpudemod: GPU demod stage used for event %d (%s)", ev.ID, name) + } } } }