| @@ -190,6 +190,52 @@ func (e *Engine) tryCUDAFMDiscrim(shifted []complex64) ([]float32, bool) { | |||
| return out, true | |||
| } | |||
| func (e *Engine) tryCUDAAMEnvelope(shifted []complex64) ([]float32, bool) { | |||
| if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil { | |||
| return nil, false | |||
| } | |||
| iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0))) | |||
| if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess { | |||
| return nil, false | |||
| } | |||
| if C.gpud_launch_am_envelope(e.dShifted, e.dAudio, C.int(len(shifted))) != 0 { | |||
| return nil, false | |||
| } | |||
| if C.gpud_device_sync() != C.cudaSuccess { | |||
| return nil, false | |||
| } | |||
| out := make([]float32, len(shifted)) | |||
| outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0))) | |||
| if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess { | |||
| return nil, false | |||
| } | |||
| return out, true | |||
| } | |||
| func (e *Engine) tryCUDASSBProduct(shifted []complex64, bfoHz float64) ([]float32, bool) { | |||
| if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil { | |||
| return nil, false | |||
| } | |||
| iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0))) | |||
| if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess { | |||
| return nil, false | |||
| } | |||
| phaseInc := 2.0 * math.Pi * bfoHz / float64(e.sampleRate) | |||
| if C.gpud_launch_ssb_product(e.dShifted, e.dAudio, C.int(len(shifted)), C.double(phaseInc), C.double(e.bfoPhase)) != 0 { | |||
| return nil, false | |||
| } | |||
| if C.gpud_device_sync() != C.cudaSuccess { | |||
| return nil, false | |||
| } | |||
| out := make([]float32, len(shifted)) | |||
| outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0))) | |||
| if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess { | |||
| return nil, false | |||
| } | |||
| e.bfoPhase += phaseInc * float64(len(shifted)) | |||
| return out, true | |||
| } | |||
| func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) { | |||
| if e == nil { | |||
| return nil, 0, errors.New("nil CUDA demod engine") | |||
| @@ -240,6 +286,7 @@ func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodT | |||
| decim = 1 | |||
| } | |||
| dec := dsp.Decimate(filtered, decim) | |||
| e.lastDecimUsedGPU = false | |||
| inputRate := e.sampleRate / decim | |||
| switch mode { | |||
| @@ -30,6 +30,7 @@ func New(maxSamples int, sampleRate int) (*Engine, error) { | |||
| func (e *Engine) SetFIR(taps []float32) {} | |||
| func (e *Engine) LastShiftUsedGPU() bool { return false } | |||
| func (e *Engine) LastDemodUsedGPU() bool { return false } | |||
| func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) { | |||
| return nil, 0, errors.New("CUDA demod not available: cufft build tag not enabled") | |||
| @@ -127,3 +127,56 @@ extern "C" int gpud_launch_decimate_cuda( | |||
| gpud_decimate_kernel<<<grid, block>>>(in, out, n_out, factor); | |||
| return (int)cudaGetLastError(); | |||
| } | |||
| extern "C" __global__ void gpud_am_envelope_kernel( | |||
| const float2* __restrict__ in, | |||
| float* __restrict__ out, | |||
| int n | |||
| ) { | |||
| int idx = blockIdx.x * blockDim.x + threadIdx.x; | |||
| if (idx >= n) return; | |||
| float2 v = in[idx]; | |||
| out[idx] = sqrtf(v.x * v.x + v.y * v.y); | |||
| } | |||
| extern "C" int gpud_launch_am_envelope_cuda( | |||
| const float2* in, | |||
| float* out, | |||
| int n | |||
| ) { | |||
| if (n <= 0) return 0; | |||
| const int block = 256; | |||
| const int grid = (n + block - 1) / block; | |||
| gpud_am_envelope_kernel<<<grid, block>>>(in, out, n); | |||
| return (int)cudaGetLastError(); | |||
| } | |||
| extern "C" __global__ void gpud_ssb_product_kernel( | |||
| const float2* __restrict__ in, | |||
| float* __restrict__ out, | |||
| int n, | |||
| double phase_inc, | |||
| double phase_start | |||
| ) { | |||
| int idx = blockIdx.x * blockDim.x + threadIdx.x; | |||
| if (idx >= n) return; | |||
| double phase = phase_start + phase_inc * (double)idx; | |||
| float si, co; | |||
| sincosf((float)phase, &si, &co); | |||
| float2 v = in[idx]; | |||
| out[idx] = v.x * co - v.y * si; | |||
| } | |||
| extern "C" int gpud_launch_ssb_product_cuda( | |||
| const float2* in, | |||
| float* out, | |||
| int n, | |||
| double phase_inc, | |||
| double phase_start | |||
| ) { | |||
| if (n <= 0) return 0; | |||
| const int block = 256; | |||
| const int grid = (n + block - 1) / block; | |||
| gpud_ssb_product_kernel<<<grid, block>>>(in, out, n, phase_inc, phase_start); | |||
| return (int)cudaGetLastError(); | |||
| } | |||
| @@ -0,0 +1,42 @@ | |||
| //go:build cufft | |||
| package gpudemod | |||
| import ( | |||
| "math/cmplx" | |||
| "sdr-visual-suite/internal/dsp" | |||
| ) | |||
| func ValidateFIR(iq []complex64, taps []float32, filtered []complex64, tol float64) bool { | |||
| if len(iq) != len(filtered) { | |||
| return false | |||
| } | |||
| ftaps := make([]float64, len(taps)) | |||
| for i, v := range taps { | |||
| ftaps[i] = float64(v) | |||
| } | |||
| ref := dsp.ApplyFIR(iq, ftaps) | |||
| for i := range ref { | |||
| if cmplx.Abs(complex128(ref[i]-filtered[i])) > tol { | |||
| return false | |||
| } | |||
| } | |||
| return true | |||
| } | |||
| func ValidateDecimate(iq []complex64, factor int, decimated []complex64, tol float64) bool { | |||
| if factor <= 0 { | |||
| return false | |||
| } | |||
| ref := dsp.Decimate(iq, factor) | |||
| if len(ref) != len(decimated) { | |||
| return false | |||
| } | |||
| for i := range ref { | |||
| if cmplx.Abs(complex128(ref[i]-decimated[i])) > tol { | |||
| return false | |||
| } | |||
| } | |||
| return true | |||
| } | |||
| @@ -0,0 +1,22 @@ | |||
| //go:build cufft | |||
| package gpudemod | |||
| import "testing" | |||
| func TestValidateDecimateRejectsBadLength(t *testing.T) { | |||
| iq := []complex64{1 + 0i, 2 + 0i, 3 + 0i, 4 + 0i} | |||
| out := []complex64{1 + 0i} | |||
| if ValidateDecimate(iq, 2, out, 1e-6) { | |||
| t.Fatal("expected decimate validation to fail on bad length") | |||
| } | |||
| } | |||
| func TestValidateFIRRejectsBadLength(t *testing.T) { | |||
| iq := []complex64{1 + 0i, 2 + 0i} | |||
| taps := []float32{1} | |||
| out := []complex64{1 + 0i} | |||
| if ValidateFIR(iq, taps, out, 1e-6) { | |||
| t.Fatal("expected FIR validation to fail on bad length") | |||
| } | |||
| } | |||
| @@ -54,6 +54,9 @@ func (m *Manager) demodAndWrite(dir string, ev detector.Event, iq []complex64, f | |||
| if m.gpuDemod.LastShiftUsedGPU() { | |||
| log.Printf("gpudemod: validated GPU freq-shift used for event %d (%s)", ev.ID, name) | |||
| } | |||
| if m.gpuDemod.LastDemodUsedGPU() { | |||
| log.Printf("gpudemod: GPU demod stage used for event %d (%s)", ev.ID, name) | |||
| } | |||
| } | |||
| } | |||
| } | |||