| @@ -190,6 +190,52 @@ func (e *Engine) tryCUDAFMDiscrim(shifted []complex64) ([]float32, bool) { | |||||
| return out, true | return out, true | ||||
| } | } | ||||
| func (e *Engine) tryCUDAAMEnvelope(shifted []complex64) ([]float32, bool) { | |||||
| if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil { | |||||
| return nil, false | |||||
| } | |||||
| iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0))) | |||||
| if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess { | |||||
| return nil, false | |||||
| } | |||||
| if C.gpud_launch_am_envelope(e.dShifted, e.dAudio, C.int(len(shifted))) != 0 { | |||||
| return nil, false | |||||
| } | |||||
| if C.gpud_device_sync() != C.cudaSuccess { | |||||
| return nil, false | |||||
| } | |||||
| out := make([]float32, len(shifted)) | |||||
| outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0))) | |||||
| if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess { | |||||
| return nil, false | |||||
| } | |||||
| return out, true | |||||
| } | |||||
| func (e *Engine) tryCUDASSBProduct(shifted []complex64, bfoHz float64) ([]float32, bool) { | |||||
| if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil { | |||||
| return nil, false | |||||
| } | |||||
| iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0))) | |||||
| if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess { | |||||
| return nil, false | |||||
| } | |||||
| phaseInc := 2.0 * math.Pi * bfoHz / float64(e.sampleRate) | |||||
| if C.gpud_launch_ssb_product(e.dShifted, e.dAudio, C.int(len(shifted)), C.double(phaseInc), C.double(e.bfoPhase)) != 0 { | |||||
| return nil, false | |||||
| } | |||||
| if C.gpud_device_sync() != C.cudaSuccess { | |||||
| return nil, false | |||||
| } | |||||
| out := make([]float32, len(shifted)) | |||||
| outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0))) | |||||
| if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess { | |||||
| return nil, false | |||||
| } | |||||
| e.bfoPhase += phaseInc * float64(len(shifted)) | |||||
| return out, true | |||||
| } | |||||
| func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) { | func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) { | ||||
| if e == nil { | if e == nil { | ||||
| return nil, 0, errors.New("nil CUDA demod engine") | return nil, 0, errors.New("nil CUDA demod engine") | ||||
| @@ -240,6 +286,7 @@ func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodT | |||||
| decim = 1 | decim = 1 | ||||
| } | } | ||||
| dec := dsp.Decimate(filtered, decim) | dec := dsp.Decimate(filtered, decim) | ||||
| e.lastDecimUsedGPU = false | |||||
| inputRate := e.sampleRate / decim | inputRate := e.sampleRate / decim | ||||
| switch mode { | switch mode { | ||||
| @@ -30,6 +30,7 @@ func New(maxSamples int, sampleRate int) (*Engine, error) { | |||||
| func (e *Engine) SetFIR(taps []float32) {} | func (e *Engine) SetFIR(taps []float32) {} | ||||
| func (e *Engine) LastShiftUsedGPU() bool { return false } | func (e *Engine) LastShiftUsedGPU() bool { return false } | ||||
| func (e *Engine) LastDemodUsedGPU() bool { return false } | |||||
| func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) { | func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) { | ||||
| return nil, 0, errors.New("CUDA demod not available: cufft build tag not enabled") | return nil, 0, errors.New("CUDA demod not available: cufft build tag not enabled") | ||||
| @@ -127,3 +127,56 @@ extern "C" int gpud_launch_decimate_cuda( | |||||
| gpud_decimate_kernel<<<grid, block>>>(in, out, n_out, factor); | gpud_decimate_kernel<<<grid, block>>>(in, out, n_out, factor); | ||||
| return (int)cudaGetLastError(); | return (int)cudaGetLastError(); | ||||
| } | } | ||||
| extern "C" __global__ void gpud_am_envelope_kernel( | |||||
| const float2* __restrict__ in, | |||||
| float* __restrict__ out, | |||||
| int n | |||||
| ) { | |||||
| int idx = blockIdx.x * blockDim.x + threadIdx.x; | |||||
| if (idx >= n) return; | |||||
| float2 v = in[idx]; | |||||
| out[idx] = sqrtf(v.x * v.x + v.y * v.y); | |||||
| } | |||||
| extern "C" int gpud_launch_am_envelope_cuda( | |||||
| const float2* in, | |||||
| float* out, | |||||
| int n | |||||
| ) { | |||||
| if (n <= 0) return 0; | |||||
| const int block = 256; | |||||
| const int grid = (n + block - 1) / block; | |||||
| gpud_am_envelope_kernel<<<grid, block>>>(in, out, n); | |||||
| return (int)cudaGetLastError(); | |||||
| } | |||||
| extern "C" __global__ void gpud_ssb_product_kernel( | |||||
| const float2* __restrict__ in, | |||||
| float* __restrict__ out, | |||||
| int n, | |||||
| double phase_inc, | |||||
| double phase_start | |||||
| ) { | |||||
| int idx = blockIdx.x * blockDim.x + threadIdx.x; | |||||
| if (idx >= n) return; | |||||
| double phase = phase_start + phase_inc * (double)idx; | |||||
| float si, co; | |||||
| sincosf((float)phase, &si, &co); | |||||
| float2 v = in[idx]; | |||||
| out[idx] = v.x * co - v.y * si; | |||||
| } | |||||
| extern "C" int gpud_launch_ssb_product_cuda( | |||||
| const float2* in, | |||||
| float* out, | |||||
| int n, | |||||
| double phase_inc, | |||||
| double phase_start | |||||
| ) { | |||||
| if (n <= 0) return 0; | |||||
| const int block = 256; | |||||
| const int grid = (n + block - 1) / block; | |||||
| gpud_ssb_product_kernel<<<grid, block>>>(in, out, n, phase_inc, phase_start); | |||||
| return (int)cudaGetLastError(); | |||||
| } | |||||
| @@ -0,0 +1,42 @@ | |||||
| //go:build cufft | |||||
| package gpudemod | |||||
| import ( | |||||
| "math/cmplx" | |||||
| "sdr-visual-suite/internal/dsp" | |||||
| ) | |||||
| func ValidateFIR(iq []complex64, taps []float32, filtered []complex64, tol float64) bool { | |||||
| if len(iq) != len(filtered) { | |||||
| return false | |||||
| } | |||||
| ftaps := make([]float64, len(taps)) | |||||
| for i, v := range taps { | |||||
| ftaps[i] = float64(v) | |||||
| } | |||||
| ref := dsp.ApplyFIR(iq, ftaps) | |||||
| for i := range ref { | |||||
| if cmplx.Abs(complex128(ref[i]-filtered[i])) > tol { | |||||
| return false | |||||
| } | |||||
| } | |||||
| return true | |||||
| } | |||||
| func ValidateDecimate(iq []complex64, factor int, decimated []complex64, tol float64) bool { | |||||
| if factor <= 0 { | |||||
| return false | |||||
| } | |||||
| ref := dsp.Decimate(iq, factor) | |||||
| if len(ref) != len(decimated) { | |||||
| return false | |||||
| } | |||||
| for i := range ref { | |||||
| if cmplx.Abs(complex128(ref[i]-decimated[i])) > tol { | |||||
| return false | |||||
| } | |||||
| } | |||||
| return true | |||||
| } | |||||
| @@ -0,0 +1,22 @@ | |||||
| //go:build cufft | |||||
| package gpudemod | |||||
| import "testing" | |||||
| func TestValidateDecimateRejectsBadLength(t *testing.T) { | |||||
| iq := []complex64{1 + 0i, 2 + 0i, 3 + 0i, 4 + 0i} | |||||
| out := []complex64{1 + 0i} | |||||
| if ValidateDecimate(iq, 2, out, 1e-6) { | |||||
| t.Fatal("expected decimate validation to fail on bad length") | |||||
| } | |||||
| } | |||||
| func TestValidateFIRRejectsBadLength(t *testing.T) { | |||||
| iq := []complex64{1 + 0i, 2 + 0i} | |||||
| taps := []float32{1} | |||||
| out := []complex64{1 + 0i} | |||||
| if ValidateFIR(iq, taps, out, 1e-6) { | |||||
| t.Fatal("expected FIR validation to fail on bad length") | |||||
| } | |||||
| } | |||||
| @@ -54,6 +54,9 @@ func (m *Manager) demodAndWrite(dir string, ev detector.Event, iq []complex64, f | |||||
| if m.gpuDemod.LastShiftUsedGPU() { | if m.gpuDemod.LastShiftUsedGPU() { | ||||
| log.Printf("gpudemod: validated GPU freq-shift used for event %d (%s)", ev.ID, name) | log.Printf("gpudemod: validated GPU freq-shift used for event %d (%s)", ev.ID, name) | ||||
| } | } | ||||
| if m.gpuDemod.LastDemodUsedGPU() { | |||||
| log.Printf("gpudemod: GPU demod stage used for event %d (%s)", ev.ID, name) | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||