diff --git a/internal/demod/gpudemod/build/kernels.obj b/internal/demod/gpudemod/build/kernels.obj index f4b6d9c..062d18f 100644 Binary files a/internal/demod/gpudemod/build/kernels.obj and b/internal/demod/gpudemod/build/kernels.obj differ diff --git a/internal/demod/gpudemod/gpudemod.go b/internal/demod/gpudemod/gpudemod.go index ed4749b..0b68ba6 100644 --- a/internal/demod/gpudemod/gpudemod.go +++ b/internal/demod/gpudemod/gpudemod.go @@ -274,6 +274,10 @@ func (e *Engine) Close() { _ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted)) e.dShifted = nil } + if e.dDecimated != nil { + _ = C.gpud_cuda_free(unsafe.Pointer(e.dDecimated)) + e.dDecimated = nil + } if e.dAudio != nil { _ = C.gpud_cuda_free(unsafe.Pointer(e.dAudio)) e.dAudio = nil diff --git a/internal/demod/gpudemod/kernels.cu b/internal/demod/gpudemod/kernels.cu index c7c6288..5250a91 100644 --- a/internal/demod/gpudemod/kernels.cu +++ b/internal/demod/gpudemod/kernels.cu @@ -60,3 +60,27 @@ extern "C" int gpud_launch_fm_discrim_cuda( gpud_fm_discrim_kernel<<>>(in, out, n); return (int)cudaGetLastError(); } + +extern "C" __global__ void gpud_decimate_kernel( + const float2* __restrict__ in, + float2* __restrict__ out, + int n_out, + int factor +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n_out) return; + out[idx] = in[idx * factor]; +} + +extern "C" int gpud_launch_decimate_cuda( + const float2* in, + float2* out, + int n_out, + int factor +) { + if (n_out <= 0 || factor <= 0) return 0; + const int block = 256; + const int grid = (n_out + block - 1) / block; + gpud_decimate_kernel<<>>(in, out, n_out, factor); + return (int)cudaGetLastError(); +}