Преглед изворни кода

feat: add CUDA FIR preparation path

master
Jan Svabenik пре 2 дана
родитељ
комит
8033e14238
3 измењених фајлова са 80 додато и 1 уклоњено
  1. BIN
      internal/demod/gpudemod/build/kernels.obj
  2. +37
    -1
      internal/demod/gpudemod/gpudemod.go
  3. +43
    -0
      internal/demod/gpudemod/kernels.cu

BIN
internal/demod/gpudemod/build/kernels.obj Прегледај датотеку


+ 37
- 1
internal/demod/gpudemod/gpudemod.go Прегледај датотеку

@@ -229,8 +229,12 @@ func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodT
if len(taps) == 0 {
base := dsp.LowpassFIR(cutoff, e.sampleRate, 101)
taps = append(make([]float32, 0, len(base)), base...)
e.SetFIR(taps)
}
filtered, ok := e.tryCUDAFIR(shifted, len(taps))
if !ok {
filtered = dsp.ApplyFIR(shifted, taps)
}
filtered := dsp.ApplyFIR(shifted, taps)
decim := int(math.Round(float64(e.sampleRate) / float64(outRate)))
if decim < 1 {
decim = 1
@@ -285,3 +289,35 @@ func (e *Engine) Close() {
e.firTaps = nil
e.cudaReady = false
}
odLSB:
return demod.LSB{}.Demod(dec, inputRate), inputRate, nil
case DemodCW:
return demod.CW{}.Demod(dec, inputRate), inputRate, nil
default:
return nil, 0, errors.New("unsupported demod type")
}
}

func (e *Engine) Close() {
if e == nil {
return
}
if e.dIQIn != nil {
_ = C.gpud_cuda_free(unsafe.Pointer(e.dIQIn))
e.dIQIn = nil
}
if e.dShifted != nil {
_ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted))
e.dShifted = nil
}
if e.dDecimated != nil {
_ = C.gpud_cuda_free(unsafe.Pointer(e.dDecimated))
e.dDecimated = nil
}
if e.dAudio != nil {
_ = C.gpud_cuda_free(unsafe.Pointer(e.dAudio))
e.dAudio = nil
}
e.firTaps = nil
e.cudaReady = false
}

+ 43
- 0
internal/demod/gpudemod/kernels.cu Прегледај датотеку

@@ -72,6 +72,49 @@ extern "C" __global__ void gpud_decimate_kernel(
out[idx] = in[idx * factor];
}

extern "C" __constant__ float gpud_fir_taps[256];

extern "C" __global__ void gpud_fir_kernel(
const float2* __restrict__ in,
float2* __restrict__ out,
int n,
int num_taps
) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx >= n) return;

float acc_r = 0.0f;
float acc_i = 0.0f;
for (int k = 0; k < num_taps; ++k) {
int src = idx - k;
if (src < 0) break;
float2 v = in[src];
float t = gpud_fir_taps[k];
acc_r += v.x * t;
acc_i += v.y * t;
}
out[idx] = make_float2(acc_r, acc_i);
}

extern "C" int gpud_upload_fir_taps_cuda(const float* taps, int n) {
if (!taps || n <= 0 || n > 256) return -1;
cudaError_t err = cudaMemcpyToSymbol(gpud_fir_taps, taps, (size_t)n * sizeof(float));
return (int)err;
}

extern "C" int gpud_launch_fir_cuda(
const float2* in,
float2* out,
int n,
int num_taps
) {
if (n <= 0 || num_taps <= 0 || num_taps > 256) return 0;
const int block = 256;
const int grid = (n + block - 1) / block;
gpud_fir_kernel<<<grid, block>>>(in, out, n, num_taps);
return (int)cudaGetLastError();
}

extern "C" int gpud_launch_decimate_cuda(
const float2* in,
float2* out,


Loading…
Откажи
Сачувај