ソースを参照

feat: extend CUDA demod telemetry and FM kernel prep

master
Jan Svabenik 2日前
コミット
123207f977
5個のファイルの変更61行の追加17行の削除
  1. バイナリ
      internal/demod/gpudemod/build/kernels.obj
  2. +25
    -15
      internal/demod/gpudemod/gpudemod.go
  3. +5
    -2
      internal/demod/gpudemod/gpudemod_stub.go
  4. +27
    -0
      internal/demod/gpudemod/kernels.cu
  5. +4
    -0
      internal/recorder/demod.go

バイナリ
internal/demod/gpudemod/build/kernels.obj ファイルの表示


+ 25
- 15
internal/demod/gpudemod/gpudemod.go ファイルの表示

@@ -36,10 +36,10 @@ static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, doub
return gpud_launch_freq_shift_cuda(in, out, n, phase_inc, phase_start);
}

extern int gpud_launch_fm_discrim_cuda(const gpud_float2* in, float* out, int n);

static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) {
// TODO(phase2): replace with real CUDA kernel launch.
(void)in; (void)out; (void)n;
return -1;
return gpud_launch_fm_discrim_cuda(in, out, n);
}
*/
import "C"
@@ -66,17 +66,18 @@ const (
)

type Engine struct {
maxSamples int
sampleRate int
phase float64
bfoPhase float64
firTaps []float32
cudaReady bool
dIQIn *C.gpud_float2
dShifted *C.gpud_float2
dAudio *C.float
iqBytes C.size_t
audioBytes C.size_t
maxSamples int
sampleRate int
phase float64
bfoPhase float64
firTaps []float32
cudaReady bool
lastShiftUsedGPU bool
dIQIn *C.gpud_float2
dShifted *C.gpud_float2
dAudio *C.float
iqBytes C.size_t
audioBytes C.size_t
}

func Available() bool {
@@ -134,7 +135,14 @@ func (e *Engine) SetFIR(taps []float32) {
}

func phaseStatus() string {
return "phase1b-launch-boundary"
return "phase1c-validated-shift"
}

func (e *Engine) LastShiftUsedGPU() bool {
if e == nil {
return false
}
return e.lastShiftUsedGPU
}

func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) ([]complex64, bool) {
@@ -224,3 +232,5 @@ func (e *Engine) Close() {
e.firTaps = nil
e.cudaReady = false
}
aReady = false
}

+ 5
- 2
internal/demod/gpudemod/gpudemod_stub.go ファイルの表示

@@ -16,8 +16,9 @@ const (
)

type Engine struct {
maxSamples int
sampleRate int
maxSamples int
sampleRate int
lastShiftUsedGPU bool
}

func Available() bool { return false }
@@ -28,6 +29,8 @@ func New(maxSamples int, sampleRate int) (*Engine, error) {

func (e *Engine) SetFIR(taps []float32) {}

func (e *Engine) LastShiftUsedGPU() bool { return false }

func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
return nil, 0, errors.New("CUDA demod not available: cufft build tag not enabled")
}


+ 27
- 0
internal/demod/gpudemod/kernels.cu ファイルの表示

@@ -33,3 +33,30 @@ extern "C" int gpud_launch_freq_shift_cuda(
gpud_freq_shift_kernel<<<grid, block>>>(in, out, n, phase_inc, phase_start);
return (int)cudaGetLastError();
}

extern "C" __global__ void gpud_fm_discrim_kernel(
const float2* __restrict__ in,
float* __restrict__ out,
int n
) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx >= n - 1) return;

float2 prev = in[idx];
float2 curr = in[idx + 1];
float re = prev.x * curr.x + prev.y * curr.y;
float im = prev.x * curr.y - prev.y * curr.x;
out[idx] = atan2f(im, re);
}

extern "C" int gpud_launch_fm_discrim_cuda(
const float2* in,
float* out,
int n
) {
if (n <= 1) return 0;
const int block = 256;
const int grid = (n + block - 1) / block;
gpud_fm_discrim_kernel<<<grid, block>>>(in, out, n);
return (int)cudaGetLastError();
}

+ 4
- 0
internal/recorder/demod.go ファイルの表示

@@ -2,6 +2,7 @@ package recorder

import (
"errors"
"log"
"math"
"path/filepath"

@@ -33,6 +34,9 @@ func (m *Manager) demodAndWrite(dir string, ev detector.Event, iq []complex64, f
if gpuAudio, gpuRate, err := m.gpuDemod.Demod(iq, offset, bw, gpudemod.DemodNFM); err == nil {
audio = gpuAudio
inputRate = gpuRate
if m.gpuDemod.LastShiftUsedGPU() {
log.Printf("gpudemod: validated GPU freq-shift used for event %d", ev.ID)
}
}
}
if audio == nil {


読み込み中…
キャンセル
保存