feat: extend CUDA demod telemetry and FM kernel prep

1ヶ月前 · 123207f977
--- a/internal/demod/gpudemod/build/kernels.obj
+++ b/internal/demod/gpudemod/build/kernels.obj
--- a/internal/demod/gpudemod/gpudemod.go
+++ b/internal/demod/gpudemod/gpudemod.go
@@ -36,10 +36,10 @@ static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, doub
 	return gpud_launch_freq_shift_cuda(in, out, n, phase_inc, phase_start);
 }

 extern int gpud_launch_fm_discrim_cuda(const gpud_float2* in, float* out, int n);

 static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) {
 	// TODO(phase2): replace with real CUDA kernel launch.
 	(void)in; (void)out; (void)n;
 	return -1;
 	return gpud_launch_fm_discrim_cuda(in, out, n);
 }
 */
 import "C"
@@ -66,17 +66,18 @@ const (
 )

 type Engine struct {
 	maxSamples int
 	sampleRate int
 	phase      float64
 	bfoPhase   float64
 	firTaps    []float32
 	cudaReady  bool
 	dIQIn      *C.gpud_float2
 	dShifted   *C.gpud_float2
 	dAudio     *C.float
 	iqBytes    C.size_t
 	audioBytes C.size_t
 	maxSamples       int
 	sampleRate       int
 	phase            float64
 	bfoPhase         float64
 	firTaps          []float32
 	cudaReady        bool
 	lastShiftUsedGPU bool
 	dIQIn            *C.gpud_float2
 	dShifted         *C.gpud_float2
 	dAudio           *C.float
 	iqBytes          C.size_t
 	audioBytes       C.size_t
 }

 func Available() bool {
@@ -134,7 +135,14 @@ func (e *Engine) SetFIR(taps []float32) {
 }

 func phaseStatus() string {
 	return "phase1b-launch-boundary"
 	return "phase1c-validated-shift"
 }

 func (e *Engine) LastShiftUsedGPU() bool {
 	if e == nil {
 		return false
 	}
 	return e.lastShiftUsedGPU
 }

 func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) ([]complex64, bool) {
@@ -224,3 +232,5 @@ func (e *Engine) Close() {
 	e.firTaps = nil
 	e.cudaReady = false
 }
 aReady = false
 }
--- a/internal/demod/gpudemod/gpudemod_stub.go
+++ b/internal/demod/gpudemod/gpudemod_stub.go
@@ -16,8 +16,9 @@ const (
 )

 type Engine struct {
 	maxSamples int
 	sampleRate int
 	maxSamples       int
 	sampleRate       int
 	lastShiftUsedGPU bool
 }

 func Available() bool { return false }
@@ -28,6 +29,8 @@ func New(maxSamples int, sampleRate int) (*Engine, error) {

 func (e *Engine) SetFIR(taps []float32) {}

 func (e *Engine) LastShiftUsedGPU() bool { return false }

 func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
 	return nil, 0, errors.New("CUDA demod not available: cufft build tag not enabled")
 }
--- a/internal/demod/gpudemod/kernels.cu
+++ b/internal/demod/gpudemod/kernels.cu
@@ -33,3 +33,30 @@ extern "C" int gpud_launch_freq_shift_cuda(
    gpud_freq_shift_kernel<<<grid, block>>>(in, out, n, phase_inc, phase_start);
    return (int)cudaGetLastError();
 }

 extern "C" __global__ void gpud_fm_discrim_kernel(
    const float2* __restrict__ in,
    float* __restrict__ out,
    int n
 ) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    if (idx >= n - 1) return;

    float2 prev = in[idx];
    float2 curr = in[idx + 1];
    float re = prev.x * curr.x + prev.y * curr.y;
    float im = prev.x * curr.y - prev.y * curr.x;
    out[idx] = atan2f(im, re);
 }

 extern "C" int gpud_launch_fm_discrim_cuda(
    const float2* in,
    float* out,
    int n
 ) {
    if (n <= 1) return 0;
    const int block = 256;
    const int grid = (n + block - 1) / block;
    gpud_fm_discrim_kernel<<<grid, block>>>(in, out, n);
    return (int)cudaGetLastError();
 }
--- a/internal/recorder/demod.go
+++ b/internal/recorder/demod.go
@@ -2,6 +2,7 @@ package recorder

 import (
 	"errors"
 	"log"
 	"math"
 	"path/filepath"

@@ -33,6 +34,9 @@ func (m *Manager) demodAndWrite(dir string, ev detector.Event, iq []complex64, f
 		if gpuAudio, gpuRate, err := m.gpuDemod.Demod(iq, offset, bw, gpudemod.DemodNFM); err == nil {
 			audio = gpuAudio
 			inputRate = gpuRate
 			if m.gpuDemod.LastShiftUsedGPU() {
 				log.Printf("gpudemod: validated GPU freq-shift used for event %d", ev.ID)
 			}
 		}
 	}
 	if audio == nil {