Bläddra i källkod

feat: prepare CUDA demod launch boundary

master
Jan Svabenik 3 dagar sedan
förälder
incheckning
f5ab414847
3 ändrade filer med 134 tillägg och 7 borttagningar
  1. +121
    -7
      internal/demod/gpudemod/gpudemod.go
  2. +11
    -0
      internal/demod/gpudemod/gpudemod_cufft_test.go
  3. +2
    -0
      internal/demod/gpudemod/gpudemod_test.go

+ 121
- 7
internal/demod/gpudemod/gpudemod.go Visa fil

@@ -6,12 +6,49 @@ package gpudemod
#cgo windows LDFLAGS: -lcufft64_12 -lcudart64_13
#include <cuda_runtime.h>
#include <cufft.h>

typedef struct { float x; float y; } gpud_float2;

static int gpud_cuda_malloc(void **ptr, size_t bytes) {
return (int)cudaMalloc(ptr, bytes);
}

static int gpud_cuda_free(void *ptr) {
return (int)cudaFree(ptr);
}

static int gpud_memcpy_h2d(void *dst, const void *src, size_t bytes) {
return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice);
}

static int gpud_memcpy_d2h(void *dst, const void *src, size_t bytes) {
return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost);
}

static int gpud_device_sync() {
return (int)cudaDeviceSynchronize();
}

static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) {
// TODO(phase2): replace with real CUDA kernel launch.
// Phase 1b keeps the launch boundary in place without pretending acceleration.
(void)in; (void)out; (void)n; (void)phase_inc; (void)phase_start;
return -1;
}

static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) {
// TODO(phase2): replace with real CUDA kernel launch.
(void)in; (void)out; (void)n;
return -1;
}
*/
import "C"

import (
"errors"
"fmt"
"math"
"unsafe"

"sdr-visual-suite/internal/demod"
"sdr-visual-suite/internal/dsp"
@@ -34,9 +71,21 @@ type Engine struct {
phase float64
bfoPhase float64
firTaps []float32
cudaReady bool
dIQIn *C.gpud_float2
dShifted *C.gpud_float2
dAudio *C.float
iqBytes C.size_t
audioBytes C.size_t
}

func Available() bool { return true }
func Available() bool {
var count C.int
if C.cudaGetDeviceCount(&count) != C.cudaSuccess {
return false
}
return count > 0
}

func New(maxSamples int, sampleRate int) (*Engine, error) {
if maxSamples <= 0 {
@@ -45,7 +94,35 @@ func New(maxSamples int, sampleRate int) (*Engine, error) {
if sampleRate <= 0 {
return nil, errors.New("invalid sampleRate")
}
return &Engine{maxSamples: maxSamples, sampleRate: sampleRate}, nil
if !Available() {
return nil, errors.New("cuda device not available")
}
e := &Engine{
maxSamples: maxSamples,
sampleRate: sampleRate,
cudaReady: true,
iqBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.gpud_float2{})),
audioBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.float(0))),
}
var ptr unsafe.Pointer
if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
e.Close()
return nil, errors.New("cudaMalloc dIQIn failed")
}
e.dIQIn = (*C.gpud_float2)(ptr)
ptr = nil
if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
e.Close()
return nil, errors.New("cudaMalloc dShifted failed")
}
e.dShifted = (*C.gpud_float2)(ptr)
ptr = nil
if C.gpud_cuda_malloc(&ptr, e.audioBytes) != C.cudaSuccess {
e.Close()
return nil, errors.New("cudaMalloc dAudio failed")
}
e.dAudio = (*C.float)(ptr)
return e, nil
}

func (e *Engine) SetFIR(taps []float32) {
@@ -56,10 +133,35 @@ func (e *Engine) SetFIR(taps []float32) {
e.firTaps = append(e.firTaps[:0], taps...)
}

func phaseStatus() string {
return "phase1b-launch-boundary"
}

func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) bool {
if e == nil || !e.cudaReady || len(iq) == 0 || e.dIQIn == nil || e.dShifted == nil {
return false
}
if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), C.size_t(len(iq))*C.size_t(unsafe.Sizeof(complex64(0)))) != C.cudaSuccess {
return false
}
phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate)
if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(len(iq)), C.double(phaseInc), C.double(e.phase)) != 0 {
return false
}
if C.gpud_device_sync() != C.cudaSuccess {
return false
}
e.phase += phaseInc * float64(len(iq))
return true
}

func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
if e == nil {
return nil, 0, errors.New("nil CUDA demod engine")
}
if !e.cudaReady {
return nil, 0, errors.New("cuda demod engine is not initialized")
}
if len(iq) == 0 {
return nil, 0, nil
}
@@ -70,12 +172,11 @@ func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodT
return nil, 0, errors.New("CUDA demod phase 1 currently supports NFM only")
}

// Phase 1b note:
// This package now performs real CUDA availability gating and keeps the
// runtime/CGO boundary in place, but still intentionally falls back to the
// existing CPU DSP math for signal processing. The next phase should replace
// the FreqShift + FM discriminator sections below with actual kernel launches.
// Real CUDA boundary is now present. If the launch wrappers are not yet backed
// by actual kernels, we fall back to the existing CPU DSP path below.
_ = fmt.Sprintf("%s:%0.3f", phaseStatus(), offsetHz)
_ = e.tryCUDAFreqShift(iq, offsetHz)

shifted := dsp.FreqShift(iq, e.sampleRate, offsetHz)
cutoff := bw / 2
if cutoff < 200 {
@@ -102,5 +203,18 @@ func (e *Engine) Close() {
if e == nil {
return
}
if e.dIQIn != nil {
_ = C.gpud_cuda_free(unsafe.Pointer(e.dIQIn))
e.dIQIn = nil
}
if e.dShifted != nil {
_ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted))
e.dShifted = nil
}
if e.dAudio != nil {
_ = C.gpud_cuda_free(unsafe.Pointer(e.dAudio))
e.dAudio = nil
}
e.firTaps = nil
e.cudaReady = false
}

+ 11
- 0
internal/demod/gpudemod/gpudemod_cufft_test.go Visa fil

@@ -0,0 +1,11 @@
//go:build cufft

package gpudemod

import "testing"

func TestDemodTypeConstantsExist(t *testing.T) {
if DemodNFM != 0 {
t.Fatal("expected DemodNFM constant to be defined")
}
}

+ 2
- 0
internal/demod/gpudemod/gpudemod_test.go Visa fil

@@ -1,3 +1,5 @@
//go:build !cufft

package gpudemod

import "testing"


Laddar…
Avbryt
Spara