|
|
|
@@ -6,12 +6,49 @@ package gpudemod |
|
|
|
#cgo windows LDFLAGS: -lcufft64_12 -lcudart64_13 |
|
|
|
#include <cuda_runtime.h> |
|
|
|
#include <cufft.h> |
|
|
|
|
|
|
|
typedef struct { float x; float y; } gpud_float2; |
|
|
|
|
|
|
|
static int gpud_cuda_malloc(void **ptr, size_t bytes) { |
|
|
|
return (int)cudaMalloc(ptr, bytes); |
|
|
|
} |
|
|
|
|
|
|
|
static int gpud_cuda_free(void *ptr) { |
|
|
|
return (int)cudaFree(ptr); |
|
|
|
} |
|
|
|
|
|
|
|
static int gpud_memcpy_h2d(void *dst, const void *src, size_t bytes) { |
|
|
|
return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice); |
|
|
|
} |
|
|
|
|
|
|
|
static int gpud_memcpy_d2h(void *dst, const void *src, size_t bytes) { |
|
|
|
return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost); |
|
|
|
} |
|
|
|
|
|
|
|
static int gpud_device_sync() { |
|
|
|
return (int)cudaDeviceSynchronize(); |
|
|
|
} |
|
|
|
|
|
|
|
static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) { |
|
|
|
// TODO(phase2): replace with real CUDA kernel launch. |
|
|
|
// Phase 1b keeps the launch boundary in place without pretending acceleration. |
|
|
|
(void)in; (void)out; (void)n; (void)phase_inc; (void)phase_start; |
|
|
|
return -1; |
|
|
|
} |
|
|
|
|
|
|
|
static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) { |
|
|
|
// TODO(phase2): replace with real CUDA kernel launch. |
|
|
|
(void)in; (void)out; (void)n; |
|
|
|
return -1; |
|
|
|
} |
|
|
|
*/ |
|
|
|
import "C" |
|
|
|
|
|
|
|
import ( |
|
|
|
"errors" |
|
|
|
"fmt" |
|
|
|
"math" |
|
|
|
"unsafe" |
|
|
|
|
|
|
|
"sdr-visual-suite/internal/demod" |
|
|
|
"sdr-visual-suite/internal/dsp" |
|
|
|
@@ -34,9 +71,21 @@ type Engine struct { |
|
|
|
phase float64 |
|
|
|
bfoPhase float64 |
|
|
|
firTaps []float32 |
|
|
|
cudaReady bool |
|
|
|
dIQIn *C.gpud_float2 |
|
|
|
dShifted *C.gpud_float2 |
|
|
|
dAudio *C.float |
|
|
|
iqBytes C.size_t |
|
|
|
audioBytes C.size_t |
|
|
|
} |
|
|
|
|
|
|
|
func Available() bool { return true } |
|
|
|
func Available() bool { |
|
|
|
var count C.int |
|
|
|
if C.cudaGetDeviceCount(&count) != C.cudaSuccess { |
|
|
|
return false |
|
|
|
} |
|
|
|
return count > 0 |
|
|
|
} |
|
|
|
|
|
|
|
func New(maxSamples int, sampleRate int) (*Engine, error) { |
|
|
|
if maxSamples <= 0 { |
|
|
|
@@ -45,7 +94,35 @@ func New(maxSamples int, sampleRate int) (*Engine, error) { |
|
|
|
if sampleRate <= 0 { |
|
|
|
return nil, errors.New("invalid sampleRate") |
|
|
|
} |
|
|
|
return &Engine{maxSamples: maxSamples, sampleRate: sampleRate}, nil |
|
|
|
if !Available() { |
|
|
|
return nil, errors.New("cuda device not available") |
|
|
|
} |
|
|
|
e := &Engine{ |
|
|
|
maxSamples: maxSamples, |
|
|
|
sampleRate: sampleRate, |
|
|
|
cudaReady: true, |
|
|
|
iqBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.gpud_float2{})), |
|
|
|
audioBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.float(0))), |
|
|
|
} |
|
|
|
var ptr unsafe.Pointer |
|
|
|
if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess { |
|
|
|
e.Close() |
|
|
|
return nil, errors.New("cudaMalloc dIQIn failed") |
|
|
|
} |
|
|
|
e.dIQIn = (*C.gpud_float2)(ptr) |
|
|
|
ptr = nil |
|
|
|
if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess { |
|
|
|
e.Close() |
|
|
|
return nil, errors.New("cudaMalloc dShifted failed") |
|
|
|
} |
|
|
|
e.dShifted = (*C.gpud_float2)(ptr) |
|
|
|
ptr = nil |
|
|
|
if C.gpud_cuda_malloc(&ptr, e.audioBytes) != C.cudaSuccess { |
|
|
|
e.Close() |
|
|
|
return nil, errors.New("cudaMalloc dAudio failed") |
|
|
|
} |
|
|
|
e.dAudio = (*C.float)(ptr) |
|
|
|
return e, nil |
|
|
|
} |
|
|
|
|
|
|
|
func (e *Engine) SetFIR(taps []float32) { |
|
|
|
@@ -56,10 +133,35 @@ func (e *Engine) SetFIR(taps []float32) { |
|
|
|
e.firTaps = append(e.firTaps[:0], taps...) |
|
|
|
} |
|
|
|
|
|
|
|
func phaseStatus() string { |
|
|
|
return "phase1b-launch-boundary" |
|
|
|
} |
|
|
|
|
|
|
|
func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) bool { |
|
|
|
if e == nil || !e.cudaReady || len(iq) == 0 || e.dIQIn == nil || e.dShifted == nil { |
|
|
|
return false |
|
|
|
} |
|
|
|
if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), C.size_t(len(iq))*C.size_t(unsafe.Sizeof(complex64(0)))) != C.cudaSuccess { |
|
|
|
return false |
|
|
|
} |
|
|
|
phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate) |
|
|
|
if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(len(iq)), C.double(phaseInc), C.double(e.phase)) != 0 { |
|
|
|
return false |
|
|
|
} |
|
|
|
if C.gpud_device_sync() != C.cudaSuccess { |
|
|
|
return false |
|
|
|
} |
|
|
|
e.phase += phaseInc * float64(len(iq)) |
|
|
|
return true |
|
|
|
} |
|
|
|
|
|
|
|
func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) { |
|
|
|
if e == nil { |
|
|
|
return nil, 0, errors.New("nil CUDA demod engine") |
|
|
|
} |
|
|
|
if !e.cudaReady { |
|
|
|
return nil, 0, errors.New("cuda demod engine is not initialized") |
|
|
|
} |
|
|
|
if len(iq) == 0 { |
|
|
|
return nil, 0, nil |
|
|
|
} |
|
|
|
@@ -70,12 +172,11 @@ func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodT |
|
|
|
return nil, 0, errors.New("CUDA demod phase 1 currently supports NFM only") |
|
|
|
} |
|
|
|
|
|
|
|
// Phase 1b note: |
|
|
|
// This package now performs real CUDA availability gating and keeps the |
|
|
|
// runtime/CGO boundary in place, but still intentionally falls back to the |
|
|
|
// existing CPU DSP math for signal processing. The next phase should replace |
|
|
|
// the FreqShift + FM discriminator sections below with actual kernel launches. |
|
|
|
// Real CUDA boundary is now present. If the launch wrappers are not yet backed |
|
|
|
// by actual kernels, we fall back to the existing CPU DSP path below. |
|
|
|
_ = fmt.Sprintf("%s:%0.3f", phaseStatus(), offsetHz) |
|
|
|
_ = e.tryCUDAFreqShift(iq, offsetHz) |
|
|
|
|
|
|
|
shifted := dsp.FreqShift(iq, e.sampleRate, offsetHz) |
|
|
|
cutoff := bw / 2 |
|
|
|
if cutoff < 200 { |
|
|
|
@@ -102,5 +203,18 @@ func (e *Engine) Close() { |
|
|
|
if e == nil { |
|
|
|
return |
|
|
|
} |
|
|
|
if e.dIQIn != nil { |
|
|
|
_ = C.gpud_cuda_free(unsafe.Pointer(e.dIQIn)) |
|
|
|
e.dIQIn = nil |
|
|
|
} |
|
|
|
if e.dShifted != nil { |
|
|
|
_ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted)) |
|
|
|
e.dShifted = nil |
|
|
|
} |
|
|
|
if e.dAudio != nil { |
|
|
|
_ = C.gpud_cuda_free(unsafe.Pointer(e.dAudio)) |
|
|
|
e.dAudio = nil |
|
|
|
} |
|
|
|
e.firTaps = nil |
|
|
|
e.cudaReady = false |
|
|
|
} |