From 6dcedf67446c00f439f743768586585079d53a75 Mon Sep 17 00:00:00 2001 From: Jan Svabenik Date: Tue, 17 Mar 2026 19:23:23 +0100 Subject: [PATCH] Add optional cuFFT GPU FFT path --- README.md | 15 +++++- cmd/sdrd/main.go | 93 +++++++++++++++++++++++++++++++++++-- config.yaml | 1 + internal/config/config.go | 2 + internal/fft/fft.go | 18 +++++++ internal/fft/gpufft/gpu.go | 88 +++++++++++++++++++++++++++++++++++ internal/fft/gpufft/stub.go | 19 ++++++++ internal/runtime/runtime.go | 4 ++ web/app.js | 24 +++++++++- web/index.html | 4 ++ 10 files changed, 263 insertions(+), 5 deletions(-) create mode 100644 internal/fft/gpufft/gpu.go create mode 100644 internal/fft/gpufft/stub.go diff --git a/README.md b/README.md index 19bf7d5..b3aac1d 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ Go-based SDRplay RSP1b live spectrum + waterfall visualizer with a minimal event - Events API (`/api/events?limit=...&since=...`) - Runtime UI controls for center frequency, span, sample rate, tuner bandwidth, FFT size, gain, AGC, DC block, IQ balance, detector threshold - Display controls: averaging + max-hold +- Optional GPU FFT (cuFFT) with toggle + `/api/gpu` - Recorded clips list placeholder (metadata only for now) - Windows + Linux support - Mock mode for testing without hardware @@ -34,6 +35,16 @@ go build -tags sdrplay ./cmd/sdrd .\sdrd.exe -config config.yaml ``` +#### Windows (GPU FFT / cuFFT) +Requires the NVIDIA CUDA Toolkit installed (cuFFT + cudart). Ensure CUDA `bin` and `lib/x64` are on PATH/LIB. +```powershell +$env:CGO_CFLAGS='-IC:\Program Files\SDRplay\API\inc -IC:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include' +$env:CGO_LDFLAGS='-LC:\Program Files\SDRplay\API\x64 -lsdrplay_api -LC:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\lib\x64 -lcufft -lcudart' + +go build -tags "sdrplay,cufft" ./cmd/sdrd +.\sdrd.exe -config config.yaml +``` + ### Linux ```bash export CGO_CFLAGS='-I/opt/sdrplay_api/include' @@ -51,6 +62,7 @@ Edit `config.yaml`: - `fft_size`: FFT size - `gain_db`: device gain (gain reduction) - `tuner_bw_khz`: tuner bandwidth (200/300/600/1536/5000/6000/7000/8000) +- `use_gpu_fft`: enable GPU FFT (requires CUDA + cufft build tag) - `agc`: enable automatic gain control - `dc_block`: enable DC blocking filter - `iq_balance`: enable basic IQ imbalance correction @@ -69,8 +81,9 @@ Use the right-side controls to adjust center frequency, span (zoom), sample rate ### Config API - `GET /api/config`: returns the current runtime configuration. -- `POST /api/config`: updates `center_hz`, `sample_rate`, `fft_size`, `gain_db`, and `detector.threshold_db` at runtime. +- `POST /api/config`: updates `center_hz`, `sample_rate`, `fft_size`, `gain_db`, `tuner_bw_khz`, `use_gpu_fft`, and `detector.threshold_db` at runtime. - `POST /api/sdr/settings`: updates `agc`, `dc_block`, and `iq_balance` at runtime. +- `GET /api/gpu`: reports GPU FFT availability/active status. ### Events API `/api/events` reads from the JSONL event log and returns the most recent events: diff --git a/cmd/sdrd/main.go b/cmd/sdrd/main.go index 16b56f0..64a9580 100644 --- a/cmd/sdrd/main.go +++ b/cmd/sdrd/main.go @@ -21,6 +21,7 @@ import ( "sdr-visual-suite/internal/dsp" "sdr-visual-suite/internal/events" fftutil "sdr-visual-suite/internal/fft" + "sdr-visual-suite/internal/fft/gpufft" "sdr-visual-suite/internal/mock" "sdr-visual-suite/internal/runtime" "sdr-visual-suite/internal/sdr" @@ -41,6 +42,30 @@ type hub struct { clients map[*websocket.Conn]struct{} } +type gpuStatus struct { + mu sync.RWMutex + Available bool `json:"available"` + Active bool `json:"active"` + Error string `json:"error"` +} + +func (g *gpuStatus) set(active bool, err error) { + g.mu.Lock() + defer g.mu.Unlock() + g.Active = active + if err != nil { + g.Error = err.Error() + } else { + g.Error = "" + } +} + +func (g *gpuStatus) snapshot() gpuStatus { + g.mu.RLock() + defer g.mu.RUnlock() + return gpuStatus{Available: g.Available, Active: g.Active, Error: g.Error} +} + func newHub() *hub { return &hub{clients: map[*websocket.Conn]struct{}{}} } @@ -126,6 +151,7 @@ type dspUpdate struct { window []float64 dcBlock bool iqBalance bool + useGPUFFT bool } func pushDSPUpdate(ch chan dspUpdate, update dspUpdate) { @@ -153,6 +179,7 @@ func main() { } cfgManager := runtime.New(cfg) + gpuState := &gpuStatus{Available: gpufft.Available()} newSource := func(cfg config.Config) (sdr.Source, error) { if mockFlag { @@ -203,7 +230,7 @@ func main() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - go runDSP(ctx, srcMgr, cfg, det, window, h, eventFile, dspUpdates) + go runDSP(ctx, srcMgr, cfg, det, window, h, eventFile, dspUpdates, gpuState) upgrader := websocket.Upgrader{CheckOrigin: func(r *http.Request) bool { return true }} http.HandleFunc("/ws", func(w http.ResponseWriter, r *http.Request) { @@ -271,6 +298,7 @@ func main() { window: newWindow, dcBlock: next.DCBlock, iqBalance: next.IQBalance, + useGPUFFT: next.UseGPUFFT, }) _ = json.NewEncoder(w).Encode(next) default: @@ -321,6 +349,11 @@ func main() { _ = json.NewEncoder(w).Encode(sdr.SourceStats{}) }) + http.HandleFunc("/api/gpu", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(gpuState.snapshot()) + }) + http.HandleFunc("/api/events", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") limit := 200 @@ -364,13 +397,26 @@ func main() { _ = server.Shutdown(ctxTimeout) } -func runDSP(ctx context.Context, src sdr.Source, cfg config.Config, det *detector.Detector, window []float64, h *hub, eventFile *os.File, updates <-chan dspUpdate) { +func runDSP(ctx context.Context, src sdr.Source, cfg config.Config, det *detector.Detector, window []float64, h *hub, eventFile *os.File, updates <-chan dspUpdate, gpuState *gpuStatus) { ticker := time.NewTicker(cfg.FrameInterval()) defer ticker.Stop() enc := json.NewEncoder(eventFile) dcBlocker := dsp.NewDCBlocker(0.995) dcEnabled := cfg.DCBlock iqEnabled := cfg.IQBalance + useGPU := cfg.UseGPUFFT + var gpuEngine *gpufft.Engine + if useGPU && gpuState != nil && gpuState.Available { + if eng, err := gpufft.New(cfg.FFTSize); err == nil { + gpuEngine = eng + gpuState.set(true, nil) + } else { + gpuState.set(false, err) + useGPU = false + } + } else if gpuState != nil { + gpuState.set(false, nil) + } gotSamples := false for { @@ -378,6 +424,8 @@ func runDSP(ctx context.Context, src sdr.Source, cfg config.Config, det *detecto case <-ctx.Done(): return case upd := <-updates: + prevFFT := cfg.FFTSize + prevUseGPU := useGPU cfg = upd.cfg if upd.det != nil { det = upd.det @@ -387,6 +435,24 @@ func runDSP(ctx context.Context, src sdr.Source, cfg config.Config, det *detecto } dcEnabled = upd.dcBlock iqEnabled = upd.iqBalance + if cfg.FFTSize != prevFFT || cfg.UseGPUFFT != prevUseGPU { + if gpuEngine != nil { + gpuEngine.Close() + gpuEngine = nil + } + useGPU = cfg.UseGPUFFT + if useGPU && gpuState != nil && gpuState.Available { + if eng, err := gpufft.New(cfg.FFTSize); err == nil { + gpuEngine = eng + gpuState.set(true, nil) + } else { + gpuState.set(false, err) + useGPU = false + } + } else if gpuState != nil { + gpuState.set(false, nil) + } + } dcBlocker.Reset() ticker.Reset(cfg.FrameInterval()) case <-ticker.C: @@ -405,7 +471,28 @@ func runDSP(ctx context.Context, src sdr.Source, cfg config.Config, det *detecto if iqEnabled { dsp.IQBalance(iq) } - spectrum := fftutil.Spectrum(iq, window) + var spectrum []float64 + if useGPU && gpuEngine != nil { + if len(window) == len(iq) { + for i := 0; i < len(iq); i++ { + v := iq[i] + w := float32(window[i]) + iq[i] = complex(real(v)*w, imag(v)*w) + } + } + out, err := gpuEngine.Exec(iq) + if err != nil { + if gpuState != nil { + gpuState.set(false, err) + } + useGPU = false + spectrum = fftutil.Spectrum(iq, window) + } else { + spectrum = fftutil.SpectrumFromFFT(out) + } + } else { + spectrum = fftutil.Spectrum(iq, window) + } now := time.Now() finished, signals := det.Process(now, spectrum, cfg.CenterHz) for _, ev := range finished { diff --git a/config.yaml b/config.yaml index b37bd4b..265f774 100644 --- a/config.yaml +++ b/config.yaml @@ -7,6 +7,7 @@ sample_rate: 2048000 fft_size: 2048 gain_db: 30 tuner_bw_khz: 1536 +use_gpu_fft: false agc: false dc_block: false iq_balance: false diff --git a/internal/config/config.go b/internal/config/config.go index b1c8d6b..916cf35 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -26,6 +26,7 @@ type Config struct { FFTSize int `yaml:"fft_size" json:"fft_size"` GainDb float64 `yaml:"gain_db" json:"gain_db"` TunerBwKHz int `yaml:"tuner_bw_khz" json:"tuner_bw_khz"` + UseGPUFFT bool `yaml:"use_gpu_fft" json:"use_gpu_fft"` AGC bool `yaml:"agc" json:"agc"` DCBlock bool `yaml:"dc_block" json:"dc_block"` IQBalance bool `yaml:"iq_balance" json:"iq_balance"` @@ -47,6 +48,7 @@ func Default() Config { FFTSize: 2048, GainDb: 30, TunerBwKHz: 1536, + UseGPUFFT: false, AGC: false, DCBlock: false, IQBalance: false, diff --git a/internal/fft/fft.go b/internal/fft/fft.go index 50f2850..36ab34c 100644 --- a/internal/fft/fft.go +++ b/internal/fft/fft.go @@ -50,6 +50,24 @@ func Spectrum(iq []complex64, window []float64) []float64 { return power } +func SpectrumFromFFT(out []complex64) []float64 { + n := len(out) + if n == 0 { + return nil + } + power := make([]float64, n) + eps := 1e-12 + invN := 1.0 / float64(n) + for i := 0; i < n; i++ { + idx := (i + n/2) % n + v := out[idx] + mag := math.Hypot(float64(real(v)), float64(imag(v))) * invN + p := 20 * math.Log10(mag+eps) + power[i] = p + } + return power +} + func cmplxAbs(v complex128) float64 { return math.Hypot(real(v), imag(v)) } diff --git a/internal/fft/gpufft/gpu.go b/internal/fft/gpufft/gpu.go new file mode 100644 index 0000000..c2b3335 --- /dev/null +++ b/internal/fft/gpufft/gpu.go @@ -0,0 +1,88 @@ +//go:build cufft + +package gpufft + +/* +#cgo windows LDFLAGS: -lcufft -lcudart +#include +#include +*/ +import "C" + +import ( + "errors" + "fmt" + "unsafe" +) + +type Engine struct { + plan C.cufftHandle + n int + data *C.cufftComplex + bytes C.size_t +} + +func Available() bool { + var count C.int + if C.cudaGetDeviceCount(&count) != C.cudaSuccess { + return false + } + return count > 0 +} + +func New(n int) (*Engine, error) { + if n <= 0 { + return nil, errors.New("invalid fft size") + } + if !Available() { + return nil, errors.New("cuda device not available") + } + var plan C.cufftHandle + if C.cufftPlan1d(&plan, C.int(n), C.CUFFT_C2C, 1) != C.CUFFT_SUCCESS { + return nil, errors.New("cufftPlan1d failed") + } + var ptr unsafe.Pointer + bytes := C.size_t(n) * C.size_t(unsafe.Sizeof(C.cufftComplex{})) + if C.cudaMalloc(&ptr, bytes) != C.cudaSuccess { + C.cufftDestroy(plan) + return nil, errors.New("cudaMalloc failed") + } + return &Engine{plan: plan, n: n, data: (*C.cufftComplex)(ptr), bytes: bytes}, nil +} + +func (e *Engine) Close() { + if e == nil { + return + } + if e.plan != 0 { + _ = C.cufftDestroy(e.plan) + e.plan = 0 + } + if e.data != nil { + _ = C.cudaFree(unsafe.Pointer(e.data)) + e.data = nil + } +} + +func (e *Engine) Exec(in []complex64) ([]complex64, error) { + if e == nil { + return nil, errors.New("gpu fft not initialized") + } + if len(in) != e.n { + return nil, fmt.Errorf("expected %d samples, got %d", e.n, len(in)) + } + if len(in) == 0 { + return nil, nil + } + if C.cudaMemcpy(unsafe.Pointer(e.data), unsafe.Pointer(&in[0]), e.bytes, C.cudaMemcpyHostToDevice) != C.cudaSuccess { + return nil, errors.New("cudaMemcpy H2D failed") + } + if C.cufftExecC2C(e.plan, e.data, e.data, C.CUFFT_FORWARD) != C.CUFFT_SUCCESS { + return nil, errors.New("cufftExecC2C failed") + } + if C.cudaMemcpy(unsafe.Pointer(&in[0]), unsafe.Pointer(e.data), e.bytes, C.cudaMemcpyDeviceToHost) != C.cudaSuccess { + return nil, errors.New("cudaMemcpy D2H failed") + } + _ = C.cudaDeviceSynchronize() + return in, nil +} diff --git a/internal/fft/gpufft/stub.go b/internal/fft/gpufft/stub.go new file mode 100644 index 0000000..077e944 --- /dev/null +++ b/internal/fft/gpufft/stub.go @@ -0,0 +1,19 @@ +//go:build !cufft + +package gpufft + +import "errors" + +type Engine struct{} + +func Available() bool { return false } + +func New(n int) (*Engine, error) { + return nil, errors.New("cufft build tag not enabled") +} + +func (e *Engine) Close() {} + +func (e *Engine) Exec(in []complex64) ([]complex64, error) { + return nil, errors.New("cufft build tag not enabled") +} diff --git a/internal/runtime/runtime.go b/internal/runtime/runtime.go index 364185c..85b902c 100644 --- a/internal/runtime/runtime.go +++ b/internal/runtime/runtime.go @@ -13,6 +13,7 @@ type ConfigUpdate struct { FFTSize *int `json:"fft_size"` GainDb *float64 `json:"gain_db"` TunerBwKHz *int `json:"tuner_bw_khz"` + UseGPUFFT *bool `json:"use_gpu_fft"` Detector *DetectorUpdate `json:"detector"` } @@ -78,6 +79,9 @@ func (m *Manager) ApplyConfig(update ConfigUpdate) (config.Config, error) { } next.TunerBwKHz = *update.TunerBwKHz } + if update.UseGPUFFT != nil { + next.UseGPUFFT = *update.UseGPUFFT + } if update.Detector != nil { if update.Detector.ThresholdDb != nil { next.Detector.ThresholdDb = *update.Detector.ThresholdDb diff --git a/web/app.js b/web/app.js index 5a51d20..242c9b0 100644 --- a/web/app.js +++ b/web/app.js @@ -29,6 +29,7 @@ const iqToggle = document.getElementById('iqToggle'); const avgSelect = document.getElementById('avgSelect'); const maxHoldToggle = document.getElementById('maxHoldToggle'); const maxHoldReset = document.getElementById('maxHoldReset'); +const gpuToggle = document.getElementById('gpuToggle'); const presetButtons = Array.from(document.querySelectorAll('.preset-btn')); let latest = null; @@ -52,6 +53,7 @@ let maxHold = false; let maxSpectrum = null; let lastFFTSize = null; let stats = { buffer_samples: 0, dropped: 0, resets: 0 }; +let gpuInfo = { available: false, active: false, error: '' }; const events = []; const eventsById = new Map(); @@ -121,6 +123,7 @@ function applyConfigToUI(cfg) { agcToggle.checked = !!cfg.agc; dcToggle.checked = !!cfg.dc_block; iqToggle.checked = !!cfg.iq_balance; + if (gpuToggle) gpuToggle.checked = !!cfg.use_gpu_fft; isSyncingConfig = false; } @@ -151,6 +154,17 @@ async function loadStats() { } } +async function loadGPU() { + try { + const res = await fetch('/api/gpu'); + if (!res.ok) return; + const data = await res.json(); + gpuInfo = data || gpuInfo; + } catch (err) { + // ignore + } +} + function queueConfigUpdate(partial) { if (isSyncingConfig) return; pendingConfigUpdate = { ...(pendingConfigUpdate || {}), ...partial }; @@ -338,7 +352,8 @@ function renderSpectrum() { } const binHz = sample_rate / n; - metaEl.textContent = `Center ${(center_hz/1e6).toFixed(3)} MHz | Span ${(span/1e6).toFixed(3)} MHz | Res ${binHz.toFixed(1)} Hz/bin | Buf ${stats.buffer_samples} Drop ${stats.dropped} Reset ${stats.resets}`; + const gpuState = gpuInfo.active ? 'GPU:ON' : (gpuInfo.available ? 'GPU:OFF' : 'GPU:N/A'); + metaEl.textContent = `Center ${(center_hz/1e6).toFixed(3)} MHz | Span ${(span/1e6).toFixed(3)} MHz | Res ${binHz.toFixed(1)} Hz/bin | Buf ${stats.buffer_samples} Drop ${stats.dropped} Reset ${stats.resets} | ${gpuState}`; } function renderWaterfall() { @@ -607,6 +622,12 @@ if (maxHoldReset) { }); } +if (gpuToggle) { + gpuToggle.addEventListener('change', () => { + queueConfigUpdate({ use_gpu_fft: gpuToggle.checked }); + }); +} + fftSelect.addEventListener('change', () => { const size = parseInt(fftSelect.value, 10); if (Number.isFinite(size)) { @@ -771,3 +792,4 @@ requestAnimationFrame(tick); fetchEvents(true); setInterval(() => fetchEvents(false), 2000); setInterval(loadStats, 1000); +setInterval(loadGPU, 1000); diff --git a/web/index.html b/web/index.html index d2153d3..0cb8f11 100644 --- a/web/index.html +++ b/web/index.html @@ -117,6 +117,10 @@ Max Hold +