Wideband autonomous SDR analysis engine forked from sdr-visual-suite
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

223 lines
5.5KB

  1. //go:build cufft
  2. package gpudemod
  3. /*
  4. #cgo windows LDFLAGS: -L${SRCDIR}/../../../cuda-mingw -lcufft64_12 -lcudart64_13 ${SRCDIR}/build/kernels.obj
  5. #cgo windows CFLAGS: -I"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.2/include"
  6. #include <cuda_runtime.h>
  7. #include <cufft.h>
  8. typedef struct { float x; float y; } gpud_float2;
  9. static int gpud_cuda_malloc(void **ptr, size_t bytes) {
  10. return (int)cudaMalloc(ptr, bytes);
  11. }
  12. static int gpud_cuda_free(void *ptr) {
  13. return (int)cudaFree(ptr);
  14. }
  15. static int gpud_memcpy_h2d(void *dst, const void *src, size_t bytes) {
  16. return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice);
  17. }
  18. static int gpud_memcpy_d2h(void *dst, const void *src, size_t bytes) {
  19. return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost);
  20. }
  21. static int gpud_device_sync() {
  22. return (int)cudaDeviceSynchronize();
  23. }
  24. static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) {
  25. // TODO(phase2): replace with real CUDA kernel launch.
  26. // Phase 1b keeps the launch boundary in place without pretending acceleration.
  27. (void)in; (void)out; (void)n; (void)phase_inc; (void)phase_start;
  28. return -1;
  29. }
  30. static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) {
  31. // TODO(phase2): replace with real CUDA kernel launch.
  32. (void)in; (void)out; (void)n;
  33. return -1;
  34. }
  35. */
  36. import "C"
  37. import (
  38. "errors"
  39. "fmt"
  40. "math"
  41. "unsafe"
  42. "sdr-visual-suite/internal/demod"
  43. "sdr-visual-suite/internal/dsp"
  44. )
  45. type DemodType int
  46. const (
  47. DemodNFM DemodType = iota
  48. DemodWFM
  49. DemodAM
  50. DemodUSB
  51. DemodLSB
  52. DemodCW
  53. )
  54. type Engine struct {
  55. maxSamples int
  56. sampleRate int
  57. phase float64
  58. bfoPhase float64
  59. firTaps []float32
  60. cudaReady bool
  61. dIQIn *C.gpud_float2
  62. dShifted *C.gpud_float2
  63. dAudio *C.float
  64. iqBytes C.size_t
  65. audioBytes C.size_t
  66. }
  67. func Available() bool {
  68. var count C.int
  69. if C.cudaGetDeviceCount(&count) != C.cudaSuccess {
  70. return false
  71. }
  72. return count > 0
  73. }
  74. func New(maxSamples int, sampleRate int) (*Engine, error) {
  75. if maxSamples <= 0 {
  76. return nil, errors.New("invalid maxSamples")
  77. }
  78. if sampleRate <= 0 {
  79. return nil, errors.New("invalid sampleRate")
  80. }
  81. if !Available() {
  82. return nil, errors.New("cuda device not available")
  83. }
  84. e := &Engine{
  85. maxSamples: maxSamples,
  86. sampleRate: sampleRate,
  87. cudaReady: true,
  88. iqBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.gpud_float2{})),
  89. audioBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.float(0))),
  90. }
  91. var ptr unsafe.Pointer
  92. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  93. e.Close()
  94. return nil, errors.New("cudaMalloc dIQIn failed")
  95. }
  96. e.dIQIn = (*C.gpud_float2)(ptr)
  97. ptr = nil
  98. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  99. e.Close()
  100. return nil, errors.New("cudaMalloc dShifted failed")
  101. }
  102. e.dShifted = (*C.gpud_float2)(ptr)
  103. ptr = nil
  104. if C.gpud_cuda_malloc(&ptr, e.audioBytes) != C.cudaSuccess {
  105. e.Close()
  106. return nil, errors.New("cudaMalloc dAudio failed")
  107. }
  108. e.dAudio = (*C.float)(ptr)
  109. return e, nil
  110. }
  111. func (e *Engine) SetFIR(taps []float32) {
  112. if len(taps) == 0 {
  113. e.firTaps = nil
  114. return
  115. }
  116. e.firTaps = append(e.firTaps[:0], taps...)
  117. }
  118. func phaseStatus() string {
  119. return "phase1b-launch-boundary"
  120. }
  121. func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) bool {
  122. if e == nil || !e.cudaReady || len(iq) == 0 || e.dIQIn == nil || e.dShifted == nil {
  123. return false
  124. }
  125. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), C.size_t(len(iq))*C.size_t(unsafe.Sizeof(complex64(0)))) != C.cudaSuccess {
  126. return false
  127. }
  128. phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate)
  129. if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(len(iq)), C.double(phaseInc), C.double(e.phase)) != 0 {
  130. return false
  131. }
  132. if C.gpud_device_sync() != C.cudaSuccess {
  133. return false
  134. }
  135. e.phase += phaseInc * float64(len(iq))
  136. return true
  137. }
  138. func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
  139. if e == nil {
  140. return nil, 0, errors.New("nil CUDA demod engine")
  141. }
  142. if !e.cudaReady {
  143. return nil, 0, errors.New("cuda demod engine is not initialized")
  144. }
  145. if len(iq) == 0 {
  146. return nil, 0, nil
  147. }
  148. if len(iq) > e.maxSamples {
  149. return nil, 0, errors.New("sample count exceeds engine capacity")
  150. }
  151. if mode != DemodNFM {
  152. return nil, 0, errors.New("CUDA demod phase 1 currently supports NFM only")
  153. }
  154. // Real CUDA boundary is now present. If the launch wrappers are not yet backed
  155. // by actual kernels, we fall back to the existing CPU DSP path below.
  156. _ = fmt.Sprintf("%s:%0.3f", phaseStatus(), offsetHz)
  157. shifted, ok := e.tryCUDAFreqShift(iq, offsetHz)
  158. if !ok {
  159. shifted = dsp.FreqShift(iq, e.sampleRate, offsetHz)
  160. }
  161. cutoff := bw / 2
  162. if cutoff < 200 {
  163. cutoff = 200
  164. }
  165. taps := e.firTaps
  166. if len(taps) == 0 {
  167. base := dsp.LowpassFIR(cutoff, e.sampleRate, 101)
  168. taps = append(make([]float32, 0, len(base)), base...)
  169. }
  170. filtered := dsp.ApplyFIR(shifted, taps)
  171. outRate := demod.NFM{}.OutputSampleRate()
  172. decim := int(math.Round(float64(e.sampleRate) / float64(outRate)))
  173. if decim < 1 {
  174. decim = 1
  175. }
  176. dec := dsp.Decimate(filtered, decim)
  177. inputRate := e.sampleRate / decim
  178. audio := demod.NFM{}.Demod(dec, inputRate)
  179. return audio, inputRate, nil
  180. }
  181. func (e *Engine) Close() {
  182. if e == nil {
  183. return
  184. }
  185. if e.dIQIn != nil {
  186. _ = C.gpud_cuda_free(unsafe.Pointer(e.dIQIn))
  187. e.dIQIn = nil
  188. }
  189. if e.dShifted != nil {
  190. _ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted))
  191. e.dShifted = nil
  192. }
  193. if e.dAudio != nil {
  194. _ = C.gpud_cuda_free(unsafe.Pointer(e.dAudio))
  195. e.dAudio = nil
  196. }
  197. e.firTaps = nil
  198. e.cudaReady = false
  199. }