Wideband autonomous SDR analysis engine forked from sdr-visual-suite
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

222 lines
5.4KB

  1. //go:build cufft
  2. package gpudemod
  3. /*
  4. #cgo windows LDFLAGS: -lcufft64_12 -lcudart64_13
  5. #include <cuda_runtime.h>
  6. #include <cufft.h>
  7. typedef struct { float x; float y; } gpud_float2;
  8. static int gpud_cuda_malloc(void **ptr, size_t bytes) {
  9. return (int)cudaMalloc(ptr, bytes);
  10. }
  11. static int gpud_cuda_free(void *ptr) {
  12. return (int)cudaFree(ptr);
  13. }
  14. static int gpud_memcpy_h2d(void *dst, const void *src, size_t bytes) {
  15. return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice);
  16. }
  17. static int gpud_memcpy_d2h(void *dst, const void *src, size_t bytes) {
  18. return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost);
  19. }
  20. static int gpud_device_sync() {
  21. return (int)cudaDeviceSynchronize();
  22. }
  23. static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) {
  24. // TODO(phase2): replace with real CUDA kernel launch.
  25. // Phase 1b keeps the launch boundary in place without pretending acceleration.
  26. (void)in; (void)out; (void)n; (void)phase_inc; (void)phase_start;
  27. return -1;
  28. }
  29. static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) {
  30. // TODO(phase2): replace with real CUDA kernel launch.
  31. (void)in; (void)out; (void)n;
  32. return -1;
  33. }
  34. */
  35. import "C"
  36. import (
  37. "errors"
  38. "fmt"
  39. "math"
  40. "unsafe"
  41. "sdr-visual-suite/internal/demod"
  42. "sdr-visual-suite/internal/dsp"
  43. )
  44. type DemodType int
  45. const (
  46. DemodNFM DemodType = iota
  47. DemodWFM
  48. DemodAM
  49. DemodUSB
  50. DemodLSB
  51. DemodCW
  52. )
  53. type Engine struct {
  54. maxSamples int
  55. sampleRate int
  56. phase float64
  57. bfoPhase float64
  58. firTaps []float32
  59. cudaReady bool
  60. dIQIn *C.gpud_float2
  61. dShifted *C.gpud_float2
  62. dAudio *C.float
  63. iqBytes C.size_t
  64. audioBytes C.size_t
  65. }
  66. func Available() bool {
  67. var count C.int
  68. if C.cudaGetDeviceCount(&count) != C.cudaSuccess {
  69. return false
  70. }
  71. return count > 0
  72. }
  73. func New(maxSamples int, sampleRate int) (*Engine, error) {
  74. if maxSamples <= 0 {
  75. return nil, errors.New("invalid maxSamples")
  76. }
  77. if sampleRate <= 0 {
  78. return nil, errors.New("invalid sampleRate")
  79. }
  80. if !Available() {
  81. return nil, errors.New("cuda device not available")
  82. }
  83. e := &Engine{
  84. maxSamples: maxSamples,
  85. sampleRate: sampleRate,
  86. cudaReady: true,
  87. iqBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.gpud_float2{})),
  88. audioBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.float(0))),
  89. }
  90. var ptr unsafe.Pointer
  91. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  92. e.Close()
  93. return nil, errors.New("cudaMalloc dIQIn failed")
  94. }
  95. e.dIQIn = (*C.gpud_float2)(ptr)
  96. ptr = nil
  97. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  98. e.Close()
  99. return nil, errors.New("cudaMalloc dShifted failed")
  100. }
  101. e.dShifted = (*C.gpud_float2)(ptr)
  102. ptr = nil
  103. if C.gpud_cuda_malloc(&ptr, e.audioBytes) != C.cudaSuccess {
  104. e.Close()
  105. return nil, errors.New("cudaMalloc dAudio failed")
  106. }
  107. e.dAudio = (*C.float)(ptr)
  108. return e, nil
  109. }
  110. func (e *Engine) SetFIR(taps []float32) {
  111. if len(taps) == 0 {
  112. e.firTaps = nil
  113. return
  114. }
  115. e.firTaps = append(e.firTaps[:0], taps...)
  116. }
  117. func phaseStatus() string {
  118. return "phase1b-launch-boundary"
  119. }
  120. func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) bool {
  121. if e == nil || !e.cudaReady || len(iq) == 0 || e.dIQIn == nil || e.dShifted == nil {
  122. return false
  123. }
  124. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), C.size_t(len(iq))*C.size_t(unsafe.Sizeof(complex64(0)))) != C.cudaSuccess {
  125. return false
  126. }
  127. phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate)
  128. if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(len(iq)), C.double(phaseInc), C.double(e.phase)) != 0 {
  129. return false
  130. }
  131. if C.gpud_device_sync() != C.cudaSuccess {
  132. return false
  133. }
  134. e.phase += phaseInc * float64(len(iq))
  135. return true
  136. }
  137. func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
  138. if e == nil {
  139. return nil, 0, errors.New("nil CUDA demod engine")
  140. }
  141. if !e.cudaReady {
  142. return nil, 0, errors.New("cuda demod engine is not initialized")
  143. }
  144. if len(iq) == 0 {
  145. return nil, 0, nil
  146. }
  147. if len(iq) > e.maxSamples {
  148. return nil, 0, errors.New("sample count exceeds engine capacity")
  149. }
  150. if mode != DemodNFM {
  151. return nil, 0, errors.New("CUDA demod phase 1 currently supports NFM only")
  152. }
  153. // Real CUDA boundary is now present. If the launch wrappers are not yet backed
  154. // by actual kernels, we fall back to the existing CPU DSP path below.
  155. _ = fmt.Sprintf("%s:%0.3f", phaseStatus(), offsetHz)
  156. shifted, ok := e.tryCUDAFreqShift(iq, offsetHz)
  157. if !ok {
  158. shifted = dsp.FreqShift(iq, e.sampleRate, offsetHz)
  159. }
  160. cutoff := bw / 2
  161. if cutoff < 200 {
  162. cutoff = 200
  163. }
  164. taps := e.firTaps
  165. if len(taps) == 0 {
  166. base := dsp.LowpassFIR(cutoff, e.sampleRate, 101)
  167. taps = append(make([]float32, 0, len(base)), base...)
  168. }
  169. filtered := dsp.ApplyFIR(shifted, taps)
  170. outRate := demod.NFM{}.OutputSampleRate()
  171. decim := int(math.Round(float64(e.sampleRate) / float64(outRate)))
  172. if decim < 1 {
  173. decim = 1
  174. }
  175. dec := dsp.Decimate(filtered, decim)
  176. inputRate := e.sampleRate / decim
  177. audio := demod.NFM{}.Demod(dec, inputRate)
  178. return audio, inputRate, nil
  179. }
  180. func (e *Engine) Close() {
  181. if e == nil {
  182. return
  183. }
  184. if e.dIQIn != nil {
  185. _ = C.gpud_cuda_free(unsafe.Pointer(e.dIQIn))
  186. e.dIQIn = nil
  187. }
  188. if e.dShifted != nil {
  189. _ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted))
  190. e.dShifted = nil
  191. }
  192. if e.dAudio != nil {
  193. _ = C.gpud_cuda_free(unsafe.Pointer(e.dAudio))
  194. e.dAudio = nil
  195. }
  196. e.firTaps = nil
  197. e.cudaReady = false
  198. }