Wideband autonomous SDR analysis engine forked from sdr-visual-suite
25개 이상의 토픽을 선택하실 수 없습니다. Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

288 lines
7.5KB

  1. //go:build cufft
  2. package gpudemod
  3. /*
  4. #cgo windows LDFLAGS: -L${SRCDIR}/../../../cuda-mingw -lcufft64_12 -lcudart64_13 ${SRCDIR}/build/kernels.obj
  5. #cgo windows CFLAGS: -I"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.2/include"
  6. #include <cuda_runtime.h>
  7. #include <cufft.h>
  8. typedef struct { float x; float y; } gpud_float2;
  9. static int gpud_cuda_malloc(void **ptr, size_t bytes) {
  10. return (int)cudaMalloc(ptr, bytes);
  11. }
  12. static int gpud_cuda_free(void *ptr) {
  13. return (int)cudaFree(ptr);
  14. }
  15. static int gpud_memcpy_h2d(void *dst, const void *src, size_t bytes) {
  16. return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice);
  17. }
  18. static int gpud_memcpy_d2h(void *dst, const void *src, size_t bytes) {
  19. return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost);
  20. }
  21. static int gpud_device_sync() {
  22. return (int)cudaDeviceSynchronize();
  23. }
  24. extern int gpud_launch_freq_shift_cuda(const gpud_float2* in, gpud_float2* out, int n, double phase_inc, double phase_start);
  25. static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) {
  26. return gpud_launch_freq_shift_cuda(in, out, n, phase_inc, phase_start);
  27. }
  28. extern int gpud_launch_fm_discrim_cuda(const gpud_float2* in, float* out, int n);
  29. static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) {
  30. return gpud_launch_fm_discrim_cuda(in, out, n);
  31. }
  32. */
  33. import "C"
  34. import (
  35. "errors"
  36. "fmt"
  37. "math"
  38. "unsafe"
  39. "sdr-visual-suite/internal/demod"
  40. "sdr-visual-suite/internal/dsp"
  41. )
  42. type DemodType int
  43. const (
  44. DemodNFM DemodType = iota
  45. DemodWFM
  46. DemodAM
  47. DemodUSB
  48. DemodLSB
  49. DemodCW
  50. )
  51. type Engine struct {
  52. maxSamples int
  53. sampleRate int
  54. phase float64
  55. bfoPhase float64
  56. firTaps []float32
  57. cudaReady bool
  58. lastShiftUsedGPU bool
  59. dIQIn *C.gpud_float2
  60. dShifted *C.gpud_float2
  61. dAudio *C.float
  62. iqBytes C.size_t
  63. audioBytes C.size_t
  64. }
  65. func Available() bool {
  66. var count C.int
  67. if C.cudaGetDeviceCount(&count) != C.cudaSuccess {
  68. return false
  69. }
  70. return count > 0
  71. }
  72. func New(maxSamples int, sampleRate int) (*Engine, error) {
  73. if maxSamples <= 0 {
  74. return nil, errors.New("invalid maxSamples")
  75. }
  76. if sampleRate <= 0 {
  77. return nil, errors.New("invalid sampleRate")
  78. }
  79. if !Available() {
  80. return nil, errors.New("cuda device not available")
  81. }
  82. e := &Engine{
  83. maxSamples: maxSamples,
  84. sampleRate: sampleRate,
  85. cudaReady: true,
  86. iqBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.gpud_float2{})),
  87. audioBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.float(0))),
  88. }
  89. var ptr unsafe.Pointer
  90. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  91. e.Close()
  92. return nil, errors.New("cudaMalloc dIQIn failed")
  93. }
  94. e.dIQIn = (*C.gpud_float2)(ptr)
  95. ptr = nil
  96. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  97. e.Close()
  98. return nil, errors.New("cudaMalloc dShifted failed")
  99. }
  100. e.dShifted = (*C.gpud_float2)(ptr)
  101. ptr = nil
  102. if C.gpud_cuda_malloc(&ptr, e.audioBytes) != C.cudaSuccess {
  103. e.Close()
  104. return nil, errors.New("cudaMalloc dAudio failed")
  105. }
  106. e.dAudio = (*C.float)(ptr)
  107. return e, nil
  108. }
  109. func (e *Engine) SetFIR(taps []float32) {
  110. if len(taps) == 0 {
  111. e.firTaps = nil
  112. return
  113. }
  114. e.firTaps = append(e.firTaps[:0], taps...)
  115. }
  116. func phaseStatus() string {
  117. return "phase1c-validated-shift"
  118. }
  119. func (e *Engine) LastShiftUsedGPU() bool {
  120. if e == nil {
  121. return false
  122. }
  123. return e.lastShiftUsedGPU
  124. }
  125. func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) ([]complex64, bool) {
  126. if e == nil || !e.cudaReady || len(iq) == 0 || e.dIQIn == nil || e.dShifted == nil {
  127. return nil, false
  128. }
  129. bytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0)))
  130. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytes) != C.cudaSuccess {
  131. return nil, false
  132. }
  133. phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate)
  134. if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(len(iq)), C.double(phaseInc), C.double(e.phase)) != 0 {
  135. return nil, false
  136. }
  137. if C.gpud_device_sync() != C.cudaSuccess {
  138. return nil, false
  139. }
  140. out := make([]complex64, len(iq))
  141. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dShifted), bytes) != C.cudaSuccess {
  142. return nil, false
  143. }
  144. e.phase += phaseInc * float64(len(iq))
  145. return out, true
  146. }
  147. func (e *Engine) tryCUDAFMDiscrim(shifted []complex64) ([]float32, bool) {
  148. if e == nil || !e.cudaReady || len(shifted) < 2 || e.dShifted == nil || e.dAudio == nil {
  149. return nil, false
  150. }
  151. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  152. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  153. return nil, false
  154. }
  155. if C.gpud_launch_fm_discrim(e.dShifted, e.dAudio, C.int(len(shifted))) != 0 {
  156. return nil, false
  157. }
  158. if C.gpud_device_sync() != C.cudaSuccess {
  159. return nil, false
  160. }
  161. out := make([]float32, len(shifted)-1)
  162. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  163. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  164. return nil, false
  165. }
  166. return out, true
  167. }
  168. func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
  169. if e == nil {
  170. return nil, 0, errors.New("nil CUDA demod engine")
  171. }
  172. if !e.cudaReady {
  173. return nil, 0, errors.New("cuda demod engine is not initialized")
  174. }
  175. if len(iq) == 0 {
  176. return nil, 0, nil
  177. }
  178. if len(iq) > e.maxSamples {
  179. return nil, 0, errors.New("sample count exceeds engine capacity")
  180. }
  181. _ = fmt.Sprintf("%s:%0.3f", phaseStatus(), offsetHz)
  182. shifted, ok := e.tryCUDAFreqShift(iq, offsetHz)
  183. e.lastShiftUsedGPU = ok && ValidateFreqShift(iq, e.sampleRate, offsetHz, shifted, 1e-3)
  184. if !e.lastShiftUsedGPU {
  185. shifted = dsp.FreqShift(iq, e.sampleRate, offsetHz)
  186. }
  187. var outRate int
  188. switch mode {
  189. case DemodNFM, DemodAM, DemodUSB, DemodLSB, DemodCW:
  190. outRate = 48000
  191. case DemodWFM:
  192. outRate = 192000
  193. default:
  194. return nil, 0, errors.New("unsupported demod type")
  195. }
  196. cutoff := bw / 2
  197. if cutoff < 200 {
  198. cutoff = 200
  199. }
  200. taps := e.firTaps
  201. if len(taps) == 0 {
  202. base := dsp.LowpassFIR(cutoff, e.sampleRate, 101)
  203. taps = append(make([]float32, 0, len(base)), base...)
  204. }
  205. filtered := dsp.ApplyFIR(shifted, taps)
  206. decim := int(math.Round(float64(e.sampleRate) / float64(outRate)))
  207. if decim < 1 {
  208. decim = 1
  209. }
  210. dec := dsp.Decimate(filtered, decim)
  211. inputRate := e.sampleRate / decim
  212. switch mode {
  213. case DemodNFM:
  214. if gpuAudio, ok := e.tryCUDAFMDiscrim(dec); ok {
  215. return gpuAudio, inputRate, nil
  216. }
  217. return demod.NFM{}.Demod(dec, inputRate), inputRate, nil
  218. case DemodWFM:
  219. if gpuAudio, ok := e.tryCUDAFMDiscrim(dec); ok {
  220. return gpuAudio, inputRate, nil
  221. }
  222. return demod.WFM{}.Demod(dec, inputRate), inputRate, nil
  223. case DemodAM:
  224. return demod.AM{}.Demod(dec, inputRate), inputRate, nil
  225. case DemodUSB:
  226. return demod.USB{}.Demod(dec, inputRate), inputRate, nil
  227. case DemodLSB:
  228. return demod.LSB{}.Demod(dec, inputRate), inputRate, nil
  229. case DemodCW:
  230. return demod.CW{}.Demod(dec, inputRate), inputRate, nil
  231. default:
  232. return nil, 0, errors.New("unsupported demod type")
  233. }
  234. }
  235. func (e *Engine) Close() {
  236. if e == nil {
  237. return
  238. }
  239. if e.dIQIn != nil {
  240. _ = C.gpud_cuda_free(unsafe.Pointer(e.dIQIn))
  241. e.dIQIn = nil
  242. }
  243. if e.dShifted != nil {
  244. _ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted))
  245. e.dShifted = nil
  246. }
  247. if e.dDecimated != nil {
  248. _ = C.gpud_cuda_free(unsafe.Pointer(e.dDecimated))
  249. e.dDecimated = nil
  250. }
  251. if e.dAudio != nil {
  252. _ = C.gpud_cuda_free(unsafe.Pointer(e.dAudio))
  253. e.dAudio = nil
  254. }
  255. e.firTaps = nil
  256. e.cudaReady = false
  257. }