Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

463 рядки
14KB

  1. //go:build cufft && !windows
  2. package gpudemod
  3. /*
  4. #cgo windows LDFLAGS: -L${SRCDIR}/../../../cuda-mingw -L${SRCDIR}/build -lgpudemod_kernels -lcufft64_12 -lcudart64_13 -lstdc++
  5. #cgo windows CFLAGS: -I"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.2/include"
  6. #include <cuda_runtime.h>
  7. #include <cufft.h>
  8. typedef struct { float x; float y; } gpud_float2;
  9. static int gpud_cuda_malloc(void **ptr, size_t bytes) {
  10. return (int)cudaMalloc(ptr, bytes);
  11. }
  12. static int gpud_cuda_free(void *ptr) {
  13. return (int)cudaFree(ptr);
  14. }
  15. static int gpud_memcpy_h2d(void *dst, const void *src, size_t bytes) {
  16. return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice);
  17. }
  18. static int gpud_memcpy_d2h(void *dst, const void *src, size_t bytes) {
  19. return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost);
  20. }
  21. static int gpud_device_sync() {
  22. return (int)cudaDeviceSynchronize();
  23. }
  24. extern int gpud_launch_freq_shift_cuda(const gpud_float2* in, gpud_float2* out, int n, double phase_inc, double phase_start);
  25. extern int gpud_launch_fm_discrim_cuda(const gpud_float2* in, float* out, int n);
  26. extern int gpud_upload_fir_taps_cuda(const float* taps, int n);
  27. extern int gpud_launch_fir_cuda(const gpud_float2* in, gpud_float2* out, int n, int num_taps);
  28. extern int gpud_launch_decimate_cuda(const gpud_float2* in, gpud_float2* out, int n_out, int factor);
  29. extern int gpud_launch_am_envelope_cuda(const gpud_float2* in, float* out, int n);
  30. extern int gpud_launch_ssb_product_cuda(const gpud_float2* in, float* out, int n, double phase_inc, double phase_start);
  31. static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) {
  32. return gpud_launch_freq_shift_cuda(in, out, n, phase_inc, phase_start);
  33. }
  34. static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) {
  35. return gpud_launch_fm_discrim_cuda(in, out, n);
  36. }
  37. static int gpud_upload_fir_taps(const float* taps, int n) {
  38. return gpud_upload_fir_taps_cuda(taps, n);
  39. }
  40. static int gpud_launch_fir(gpud_float2 *in, gpud_float2 *out, int n, int num_taps) {
  41. return gpud_launch_fir_cuda(in, out, n, num_taps);
  42. }
  43. static int gpud_launch_decimate(gpud_float2 *in, gpud_float2 *out, int n_out, int factor) {
  44. return gpud_launch_decimate_cuda(in, out, n_out, factor);
  45. }
  46. static int gpud_launch_am_envelope(gpud_float2 *in, float *out, int n) {
  47. return gpud_launch_am_envelope_cuda(in, out, n);
  48. }
  49. static int gpud_launch_ssb_product(gpud_float2 *in, float *out, int n, double phase_inc, double phase_start) {
  50. return gpud_launch_ssb_product_cuda(in, out, n, phase_inc, phase_start);
  51. }
  52. */
  53. import "C"
  54. import (
  55. "errors"
  56. "fmt"
  57. "math"
  58. "unsafe"
  59. "sdr-visual-suite/internal/demod"
  60. "sdr-visual-suite/internal/dsp"
  61. )
  62. type DemodType int
  63. const (
  64. DemodNFM DemodType = iota
  65. DemodWFM
  66. DemodAM
  67. DemodUSB
  68. DemodLSB
  69. DemodCW
  70. )
  71. type Engine struct {
  72. maxSamples int
  73. sampleRate int
  74. phase float64
  75. bfoPhase float64
  76. firTaps []float32
  77. cudaReady bool
  78. lastShiftUsedGPU bool
  79. lastFIRUsedGPU bool
  80. lastDecimUsedGPU bool
  81. lastDemodUsedGPU bool
  82. dIQIn *C.gpud_float2
  83. dShifted *C.gpud_float2
  84. dFiltered *C.gpud_float2
  85. dDecimated *C.gpud_float2
  86. dAudio *C.float
  87. iqBytes C.size_t
  88. audioBytes C.size_t
  89. }
  90. func Available() bool {
  91. var count C.int
  92. if C.cudaGetDeviceCount(&count) != C.cudaSuccess {
  93. return false
  94. }
  95. return count > 0
  96. }
  97. func New(maxSamples int, sampleRate int) (*Engine, error) {
  98. if maxSamples <= 0 {
  99. return nil, errors.New("invalid maxSamples")
  100. }
  101. if sampleRate <= 0 {
  102. return nil, errors.New("invalid sampleRate")
  103. }
  104. if !Available() {
  105. return nil, errors.New("cuda device not available")
  106. }
  107. e := &Engine{
  108. maxSamples: maxSamples,
  109. sampleRate: sampleRate,
  110. cudaReady: true,
  111. iqBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.gpud_float2{})),
  112. audioBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.float(0))),
  113. }
  114. var ptr unsafe.Pointer
  115. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  116. e.Close()
  117. return nil, errors.New("cudaMalloc dIQIn failed")
  118. }
  119. e.dIQIn = (*C.gpud_float2)(ptr)
  120. ptr = nil
  121. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  122. e.Close()
  123. return nil, errors.New("cudaMalloc dShifted failed")
  124. }
  125. e.dShifted = (*C.gpud_float2)(ptr)
  126. ptr = nil
  127. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  128. e.Close()
  129. return nil, errors.New("cudaMalloc dFiltered failed")
  130. }
  131. e.dFiltered = (*C.gpud_float2)(ptr)
  132. ptr = nil
  133. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  134. e.Close()
  135. return nil, errors.New("cudaMalloc dDecimated failed")
  136. }
  137. e.dDecimated = (*C.gpud_float2)(ptr)
  138. ptr = nil
  139. if C.gpud_cuda_malloc(&ptr, e.audioBytes) != C.cudaSuccess {
  140. e.Close()
  141. return nil, errors.New("cudaMalloc dAudio failed")
  142. }
  143. e.dAudio = (*C.float)(ptr)
  144. return e, nil
  145. }
  146. func (e *Engine) SetFIR(taps []float32) {
  147. if len(taps) == 0 {
  148. e.firTaps = nil
  149. return
  150. }
  151. if len(taps) > 256 {
  152. taps = taps[:256]
  153. }
  154. e.firTaps = append(e.firTaps[:0], taps...)
  155. if e.cudaReady {
  156. _ = C.gpud_upload_fir_taps((*C.float)(unsafe.Pointer(&e.firTaps[0])), C.int(len(e.firTaps)))
  157. }
  158. }
  159. func (e *Engine) LastShiftUsedGPU() bool { return e != nil && e.lastShiftUsedGPU }
  160. func (e *Engine) LastDemodUsedGPU() bool { return e != nil && e.lastDemodUsedGPU }
  161. func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) ([]complex64, bool) {
  162. if e == nil || !e.cudaReady || len(iq) == 0 || e.dIQIn == nil || e.dShifted == nil {
  163. return nil, false
  164. }
  165. bytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0)))
  166. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytes) != C.cudaSuccess {
  167. return nil, false
  168. }
  169. phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate)
  170. if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(len(iq)), C.double(phaseInc), C.double(e.phase)) != 0 {
  171. return nil, false
  172. }
  173. if C.gpud_device_sync() != C.cudaSuccess {
  174. return nil, false
  175. }
  176. out := make([]complex64, len(iq))
  177. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dShifted), bytes) != C.cudaSuccess {
  178. return nil, false
  179. }
  180. e.phase += phaseInc * float64(len(iq))
  181. return out, true
  182. }
  183. func (e *Engine) tryCUDAFIR(iq []complex64, numTaps int) ([]complex64, bool) {
  184. if e == nil || !e.cudaReady || len(iq) == 0 || numTaps <= 0 || e.dShifted == nil || e.dFiltered == nil {
  185. return nil, false
  186. }
  187. iqBytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0)))
  188. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&iq[0]), iqBytes) != C.cudaSuccess {
  189. return nil, false
  190. }
  191. if C.gpud_launch_fir(e.dShifted, e.dFiltered, C.int(len(iq)), C.int(numTaps)) != 0 {
  192. return nil, false
  193. }
  194. if C.gpud_device_sync() != C.cudaSuccess {
  195. return nil, false
  196. }
  197. out := make([]complex64, len(iq))
  198. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dFiltered), iqBytes) != C.cudaSuccess {
  199. return nil, false
  200. }
  201. return out, true
  202. }
  203. func (e *Engine) tryCUDADecimate(filtered []complex64, factor int) ([]complex64, bool) {
  204. if e == nil || !e.cudaReady || len(filtered) == 0 || factor <= 0 || e.dFiltered == nil || e.dDecimated == nil {
  205. return nil, false
  206. }
  207. nOut := len(filtered) / factor
  208. if nOut <= 0 {
  209. return nil, false
  210. }
  211. iqBytes := C.size_t(len(filtered)) * C.size_t(unsafe.Sizeof(complex64(0)))
  212. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dFiltered), unsafe.Pointer(&filtered[0]), iqBytes) != C.cudaSuccess {
  213. return nil, false
  214. }
  215. if C.gpud_launch_decimate(e.dFiltered, e.dDecimated, C.int(nOut), C.int(factor)) != 0 {
  216. return nil, false
  217. }
  218. if C.gpud_device_sync() != C.cudaSuccess {
  219. return nil, false
  220. }
  221. out := make([]complex64, nOut)
  222. outBytes := C.size_t(nOut) * C.size_t(unsafe.Sizeof(complex64(0)))
  223. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dDecimated), outBytes) != C.cudaSuccess {
  224. return nil, false
  225. }
  226. return out, true
  227. }
  228. func (e *Engine) tryCUDAFMDiscrim(shifted []complex64) ([]float32, bool) {
  229. if e == nil || !e.cudaReady || len(shifted) < 2 || e.dShifted == nil || e.dAudio == nil {
  230. return nil, false
  231. }
  232. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  233. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  234. return nil, false
  235. }
  236. if C.gpud_launch_fm_discrim(e.dShifted, e.dAudio, C.int(len(shifted))) != 0 {
  237. return nil, false
  238. }
  239. if C.gpud_device_sync() != C.cudaSuccess {
  240. return nil, false
  241. }
  242. out := make([]float32, len(shifted)-1)
  243. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  244. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  245. return nil, false
  246. }
  247. return out, true
  248. }
  249. func (e *Engine) tryCUDAAMEnvelope(shifted []complex64) ([]float32, bool) {
  250. if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil {
  251. return nil, false
  252. }
  253. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  254. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  255. return nil, false
  256. }
  257. if C.gpud_launch_am_envelope(e.dShifted, e.dAudio, C.int(len(shifted))) != 0 {
  258. return nil, false
  259. }
  260. if C.gpud_device_sync() != C.cudaSuccess {
  261. return nil, false
  262. }
  263. out := make([]float32, len(shifted))
  264. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  265. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  266. return nil, false
  267. }
  268. return out, true
  269. }
  270. func (e *Engine) tryCUDASSBProduct(shifted []complex64, bfoHz float64) ([]float32, bool) {
  271. if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil {
  272. return nil, false
  273. }
  274. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  275. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  276. return nil, false
  277. }
  278. phaseInc := 2.0 * math.Pi * bfoHz / float64(e.sampleRate)
  279. if C.gpud_launch_ssb_product(e.dShifted, e.dAudio, C.int(len(shifted)), C.double(phaseInc), C.double(e.bfoPhase)) != 0 {
  280. return nil, false
  281. }
  282. if C.gpud_device_sync() != C.cudaSuccess {
  283. return nil, false
  284. }
  285. out := make([]float32, len(shifted))
  286. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  287. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  288. return nil, false
  289. }
  290. e.bfoPhase += phaseInc * float64(len(shifted))
  291. return out, true
  292. }
  293. func (e *Engine) DemodFused(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
  294. return e.Demod(iq, offsetHz, bw, mode)
  295. }
  296. func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
  297. if e == nil {
  298. return nil, 0, errors.New("nil CUDA demod engine")
  299. }
  300. if !e.cudaReady {
  301. return nil, 0, errors.New("cuda demod engine is not initialized")
  302. }
  303. if len(iq) == 0 {
  304. return nil, 0, nil
  305. }
  306. if len(iq) > e.maxSamples {
  307. return nil, 0, errors.New("sample count exceeds engine capacity")
  308. }
  309. shifted, ok := e.tryCUDAFreqShift(iq, offsetHz)
  310. e.lastShiftUsedGPU = ok && ValidateFreqShift(iq, e.sampleRate, offsetHz, shifted, 1e-3)
  311. if !e.lastShiftUsedGPU {
  312. shifted = dsp.FreqShift(iq, e.sampleRate, offsetHz)
  313. }
  314. var outRate int
  315. switch mode {
  316. case DemodNFM, DemodAM, DemodUSB, DemodLSB, DemodCW:
  317. outRate = 48000
  318. case DemodWFM:
  319. outRate = 192000
  320. default:
  321. return nil, 0, errors.New("unsupported demod type")
  322. }
  323. cutoff := bw / 2
  324. if cutoff < 200 {
  325. cutoff = 200
  326. }
  327. taps := e.firTaps
  328. if len(taps) == 0 {
  329. base64 := dsp.LowpassFIR(cutoff, e.sampleRate, 101)
  330. taps = make([]float32, len(base64))
  331. for i, v := range base64 {
  332. taps[i] = float32(v)
  333. }
  334. e.SetFIR(taps)
  335. }
  336. filtered, ok := e.tryCUDAFIR(shifted, len(taps))
  337. e.lastFIRUsedGPU = ok && ValidateFIR(shifted, taps, filtered, 1e-3)
  338. if !e.lastFIRUsedGPU {
  339. ftaps := make([]float64, len(taps))
  340. for i, v := range taps {
  341. ftaps[i] = float64(v)
  342. }
  343. filtered = dsp.ApplyFIR(shifted, ftaps)
  344. }
  345. decim := int(math.Round(float64(e.sampleRate) / float64(outRate)))
  346. if decim < 1 {
  347. decim = 1
  348. }
  349. dec, ok := e.tryCUDADecimate(filtered, decim)
  350. e.lastDecimUsedGPU = ok && ValidateDecimate(filtered, decim, dec, 1e-3)
  351. if !e.lastDecimUsedGPU {
  352. dec = dsp.Decimate(filtered, decim)
  353. }
  354. inputRate := e.sampleRate / decim
  355. e.lastDemodUsedGPU = false
  356. switch mode {
  357. case DemodNFM:
  358. if gpuAudio, ok := e.tryCUDAFMDiscrim(dec); ok {
  359. e.lastDemodUsedGPU = true
  360. return gpuAudio, inputRate, nil
  361. }
  362. return demod.NFM{}.Demod(dec, inputRate), inputRate, nil
  363. case DemodWFM:
  364. if gpuAudio, ok := e.tryCUDAFMDiscrim(dec); ok {
  365. e.lastDemodUsedGPU = true
  366. return gpuAudio, inputRate, nil
  367. }
  368. return demod.WFM{}.Demod(dec, inputRate), inputRate, nil
  369. case DemodAM:
  370. if gpuAudio, ok := e.tryCUDAAMEnvelope(dec); ok {
  371. e.lastDemodUsedGPU = true
  372. return gpuAudio, inputRate, nil
  373. }
  374. return demod.AM{}.Demod(dec, inputRate), inputRate, nil
  375. case DemodUSB:
  376. if gpuAudio, ok := e.tryCUDASSBProduct(dec, 700.0); ok {
  377. e.lastDemodUsedGPU = true
  378. return gpuAudio, inputRate, nil
  379. }
  380. return demod.USB{}.Demod(dec, inputRate), inputRate, nil
  381. case DemodLSB:
  382. if gpuAudio, ok := e.tryCUDASSBProduct(dec, -700.0); ok {
  383. e.lastDemodUsedGPU = true
  384. return gpuAudio, inputRate, nil
  385. }
  386. return demod.LSB{}.Demod(dec, inputRate), inputRate, nil
  387. case DemodCW:
  388. if gpuAudio, ok := e.tryCUDASSBProduct(dec, 700.0); ok {
  389. e.lastDemodUsedGPU = true
  390. return gpuAudio, inputRate, nil
  391. }
  392. return demod.CW{}.Demod(dec, inputRate), inputRate, nil
  393. default:
  394. return nil, 0, errors.New("unsupported demod type")
  395. }
  396. }
  397. func (e *Engine) Close() {
  398. if e == nil {
  399. return
  400. }
  401. if e.dIQIn != nil {
  402. _ = C.gpud_cuda_free(unsafe.Pointer(e.dIQIn))
  403. e.dIQIn = nil
  404. }
  405. if e.dShifted != nil {
  406. _ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted))
  407. e.dShifted = nil
  408. }
  409. if e.dFiltered != nil {
  410. _ = C.gpud_cuda_free(unsafe.Pointer(e.dFiltered))
  411. e.dFiltered = nil
  412. }
  413. if e.dDecimated != nil {
  414. _ = C.gpud_cuda_free(unsafe.Pointer(e.dDecimated))
  415. e.dDecimated = nil
  416. }
  417. if e.dAudio != nil {
  418. _ = C.gpud_cuda_free(unsafe.Pointer(e.dAudio))
  419. e.dAudio = nil
  420. }
  421. e.firTaps = nil
  422. e.cudaReady = false
  423. }