Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

461 рядки
14KB

  1. //go:build cufft
  2. package gpudemod
  3. /*
  4. #cgo windows LDFLAGS: -L${SRCDIR}/../../../cuda-mingw -L${SRCDIR}/build -lgpudemod_kernels -lcufft64_12 -lcudart64_13 -lstdc++
  5. #cgo windows CFLAGS: -I"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.2/include"
  6. #include <cuda_runtime.h>
  7. #include <cufft.h>
  8. typedef struct { float x; float y; } gpud_float2;
  9. static int gpud_cuda_malloc(void **ptr, size_t bytes) {
  10. return (int)cudaMalloc(ptr, bytes);
  11. }
  12. static int gpud_cuda_free(void *ptr) {
  13. return (int)cudaFree(ptr);
  14. }
  15. static int gpud_memcpy_h2d(void *dst, const void *src, size_t bytes) {
  16. return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice);
  17. }
  18. static int gpud_memcpy_d2h(void *dst, const void *src, size_t bytes) {
  19. return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost);
  20. }
  21. static int gpud_device_sync() {
  22. return (int)cudaDeviceSynchronize();
  23. }
  24. extern int gpud_launch_freq_shift_cuda(const gpud_float2* in, gpud_float2* out, int n, double phase_inc, double phase_start);
  25. extern int gpud_launch_fm_discrim_cuda(const gpud_float2* in, float* out, int n);
  26. extern int gpud_upload_fir_taps_cuda(const float* taps, int n);
  27. extern int gpud_launch_fir_cuda(const gpud_float2* in, gpud_float2* out, int n, int num_taps);
  28. extern int gpud_launch_decimate_cuda(const gpud_float2* in, gpud_float2* out, int n_out, int factor);
  29. extern int gpud_launch_am_envelope_cuda(const gpud_float2* in, float* out, int n);
  30. extern int gpud_launch_ssb_product_cuda(const gpud_float2* in, float* out, int n, double phase_inc, double phase_start);
  31. static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) {
  32. return gpud_launch_freq_shift_cuda(in, out, n, phase_inc, phase_start);
  33. }
  34. static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) {
  35. return gpud_launch_fm_discrim_cuda(in, out, n);
  36. }
  37. static int gpud_upload_fir_taps(const float* taps, int n) {
  38. return gpud_upload_fir_taps_cuda(taps, n);
  39. }
  40. static int gpud_launch_fir(gpud_float2 *in, gpud_float2 *out, int n, int num_taps) {
  41. return gpud_launch_fir_cuda(in, out, n, num_taps);
  42. }
  43. static int gpud_launch_decimate(gpud_float2 *in, gpud_float2 *out, int n_out, int factor) {
  44. return gpud_launch_decimate_cuda(in, out, n_out, factor);
  45. }
  46. static int gpud_launch_am_envelope(gpud_float2 *in, float *out, int n) {
  47. return gpud_launch_am_envelope_cuda(in, out, n);
  48. }
  49. static int gpud_launch_ssb_product(gpud_float2 *in, float *out, int n, double phase_inc, double phase_start) {
  50. return gpud_launch_ssb_product_cuda(in, out, n, phase_inc, phase_start);
  51. }
  52. */
  53. import "C"
  54. import (
  55. "errors"
  56. "fmt"
  57. "math"
  58. "unsafe"
  59. "sdr-visual-suite/internal/demod"
  60. "sdr-visual-suite/internal/dsp"
  61. )
  62. type DemodType int
  63. const (
  64. DemodNFM DemodType = iota
  65. DemodWFM
  66. DemodAM
  67. DemodUSB
  68. DemodLSB
  69. DemodCW
  70. )
  71. type Engine struct {
  72. maxSamples int
  73. sampleRate int
  74. phase float64
  75. bfoPhase float64
  76. firTaps []float32
  77. cudaReady bool
  78. lastShiftUsedGPU bool
  79. lastFIRUsedGPU bool
  80. lastDecimUsedGPU bool
  81. lastDemodUsedGPU bool
  82. dIQIn *C.gpud_float2
  83. dShifted *C.gpud_float2
  84. dFiltered *C.gpud_float2
  85. dDecimated *C.gpud_float2
  86. dAudio *C.float
  87. iqBytes C.size_t
  88. audioBytes C.size_t
  89. }
  90. func Available() bool {
  91. var count C.int
  92. if C.cudaGetDeviceCount(&count) != C.cudaSuccess {
  93. return false
  94. }
  95. return count > 0
  96. }
  97. func New(maxSamples int, sampleRate int) (*Engine, error) {
  98. if maxSamples <= 0 {
  99. return nil, errors.New("invalid maxSamples")
  100. }
  101. if sampleRate <= 0 {
  102. return nil, errors.New("invalid sampleRate")
  103. }
  104. if !Available() {
  105. return nil, errors.New("cuda device not available")
  106. }
  107. e := &Engine{
  108. maxSamples: maxSamples,
  109. sampleRate: sampleRate,
  110. cudaReady: true,
  111. iqBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.gpud_float2{})),
  112. audioBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.float(0))),
  113. }
  114. var ptr unsafe.Pointer
  115. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  116. e.Close()
  117. return nil, errors.New("cudaMalloc dIQIn failed")
  118. }
  119. e.dIQIn = (*C.gpud_float2)(ptr)
  120. ptr = nil
  121. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  122. e.Close()
  123. return nil, errors.New("cudaMalloc dShifted failed")
  124. }
  125. e.dShifted = (*C.gpud_float2)(ptr)
  126. ptr = nil
  127. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  128. e.Close()
  129. return nil, errors.New("cudaMalloc dFiltered failed")
  130. }
  131. e.dFiltered = (*C.gpud_float2)(ptr)
  132. ptr = nil
  133. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  134. e.Close()
  135. return nil, errors.New("cudaMalloc dDecimated failed")
  136. }
  137. e.dDecimated = (*C.gpud_float2)(ptr)
  138. ptr = nil
  139. if C.gpud_cuda_malloc(&ptr, e.audioBytes) != C.cudaSuccess {
  140. e.Close()
  141. return nil, errors.New("cudaMalloc dAudio failed")
  142. }
  143. e.dAudio = (*C.float)(ptr)
  144. return e, nil
  145. }
  146. func (e *Engine) SetFIR(taps []float32) {
  147. if len(taps) == 0 {
  148. e.firTaps = nil
  149. return
  150. }
  151. if len(taps) > 256 {
  152. taps = taps[:256]
  153. }
  154. e.firTaps = append(e.firTaps[:0], taps...)
  155. if e.cudaReady {
  156. _ = C.gpud_upload_fir_taps((*C.float)(unsafe.Pointer(&e.firTaps[0])), C.int(len(e.firTaps)))
  157. }
  158. }
  159. func phaseStatus() string { return "phase1c-validated-shift" }
  160. func (e *Engine) LastShiftUsedGPU() bool { return e != nil && e.lastShiftUsedGPU }
  161. func (e *Engine) LastDemodUsedGPU() bool { return e != nil && e.lastDemodUsedGPU }
  162. func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) ([]complex64, bool) {
  163. if e == nil || !e.cudaReady || len(iq) == 0 || e.dIQIn == nil || e.dShifted == nil {
  164. return nil, false
  165. }
  166. bytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0)))
  167. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytes) != C.cudaSuccess {
  168. return nil, false
  169. }
  170. phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate)
  171. if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(len(iq)), C.double(phaseInc), C.double(e.phase)) != 0 {
  172. return nil, false
  173. }
  174. if C.gpud_device_sync() != C.cudaSuccess {
  175. return nil, false
  176. }
  177. out := make([]complex64, len(iq))
  178. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dShifted), bytes) != C.cudaSuccess {
  179. return nil, false
  180. }
  181. e.phase += phaseInc * float64(len(iq))
  182. return out, true
  183. }
  184. func (e *Engine) tryCUDAFIR(iq []complex64, numTaps int) ([]complex64, bool) {
  185. if e == nil || !e.cudaReady || len(iq) == 0 || numTaps <= 0 || e.dShifted == nil || e.dFiltered == nil {
  186. return nil, false
  187. }
  188. iqBytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0)))
  189. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&iq[0]), iqBytes) != C.cudaSuccess {
  190. return nil, false
  191. }
  192. if C.gpud_launch_fir(e.dShifted, e.dFiltered, C.int(len(iq)), C.int(numTaps)) != 0 {
  193. return nil, false
  194. }
  195. if C.gpud_device_sync() != C.cudaSuccess {
  196. return nil, false
  197. }
  198. out := make([]complex64, len(iq))
  199. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dFiltered), iqBytes) != C.cudaSuccess {
  200. return nil, false
  201. }
  202. return out, true
  203. }
  204. func (e *Engine) tryCUDADecimate(filtered []complex64, factor int) ([]complex64, bool) {
  205. if e == nil || !e.cudaReady || len(filtered) == 0 || factor <= 0 || e.dFiltered == nil || e.dDecimated == nil {
  206. return nil, false
  207. }
  208. nOut := len(filtered) / factor
  209. if nOut <= 0 {
  210. return nil, false
  211. }
  212. iqBytes := C.size_t(len(filtered)) * C.size_t(unsafe.Sizeof(complex64(0)))
  213. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dFiltered), unsafe.Pointer(&filtered[0]), iqBytes) != C.cudaSuccess {
  214. return nil, false
  215. }
  216. if C.gpud_launch_decimate(e.dFiltered, e.dDecimated, C.int(nOut), C.int(factor)) != 0 {
  217. return nil, false
  218. }
  219. if C.gpud_device_sync() != C.cudaSuccess {
  220. return nil, false
  221. }
  222. out := make([]complex64, nOut)
  223. outBytes := C.size_t(nOut) * C.size_t(unsafe.Sizeof(complex64(0)))
  224. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dDecimated), outBytes) != C.cudaSuccess {
  225. return nil, false
  226. }
  227. return out, true
  228. }
  229. func (e *Engine) tryCUDAFMDiscrim(shifted []complex64) ([]float32, bool) {
  230. if e == nil || !e.cudaReady || len(shifted) < 2 || e.dShifted == nil || e.dAudio == nil {
  231. return nil, false
  232. }
  233. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  234. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  235. return nil, false
  236. }
  237. if C.gpud_launch_fm_discrim(e.dShifted, e.dAudio, C.int(len(shifted))) != 0 {
  238. return nil, false
  239. }
  240. if C.gpud_device_sync() != C.cudaSuccess {
  241. return nil, false
  242. }
  243. out := make([]float32, len(shifted)-1)
  244. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  245. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  246. return nil, false
  247. }
  248. return out, true
  249. }
  250. func (e *Engine) tryCUDAAMEnvelope(shifted []complex64) ([]float32, bool) {
  251. if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil {
  252. return nil, false
  253. }
  254. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  255. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  256. return nil, false
  257. }
  258. if C.gpud_launch_am_envelope(e.dShifted, e.dAudio, C.int(len(shifted))) != 0 {
  259. return nil, false
  260. }
  261. if C.gpud_device_sync() != C.cudaSuccess {
  262. return nil, false
  263. }
  264. out := make([]float32, len(shifted))
  265. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  266. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  267. return nil, false
  268. }
  269. return out, true
  270. }
  271. func (e *Engine) tryCUDASSBProduct(shifted []complex64, bfoHz float64) ([]float32, bool) {
  272. if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil {
  273. return nil, false
  274. }
  275. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  276. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  277. return nil, false
  278. }
  279. phaseInc := 2.0 * math.Pi * bfoHz / float64(e.sampleRate)
  280. if C.gpud_launch_ssb_product(e.dShifted, e.dAudio, C.int(len(shifted)), C.double(phaseInc), C.double(e.bfoPhase)) != 0 {
  281. return nil, false
  282. }
  283. if C.gpud_device_sync() != C.cudaSuccess {
  284. return nil, false
  285. }
  286. out := make([]float32, len(shifted))
  287. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  288. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  289. return nil, false
  290. }
  291. e.bfoPhase += phaseInc * float64(len(shifted))
  292. return out, true
  293. }
  294. func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
  295. if e == nil {
  296. return nil, 0, errors.New("nil CUDA demod engine")
  297. }
  298. if !e.cudaReady {
  299. return nil, 0, errors.New("cuda demod engine is not initialized")
  300. }
  301. if len(iq) == 0 {
  302. return nil, 0, nil
  303. }
  304. if len(iq) > e.maxSamples {
  305. return nil, 0, errors.New("sample count exceeds engine capacity")
  306. }
  307. _ = fmt.Sprintf("%s:%0.3f", phaseStatus(), offsetHz)
  308. shifted, ok := e.tryCUDAFreqShift(iq, offsetHz)
  309. e.lastShiftUsedGPU = ok && ValidateFreqShift(iq, e.sampleRate, offsetHz, shifted, 1e-3)
  310. if !e.lastShiftUsedGPU {
  311. shifted = dsp.FreqShift(iq, e.sampleRate, offsetHz)
  312. }
  313. var outRate int
  314. switch mode {
  315. case DemodNFM, DemodAM, DemodUSB, DemodLSB, DemodCW:
  316. outRate = 48000
  317. case DemodWFM:
  318. outRate = 192000
  319. default:
  320. return nil, 0, errors.New("unsupported demod type")
  321. }
  322. cutoff := bw / 2
  323. if cutoff < 200 {
  324. cutoff = 200
  325. }
  326. taps := e.firTaps
  327. if len(taps) == 0 {
  328. base64 := dsp.LowpassFIR(cutoff, e.sampleRate, 101)
  329. taps = make([]float32, len(base64))
  330. for i, v := range base64 {
  331. taps[i] = float32(v)
  332. }
  333. e.SetFIR(taps)
  334. }
  335. filtered, ok := e.tryCUDAFIR(shifted, len(taps))
  336. e.lastFIRUsedGPU = ok && ValidateFIR(shifted, taps, filtered, 1e-3)
  337. if !e.lastFIRUsedGPU {
  338. ftaps := make([]float64, len(taps))
  339. for i, v := range taps {
  340. ftaps[i] = float64(v)
  341. }
  342. filtered = dsp.ApplyFIR(shifted, ftaps)
  343. }
  344. decim := int(math.Round(float64(e.sampleRate) / float64(outRate)))
  345. if decim < 1 {
  346. decim = 1
  347. }
  348. dec, ok := e.tryCUDADecimate(filtered, decim)
  349. e.lastDecimUsedGPU = ok && ValidateDecimate(filtered, decim, dec, 1e-3)
  350. if !e.lastDecimUsedGPU {
  351. dec = dsp.Decimate(filtered, decim)
  352. }
  353. inputRate := e.sampleRate / decim
  354. e.lastDemodUsedGPU = false
  355. switch mode {
  356. case DemodNFM:
  357. if gpuAudio, ok := e.tryCUDAFMDiscrim(dec); ok {
  358. e.lastDemodUsedGPU = true
  359. return gpuAudio, inputRate, nil
  360. }
  361. return demod.NFM{}.Demod(dec, inputRate), inputRate, nil
  362. case DemodWFM:
  363. if gpuAudio, ok := e.tryCUDAFMDiscrim(dec); ok {
  364. e.lastDemodUsedGPU = true
  365. return gpuAudio, inputRate, nil
  366. }
  367. return demod.WFM{}.Demod(dec, inputRate), inputRate, nil
  368. case DemodAM:
  369. if gpuAudio, ok := e.tryCUDAAMEnvelope(dec); ok {
  370. e.lastDemodUsedGPU = true
  371. return gpuAudio, inputRate, nil
  372. }
  373. return demod.AM{}.Demod(dec, inputRate), inputRate, nil
  374. case DemodUSB:
  375. if gpuAudio, ok := e.tryCUDASSBProduct(dec, 700.0); ok {
  376. e.lastDemodUsedGPU = true
  377. return gpuAudio, inputRate, nil
  378. }
  379. return demod.USB{}.Demod(dec, inputRate), inputRate, nil
  380. case DemodLSB:
  381. if gpuAudio, ok := e.tryCUDASSBProduct(dec, -700.0); ok {
  382. e.lastDemodUsedGPU = true
  383. return gpuAudio, inputRate, nil
  384. }
  385. return demod.LSB{}.Demod(dec, inputRate), inputRate, nil
  386. case DemodCW:
  387. if gpuAudio, ok := e.tryCUDASSBProduct(dec, 700.0); ok {
  388. e.lastDemodUsedGPU = true
  389. return gpuAudio, inputRate, nil
  390. }
  391. return demod.CW{}.Demod(dec, inputRate), inputRate, nil
  392. default:
  393. return nil, 0, errors.New("unsupported demod type")
  394. }
  395. }
  396. func (e *Engine) Close() {
  397. if e == nil {
  398. return
  399. }
  400. if e.dIQIn != nil {
  401. _ = C.gpud_cuda_free(unsafe.Pointer(e.dIQIn))
  402. e.dIQIn = nil
  403. }
  404. if e.dShifted != nil {
  405. _ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted))
  406. e.dShifted = nil
  407. }
  408. if e.dFiltered != nil {
  409. _ = C.gpud_cuda_free(unsafe.Pointer(e.dFiltered))
  410. e.dFiltered = nil
  411. }
  412. if e.dDecimated != nil {
  413. _ = C.gpud_cuda_free(unsafe.Pointer(e.dDecimated))
  414. e.dDecimated = nil
  415. }
  416. if e.dAudio != nil {
  417. _ = C.gpud_cuda_free(unsafe.Pointer(e.dAudio))
  418. e.dAudio = nil
  419. }
  420. e.firTaps = nil
  421. e.cudaReady = false
  422. }