25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.

465 satır
14KB

  1. //go:build cufft && !windows
  2. package gpudemod
  3. /*
  4. #cgo windows LDFLAGS: -L${SRCDIR}/../../../cuda-mingw -L${SRCDIR}/build -lgpudemod_kernels -lcufft64_12 -lcudart64_13 -lstdc++
  5. #cgo windows CFLAGS: -I"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.2/include"
  6. #include <cuda_runtime.h>
  7. #include <cufft.h>
  8. typedef struct { float x; float y; } gpud_float2;
  9. static int gpud_cuda_malloc(void **ptr, size_t bytes) {
  10. return (int)cudaMalloc(ptr, bytes);
  11. }
  12. static int gpud_cuda_free(void *ptr) {
  13. return (int)cudaFree(ptr);
  14. }
  15. static int gpud_memcpy_h2d(void *dst, const void *src, size_t bytes) {
  16. return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice);
  17. }
  18. static int gpud_memcpy_d2h(void *dst, const void *src, size_t bytes) {
  19. return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost);
  20. }
  21. static int gpud_device_sync() {
  22. return (int)cudaDeviceSynchronize();
  23. }
  24. extern int gpud_launch_freq_shift_cuda(const gpud_float2* in, gpud_float2* out, int n, double phase_inc, double phase_start);
  25. extern int gpud_launch_fm_discrim_cuda(const gpud_float2* in, float* out, int n);
  26. extern int gpud_upload_fir_taps_cuda(const float* taps, int n);
  27. extern int gpud_launch_fir_cuda(const gpud_float2* in, gpud_float2* out, int n, int num_taps);
  28. extern int gpud_launch_decimate_cuda(const gpud_float2* in, gpud_float2* out, int n_out, int factor);
  29. extern int gpud_launch_am_envelope_cuda(const gpud_float2* in, float* out, int n);
  30. extern int gpud_launch_ssb_product_cuda(const gpud_float2* in, float* out, int n, double phase_inc, double phase_start);
  31. static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) {
  32. return gpud_launch_freq_shift_cuda(in, out, n, phase_inc, phase_start);
  33. }
  34. static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) {
  35. return gpud_launch_fm_discrim_cuda(in, out, n);
  36. }
  37. static int gpud_upload_fir_taps(const float* taps, int n) {
  38. return gpud_upload_fir_taps_cuda(taps, n);
  39. }
  40. static int gpud_launch_fir(gpud_float2 *in, gpud_float2 *out, int n, int num_taps) {
  41. return gpud_launch_fir_cuda(in, out, n, num_taps);
  42. }
  43. static int gpud_launch_decimate(gpud_float2 *in, gpud_float2 *out, int n_out, int factor) {
  44. return gpud_launch_decimate_cuda(in, out, n_out, factor);
  45. }
  46. static int gpud_launch_am_envelope(gpud_float2 *in, float *out, int n) {
  47. return gpud_launch_am_envelope_cuda(in, out, n);
  48. }
  49. static int gpud_launch_ssb_product(gpud_float2 *in, float *out, int n, double phase_inc, double phase_start) {
  50. return gpud_launch_ssb_product_cuda(in, out, n, phase_inc, phase_start);
  51. }
  52. */
  53. import "C"
  54. import (
  55. "errors"
  56. "fmt"
  57. "math"
  58. "unsafe"
  59. "sdr-visual-suite/internal/demod"
  60. "sdr-visual-suite/internal/dsp"
  61. )
  62. type DemodType int
  63. const (
  64. DemodNFM DemodType = iota
  65. DemodWFM
  66. DemodAM
  67. DemodUSB
  68. DemodLSB
  69. DemodCW
  70. )
  71. type Engine struct {
  72. maxSamples int
  73. sampleRate int
  74. phase float64
  75. bfoPhase float64
  76. firTaps []float32
  77. cudaReady bool
  78. lastShiftUsedGPU bool
  79. lastFIRUsedGPU bool
  80. lastDecimUsedGPU bool
  81. lastDemodUsedGPU bool
  82. dIQIn *C.gpud_float2
  83. dShifted *C.gpud_float2
  84. dFiltered *C.gpud_float2
  85. dDecimated *C.gpud_float2
  86. dAudio *C.float
  87. iqBytes C.size_t
  88. audioBytes C.size_t
  89. }
  90. func Available() bool {
  91. var count C.int
  92. if C.cudaGetDeviceCount(&count) != C.cudaSuccess {
  93. return false
  94. }
  95. return count > 0
  96. }
  97. func New(maxSamples int, sampleRate int) (*Engine, error) {
  98. if maxSamples <= 0 {
  99. return nil, errors.New("invalid maxSamples")
  100. }
  101. if sampleRate <= 0 {
  102. return nil, errors.New("invalid sampleRate")
  103. }
  104. if !Available() {
  105. return nil, errors.New("cuda device not available")
  106. }
  107. e := &Engine{
  108. maxSamples: maxSamples,
  109. sampleRate: sampleRate,
  110. cudaReady: true,
  111. iqBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.gpud_float2{})),
  112. audioBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.float(0))),
  113. }
  114. var ptr unsafe.Pointer
  115. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  116. e.Close()
  117. return nil, errors.New("cudaMalloc dIQIn failed")
  118. }
  119. e.dIQIn = (*C.gpud_float2)(ptr)
  120. ptr = nil
  121. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  122. e.Close()
  123. return nil, errors.New("cudaMalloc dShifted failed")
  124. }
  125. e.dShifted = (*C.gpud_float2)(ptr)
  126. ptr = nil
  127. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  128. e.Close()
  129. return nil, errors.New("cudaMalloc dFiltered failed")
  130. }
  131. e.dFiltered = (*C.gpud_float2)(ptr)
  132. ptr = nil
  133. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  134. e.Close()
  135. return nil, errors.New("cudaMalloc dDecimated failed")
  136. }
  137. e.dDecimated = (*C.gpud_float2)(ptr)
  138. ptr = nil
  139. if C.gpud_cuda_malloc(&ptr, e.audioBytes) != C.cudaSuccess {
  140. e.Close()
  141. return nil, errors.New("cudaMalloc dAudio failed")
  142. }
  143. e.dAudio = (*C.float)(ptr)
  144. return e, nil
  145. }
  146. func (e *Engine) SetFIR(taps []float32) {
  147. if len(taps) == 0 {
  148. e.firTaps = nil
  149. return
  150. }
  151. if len(taps) > 256 {
  152. taps = taps[:256]
  153. }
  154. e.firTaps = append(e.firTaps[:0], taps...)
  155. if e.cudaReady {
  156. _ = C.gpud_upload_fir_taps((*C.float)(unsafe.Pointer(&e.firTaps[0])), C.int(len(e.firTaps)))
  157. }
  158. }
  159. func phaseStatus() string { return "phase1c-validated-shift" }
  160. func (e *Engine) LastShiftUsedGPU() bool { return e != nil && e.lastShiftUsedGPU }
  161. func (e *Engine) LastDemodUsedGPU() bool { return e != nil && e.lastDemodUsedGPU }
  162. func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) ([]complex64, bool) {
  163. if e == nil || !e.cudaReady || len(iq) == 0 || e.dIQIn == nil || e.dShifted == nil {
  164. return nil, false
  165. }
  166. bytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0)))
  167. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytes) != C.cudaSuccess {
  168. return nil, false
  169. }
  170. phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate)
  171. if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(len(iq)), C.double(phaseInc), C.double(e.phase)) != 0 {
  172. return nil, false
  173. }
  174. if C.gpud_device_sync() != C.cudaSuccess {
  175. return nil, false
  176. }
  177. out := make([]complex64, len(iq))
  178. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dShifted), bytes) != C.cudaSuccess {
  179. return nil, false
  180. }
  181. e.phase += phaseInc * float64(len(iq))
  182. return out, true
  183. }
  184. func (e *Engine) tryCUDAFIR(iq []complex64, numTaps int) ([]complex64, bool) {
  185. if e == nil || !e.cudaReady || len(iq) == 0 || numTaps <= 0 || e.dShifted == nil || e.dFiltered == nil {
  186. return nil, false
  187. }
  188. iqBytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0)))
  189. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&iq[0]), iqBytes) != C.cudaSuccess {
  190. return nil, false
  191. }
  192. if C.gpud_launch_fir(e.dShifted, e.dFiltered, C.int(len(iq)), C.int(numTaps)) != 0 {
  193. return nil, false
  194. }
  195. if C.gpud_device_sync() != C.cudaSuccess {
  196. return nil, false
  197. }
  198. out := make([]complex64, len(iq))
  199. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dFiltered), iqBytes) != C.cudaSuccess {
  200. return nil, false
  201. }
  202. return out, true
  203. }
  204. func (e *Engine) tryCUDADecimate(filtered []complex64, factor int) ([]complex64, bool) {
  205. if e == nil || !e.cudaReady || len(filtered) == 0 || factor <= 0 || e.dFiltered == nil || e.dDecimated == nil {
  206. return nil, false
  207. }
  208. nOut := len(filtered) / factor
  209. if nOut <= 0 {
  210. return nil, false
  211. }
  212. iqBytes := C.size_t(len(filtered)) * C.size_t(unsafe.Sizeof(complex64(0)))
  213. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dFiltered), unsafe.Pointer(&filtered[0]), iqBytes) != C.cudaSuccess {
  214. return nil, false
  215. }
  216. if C.gpud_launch_decimate(e.dFiltered, e.dDecimated, C.int(nOut), C.int(factor)) != 0 {
  217. return nil, false
  218. }
  219. if C.gpud_device_sync() != C.cudaSuccess {
  220. return nil, false
  221. }
  222. out := make([]complex64, nOut)
  223. outBytes := C.size_t(nOut) * C.size_t(unsafe.Sizeof(complex64(0)))
  224. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dDecimated), outBytes) != C.cudaSuccess {
  225. return nil, false
  226. }
  227. return out, true
  228. }
  229. func (e *Engine) tryCUDAFMDiscrim(shifted []complex64) ([]float32, bool) {
  230. if e == nil || !e.cudaReady || len(shifted) < 2 || e.dShifted == nil || e.dAudio == nil {
  231. return nil, false
  232. }
  233. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  234. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  235. return nil, false
  236. }
  237. if C.gpud_launch_fm_discrim(e.dShifted, e.dAudio, C.int(len(shifted))) != 0 {
  238. return nil, false
  239. }
  240. if C.gpud_device_sync() != C.cudaSuccess {
  241. return nil, false
  242. }
  243. out := make([]float32, len(shifted)-1)
  244. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  245. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  246. return nil, false
  247. }
  248. return out, true
  249. }
  250. func (e *Engine) tryCUDAAMEnvelope(shifted []complex64) ([]float32, bool) {
  251. if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil {
  252. return nil, false
  253. }
  254. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  255. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  256. return nil, false
  257. }
  258. if C.gpud_launch_am_envelope(e.dShifted, e.dAudio, C.int(len(shifted))) != 0 {
  259. return nil, false
  260. }
  261. if C.gpud_device_sync() != C.cudaSuccess {
  262. return nil, false
  263. }
  264. out := make([]float32, len(shifted))
  265. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  266. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  267. return nil, false
  268. }
  269. return out, true
  270. }
  271. func (e *Engine) tryCUDASSBProduct(shifted []complex64, bfoHz float64) ([]float32, bool) {
  272. if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil {
  273. return nil, false
  274. }
  275. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  276. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  277. return nil, false
  278. }
  279. phaseInc := 2.0 * math.Pi * bfoHz / float64(e.sampleRate)
  280. if C.gpud_launch_ssb_product(e.dShifted, e.dAudio, C.int(len(shifted)), C.double(phaseInc), C.double(e.bfoPhase)) != 0 {
  281. return nil, false
  282. }
  283. if C.gpud_device_sync() != C.cudaSuccess {
  284. return nil, false
  285. }
  286. out := make([]float32, len(shifted))
  287. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  288. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  289. return nil, false
  290. }
  291. e.bfoPhase += phaseInc * float64(len(shifted))
  292. return out, true
  293. }
  294. func (e *Engine) DemodFused(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
  295. return e.Demod(iq, offsetHz, bw, mode)
  296. }
  297. func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
  298. if e == nil {
  299. return nil, 0, errors.New("nil CUDA demod engine")
  300. }
  301. if !e.cudaReady {
  302. return nil, 0, errors.New("cuda demod engine is not initialized")
  303. }
  304. if len(iq) == 0 {
  305. return nil, 0, nil
  306. }
  307. if len(iq) > e.maxSamples {
  308. return nil, 0, errors.New("sample count exceeds engine capacity")
  309. }
  310. _ = fmt.Sprintf("%s:%0.3f", phaseStatus(), offsetHz)
  311. shifted, ok := e.tryCUDAFreqShift(iq, offsetHz)
  312. e.lastShiftUsedGPU = ok && ValidateFreqShift(iq, e.sampleRate, offsetHz, shifted, 1e-3)
  313. if !e.lastShiftUsedGPU {
  314. shifted = dsp.FreqShift(iq, e.sampleRate, offsetHz)
  315. }
  316. var outRate int
  317. switch mode {
  318. case DemodNFM, DemodAM, DemodUSB, DemodLSB, DemodCW:
  319. outRate = 48000
  320. case DemodWFM:
  321. outRate = 192000
  322. default:
  323. return nil, 0, errors.New("unsupported demod type")
  324. }
  325. cutoff := bw / 2
  326. if cutoff < 200 {
  327. cutoff = 200
  328. }
  329. taps := e.firTaps
  330. if len(taps) == 0 {
  331. base64 := dsp.LowpassFIR(cutoff, e.sampleRate, 101)
  332. taps = make([]float32, len(base64))
  333. for i, v := range base64 {
  334. taps[i] = float32(v)
  335. }
  336. e.SetFIR(taps)
  337. }
  338. filtered, ok := e.tryCUDAFIR(shifted, len(taps))
  339. e.lastFIRUsedGPU = ok && ValidateFIR(shifted, taps, filtered, 1e-3)
  340. if !e.lastFIRUsedGPU {
  341. ftaps := make([]float64, len(taps))
  342. for i, v := range taps {
  343. ftaps[i] = float64(v)
  344. }
  345. filtered = dsp.ApplyFIR(shifted, ftaps)
  346. }
  347. decim := int(math.Round(float64(e.sampleRate) / float64(outRate)))
  348. if decim < 1 {
  349. decim = 1
  350. }
  351. dec, ok := e.tryCUDADecimate(filtered, decim)
  352. e.lastDecimUsedGPU = ok && ValidateDecimate(filtered, decim, dec, 1e-3)
  353. if !e.lastDecimUsedGPU {
  354. dec = dsp.Decimate(filtered, decim)
  355. }
  356. inputRate := e.sampleRate / decim
  357. e.lastDemodUsedGPU = false
  358. switch mode {
  359. case DemodNFM:
  360. if gpuAudio, ok := e.tryCUDAFMDiscrim(dec); ok {
  361. e.lastDemodUsedGPU = true
  362. return gpuAudio, inputRate, nil
  363. }
  364. return demod.NFM{}.Demod(dec, inputRate), inputRate, nil
  365. case DemodWFM:
  366. if gpuAudio, ok := e.tryCUDAFMDiscrim(dec); ok {
  367. e.lastDemodUsedGPU = true
  368. return gpuAudio, inputRate, nil
  369. }
  370. return demod.WFM{}.Demod(dec, inputRate), inputRate, nil
  371. case DemodAM:
  372. if gpuAudio, ok := e.tryCUDAAMEnvelope(dec); ok {
  373. e.lastDemodUsedGPU = true
  374. return gpuAudio, inputRate, nil
  375. }
  376. return demod.AM{}.Demod(dec, inputRate), inputRate, nil
  377. case DemodUSB:
  378. if gpuAudio, ok := e.tryCUDASSBProduct(dec, 700.0); ok {
  379. e.lastDemodUsedGPU = true
  380. return gpuAudio, inputRate, nil
  381. }
  382. return demod.USB{}.Demod(dec, inputRate), inputRate, nil
  383. case DemodLSB:
  384. if gpuAudio, ok := e.tryCUDASSBProduct(dec, -700.0); ok {
  385. e.lastDemodUsedGPU = true
  386. return gpuAudio, inputRate, nil
  387. }
  388. return demod.LSB{}.Demod(dec, inputRate), inputRate, nil
  389. case DemodCW:
  390. if gpuAudio, ok := e.tryCUDASSBProduct(dec, 700.0); ok {
  391. e.lastDemodUsedGPU = true
  392. return gpuAudio, inputRate, nil
  393. }
  394. return demod.CW{}.Demod(dec, inputRate), inputRate, nil
  395. default:
  396. return nil, 0, errors.New("unsupported demod type")
  397. }
  398. }
  399. func (e *Engine) Close() {
  400. if e == nil {
  401. return
  402. }
  403. if e.dIQIn != nil {
  404. _ = C.gpud_cuda_free(unsafe.Pointer(e.dIQIn))
  405. e.dIQIn = nil
  406. }
  407. if e.dShifted != nil {
  408. _ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted))
  409. e.dShifted = nil
  410. }
  411. if e.dFiltered != nil {
  412. _ = C.gpud_cuda_free(unsafe.Pointer(e.dFiltered))
  413. e.dFiltered = nil
  414. }
  415. if e.dDecimated != nil {
  416. _ = C.gpud_cuda_free(unsafe.Pointer(e.dDecimated))
  417. e.dDecimated = nil
  418. }
  419. if e.dAudio != nil {
  420. _ = C.gpud_cuda_free(unsafe.Pointer(e.dAudio))
  421. e.dAudio = nil
  422. }
  423. e.firTaps = nil
  424. e.cudaReady = false
  425. }