Wideband autonomous SDR analysis engine forked from sdr-visual-suite
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

678 строки
22KB

  1. //go:build cufft && windows
  2. package gpudemod
  3. /*
  4. #cgo windows CFLAGS: -I"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.2/include"
  5. #cgo windows LDFLAGS: -lcudart64_13 -lkernel32
  6. #include <windows.h>
  7. #include <stdlib.h>
  8. #include <cuda_runtime.h>
  9. typedef struct { float x; float y; } gpud_float2;
  10. typedef int (__stdcall *gpud_upload_fir_taps_fn)(const float* taps, int n);
  11. typedef int (__stdcall *gpud_launch_freq_shift_fn)(const gpud_float2* in, gpud_float2* out, int n, double phase_inc, double phase_start);
  12. typedef int (__stdcall *gpud_launch_fm_discrim_fn)(const gpud_float2* in, float* out, int n);
  13. typedef int (__stdcall *gpud_launch_fir_fn)(const gpud_float2* in, gpud_float2* out, int n, int num_taps);
  14. typedef int (__stdcall *gpud_launch_decimate_fn)(const gpud_float2* in, gpud_float2* out, int n_out, int factor);
  15. typedef int (__stdcall *gpud_launch_am_envelope_fn)(const gpud_float2* in, float* out, int n);
  16. typedef int (__stdcall *gpud_launch_ssb_product_fn)(const gpud_float2* in, float* out, int n, double phase_inc, double phase_start);
  17. static HMODULE gpud_mod = NULL;
  18. static gpud_upload_fir_taps_fn gpud_p_upload_fir_taps = NULL;
  19. static gpud_launch_freq_shift_fn gpud_p_launch_freq_shift = NULL;
  20. static gpud_launch_fm_discrim_fn gpud_p_launch_fm_discrim = NULL;
  21. static gpud_launch_fir_fn gpud_p_launch_fir = NULL;
  22. static gpud_launch_decimate_fn gpud_p_launch_decimate = NULL;
  23. static gpud_launch_am_envelope_fn gpud_p_launch_am_envelope = NULL;
  24. static gpud_launch_ssb_product_fn gpud_p_launch_ssb_product = NULL;
  25. static int gpud_cuda_malloc(void **ptr, size_t bytes) { return (int)cudaMalloc(ptr, bytes); }
  26. static int gpud_cuda_free(void *ptr) { return (int)cudaFree(ptr); }
  27. static int gpud_memcpy_h2d(void *dst, const void *src, size_t bytes) { return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice); }
  28. static int gpud_memcpy_d2h(void *dst, const void *src, size_t bytes) { return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost); }
  29. static int gpud_device_sync() { return (int)cudaDeviceSynchronize(); }
  30. static int gpud_load_library(const char* path) {
  31. if (gpud_mod != NULL) return 0;
  32. gpud_mod = LoadLibraryA(path);
  33. if (gpud_mod == NULL) return -1;
  34. gpud_p_upload_fir_taps = (gpud_upload_fir_taps_fn)GetProcAddress(gpud_mod, "gpud_upload_fir_taps_cuda");
  35. gpud_p_launch_freq_shift = (gpud_launch_freq_shift_fn)GetProcAddress(gpud_mod, "gpud_launch_freq_shift_cuda");
  36. gpud_p_launch_fm_discrim = (gpud_launch_fm_discrim_fn)GetProcAddress(gpud_mod, "gpud_launch_fm_discrim_cuda");
  37. gpud_p_launch_fir = (gpud_launch_fir_fn)GetProcAddress(gpud_mod, "gpud_launch_fir_cuda");
  38. gpud_p_launch_decimate = (gpud_launch_decimate_fn)GetProcAddress(gpud_mod, "gpud_launch_decimate_cuda");
  39. gpud_p_launch_am_envelope = (gpud_launch_am_envelope_fn)GetProcAddress(gpud_mod, "gpud_launch_am_envelope_cuda");
  40. gpud_p_launch_ssb_product = (gpud_launch_ssb_product_fn)GetProcAddress(gpud_mod, "gpud_launch_ssb_product_cuda");
  41. if (!gpud_p_upload_fir_taps || !gpud_p_launch_freq_shift || !gpud_p_launch_fm_discrim || !gpud_p_launch_fir || !gpud_p_launch_decimate || !gpud_p_launch_am_envelope || !gpud_p_launch_ssb_product) {
  42. FreeLibrary(gpud_mod);
  43. gpud_mod = NULL;
  44. return -2;
  45. }
  46. return 0;
  47. }
  48. static int gpud_upload_fir_taps(const float* taps, int n) {
  49. if (!gpud_p_upload_fir_taps) return -1;
  50. return gpud_p_upload_fir_taps(taps, n);
  51. }
  52. static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) {
  53. if (!gpud_p_launch_freq_shift) return -1;
  54. return gpud_p_launch_freq_shift(in, out, n, phase_inc, phase_start);
  55. }
  56. static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) {
  57. if (!gpud_p_launch_fm_discrim) return -1;
  58. return gpud_p_launch_fm_discrim(in, out, n);
  59. }
  60. static int gpud_launch_fir(gpud_float2 *in, gpud_float2 *out, int n, int num_taps) {
  61. if (!gpud_p_launch_fir) return -1;
  62. return gpud_p_launch_fir(in, out, n, num_taps);
  63. }
  64. static int gpud_launch_decimate(gpud_float2 *in, gpud_float2 *out, int n_out, int factor) {
  65. if (!gpud_p_launch_decimate) return -1;
  66. return gpud_p_launch_decimate(in, out, n_out, factor);
  67. }
  68. static int gpud_launch_am_envelope(gpud_float2 *in, float *out, int n) {
  69. if (!gpud_p_launch_am_envelope) return -1;
  70. return gpud_p_launch_am_envelope(in, out, n);
  71. }
  72. static int gpud_launch_ssb_product(gpud_float2 *in, float *out, int n, double phase_inc, double phase_start) {
  73. if (!gpud_p_launch_ssb_product) return -1;
  74. return gpud_p_launch_ssb_product(in, out, n, phase_inc, phase_start);
  75. }
  76. */
  77. import "C"
  78. import (
  79. "errors"
  80. "fmt"
  81. "math"
  82. "os"
  83. "path/filepath"
  84. "sync"
  85. "unsafe"
  86. "sdr-visual-suite/internal/demod"
  87. "sdr-visual-suite/internal/dsp"
  88. )
  89. type DemodType int
  90. const (
  91. DemodNFM DemodType = iota
  92. DemodWFM
  93. DemodAM
  94. DemodUSB
  95. DemodLSB
  96. DemodCW
  97. )
  98. var loadOnce sync.Once
  99. var loadErr error
  100. func ensureDLLLoaded() error {
  101. loadOnce.Do(func() {
  102. candidates := []string{}
  103. if exe, err := os.Executable(); err == nil {
  104. dir := filepath.Dir(exe)
  105. candidates = append(candidates, filepath.Join(dir, "gpudemod_kernels.dll"))
  106. }
  107. if wd, err := os.Getwd(); err == nil {
  108. candidates = append(candidates,
  109. filepath.Join(wd, "gpudemod_kernels.dll"),
  110. filepath.Join(wd, "internal", "demod", "gpudemod", "build", "gpudemod_kernels.dll"),
  111. )
  112. }
  113. seen := map[string]bool{}
  114. for _, p := range candidates {
  115. if p == "" || seen[p] {
  116. continue
  117. }
  118. seen[p] = true
  119. if _, err := os.Stat(p); err == nil {
  120. cp := C.CString(p)
  121. res := C.gpud_load_library(cp)
  122. C.free(unsafe.Pointer(cp))
  123. if res == 0 {
  124. loadErr = nil
  125. fmt.Fprintf(os.Stderr, "gpudemod: loaded DLL %s\n", p)
  126. return
  127. }
  128. loadErr = fmt.Errorf("failed to load gpudemod DLL: %s (code %d)", p, int(res))
  129. fmt.Fprintf(os.Stderr, "gpudemod: DLL load failed for %s (code %d)\n", p, int(res))
  130. }
  131. }
  132. if loadErr == nil {
  133. loadErr = errors.New("gpudemod_kernels.dll not found")
  134. fmt.Fprintln(os.Stderr, "gpudemod: gpudemod_kernels.dll not found in search paths")
  135. }
  136. })
  137. return loadErr
  138. }
  139. type Engine struct {
  140. maxSamples int
  141. sampleRate int
  142. phase float64
  143. bfoPhase float64
  144. firTaps []float32
  145. cudaReady bool
  146. lastShiftUsedGPU bool
  147. lastFIRUsedGPU bool
  148. lastDecimUsedGPU bool
  149. lastDemodUsedGPU bool
  150. dIQIn *C.gpud_float2
  151. dShifted *C.gpud_float2
  152. dFiltered *C.gpud_float2
  153. dDecimated *C.gpud_float2
  154. dAudio *C.float
  155. iqBytes C.size_t
  156. audioBytes C.size_t
  157. }
  158. func Available() bool {
  159. if ensureDLLLoaded() != nil {
  160. return false
  161. }
  162. var count C.int
  163. if C.cudaGetDeviceCount(&count) != C.cudaSuccess {
  164. return false
  165. }
  166. return count > 0
  167. }
  168. func New(maxSamples int, sampleRate int) (*Engine, error) {
  169. if maxSamples <= 0 {
  170. return nil, errors.New("invalid maxSamples")
  171. }
  172. if sampleRate <= 0 {
  173. return nil, errors.New("invalid sampleRate")
  174. }
  175. if err := ensureDLLLoaded(); err != nil {
  176. return nil, err
  177. }
  178. if !Available() {
  179. return nil, errors.New("cuda device not available")
  180. }
  181. e := &Engine{
  182. maxSamples: maxSamples,
  183. sampleRate: sampleRate,
  184. cudaReady: true,
  185. iqBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.gpud_float2{})),
  186. audioBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.float(0))),
  187. }
  188. var ptr unsafe.Pointer
  189. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  190. e.Close()
  191. return nil, errors.New("cudaMalloc dIQIn failed")
  192. }
  193. e.dIQIn = (*C.gpud_float2)(ptr)
  194. ptr = nil
  195. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  196. e.Close()
  197. return nil, errors.New("cudaMalloc dShifted failed")
  198. }
  199. e.dShifted = (*C.gpud_float2)(ptr)
  200. ptr = nil
  201. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  202. e.Close()
  203. return nil, errors.New("cudaMalloc dFiltered failed")
  204. }
  205. e.dFiltered = (*C.gpud_float2)(ptr)
  206. ptr = nil
  207. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  208. e.Close()
  209. return nil, errors.New("cudaMalloc dDecimated failed")
  210. }
  211. e.dDecimated = (*C.gpud_float2)(ptr)
  212. ptr = nil
  213. if C.gpud_cuda_malloc(&ptr, e.audioBytes) != C.cudaSuccess {
  214. e.Close()
  215. return nil, errors.New("cudaMalloc dAudio failed")
  216. }
  217. e.dAudio = (*C.float)(ptr)
  218. return e, nil
  219. }
  220. func (e *Engine) SetFIR(taps []float32) {
  221. if len(taps) == 0 {
  222. e.firTaps = nil
  223. return
  224. }
  225. if len(taps) > 256 {
  226. taps = taps[:256]
  227. }
  228. e.firTaps = append(e.firTaps[:0], taps...)
  229. if e.cudaReady {
  230. _ = C.gpud_upload_fir_taps((*C.float)(unsafe.Pointer(&e.firTaps[0])), C.int(len(e.firTaps)))
  231. }
  232. }
  233. func (e *Engine) LastShiftUsedGPU() bool { return e != nil && e.lastShiftUsedGPU }
  234. func (e *Engine) LastDemodUsedGPU() bool { return e != nil && e.lastDemodUsedGPU }
  235. func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) ([]complex64, bool) {
  236. if e == nil || !e.cudaReady || len(iq) == 0 || e.dIQIn == nil || e.dShifted == nil {
  237. return nil, false
  238. }
  239. bytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0)))
  240. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytes) != C.cudaSuccess {
  241. return nil, false
  242. }
  243. phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate)
  244. if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(len(iq)), C.double(phaseInc), C.double(e.phase)) != 0 {
  245. return nil, false
  246. }
  247. if C.gpud_device_sync() != C.cudaSuccess {
  248. return nil, false
  249. }
  250. out := make([]complex64, len(iq))
  251. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dShifted), bytes) != C.cudaSuccess {
  252. return nil, false
  253. }
  254. e.phase += phaseInc * float64(len(iq))
  255. return out, true
  256. }
  257. func (e *Engine) tryCUDAFIR(iq []complex64, numTaps int) ([]complex64, bool) {
  258. if e == nil || !e.cudaReady || len(iq) == 0 || numTaps <= 0 || e.dShifted == nil || e.dFiltered == nil {
  259. return nil, false
  260. }
  261. iqBytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0)))
  262. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&iq[0]), iqBytes) != C.cudaSuccess {
  263. return nil, false
  264. }
  265. if C.gpud_launch_fir(e.dShifted, e.dFiltered, C.int(len(iq)), C.int(numTaps)) != 0 {
  266. return nil, false
  267. }
  268. if C.gpud_device_sync() != C.cudaSuccess {
  269. return nil, false
  270. }
  271. out := make([]complex64, len(iq))
  272. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dFiltered), iqBytes) != C.cudaSuccess {
  273. return nil, false
  274. }
  275. return out, true
  276. }
  277. func (e *Engine) tryCUDADecimate(filtered []complex64, factor int) ([]complex64, bool) {
  278. if e == nil || !e.cudaReady || len(filtered) == 0 || factor <= 0 || e.dFiltered == nil || e.dDecimated == nil {
  279. return nil, false
  280. }
  281. nOut := len(filtered) / factor
  282. if nOut <= 0 {
  283. return nil, false
  284. }
  285. iqBytes := C.size_t(len(filtered)) * C.size_t(unsafe.Sizeof(complex64(0)))
  286. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dFiltered), unsafe.Pointer(&filtered[0]), iqBytes) != C.cudaSuccess {
  287. return nil, false
  288. }
  289. if C.gpud_launch_decimate(e.dFiltered, e.dDecimated, C.int(nOut), C.int(factor)) != 0 {
  290. return nil, false
  291. }
  292. if C.gpud_device_sync() != C.cudaSuccess {
  293. return nil, false
  294. }
  295. out := make([]complex64, nOut)
  296. outBytes := C.size_t(nOut) * C.size_t(unsafe.Sizeof(complex64(0)))
  297. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dDecimated), outBytes) != C.cudaSuccess {
  298. return nil, false
  299. }
  300. return out, true
  301. }
  302. func (e *Engine) tryCUDAFMDiscrim(shifted []complex64) ([]float32, bool) {
  303. if e == nil || !e.cudaReady || len(shifted) < 2 || e.dShifted == nil || e.dAudio == nil {
  304. return nil, false
  305. }
  306. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  307. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  308. return nil, false
  309. }
  310. if C.gpud_launch_fm_discrim(e.dShifted, e.dAudio, C.int(len(shifted))) != 0 {
  311. return nil, false
  312. }
  313. if C.gpud_device_sync() != C.cudaSuccess {
  314. return nil, false
  315. }
  316. out := make([]float32, len(shifted)-1)
  317. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  318. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  319. return nil, false
  320. }
  321. return out, true
  322. }
  323. func (e *Engine) tryCUDAAMEnvelope(shifted []complex64) ([]float32, bool) {
  324. if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil {
  325. return nil, false
  326. }
  327. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  328. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  329. return nil, false
  330. }
  331. if C.gpud_launch_am_envelope(e.dShifted, e.dAudio, C.int(len(shifted))) != 0 {
  332. return nil, false
  333. }
  334. if C.gpud_device_sync() != C.cudaSuccess {
  335. return nil, false
  336. }
  337. out := make([]float32, len(shifted))
  338. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  339. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  340. return nil, false
  341. }
  342. return out, true
  343. }
  344. func (e *Engine) tryCUDASSBProduct(shifted []complex64, bfoHz float64) ([]float32, bool) {
  345. if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil {
  346. return nil, false
  347. }
  348. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  349. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  350. return nil, false
  351. }
  352. phaseInc := 2.0 * math.Pi * bfoHz / float64(e.sampleRate)
  353. if C.gpud_launch_ssb_product(e.dShifted, e.dAudio, C.int(len(shifted)), C.double(phaseInc), C.double(e.bfoPhase)) != 0 {
  354. return nil, false
  355. }
  356. if C.gpud_device_sync() != C.cudaSuccess {
  357. return nil, false
  358. }
  359. out := make([]float32, len(shifted))
  360. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  361. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  362. return nil, false
  363. }
  364. e.bfoPhase += phaseInc * float64(len(shifted))
  365. return out, true
  366. }
  367. func (e *Engine) DemodFused(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
  368. if e == nil {
  369. return nil, 0, errors.New("nil CUDA demod engine")
  370. }
  371. if !e.cudaReady {
  372. return nil, 0, errors.New("cuda demod engine is not initialized")
  373. }
  374. if len(iq) == 0 {
  375. return nil, 0, nil
  376. }
  377. if len(iq) > e.maxSamples {
  378. return nil, 0, errors.New("sample count exceeds engine capacity")
  379. }
  380. var outRate int
  381. switch mode {
  382. case DemodNFM, DemodAM, DemodUSB, DemodLSB, DemodCW:
  383. outRate = 48000
  384. case DemodWFM:
  385. outRate = 192000
  386. default:
  387. return nil, 0, errors.New("unsupported demod type")
  388. }
  389. cutoff := bw / 2
  390. if cutoff < 200 {
  391. cutoff = 200
  392. }
  393. taps := e.firTaps
  394. if len(taps) == 0 {
  395. base64 := dsp.LowpassFIR(cutoff, e.sampleRate, 101)
  396. taps = make([]float32, len(base64))
  397. for i, v := range base64 {
  398. taps[i] = float32(v)
  399. }
  400. e.SetFIR(taps)
  401. }
  402. if len(taps) == 0 {
  403. return nil, 0, errors.New("no FIR taps configured")
  404. }
  405. decim := int(math.Round(float64(e.sampleRate) / float64(outRate)))
  406. if decim < 1 {
  407. decim = 1
  408. }
  409. n := len(iq)
  410. nOut := n / decim
  411. if nOut <= 1 {
  412. return nil, 0, errors.New("not enough output samples after decimation")
  413. }
  414. bytesIn := C.size_t(n) * C.size_t(unsafe.Sizeof(complex64(0)))
  415. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytesIn) != C.cudaSuccess {
  416. return nil, 0, errors.New("cudaMemcpy H2D failed")
  417. }
  418. phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate)
  419. if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(n), C.double(phaseInc), C.double(e.phase)) != 0 {
  420. return nil, 0, errors.New("gpu freq shift failed")
  421. }
  422. if C.gpud_launch_fir(e.dShifted, e.dFiltered, C.int(n), C.int(len(taps))) != 0 {
  423. return nil, 0, errors.New("gpu FIR failed")
  424. }
  425. if C.gpud_launch_decimate(e.dFiltered, e.dDecimated, C.int(nOut), C.int(decim)) != 0 {
  426. return nil, 0, errors.New("gpu decimate failed")
  427. }
  428. e.lastShiftUsedGPU = true
  429. e.lastFIRUsedGPU = true
  430. e.lastDecimUsedGPU = true
  431. e.lastDemodUsedGPU = false
  432. switch mode {
  433. case DemodNFM, DemodWFM:
  434. if C.gpud_launch_fm_discrim(e.dDecimated, e.dAudio, C.int(nOut)) != 0 {
  435. return nil, 0, errors.New("gpu FM discrim failed")
  436. }
  437. out := make([]float32, nOut-1)
  438. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  439. if C.gpud_device_sync() != C.cudaSuccess {
  440. return nil, 0, errors.New("cudaDeviceSynchronize failed")
  441. }
  442. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  443. return nil, 0, errors.New("cudaMemcpy D2H failed")
  444. }
  445. e.phase += phaseInc * float64(n)
  446. e.lastDemodUsedGPU = true
  447. return out, e.sampleRate / decim, nil
  448. case DemodAM:
  449. if C.gpud_launch_am_envelope(e.dDecimated, e.dAudio, C.int(nOut)) != 0 {
  450. return nil, 0, errors.New("gpu AM envelope failed")
  451. }
  452. out := make([]float32, nOut)
  453. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  454. if C.gpud_device_sync() != C.cudaSuccess {
  455. return nil, 0, errors.New("cudaDeviceSynchronize failed")
  456. }
  457. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  458. return nil, 0, errors.New("cudaMemcpy D2H failed")
  459. }
  460. e.phase += phaseInc * float64(n)
  461. e.lastDemodUsedGPU = true
  462. return out, e.sampleRate / decim, nil
  463. case DemodUSB, DemodLSB, DemodCW:
  464. bfoHz := 700.0
  465. if mode == DemodLSB {
  466. bfoHz = -700.0
  467. }
  468. phaseBFO := 2.0 * math.Pi * bfoHz / float64(e.sampleRate)
  469. if C.gpud_launch_ssb_product(e.dDecimated, e.dAudio, C.int(nOut), C.double(phaseBFO), C.double(e.bfoPhase)) != 0 {
  470. return nil, 0, errors.New("gpu SSB product failed")
  471. }
  472. out := make([]float32, nOut)
  473. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  474. if C.gpud_device_sync() != C.cudaSuccess {
  475. return nil, 0, errors.New("cudaDeviceSynchronize failed")
  476. }
  477. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  478. return nil, 0, errors.New("cudaMemcpy D2H failed")
  479. }
  480. e.phase += phaseInc * float64(n)
  481. e.bfoPhase += phaseBFO * float64(nOut)
  482. e.lastDemodUsedGPU = true
  483. return out, e.sampleRate / decim, nil
  484. default:
  485. return nil, 0, errors.New("unsupported demod type")
  486. }
  487. }
  488. func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
  489. if e == nil {
  490. return nil, 0, errors.New("nil CUDA demod engine")
  491. }
  492. if !e.cudaReady {
  493. return nil, 0, errors.New("cuda demod engine is not initialized")
  494. }
  495. if len(iq) == 0 {
  496. return nil, 0, nil
  497. }
  498. if len(iq) > e.maxSamples {
  499. return nil, 0, errors.New("sample count exceeds engine capacity")
  500. }
  501. shifted, ok := e.tryCUDAFreqShift(iq, offsetHz)
  502. e.lastShiftUsedGPU = ok && ValidateFreqShift(iq, e.sampleRate, offsetHz, shifted, 1e-3)
  503. if !e.lastShiftUsedGPU {
  504. shifted = dsp.FreqShift(iq, e.sampleRate, offsetHz)
  505. }
  506. var outRate int
  507. switch mode {
  508. case DemodNFM, DemodAM, DemodUSB, DemodLSB, DemodCW:
  509. outRate = 48000
  510. case DemodWFM:
  511. outRate = 192000
  512. default:
  513. return nil, 0, errors.New("unsupported demod type")
  514. }
  515. cutoff := bw / 2
  516. if cutoff < 200 {
  517. cutoff = 200
  518. }
  519. taps := e.firTaps
  520. if len(taps) == 0 {
  521. base64 := dsp.LowpassFIR(cutoff, e.sampleRate, 101)
  522. taps = make([]float32, len(base64))
  523. for i, v := range base64 {
  524. taps[i] = float32(v)
  525. }
  526. e.SetFIR(taps)
  527. }
  528. filtered, ok := e.tryCUDAFIR(shifted, len(taps))
  529. if ok {
  530. if validationEnabled() {
  531. e.lastFIRUsedGPU = ValidateFIR(shifted, taps, filtered, 1e-3)
  532. if !e.lastFIRUsedGPU {
  533. ftaps := make([]float64, len(taps))
  534. for i, v := range taps {
  535. ftaps[i] = float64(v)
  536. }
  537. filtered = dsp.ApplyFIR(shifted, ftaps)
  538. }
  539. } else {
  540. e.lastFIRUsedGPU = true
  541. }
  542. }
  543. if filtered == nil {
  544. ftaps := make([]float64, len(taps))
  545. for i, v := range taps {
  546. ftaps[i] = float64(v)
  547. }
  548. filtered = dsp.ApplyFIR(shifted, ftaps)
  549. }
  550. decim := int(math.Round(float64(e.sampleRate) / float64(outRate)))
  551. if decim < 1 {
  552. decim = 1
  553. }
  554. dec, ok := e.tryCUDADecimate(filtered, decim)
  555. if ok {
  556. if validationEnabled() {
  557. e.lastDecimUsedGPU = ValidateDecimate(filtered, decim, dec, 1e-3)
  558. if !e.lastDecimUsedGPU {
  559. dec = dsp.Decimate(filtered, decim)
  560. }
  561. } else {
  562. e.lastDecimUsedGPU = true
  563. }
  564. }
  565. if dec == nil {
  566. dec = dsp.Decimate(filtered, decim)
  567. }
  568. inputRate := e.sampleRate / decim
  569. e.lastDemodUsedGPU = false
  570. switch mode {
  571. case DemodNFM:
  572. if gpuAudio, ok := e.tryCUDAFMDiscrim(dec); ok {
  573. e.lastDemodUsedGPU = true
  574. return gpuAudio, inputRate, nil
  575. }
  576. return demod.NFM{}.Demod(dec, inputRate), inputRate, nil
  577. case DemodWFM:
  578. if gpuAudio, ok := e.tryCUDAFMDiscrim(dec); ok {
  579. e.lastDemodUsedGPU = true
  580. return gpuAudio, inputRate, nil
  581. }
  582. return demod.WFM{}.Demod(dec, inputRate), inputRate, nil
  583. case DemodAM:
  584. if gpuAudio, ok := e.tryCUDAAMEnvelope(dec); ok {
  585. e.lastDemodUsedGPU = true
  586. return gpuAudio, inputRate, nil
  587. }
  588. return demod.AM{}.Demod(dec, inputRate), inputRate, nil
  589. case DemodUSB:
  590. if gpuAudio, ok := e.tryCUDASSBProduct(dec, 700.0); ok {
  591. e.lastDemodUsedGPU = true
  592. return gpuAudio, inputRate, nil
  593. }
  594. return demod.USB{}.Demod(dec, inputRate), inputRate, nil
  595. case DemodLSB:
  596. if gpuAudio, ok := e.tryCUDASSBProduct(dec, -700.0); ok {
  597. e.lastDemodUsedGPU = true
  598. return gpuAudio, inputRate, nil
  599. }
  600. return demod.LSB{}.Demod(dec, inputRate), inputRate, nil
  601. case DemodCW:
  602. if gpuAudio, ok := e.tryCUDASSBProduct(dec, 700.0); ok {
  603. e.lastDemodUsedGPU = true
  604. return gpuAudio, inputRate, nil
  605. }
  606. return demod.CW{}.Demod(dec, inputRate), inputRate, nil
  607. default:
  608. return nil, 0, errors.New("unsupported demod type")
  609. }
  610. }
  611. func (e *Engine) Close() {
  612. if e == nil {
  613. return
  614. }
  615. if e.dIQIn != nil {
  616. _ = C.gpud_cuda_free(unsafe.Pointer(e.dIQIn))
  617. e.dIQIn = nil
  618. }
  619. if e.dShifted != nil {
  620. _ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted))
  621. e.dShifted = nil
  622. }
  623. if e.dFiltered != nil {
  624. _ = C.gpud_cuda_free(unsafe.Pointer(e.dFiltered))
  625. e.dFiltered = nil
  626. }
  627. if e.dDecimated != nil {
  628. _ = C.gpud_cuda_free(unsafe.Pointer(e.dDecimated))
  629. e.dDecimated = nil
  630. }
  631. if e.dAudio != nil {
  632. _ = C.gpud_cuda_free(unsafe.Pointer(e.dAudio))
  633. e.dAudio = nil
  634. }
  635. e.firTaps = nil
  636. e.cudaReady = false
  637. }