Wideband autonomous SDR analysis engine forked from sdr-visual-suite
Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

682 wiersze
22KB

  1. //go:build cufft && windows
  2. package gpudemod
  3. /*
  4. #cgo windows CFLAGS: -I"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.2/include"
  5. #cgo windows LDFLAGS: -lcudart64_13 -lkernel32
  6. #include <windows.h>
  7. #include <stdlib.h>
  8. #include <cuda_runtime.h>
  9. typedef struct { float x; float y; } gpud_float2;
  10. typedef int (__stdcall *gpud_upload_fir_taps_fn)(const float* taps, int n);
  11. typedef int (__stdcall *gpud_launch_freq_shift_fn)(const gpud_float2* in, gpud_float2* out, int n, double phase_inc, double phase_start);
  12. typedef int (__stdcall *gpud_launch_fm_discrim_fn)(const gpud_float2* in, float* out, int n);
  13. typedef int (__stdcall *gpud_launch_fir_fn)(const gpud_float2* in, gpud_float2* out, int n, int num_taps);
  14. typedef int (__stdcall *gpud_launch_decimate_fn)(const gpud_float2* in, gpud_float2* out, int n_out, int factor);
  15. typedef int (__stdcall *gpud_launch_am_envelope_fn)(const gpud_float2* in, float* out, int n);
  16. typedef int (__stdcall *gpud_launch_ssb_product_fn)(const gpud_float2* in, float* out, int n, double phase_inc, double phase_start);
  17. static HMODULE gpud_mod = NULL;
  18. static gpud_upload_fir_taps_fn gpud_p_upload_fir_taps = NULL;
  19. static gpud_launch_freq_shift_fn gpud_p_launch_freq_shift = NULL;
  20. static gpud_launch_fm_discrim_fn gpud_p_launch_fm_discrim = NULL;
  21. static gpud_launch_fir_fn gpud_p_launch_fir = NULL;
  22. static gpud_launch_decimate_fn gpud_p_launch_decimate = NULL;
  23. static gpud_launch_am_envelope_fn gpud_p_launch_am_envelope = NULL;
  24. static gpud_launch_ssb_product_fn gpud_p_launch_ssb_product = NULL;
  25. static int gpud_cuda_malloc(void **ptr, size_t bytes) { return (int)cudaMalloc(ptr, bytes); }
  26. static int gpud_cuda_free(void *ptr) { return (int)cudaFree(ptr); }
  27. static int gpud_memcpy_h2d(void *dst, const void *src, size_t bytes) { return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice); }
  28. static int gpud_memcpy_d2h(void *dst, const void *src, size_t bytes) { return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost); }
  29. static int gpud_device_sync() { return (int)cudaDeviceSynchronize(); }
  30. static int gpud_load_library(const char* path) {
  31. if (gpud_mod != NULL) return 0;
  32. gpud_mod = LoadLibraryA(path);
  33. if (gpud_mod == NULL) return -1;
  34. gpud_p_upload_fir_taps = (gpud_upload_fir_taps_fn)GetProcAddress(gpud_mod, "gpud_upload_fir_taps_cuda");
  35. gpud_p_launch_freq_shift = (gpud_launch_freq_shift_fn)GetProcAddress(gpud_mod, "gpud_launch_freq_shift_cuda");
  36. gpud_p_launch_fm_discrim = (gpud_launch_fm_discrim_fn)GetProcAddress(gpud_mod, "gpud_launch_fm_discrim_cuda");
  37. gpud_p_launch_fir = (gpud_launch_fir_fn)GetProcAddress(gpud_mod, "gpud_launch_fir_cuda");
  38. gpud_p_launch_decimate = (gpud_launch_decimate_fn)GetProcAddress(gpud_mod, "gpud_launch_decimate_cuda");
  39. gpud_p_launch_am_envelope = (gpud_launch_am_envelope_fn)GetProcAddress(gpud_mod, "gpud_launch_am_envelope_cuda");
  40. gpud_p_launch_ssb_product = (gpud_launch_ssb_product_fn)GetProcAddress(gpud_mod, "gpud_launch_ssb_product_cuda");
  41. if (!gpud_p_upload_fir_taps || !gpud_p_launch_freq_shift || !gpud_p_launch_fm_discrim || !gpud_p_launch_fir || !gpud_p_launch_decimate || !gpud_p_launch_am_envelope || !gpud_p_launch_ssb_product) {
  42. FreeLibrary(gpud_mod);
  43. gpud_mod = NULL;
  44. return -2;
  45. }
  46. return 0;
  47. }
  48. static int gpud_upload_fir_taps(const float* taps, int n) {
  49. if (!gpud_p_upload_fir_taps) return -1;
  50. return gpud_p_upload_fir_taps(taps, n);
  51. }
  52. static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) {
  53. if (!gpud_p_launch_freq_shift) return -1;
  54. return gpud_p_launch_freq_shift(in, out, n, phase_inc, phase_start);
  55. }
  56. static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) {
  57. if (!gpud_p_launch_fm_discrim) return -1;
  58. return gpud_p_launch_fm_discrim(in, out, n);
  59. }
  60. static int gpud_launch_fir(gpud_float2 *in, gpud_float2 *out, int n, int num_taps) {
  61. if (!gpud_p_launch_fir) return -1;
  62. return gpud_p_launch_fir(in, out, n, num_taps);
  63. }
  64. static int gpud_launch_decimate(gpud_float2 *in, gpud_float2 *out, int n_out, int factor) {
  65. if (!gpud_p_launch_decimate) return -1;
  66. return gpud_p_launch_decimate(in, out, n_out, factor);
  67. }
  68. static int gpud_launch_am_envelope(gpud_float2 *in, float *out, int n) {
  69. if (!gpud_p_launch_am_envelope) return -1;
  70. return gpud_p_launch_am_envelope(in, out, n);
  71. }
  72. static int gpud_launch_ssb_product(gpud_float2 *in, float *out, int n, double phase_inc, double phase_start) {
  73. if (!gpud_p_launch_ssb_product) return -1;
  74. return gpud_p_launch_ssb_product(in, out, n, phase_inc, phase_start);
  75. }
  76. */
  77. import "C"
  78. import (
  79. "errors"
  80. "fmt"
  81. "math"
  82. "os"
  83. "path/filepath"
  84. "sync"
  85. "unsafe"
  86. "sdr-visual-suite/internal/demod"
  87. "sdr-visual-suite/internal/dsp"
  88. )
  89. type DemodType int
  90. const (
  91. DemodNFM DemodType = iota
  92. DemodWFM
  93. DemodAM
  94. DemodUSB
  95. DemodLSB
  96. DemodCW
  97. )
  98. var loadOnce sync.Once
  99. var loadErr error
  100. func ensureDLLLoaded() error {
  101. loadOnce.Do(func() {
  102. candidates := []string{}
  103. if exe, err := os.Executable(); err == nil {
  104. dir := filepath.Dir(exe)
  105. candidates = append(candidates, filepath.Join(dir, "gpudemod_kernels.dll"))
  106. }
  107. if wd, err := os.Getwd(); err == nil {
  108. candidates = append(candidates,
  109. filepath.Join(wd, "gpudemod_kernels.dll"),
  110. filepath.Join(wd, "internal", "demod", "gpudemod", "build", "gpudemod_kernels.dll"),
  111. )
  112. }
  113. seen := map[string]bool{}
  114. for _, p := range candidates {
  115. if p == "" || seen[p] {
  116. continue
  117. }
  118. seen[p] = true
  119. if _, err := os.Stat(p); err == nil {
  120. cp := C.CString(p)
  121. res := C.gpud_load_library(cp)
  122. C.free(unsafe.Pointer(cp))
  123. if res == 0 {
  124. loadErr = nil
  125. fmt.Fprintf(os.Stderr, "gpudemod: loaded DLL %s\n", p)
  126. return
  127. }
  128. loadErr = fmt.Errorf("failed to load gpudemod DLL: %s (code %d)", p, int(res))
  129. fmt.Fprintf(os.Stderr, "gpudemod: DLL load failed for %s (code %d)\n", p, int(res))
  130. }
  131. }
  132. if loadErr == nil {
  133. loadErr = errors.New("gpudemod_kernels.dll not found")
  134. fmt.Fprintln(os.Stderr, "gpudemod: gpudemod_kernels.dll not found in search paths")
  135. }
  136. })
  137. return loadErr
  138. }
  139. type Engine struct {
  140. maxSamples int
  141. sampleRate int
  142. phase float64
  143. bfoPhase float64
  144. firTaps []float32
  145. cudaReady bool
  146. lastShiftUsedGPU bool
  147. lastFIRUsedGPU bool
  148. lastDecimUsedGPU bool
  149. lastDemodUsedGPU bool
  150. dIQIn *C.gpud_float2
  151. dShifted *C.gpud_float2
  152. dFiltered *C.gpud_float2
  153. dDecimated *C.gpud_float2
  154. dAudio *C.float
  155. iqBytes C.size_t
  156. audioBytes C.size_t
  157. }
  158. func Available() bool {
  159. if ensureDLLLoaded() != nil {
  160. return false
  161. }
  162. var count C.int
  163. if C.cudaGetDeviceCount(&count) != C.cudaSuccess {
  164. return false
  165. }
  166. return count > 0
  167. }
  168. func New(maxSamples int, sampleRate int) (*Engine, error) {
  169. if maxSamples <= 0 {
  170. return nil, errors.New("invalid maxSamples")
  171. }
  172. if sampleRate <= 0 {
  173. return nil, errors.New("invalid sampleRate")
  174. }
  175. if err := ensureDLLLoaded(); err != nil {
  176. return nil, err
  177. }
  178. if !Available() {
  179. return nil, errors.New("cuda device not available")
  180. }
  181. e := &Engine{
  182. maxSamples: maxSamples,
  183. sampleRate: sampleRate,
  184. cudaReady: true,
  185. iqBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.gpud_float2{})),
  186. audioBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.float(0))),
  187. }
  188. var ptr unsafe.Pointer
  189. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  190. e.Close()
  191. return nil, errors.New("cudaMalloc dIQIn failed")
  192. }
  193. e.dIQIn = (*C.gpud_float2)(ptr)
  194. ptr = nil
  195. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  196. e.Close()
  197. return nil, errors.New("cudaMalloc dShifted failed")
  198. }
  199. e.dShifted = (*C.gpud_float2)(ptr)
  200. ptr = nil
  201. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  202. e.Close()
  203. return nil, errors.New("cudaMalloc dFiltered failed")
  204. }
  205. e.dFiltered = (*C.gpud_float2)(ptr)
  206. ptr = nil
  207. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  208. e.Close()
  209. return nil, errors.New("cudaMalloc dDecimated failed")
  210. }
  211. e.dDecimated = (*C.gpud_float2)(ptr)
  212. ptr = nil
  213. if C.gpud_cuda_malloc(&ptr, e.audioBytes) != C.cudaSuccess {
  214. e.Close()
  215. return nil, errors.New("cudaMalloc dAudio failed")
  216. }
  217. e.dAudio = (*C.float)(ptr)
  218. return e, nil
  219. }
  220. func (e *Engine) SetFIR(taps []float32) {
  221. if len(taps) == 0 {
  222. e.firTaps = nil
  223. return
  224. }
  225. if len(taps) > 256 {
  226. taps = taps[:256]
  227. }
  228. e.firTaps = append(e.firTaps[:0], taps...)
  229. if e.cudaReady {
  230. _ = C.gpud_upload_fir_taps((*C.float)(unsafe.Pointer(&e.firTaps[0])), C.int(len(e.firTaps)))
  231. }
  232. }
  233. func (e *Engine) LastShiftUsedGPU() bool { return e != nil && e.lastShiftUsedGPU }
  234. func (e *Engine) LastDemodUsedGPU() bool { return e != nil && e.lastDemodUsedGPU }
  235. func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) ([]complex64, bool) {
  236. if e == nil || !e.cudaReady || len(iq) == 0 || e.dIQIn == nil || e.dShifted == nil {
  237. return nil, false
  238. }
  239. bytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0)))
  240. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytes) != C.cudaSuccess {
  241. return nil, false
  242. }
  243. phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate)
  244. if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(len(iq)), C.double(phaseInc), C.double(e.phase)) != 0 {
  245. return nil, false
  246. }
  247. if C.gpud_device_sync() != C.cudaSuccess {
  248. return nil, false
  249. }
  250. out := make([]complex64, len(iq))
  251. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dShifted), bytes) != C.cudaSuccess {
  252. return nil, false
  253. }
  254. e.phase += phaseInc * float64(len(iq))
  255. return out, true
  256. }
  257. func (e *Engine) tryCUDAFIR(iq []complex64, numTaps int) ([]complex64, bool) {
  258. if e == nil || !e.cudaReady || len(iq) == 0 || numTaps <= 0 || e.dShifted == nil || e.dFiltered == nil {
  259. return nil, false
  260. }
  261. iqBytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0)))
  262. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&iq[0]), iqBytes) != C.cudaSuccess {
  263. return nil, false
  264. }
  265. if C.gpud_launch_fir(e.dShifted, e.dFiltered, C.int(len(iq)), C.int(numTaps)) != 0 {
  266. return nil, false
  267. }
  268. if C.gpud_device_sync() != C.cudaSuccess {
  269. return nil, false
  270. }
  271. out := make([]complex64, len(iq))
  272. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dFiltered), iqBytes) != C.cudaSuccess {
  273. return nil, false
  274. }
  275. return out, true
  276. }
  277. func (e *Engine) tryCUDADecimate(filtered []complex64, factor int) ([]complex64, bool) {
  278. if e == nil || !e.cudaReady || len(filtered) == 0 || factor <= 0 || e.dFiltered == nil || e.dDecimated == nil {
  279. return nil, false
  280. }
  281. nOut := len(filtered) / factor
  282. if nOut <= 0 {
  283. return nil, false
  284. }
  285. iqBytes := C.size_t(len(filtered)) * C.size_t(unsafe.Sizeof(complex64(0)))
  286. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dFiltered), unsafe.Pointer(&filtered[0]), iqBytes) != C.cudaSuccess {
  287. return nil, false
  288. }
  289. if C.gpud_launch_decimate(e.dFiltered, e.dDecimated, C.int(nOut), C.int(factor)) != 0 {
  290. return nil, false
  291. }
  292. if C.gpud_device_sync() != C.cudaSuccess {
  293. return nil, false
  294. }
  295. out := make([]complex64, nOut)
  296. outBytes := C.size_t(nOut) * C.size_t(unsafe.Sizeof(complex64(0)))
  297. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dDecimated), outBytes) != C.cudaSuccess {
  298. return nil, false
  299. }
  300. return out, true
  301. }
  302. func (e *Engine) tryCUDAFMDiscrim(shifted []complex64) ([]float32, bool) {
  303. if e == nil || !e.cudaReady || len(shifted) < 2 || e.dShifted == nil || e.dAudio == nil {
  304. return nil, false
  305. }
  306. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  307. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  308. return nil, false
  309. }
  310. if C.gpud_launch_fm_discrim(e.dShifted, e.dAudio, C.int(len(shifted))) != 0 {
  311. return nil, false
  312. }
  313. if C.gpud_device_sync() != C.cudaSuccess {
  314. return nil, false
  315. }
  316. out := make([]float32, len(shifted)-1)
  317. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  318. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  319. return nil, false
  320. }
  321. return out, true
  322. }
  323. func (e *Engine) tryCUDAAMEnvelope(shifted []complex64) ([]float32, bool) {
  324. if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil {
  325. return nil, false
  326. }
  327. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  328. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  329. return nil, false
  330. }
  331. if C.gpud_launch_am_envelope(e.dShifted, e.dAudio, C.int(len(shifted))) != 0 {
  332. return nil, false
  333. }
  334. if C.gpud_device_sync() != C.cudaSuccess {
  335. return nil, false
  336. }
  337. out := make([]float32, len(shifted))
  338. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  339. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  340. return nil, false
  341. }
  342. return out, true
  343. }
  344. func (e *Engine) tryCUDASSBProduct(shifted []complex64, bfoHz float64) ([]float32, bool) {
  345. if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil {
  346. return nil, false
  347. }
  348. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  349. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  350. return nil, false
  351. }
  352. phaseInc := 2.0 * math.Pi * bfoHz / float64(e.sampleRate)
  353. if C.gpud_launch_ssb_product(e.dShifted, e.dAudio, C.int(len(shifted)), C.double(phaseInc), C.double(e.bfoPhase)) != 0 {
  354. return nil, false
  355. }
  356. if C.gpud_device_sync() != C.cudaSuccess {
  357. return nil, false
  358. }
  359. out := make([]float32, len(shifted))
  360. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  361. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  362. return nil, false
  363. }
  364. e.bfoPhase += phaseInc * float64(len(shifted))
  365. return out, true
  366. }
  367. func (e *Engine) DemodFused(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
  368. if e == nil {
  369. return nil, 0, errors.New("nil CUDA demod engine")
  370. }
  371. if !e.cudaReady {
  372. return nil, 0, errors.New("cuda demod engine is not initialized")
  373. }
  374. if len(iq) == 0 {
  375. return nil, 0, nil
  376. }
  377. e.lastShiftUsedGPU = false
  378. e.lastFIRUsedGPU = false
  379. e.lastDecimUsedGPU = false
  380. e.lastDemodUsedGPU = false
  381. if len(iq) > e.maxSamples {
  382. return nil, 0, errors.New("sample count exceeds engine capacity")
  383. }
  384. var outRate int
  385. switch mode {
  386. case DemodNFM, DemodAM, DemodUSB, DemodLSB, DemodCW:
  387. outRate = 48000
  388. case DemodWFM:
  389. outRate = 192000
  390. default:
  391. return nil, 0, errors.New("unsupported demod type")
  392. }
  393. cutoff := bw / 2
  394. if cutoff < 200 {
  395. cutoff = 200
  396. }
  397. taps := e.firTaps
  398. if len(taps) == 0 {
  399. base64 := dsp.LowpassFIR(cutoff, e.sampleRate, 101)
  400. taps = make([]float32, len(base64))
  401. for i, v := range base64 {
  402. taps[i] = float32(v)
  403. }
  404. e.SetFIR(taps)
  405. }
  406. if len(taps) == 0 {
  407. return nil, 0, errors.New("no FIR taps configured")
  408. }
  409. decim := int(math.Round(float64(e.sampleRate) / float64(outRate)))
  410. if decim < 1 {
  411. decim = 1
  412. }
  413. n := len(iq)
  414. nOut := n / decim
  415. if nOut <= 1 {
  416. return nil, 0, errors.New("not enough output samples after decimation")
  417. }
  418. bytesIn := C.size_t(n) * C.size_t(unsafe.Sizeof(complex64(0)))
  419. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytesIn) != C.cudaSuccess {
  420. return nil, 0, errors.New("cudaMemcpy H2D failed")
  421. }
  422. phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate)
  423. if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(n), C.double(phaseInc), C.double(e.phase)) != 0 {
  424. return nil, 0, errors.New("gpu freq shift failed")
  425. }
  426. if C.gpud_launch_fir(e.dShifted, e.dFiltered, C.int(n), C.int(len(taps))) != 0 {
  427. return nil, 0, errors.New("gpu FIR failed")
  428. }
  429. if C.gpud_launch_decimate(e.dFiltered, e.dDecimated, C.int(nOut), C.int(decim)) != 0 {
  430. return nil, 0, errors.New("gpu decimate failed")
  431. }
  432. e.lastShiftUsedGPU = true
  433. e.lastFIRUsedGPU = true
  434. e.lastDecimUsedGPU = true
  435. e.lastDemodUsedGPU = false
  436. switch mode {
  437. case DemodNFM, DemodWFM:
  438. if C.gpud_launch_fm_discrim(e.dDecimated, e.dAudio, C.int(nOut)) != 0 {
  439. return nil, 0, errors.New("gpu FM discrim failed")
  440. }
  441. out := make([]float32, nOut-1)
  442. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  443. if C.gpud_device_sync() != C.cudaSuccess {
  444. return nil, 0, errors.New("cudaDeviceSynchronize failed")
  445. }
  446. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  447. return nil, 0, errors.New("cudaMemcpy D2H failed")
  448. }
  449. e.phase += phaseInc * float64(n)
  450. e.lastDemodUsedGPU = true
  451. return out, e.sampleRate / decim, nil
  452. case DemodAM:
  453. if C.gpud_launch_am_envelope(e.dDecimated, e.dAudio, C.int(nOut)) != 0 {
  454. return nil, 0, errors.New("gpu AM envelope failed")
  455. }
  456. out := make([]float32, nOut)
  457. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  458. if C.gpud_device_sync() != C.cudaSuccess {
  459. return nil, 0, errors.New("cudaDeviceSynchronize failed")
  460. }
  461. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  462. return nil, 0, errors.New("cudaMemcpy D2H failed")
  463. }
  464. e.phase += phaseInc * float64(n)
  465. e.lastDemodUsedGPU = true
  466. return out, e.sampleRate / decim, nil
  467. case DemodUSB, DemodLSB, DemodCW:
  468. bfoHz := 700.0
  469. if mode == DemodLSB {
  470. bfoHz = -700.0
  471. }
  472. phaseBFO := 2.0 * math.Pi * bfoHz / float64(e.sampleRate)
  473. if C.gpud_launch_ssb_product(e.dDecimated, e.dAudio, C.int(nOut), C.double(phaseBFO), C.double(e.bfoPhase)) != 0 {
  474. return nil, 0, errors.New("gpu SSB product failed")
  475. }
  476. out := make([]float32, nOut)
  477. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  478. if C.gpud_device_sync() != C.cudaSuccess {
  479. return nil, 0, errors.New("cudaDeviceSynchronize failed")
  480. }
  481. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  482. return nil, 0, errors.New("cudaMemcpy D2H failed")
  483. }
  484. e.phase += phaseInc * float64(n)
  485. e.bfoPhase += phaseBFO * float64(nOut)
  486. e.lastDemodUsedGPU = true
  487. return out, e.sampleRate / decim, nil
  488. default:
  489. return nil, 0, errors.New("unsupported demod type")
  490. }
  491. }
  492. func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
  493. if e == nil {
  494. return nil, 0, errors.New("nil CUDA demod engine")
  495. }
  496. if !e.cudaReady {
  497. return nil, 0, errors.New("cuda demod engine is not initialized")
  498. }
  499. if len(iq) == 0 {
  500. return nil, 0, nil
  501. }
  502. if len(iq) > e.maxSamples {
  503. return nil, 0, errors.New("sample count exceeds engine capacity")
  504. }
  505. shifted, ok := e.tryCUDAFreqShift(iq, offsetHz)
  506. e.lastShiftUsedGPU = ok && ValidateFreqShift(iq, e.sampleRate, offsetHz, shifted, 1e-3)
  507. if !e.lastShiftUsedGPU {
  508. shifted = dsp.FreqShift(iq, e.sampleRate, offsetHz)
  509. }
  510. var outRate int
  511. switch mode {
  512. case DemodNFM, DemodAM, DemodUSB, DemodLSB, DemodCW:
  513. outRate = 48000
  514. case DemodWFM:
  515. outRate = 192000
  516. default:
  517. return nil, 0, errors.New("unsupported demod type")
  518. }
  519. cutoff := bw / 2
  520. if cutoff < 200 {
  521. cutoff = 200
  522. }
  523. taps := e.firTaps
  524. if len(taps) == 0 {
  525. base64 := dsp.LowpassFIR(cutoff, e.sampleRate, 101)
  526. taps = make([]float32, len(base64))
  527. for i, v := range base64 {
  528. taps[i] = float32(v)
  529. }
  530. e.SetFIR(taps)
  531. }
  532. filtered, ok := e.tryCUDAFIR(shifted, len(taps))
  533. if ok {
  534. if validationEnabled() {
  535. e.lastFIRUsedGPU = ValidateFIR(shifted, taps, filtered, 1e-3)
  536. if !e.lastFIRUsedGPU {
  537. ftaps := make([]float64, len(taps))
  538. for i, v := range taps {
  539. ftaps[i] = float64(v)
  540. }
  541. filtered = dsp.ApplyFIR(shifted, ftaps)
  542. }
  543. } else {
  544. e.lastFIRUsedGPU = true
  545. }
  546. }
  547. if filtered == nil {
  548. ftaps := make([]float64, len(taps))
  549. for i, v := range taps {
  550. ftaps[i] = float64(v)
  551. }
  552. filtered = dsp.ApplyFIR(shifted, ftaps)
  553. }
  554. decim := int(math.Round(float64(e.sampleRate) / float64(outRate)))
  555. if decim < 1 {
  556. decim = 1
  557. }
  558. dec, ok := e.tryCUDADecimate(filtered, decim)
  559. if ok {
  560. if validationEnabled() {
  561. e.lastDecimUsedGPU = ValidateDecimate(filtered, decim, dec, 1e-3)
  562. if !e.lastDecimUsedGPU {
  563. dec = dsp.Decimate(filtered, decim)
  564. }
  565. } else {
  566. e.lastDecimUsedGPU = true
  567. }
  568. }
  569. if dec == nil {
  570. dec = dsp.Decimate(filtered, decim)
  571. }
  572. inputRate := e.sampleRate / decim
  573. e.lastDemodUsedGPU = false
  574. switch mode {
  575. case DemodNFM:
  576. if gpuAudio, ok := e.tryCUDAFMDiscrim(dec); ok {
  577. e.lastDemodUsedGPU = true
  578. return gpuAudio, inputRate, nil
  579. }
  580. return demod.NFM{}.Demod(dec, inputRate), inputRate, nil
  581. case DemodWFM:
  582. if gpuAudio, ok := e.tryCUDAFMDiscrim(dec); ok {
  583. e.lastDemodUsedGPU = true
  584. return gpuAudio, inputRate, nil
  585. }
  586. return demod.WFM{}.Demod(dec, inputRate), inputRate, nil
  587. case DemodAM:
  588. if gpuAudio, ok := e.tryCUDAAMEnvelope(dec); ok {
  589. e.lastDemodUsedGPU = true
  590. return gpuAudio, inputRate, nil
  591. }
  592. return demod.AM{}.Demod(dec, inputRate), inputRate, nil
  593. case DemodUSB:
  594. if gpuAudio, ok := e.tryCUDASSBProduct(dec, 700.0); ok {
  595. e.lastDemodUsedGPU = true
  596. return gpuAudio, inputRate, nil
  597. }
  598. return demod.USB{}.Demod(dec, inputRate), inputRate, nil
  599. case DemodLSB:
  600. if gpuAudio, ok := e.tryCUDASSBProduct(dec, -700.0); ok {
  601. e.lastDemodUsedGPU = true
  602. return gpuAudio, inputRate, nil
  603. }
  604. return demod.LSB{}.Demod(dec, inputRate), inputRate, nil
  605. case DemodCW:
  606. if gpuAudio, ok := e.tryCUDASSBProduct(dec, 700.0); ok {
  607. e.lastDemodUsedGPU = true
  608. return gpuAudio, inputRate, nil
  609. }
  610. return demod.CW{}.Demod(dec, inputRate), inputRate, nil
  611. default:
  612. return nil, 0, errors.New("unsupported demod type")
  613. }
  614. }
  615. func (e *Engine) Close() {
  616. if e == nil {
  617. return
  618. }
  619. if e.dIQIn != nil {
  620. _ = C.gpud_cuda_free(unsafe.Pointer(e.dIQIn))
  621. e.dIQIn = nil
  622. }
  623. if e.dShifted != nil {
  624. _ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted))
  625. e.dShifted = nil
  626. }
  627. if e.dFiltered != nil {
  628. _ = C.gpud_cuda_free(unsafe.Pointer(e.dFiltered))
  629. e.dFiltered = nil
  630. }
  631. if e.dDecimated != nil {
  632. _ = C.gpud_cuda_free(unsafe.Pointer(e.dDecimated))
  633. e.dDecimated = nil
  634. }
  635. if e.dAudio != nil {
  636. _ = C.gpud_cuda_free(unsafe.Pointer(e.dAudio))
  637. e.dAudio = nil
  638. }
  639. e.firTaps = nil
  640. e.cudaReady = false
  641. }