Wideband autonomous SDR analysis engine forked from sdr-visual-suite
25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.

769 satır
25KB

  1. //go:build cufft && windows
  2. package gpudemod
  3. /*
  4. #cgo windows CFLAGS: -I"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.2/include"
  5. #cgo windows LDFLAGS: -lcudart64_13 -lkernel32
  6. #include <windows.h>
  7. #include <stdlib.h>
  8. #include <cuda_runtime.h>
  9. typedef struct { float x; float y; } gpud_float2;
  10. typedef void* gpud_stream_handle;
  11. typedef int (__stdcall *gpud_stream_create_fn)(gpud_stream_handle* out);
  12. typedef int (__stdcall *gpud_stream_destroy_fn)(gpud_stream_handle stream);
  13. typedef int (__stdcall *gpud_stream_sync_fn)(gpud_stream_handle stream);
  14. typedef int (__stdcall *gpud_upload_fir_taps_fn)(const float* taps, int n);
  15. typedef int (__stdcall *gpud_launch_freq_shift_fn)(const gpud_float2* in, gpud_float2* out, int n, double phase_inc, double phase_start);
  16. typedef int (__stdcall *gpud_launch_fm_discrim_fn)(const gpud_float2* in, float* out, int n);
  17. typedef int (__stdcall *gpud_launch_fir_stream_fn)(const gpud_float2* in, gpud_float2* out, int n, int num_taps, gpud_stream_handle stream);
  18. typedef int (__stdcall *gpud_launch_fir_fn)(const gpud_float2* in, gpud_float2* out, int n, int num_taps);
  19. typedef int (__stdcall *gpud_launch_decimate_stream_fn)(const gpud_float2* in, gpud_float2* out, int n_out, int factor, gpud_stream_handle stream);
  20. typedef int (__stdcall *gpud_launch_decimate_fn)(const gpud_float2* in, gpud_float2* out, int n_out, int factor);
  21. typedef int (__stdcall *gpud_launch_am_envelope_fn)(const gpud_float2* in, float* out, int n);
  22. typedef int (__stdcall *gpud_launch_ssb_product_fn)(const gpud_float2* in, float* out, int n, double phase_inc, double phase_start);
  23. static HMODULE gpud_mod = NULL;
  24. static gpud_upload_fir_taps_fn gpud_p_upload_fir_taps = NULL;
  25. static gpud_launch_freq_shift_fn gpud_p_launch_freq_shift = NULL;
  26. static gpud_launch_fm_discrim_fn gpud_p_launch_fm_discrim = NULL;
  27. static gpud_launch_fir_fn gpud_p_launch_fir = NULL;
  28. static gpud_launch_decimate_fn gpud_p_launch_decimate = NULL;
  29. static gpud_launch_am_envelope_fn gpud_p_launch_am_envelope = NULL;
  30. static gpud_launch_ssb_product_fn gpud_p_launch_ssb_product = NULL;
  31. static int gpud_cuda_malloc(void **ptr, size_t bytes) { return (int)cudaMalloc(ptr, bytes); }
  32. static int gpud_cuda_free(void *ptr) { return (int)cudaFree(ptr); }
  33. static int gpud_memcpy_h2d(void *dst, const void *src, size_t bytes) { return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice); }
  34. static int gpud_memcpy_d2h(void *dst, const void *src, size_t bytes) { return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost); }
  35. static int gpud_device_sync() { return (int)cudaDeviceSynchronize(); }
  36. static int gpud_load_library(const char* path) {
  37. if (gpud_mod != NULL) return 0;
  38. gpud_mod = LoadLibraryA(path);
  39. if (gpud_mod == NULL) return -1;
  40. gpud_p_upload_fir_taps = (gpud_upload_fir_taps_fn)GetProcAddress(gpud_mod, "gpud_upload_fir_taps_cuda");
  41. gpud_p_launch_freq_shift = (gpud_launch_freq_shift_fn)GetProcAddress(gpud_mod, "gpud_launch_freq_shift_cuda");
  42. gpud_p_launch_fm_discrim = (gpud_launch_fm_discrim_fn)GetProcAddress(gpud_mod, "gpud_launch_fm_discrim_cuda");
  43. gpud_p_launch_fir = (gpud_launch_fir_fn)GetProcAddress(gpud_mod, "gpud_launch_fir_cuda");
  44. gpud_p_launch_decimate = (gpud_launch_decimate_fn)GetProcAddress(gpud_mod, "gpud_launch_decimate_cuda");
  45. gpud_p_launch_am_envelope = (gpud_launch_am_envelope_fn)GetProcAddress(gpud_mod, "gpud_launch_am_envelope_cuda");
  46. gpud_p_launch_ssb_product = (gpud_launch_ssb_product_fn)GetProcAddress(gpud_mod, "gpud_launch_ssb_product_cuda");
  47. if (!gpud_p_upload_fir_taps || !gpud_p_launch_freq_shift || !gpud_p_launch_fm_discrim || !gpud_p_launch_fir || !gpud_p_launch_decimate || !gpud_p_launch_am_envelope || !gpud_p_launch_ssb_product) {
  48. FreeLibrary(gpud_mod);
  49. gpud_mod = NULL;
  50. return -2;
  51. }
  52. return 0;
  53. }
  54. static int gpud_upload_fir_taps(const float* taps, int n) {
  55. if (!gpud_p_upload_fir_taps) return -1;
  56. return gpud_p_upload_fir_taps(taps, n);
  57. }
  58. static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) {
  59. if (!gpud_p_launch_freq_shift) return -1;
  60. return gpud_p_launch_freq_shift(in, out, n, phase_inc, phase_start);
  61. }
  62. static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) {
  63. if (!gpud_p_launch_fm_discrim) return -1;
  64. return gpud_p_launch_fm_discrim(in, out, n);
  65. }
  66. static int gpud_launch_fir(gpud_float2 *in, gpud_float2 *out, int n, int num_taps) {
  67. if (!gpud_p_launch_fir) return -1;
  68. return gpud_p_launch_fir(in, out, n, num_taps);
  69. }
  70. static int gpud_launch_decimate(gpud_float2 *in, gpud_float2 *out, int n_out, int factor) {
  71. if (!gpud_p_launch_decimate) return -1;
  72. return gpud_p_launch_decimate(in, out, n_out, factor);
  73. }
  74. static int gpud_launch_am_envelope(gpud_float2 *in, float *out, int n) {
  75. if (!gpud_p_launch_am_envelope) return -1;
  76. return gpud_p_launch_am_envelope(in, out, n);
  77. }
  78. static int gpud_launch_ssb_product(gpud_float2 *in, float *out, int n, double phase_inc, double phase_start) {
  79. if (!gpud_p_launch_ssb_product) return -1;
  80. return gpud_p_launch_ssb_product(in, out, n, phase_inc, phase_start);
  81. }
  82. */
  83. import "C"
  84. import (
  85. "errors"
  86. "fmt"
  87. "math"
  88. "os"
  89. "path/filepath"
  90. "sync"
  91. "unsafe"
  92. "sdr-visual-suite/internal/demod"
  93. "sdr-visual-suite/internal/dsp"
  94. )
  95. type DemodType int
  96. const (
  97. DemodNFM DemodType = iota
  98. DemodWFM
  99. DemodAM
  100. DemodUSB
  101. DemodLSB
  102. DemodCW
  103. )
  104. var loadOnce sync.Once
  105. var loadErr error
  106. func ensureDLLLoaded() error {
  107. loadOnce.Do(func() {
  108. candidates := []string{}
  109. if exe, err := os.Executable(); err == nil {
  110. dir := filepath.Dir(exe)
  111. candidates = append(candidates, filepath.Join(dir, "gpudemod_kernels.dll"))
  112. }
  113. if wd, err := os.Getwd(); err == nil {
  114. candidates = append(candidates,
  115. filepath.Join(wd, "gpudemod_kernels.dll"),
  116. filepath.Join(wd, "internal", "demod", "gpudemod", "build", "gpudemod_kernels.dll"),
  117. )
  118. }
  119. if env := os.Getenv("GPUMOD_DLL"); env != "" {
  120. candidates = append([]string{env}, candidates...)
  121. }
  122. seen := map[string]bool{}
  123. for _, p := range candidates {
  124. if p == "" || seen[p] {
  125. continue
  126. }
  127. seen[p] = true
  128. if _, err := os.Stat(p); err == nil {
  129. cp := C.CString(p)
  130. res := C.gpud_load_library(cp)
  131. C.free(unsafe.Pointer(cp))
  132. if res == 0 {
  133. loadErr = nil
  134. fmt.Fprintf(os.Stderr, "gpudemod: loaded DLL %s\n", p)
  135. return
  136. }
  137. loadErr = fmt.Errorf("failed to load gpudemod DLL: %s (code %d)", p, int(res))
  138. fmt.Fprintf(os.Stderr, "gpudemod: DLL load failed for %s (code %d)\n", p, int(res))
  139. }
  140. }
  141. if loadErr == nil {
  142. loadErr = errors.New("gpudemod_kernels.dll not found")
  143. fmt.Fprintln(os.Stderr, "gpudemod: gpudemod_kernels.dll not found in search paths")
  144. }
  145. })
  146. return loadErr
  147. }
  148. type Engine struct {
  149. maxSamples int
  150. sampleRate int
  151. phase float64
  152. bfoPhase float64
  153. firTaps []float32
  154. cudaReady bool
  155. lastShiftUsedGPU bool
  156. lastFIRUsedGPU bool
  157. lastDecimUsedGPU bool
  158. lastDemodUsedGPU bool
  159. dIQIn *C.gpud_float2
  160. dShifted *C.gpud_float2
  161. dFiltered *C.gpud_float2
  162. dDecimated *C.gpud_float2
  163. dAudio *C.float
  164. iqBytes C.size_t
  165. audioBytes C.size_t
  166. }
  167. func Available() bool {
  168. if ensureDLLLoaded() != nil {
  169. return false
  170. }
  171. var count C.int
  172. if C.cudaGetDeviceCount(&count) != C.cudaSuccess {
  173. return false
  174. }
  175. return count > 0
  176. }
  177. func New(maxSamples int, sampleRate int) (*Engine, error) {
  178. if maxSamples <= 0 {
  179. return nil, errors.New("invalid maxSamples")
  180. }
  181. if sampleRate <= 0 {
  182. return nil, errors.New("invalid sampleRate")
  183. }
  184. if err := ensureDLLLoaded(); err != nil {
  185. return nil, err
  186. }
  187. if !Available() {
  188. return nil, errors.New("cuda device not available")
  189. }
  190. e := &Engine{
  191. maxSamples: maxSamples,
  192. sampleRate: sampleRate,
  193. cudaReady: true,
  194. iqBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.gpud_float2{})),
  195. audioBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.float(0))),
  196. }
  197. var ptr unsafe.Pointer
  198. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  199. e.Close()
  200. return nil, errors.New("cudaMalloc dIQIn failed")
  201. }
  202. e.dIQIn = (*C.gpud_float2)(ptr)
  203. ptr = nil
  204. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  205. e.Close()
  206. return nil, errors.New("cudaMalloc dShifted failed")
  207. }
  208. e.dShifted = (*C.gpud_float2)(ptr)
  209. ptr = nil
  210. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  211. e.Close()
  212. return nil, errors.New("cudaMalloc dFiltered failed")
  213. }
  214. e.dFiltered = (*C.gpud_float2)(ptr)
  215. ptr = nil
  216. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  217. e.Close()
  218. return nil, errors.New("cudaMalloc dDecimated failed")
  219. }
  220. e.dDecimated = (*C.gpud_float2)(ptr)
  221. ptr = nil
  222. if C.gpud_cuda_malloc(&ptr, e.audioBytes) != C.cudaSuccess {
  223. e.Close()
  224. return nil, errors.New("cudaMalloc dAudio failed")
  225. }
  226. e.dAudio = (*C.float)(ptr)
  227. return e, nil
  228. }
  229. func (e *Engine) SetFIR(taps []float32) {
  230. if len(taps) == 0 {
  231. e.firTaps = nil
  232. return
  233. }
  234. if len(taps) > 256 {
  235. taps = taps[:256]
  236. }
  237. e.firTaps = append(e.firTaps[:0], taps...)
  238. if e.cudaReady {
  239. _ = C.gpud_upload_fir_taps((*C.float)(unsafe.Pointer(&e.firTaps[0])), C.int(len(e.firTaps)))
  240. }
  241. }
  242. func (e *Engine) LastShiftUsedGPU() bool { return e != nil && e.lastShiftUsedGPU }
  243. func (e *Engine) LastDemodUsedGPU() bool { return e != nil && e.lastDemodUsedGPU }
  244. func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) ([]complex64, bool) {
  245. if e == nil || !e.cudaReady || len(iq) == 0 || e.dIQIn == nil || e.dShifted == nil {
  246. return nil, false
  247. }
  248. bytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0)))
  249. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytes) != C.cudaSuccess {
  250. return nil, false
  251. }
  252. phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate)
  253. if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(len(iq)), C.double(phaseInc), C.double(e.phase)) != 0 {
  254. return nil, false
  255. }
  256. if C.gpud_device_sync() != C.cudaSuccess {
  257. return nil, false
  258. }
  259. out := make([]complex64, len(iq))
  260. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dShifted), bytes) != C.cudaSuccess {
  261. return nil, false
  262. }
  263. e.phase += phaseInc * float64(len(iq))
  264. return out, true
  265. }
  266. func (e *Engine) tryCUDAFIR(iq []complex64, numTaps int) ([]complex64, bool) {
  267. if e == nil || !e.cudaReady || len(iq) == 0 || numTaps <= 0 || e.dShifted == nil || e.dFiltered == nil {
  268. return nil, false
  269. }
  270. iqBytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0)))
  271. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&iq[0]), iqBytes) != C.cudaSuccess {
  272. return nil, false
  273. }
  274. if C.gpud_launch_fir(e.dShifted, e.dFiltered, C.int(len(iq)), C.int(numTaps)) != 0 {
  275. return nil, false
  276. }
  277. if C.gpud_device_sync() != C.cudaSuccess {
  278. return nil, false
  279. }
  280. out := make([]complex64, len(iq))
  281. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dFiltered), iqBytes) != C.cudaSuccess {
  282. return nil, false
  283. }
  284. return out, true
  285. }
  286. func (e *Engine) tryCUDADecimate(filtered []complex64, factor int) ([]complex64, bool) {
  287. if e == nil || !e.cudaReady || len(filtered) == 0 || factor <= 0 || e.dFiltered == nil || e.dDecimated == nil {
  288. return nil, false
  289. }
  290. nOut := len(filtered) / factor
  291. if nOut <= 0 {
  292. return nil, false
  293. }
  294. iqBytes := C.size_t(len(filtered)) * C.size_t(unsafe.Sizeof(complex64(0)))
  295. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dFiltered), unsafe.Pointer(&filtered[0]), iqBytes) != C.cudaSuccess {
  296. return nil, false
  297. }
  298. if C.gpud_launch_decimate(e.dFiltered, e.dDecimated, C.int(nOut), C.int(factor)) != 0 {
  299. return nil, false
  300. }
  301. if C.gpud_device_sync() != C.cudaSuccess {
  302. return nil, false
  303. }
  304. out := make([]complex64, nOut)
  305. outBytes := C.size_t(nOut) * C.size_t(unsafe.Sizeof(complex64(0)))
  306. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dDecimated), outBytes) != C.cudaSuccess {
  307. return nil, false
  308. }
  309. return out, true
  310. }
  311. func (e *Engine) tryCUDAFMDiscrim(shifted []complex64) ([]float32, bool) {
  312. if e == nil || !e.cudaReady || len(shifted) < 2 || e.dShifted == nil || e.dAudio == nil {
  313. return nil, false
  314. }
  315. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  316. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  317. return nil, false
  318. }
  319. if C.gpud_launch_fm_discrim(e.dShifted, e.dAudio, C.int(len(shifted))) != 0 {
  320. return nil, false
  321. }
  322. if C.gpud_device_sync() != C.cudaSuccess {
  323. return nil, false
  324. }
  325. out := make([]float32, len(shifted)-1)
  326. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  327. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  328. return nil, false
  329. }
  330. return out, true
  331. }
  332. func (e *Engine) tryCUDAAMEnvelope(shifted []complex64) ([]float32, bool) {
  333. if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil {
  334. return nil, false
  335. }
  336. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  337. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  338. return nil, false
  339. }
  340. if C.gpud_launch_am_envelope(e.dShifted, e.dAudio, C.int(len(shifted))) != 0 {
  341. return nil, false
  342. }
  343. if C.gpud_device_sync() != C.cudaSuccess {
  344. return nil, false
  345. }
  346. out := make([]float32, len(shifted))
  347. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  348. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  349. return nil, false
  350. }
  351. return out, true
  352. }
  353. func (e *Engine) tryCUDASSBProduct(shifted []complex64, bfoHz float64) ([]float32, bool) {
  354. if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil {
  355. return nil, false
  356. }
  357. iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0)))
  358. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess {
  359. return nil, false
  360. }
  361. phaseInc := 2.0 * math.Pi * bfoHz / float64(e.sampleRate)
  362. if C.gpud_launch_ssb_product(e.dShifted, e.dAudio, C.int(len(shifted)), C.double(phaseInc), C.double(e.bfoPhase)) != 0 {
  363. return nil, false
  364. }
  365. if C.gpud_device_sync() != C.cudaSuccess {
  366. return nil, false
  367. }
  368. out := make([]float32, len(shifted))
  369. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  370. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  371. return nil, false
  372. }
  373. e.bfoPhase += phaseInc * float64(len(shifted))
  374. return out, true
  375. }
  376. func (e *Engine) ShiftFilterDecimate(iq []complex64, offsetHz float64, bw float64, outRate int) ([]complex64, int, error) {
  377. if e == nil {
  378. return nil, 0, errors.New("nil CUDA demod engine")
  379. }
  380. if !e.cudaReady {
  381. return nil, 0, errors.New("cuda demod engine is not initialized")
  382. }
  383. if len(iq) == 0 {
  384. return nil, 0, nil
  385. }
  386. if len(iq) > e.maxSamples {
  387. return nil, 0, errors.New("sample count exceeds engine capacity")
  388. }
  389. if outRate <= 0 {
  390. return nil, 0, errors.New("invalid output sample rate")
  391. }
  392. e.lastShiftUsedGPU = false
  393. e.lastFIRUsedGPU = false
  394. e.lastDecimUsedGPU = false
  395. e.lastDemodUsedGPU = false
  396. cutoff := bw / 2
  397. if cutoff < 200 {
  398. cutoff = 200
  399. }
  400. taps := e.firTaps
  401. if len(taps) == 0 {
  402. base64 := dsp.LowpassFIR(cutoff, e.sampleRate, 101)
  403. taps = make([]float32, len(base64))
  404. for i, v := range base64 {
  405. taps[i] = float32(v)
  406. }
  407. e.SetFIR(taps)
  408. }
  409. if len(taps) == 0 {
  410. return nil, 0, errors.New("no FIR taps configured")
  411. }
  412. decim := int(math.Round(float64(e.sampleRate) / float64(outRate)))
  413. if decim < 1 {
  414. decim = 1
  415. }
  416. n := len(iq)
  417. nOut := n / decim
  418. if nOut <= 0 {
  419. return nil, 0, errors.New("not enough output samples after decimation")
  420. }
  421. bytesIn := C.size_t(n) * C.size_t(unsafe.Sizeof(complex64(0)))
  422. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytesIn) != C.cudaSuccess {
  423. return nil, 0, errors.New("cudaMemcpy H2D failed")
  424. }
  425. phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate)
  426. if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(n), C.double(phaseInc), C.double(e.phase)) != 0 {
  427. return nil, 0, errors.New("gpu freq shift failed")
  428. }
  429. if C.gpud_launch_fir(e.dShifted, e.dFiltered, C.int(n), C.int(len(taps))) != 0 {
  430. return nil, 0, errors.New("gpu FIR failed")
  431. }
  432. if C.gpud_launch_decimate(e.dFiltered, e.dDecimated, C.int(nOut), C.int(decim)) != 0 {
  433. return nil, 0, errors.New("gpu decimate failed")
  434. }
  435. if C.gpud_device_sync() != C.cudaSuccess {
  436. return nil, 0, errors.New("cudaDeviceSynchronize failed")
  437. }
  438. out := make([]complex64, nOut)
  439. outBytes := C.size_t(nOut) * C.size_t(unsafe.Sizeof(complex64(0)))
  440. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dDecimated), outBytes) != C.cudaSuccess {
  441. return nil, 0, errors.New("cudaMemcpy D2H failed")
  442. }
  443. e.phase += phaseInc * float64(n)
  444. e.lastShiftUsedGPU = true
  445. e.lastFIRUsedGPU = true
  446. e.lastDecimUsedGPU = true
  447. return out, e.sampleRate / decim, nil
  448. }
  449. func (e *Engine) DemodFused(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
  450. if e == nil {
  451. return nil, 0, errors.New("nil CUDA demod engine")
  452. }
  453. if !e.cudaReady {
  454. return nil, 0, errors.New("cuda demod engine is not initialized")
  455. }
  456. if len(iq) == 0 {
  457. return nil, 0, nil
  458. }
  459. e.lastShiftUsedGPU = false
  460. e.lastFIRUsedGPU = false
  461. e.lastDecimUsedGPU = false
  462. e.lastDemodUsedGPU = false
  463. if len(iq) > e.maxSamples {
  464. return nil, 0, errors.New("sample count exceeds engine capacity")
  465. }
  466. var outRate int
  467. switch mode {
  468. case DemodNFM, DemodAM, DemodUSB, DemodLSB, DemodCW:
  469. outRate = 48000
  470. case DemodWFM:
  471. outRate = 192000
  472. default:
  473. return nil, 0, errors.New("unsupported demod type")
  474. }
  475. cutoff := bw / 2
  476. if cutoff < 200 {
  477. cutoff = 200
  478. }
  479. taps := e.firTaps
  480. if len(taps) == 0 {
  481. base64 := dsp.LowpassFIR(cutoff, e.sampleRate, 101)
  482. taps = make([]float32, len(base64))
  483. for i, v := range base64 {
  484. taps[i] = float32(v)
  485. }
  486. e.SetFIR(taps)
  487. }
  488. if len(taps) == 0 {
  489. return nil, 0, errors.New("no FIR taps configured")
  490. }
  491. decim := int(math.Round(float64(e.sampleRate) / float64(outRate)))
  492. if decim < 1 {
  493. decim = 1
  494. }
  495. n := len(iq)
  496. nOut := n / decim
  497. if nOut <= 1 {
  498. return nil, 0, errors.New("not enough output samples after decimation")
  499. }
  500. bytesIn := C.size_t(n) * C.size_t(unsafe.Sizeof(complex64(0)))
  501. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytesIn) != C.cudaSuccess {
  502. return nil, 0, errors.New("cudaMemcpy H2D failed")
  503. }
  504. phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate)
  505. if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(n), C.double(phaseInc), C.double(e.phase)) != 0 {
  506. return nil, 0, errors.New("gpu freq shift failed")
  507. }
  508. if C.gpud_launch_fir(e.dShifted, e.dFiltered, C.int(n), C.int(len(taps))) != 0 {
  509. return nil, 0, errors.New("gpu FIR failed")
  510. }
  511. if C.gpud_launch_decimate(e.dFiltered, e.dDecimated, C.int(nOut), C.int(decim)) != 0 {
  512. return nil, 0, errors.New("gpu decimate failed")
  513. }
  514. e.lastShiftUsedGPU = true
  515. e.lastFIRUsedGPU = true
  516. e.lastDecimUsedGPU = true
  517. e.lastDemodUsedGPU = false
  518. switch mode {
  519. case DemodNFM, DemodWFM:
  520. if C.gpud_launch_fm_discrim(e.dDecimated, e.dAudio, C.int(nOut)) != 0 {
  521. return nil, 0, errors.New("gpu FM discrim failed")
  522. }
  523. out := make([]float32, nOut-1)
  524. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  525. if C.gpud_device_sync() != C.cudaSuccess {
  526. return nil, 0, errors.New("cudaDeviceSynchronize failed")
  527. }
  528. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  529. return nil, 0, errors.New("cudaMemcpy D2H failed")
  530. }
  531. e.phase += phaseInc * float64(n)
  532. e.lastDemodUsedGPU = true
  533. return out, e.sampleRate / decim, nil
  534. case DemodAM:
  535. if C.gpud_launch_am_envelope(e.dDecimated, e.dAudio, C.int(nOut)) != 0 {
  536. return nil, 0, errors.New("gpu AM envelope failed")
  537. }
  538. out := make([]float32, nOut)
  539. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  540. if C.gpud_device_sync() != C.cudaSuccess {
  541. return nil, 0, errors.New("cudaDeviceSynchronize failed")
  542. }
  543. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  544. return nil, 0, errors.New("cudaMemcpy D2H failed")
  545. }
  546. e.phase += phaseInc * float64(n)
  547. e.lastDemodUsedGPU = true
  548. return out, e.sampleRate / decim, nil
  549. case DemodUSB, DemodLSB, DemodCW:
  550. bfoHz := 700.0
  551. if mode == DemodLSB {
  552. bfoHz = -700.0
  553. }
  554. phaseBFO := 2.0 * math.Pi * bfoHz / float64(e.sampleRate)
  555. if C.gpud_launch_ssb_product(e.dDecimated, e.dAudio, C.int(nOut), C.double(phaseBFO), C.double(e.bfoPhase)) != 0 {
  556. return nil, 0, errors.New("gpu SSB product failed")
  557. }
  558. out := make([]float32, nOut)
  559. outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0)))
  560. if C.gpud_device_sync() != C.cudaSuccess {
  561. return nil, 0, errors.New("cudaDeviceSynchronize failed")
  562. }
  563. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess {
  564. return nil, 0, errors.New("cudaMemcpy D2H failed")
  565. }
  566. e.phase += phaseInc * float64(n)
  567. e.bfoPhase += phaseBFO * float64(nOut)
  568. e.lastDemodUsedGPU = true
  569. return out, e.sampleRate / decim, nil
  570. default:
  571. return nil, 0, errors.New("unsupported demod type")
  572. }
  573. }
  574. func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
  575. if e == nil {
  576. return nil, 0, errors.New("nil CUDA demod engine")
  577. }
  578. if !e.cudaReady {
  579. return nil, 0, errors.New("cuda demod engine is not initialized")
  580. }
  581. if len(iq) == 0 {
  582. return nil, 0, nil
  583. }
  584. if len(iq) > e.maxSamples {
  585. return nil, 0, errors.New("sample count exceeds engine capacity")
  586. }
  587. shifted, ok := e.tryCUDAFreqShift(iq, offsetHz)
  588. e.lastShiftUsedGPU = ok && ValidateFreqShift(iq, e.sampleRate, offsetHz, shifted, 1e-3)
  589. if !e.lastShiftUsedGPU {
  590. shifted = dsp.FreqShift(iq, e.sampleRate, offsetHz)
  591. }
  592. var outRate int
  593. switch mode {
  594. case DemodNFM, DemodAM, DemodUSB, DemodLSB, DemodCW:
  595. outRate = 48000
  596. case DemodWFM:
  597. outRate = 192000
  598. default:
  599. return nil, 0, errors.New("unsupported demod type")
  600. }
  601. cutoff := bw / 2
  602. if cutoff < 200 {
  603. cutoff = 200
  604. }
  605. taps := e.firTaps
  606. if len(taps) == 0 {
  607. base64 := dsp.LowpassFIR(cutoff, e.sampleRate, 101)
  608. taps = make([]float32, len(base64))
  609. for i, v := range base64 {
  610. taps[i] = float32(v)
  611. }
  612. e.SetFIR(taps)
  613. }
  614. filtered, ok := e.tryCUDAFIR(shifted, len(taps))
  615. if ok {
  616. if validationEnabled() {
  617. e.lastFIRUsedGPU = ValidateFIR(shifted, taps, filtered, 1e-3)
  618. if !e.lastFIRUsedGPU {
  619. ftaps := make([]float64, len(taps))
  620. for i, v := range taps {
  621. ftaps[i] = float64(v)
  622. }
  623. filtered = dsp.ApplyFIR(shifted, ftaps)
  624. }
  625. } else {
  626. e.lastFIRUsedGPU = true
  627. }
  628. }
  629. if filtered == nil {
  630. ftaps := make([]float64, len(taps))
  631. for i, v := range taps {
  632. ftaps[i] = float64(v)
  633. }
  634. filtered = dsp.ApplyFIR(shifted, ftaps)
  635. }
  636. decim := int(math.Round(float64(e.sampleRate) / float64(outRate)))
  637. if decim < 1 {
  638. decim = 1
  639. }
  640. dec, ok := e.tryCUDADecimate(filtered, decim)
  641. if ok {
  642. if validationEnabled() {
  643. e.lastDecimUsedGPU = ValidateDecimate(filtered, decim, dec, 1e-3)
  644. if !e.lastDecimUsedGPU {
  645. dec = dsp.Decimate(filtered, decim)
  646. }
  647. } else {
  648. e.lastDecimUsedGPU = true
  649. }
  650. }
  651. if dec == nil {
  652. dec = dsp.Decimate(filtered, decim)
  653. }
  654. inputRate := e.sampleRate / decim
  655. e.lastDemodUsedGPU = false
  656. switch mode {
  657. case DemodNFM:
  658. if gpuAudio, ok := e.tryCUDAFMDiscrim(dec); ok {
  659. e.lastDemodUsedGPU = true
  660. return gpuAudio, inputRate, nil
  661. }
  662. return demod.NFM{}.Demod(dec, inputRate), inputRate, nil
  663. case DemodWFM:
  664. if gpuAudio, ok := e.tryCUDAFMDiscrim(dec); ok {
  665. e.lastDemodUsedGPU = true
  666. return gpuAudio, inputRate, nil
  667. }
  668. return demod.WFM{}.Demod(dec, inputRate), inputRate, nil
  669. case DemodAM:
  670. if gpuAudio, ok := e.tryCUDAAMEnvelope(dec); ok {
  671. e.lastDemodUsedGPU = true
  672. return gpuAudio, inputRate, nil
  673. }
  674. return demod.AM{}.Demod(dec, inputRate), inputRate, nil
  675. case DemodUSB:
  676. if gpuAudio, ok := e.tryCUDASSBProduct(dec, 700.0); ok {
  677. e.lastDemodUsedGPU = true
  678. return gpuAudio, inputRate, nil
  679. }
  680. return demod.USB{}.Demod(dec, inputRate), inputRate, nil
  681. case DemodLSB:
  682. if gpuAudio, ok := e.tryCUDASSBProduct(dec, -700.0); ok {
  683. e.lastDemodUsedGPU = true
  684. return gpuAudio, inputRate, nil
  685. }
  686. return demod.LSB{}.Demod(dec, inputRate), inputRate, nil
  687. case DemodCW:
  688. if gpuAudio, ok := e.tryCUDASSBProduct(dec, 700.0); ok {
  689. e.lastDemodUsedGPU = true
  690. return gpuAudio, inputRate, nil
  691. }
  692. return demod.CW{}.Demod(dec, inputRate), inputRate, nil
  693. default:
  694. return nil, 0, errors.New("unsupported demod type")
  695. }
  696. }
  697. func (e *Engine) Close() {
  698. if e == nil {
  699. return
  700. }
  701. if e.dIQIn != nil {
  702. _ = C.gpud_cuda_free(unsafe.Pointer(e.dIQIn))
  703. e.dIQIn = nil
  704. }
  705. if e.dShifted != nil {
  706. _ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted))
  707. e.dShifted = nil
  708. }
  709. if e.dFiltered != nil {
  710. _ = C.gpud_cuda_free(unsafe.Pointer(e.dFiltered))
  711. e.dFiltered = nil
  712. }
  713. if e.dDecimated != nil {
  714. _ = C.gpud_cuda_free(unsafe.Pointer(e.dDecimated))
  715. e.dDecimated = nil
  716. }
  717. if e.dAudio != nil {
  718. _ = C.gpud_cuda_free(unsafe.Pointer(e.dAudio))
  719. e.dAudio = nil
  720. }
  721. e.firTaps = nil
  722. e.cudaReady = false
  723. }