Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

227 linhas
5.7KB

  1. //go:build cufft
  2. package gpudemod
  3. /*
  4. #cgo windows LDFLAGS: -L${SRCDIR}/../../../cuda-mingw -lcufft64_12 -lcudart64_13 ${SRCDIR}/build/kernels.obj
  5. #cgo windows CFLAGS: -I"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.2/include"
  6. #include <cuda_runtime.h>
  7. #include <cufft.h>
  8. typedef struct { float x; float y; } gpud_float2;
  9. static int gpud_cuda_malloc(void **ptr, size_t bytes) {
  10. return (int)cudaMalloc(ptr, bytes);
  11. }
  12. static int gpud_cuda_free(void *ptr) {
  13. return (int)cudaFree(ptr);
  14. }
  15. static int gpud_memcpy_h2d(void *dst, const void *src, size_t bytes) {
  16. return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice);
  17. }
  18. static int gpud_memcpy_d2h(void *dst, const void *src, size_t bytes) {
  19. return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost);
  20. }
  21. static int gpud_device_sync() {
  22. return (int)cudaDeviceSynchronize();
  23. }
  24. extern int gpud_launch_freq_shift_cuda(const gpud_float2* in, gpud_float2* out, int n, double phase_inc, double phase_start);
  25. static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) {
  26. return gpud_launch_freq_shift_cuda(in, out, n, phase_inc, phase_start);
  27. }
  28. static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) {
  29. // TODO(phase2): replace with real CUDA kernel launch.
  30. (void)in; (void)out; (void)n;
  31. return -1;
  32. }
  33. */
  34. import "C"
  35. import (
  36. "errors"
  37. "fmt"
  38. "math"
  39. "unsafe"
  40. "sdr-visual-suite/internal/demod"
  41. "sdr-visual-suite/internal/dsp"
  42. )
  43. type DemodType int
  44. const (
  45. DemodNFM DemodType = iota
  46. DemodWFM
  47. DemodAM
  48. DemodUSB
  49. DemodLSB
  50. DemodCW
  51. )
  52. type Engine struct {
  53. maxSamples int
  54. sampleRate int
  55. phase float64
  56. bfoPhase float64
  57. firTaps []float32
  58. cudaReady bool
  59. dIQIn *C.gpud_float2
  60. dShifted *C.gpud_float2
  61. dAudio *C.float
  62. iqBytes C.size_t
  63. audioBytes C.size_t
  64. }
  65. func Available() bool {
  66. var count C.int
  67. if C.cudaGetDeviceCount(&count) != C.cudaSuccess {
  68. return false
  69. }
  70. return count > 0
  71. }
  72. func New(maxSamples int, sampleRate int) (*Engine, error) {
  73. if maxSamples <= 0 {
  74. return nil, errors.New("invalid maxSamples")
  75. }
  76. if sampleRate <= 0 {
  77. return nil, errors.New("invalid sampleRate")
  78. }
  79. if !Available() {
  80. return nil, errors.New("cuda device not available")
  81. }
  82. e := &Engine{
  83. maxSamples: maxSamples,
  84. sampleRate: sampleRate,
  85. cudaReady: true,
  86. iqBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.gpud_float2{})),
  87. audioBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.float(0))),
  88. }
  89. var ptr unsafe.Pointer
  90. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  91. e.Close()
  92. return nil, errors.New("cudaMalloc dIQIn failed")
  93. }
  94. e.dIQIn = (*C.gpud_float2)(ptr)
  95. ptr = nil
  96. if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess {
  97. e.Close()
  98. return nil, errors.New("cudaMalloc dShifted failed")
  99. }
  100. e.dShifted = (*C.gpud_float2)(ptr)
  101. ptr = nil
  102. if C.gpud_cuda_malloc(&ptr, e.audioBytes) != C.cudaSuccess {
  103. e.Close()
  104. return nil, errors.New("cudaMalloc dAudio failed")
  105. }
  106. e.dAudio = (*C.float)(ptr)
  107. return e, nil
  108. }
  109. func (e *Engine) SetFIR(taps []float32) {
  110. if len(taps) == 0 {
  111. e.firTaps = nil
  112. return
  113. }
  114. e.firTaps = append(e.firTaps[:0], taps...)
  115. }
  116. func phaseStatus() string {
  117. return "phase1b-launch-boundary"
  118. }
  119. func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) ([]complex64, bool) {
  120. if e == nil || !e.cudaReady || len(iq) == 0 || e.dIQIn == nil || e.dShifted == nil {
  121. return nil, false
  122. }
  123. bytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0)))
  124. if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytes) != C.cudaSuccess {
  125. return nil, false
  126. }
  127. phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate)
  128. if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(len(iq)), C.double(phaseInc), C.double(e.phase)) != 0 {
  129. return nil, false
  130. }
  131. if C.gpud_device_sync() != C.cudaSuccess {
  132. return nil, false
  133. }
  134. out := make([]complex64, len(iq))
  135. if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dShifted), bytes) != C.cudaSuccess {
  136. return nil, false
  137. }
  138. e.phase += phaseInc * float64(len(iq))
  139. return out, true
  140. }
  141. func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
  142. if e == nil {
  143. return nil, 0, errors.New("nil CUDA demod engine")
  144. }
  145. if !e.cudaReady {
  146. return nil, 0, errors.New("cuda demod engine is not initialized")
  147. }
  148. if len(iq) == 0 {
  149. return nil, 0, nil
  150. }
  151. if len(iq) > e.maxSamples {
  152. return nil, 0, errors.New("sample count exceeds engine capacity")
  153. }
  154. if mode != DemodNFM {
  155. return nil, 0, errors.New("CUDA demod phase 1 currently supports NFM only")
  156. }
  157. // Real CUDA boundary is now present. If the launch wrappers are not yet backed
  158. // by actual kernels, we fall back to the existing CPU DSP path below.
  159. _ = fmt.Sprintf("%s:%0.3f", phaseStatus(), offsetHz)
  160. shifted, ok := e.tryCUDAFreqShift(iq, offsetHz)
  161. if !ok {
  162. shifted = dsp.FreqShift(iq, e.sampleRate, offsetHz)
  163. }
  164. cutoff := bw / 2
  165. if cutoff < 200 {
  166. cutoff = 200
  167. }
  168. taps := e.firTaps
  169. if len(taps) == 0 {
  170. base := dsp.LowpassFIR(cutoff, e.sampleRate, 101)
  171. taps = append(make([]float32, 0, len(base)), base...)
  172. }
  173. filtered := dsp.ApplyFIR(shifted, taps)
  174. outRate := demod.NFM{}.OutputSampleRate()
  175. decim := int(math.Round(float64(e.sampleRate) / float64(outRate)))
  176. if decim < 1 {
  177. decim = 1
  178. }
  179. dec := dsp.Decimate(filtered, decim)
  180. inputRate := e.sampleRate / decim
  181. audio := demod.NFM{}.Demod(dec, inputRate)
  182. return audio, inputRate, nil
  183. }
  184. func (e *Engine) Close() {
  185. if e == nil {
  186. return
  187. }
  188. if e.dIQIn != nil {
  189. _ = C.gpud_cuda_free(unsafe.Pointer(e.dIQIn))
  190. e.dIQIn = nil
  191. }
  192. if e.dShifted != nil {
  193. _ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted))
  194. e.dShifted = nil
  195. }
  196. if e.dAudio != nil {
  197. _ = C.gpud_cuda_free(unsafe.Pointer(e.dAudio))
  198. e.dAudio = nil
  199. }
  200. e.firTaps = nil
  201. e.cudaReady = false
  202. }