Wideband autonomous SDR analysis engine forked from sdr-visual-suite
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.

180 líneas
4.8KB

  1. //go:build cufft && windows
  2. package gpudemod
  3. /*
  4. #include <cuda_runtime.h>
  5. */
  6. import "C"
  7. import (
  8. "errors"
  9. "math"
  10. "unsafe"
  11. "sdr-visual-suite/internal/dsp"
  12. )
  13. type slotBuffers struct {
  14. dShifted unsafe.Pointer
  15. dFiltered unsafe.Pointer
  16. dDecimated unsafe.Pointer
  17. dTaps unsafe.Pointer
  18. stream streamHandle
  19. }
  20. func (r *BatchRunner) freeSlotBuffers() {
  21. for i := range r.slotBufs {
  22. if r.slotBufs[i].dShifted != nil {
  23. _ = bridgeCudaFree(r.slotBufs[i].dShifted)
  24. r.slotBufs[i].dShifted = nil
  25. }
  26. if r.slotBufs[i].dFiltered != nil {
  27. _ = bridgeCudaFree(r.slotBufs[i].dFiltered)
  28. r.slotBufs[i].dFiltered = nil
  29. }
  30. if r.slotBufs[i].dDecimated != nil {
  31. _ = bridgeCudaFree(r.slotBufs[i].dDecimated)
  32. r.slotBufs[i].dDecimated = nil
  33. }
  34. if r.slotBufs[i].dTaps != nil {
  35. _ = bridgeCudaFree(r.slotBufs[i].dTaps)
  36. r.slotBufs[i].dTaps = nil
  37. }
  38. if r.slotBufs[i].stream != nil {
  39. _ = bridgeStreamDestroy(r.slotBufs[i].stream)
  40. r.slotBufs[i].stream = nil
  41. }
  42. }
  43. r.slotBufs = nil
  44. }
  45. func (r *BatchRunner) allocSlotBuffers(n int) error {
  46. if len(r.slotBufs) == len(r.slots) && len(r.slotBufs) > 0 {
  47. return nil
  48. }
  49. r.freeSlotBuffers()
  50. if len(r.slots) == 0 {
  51. return nil
  52. }
  53. iqBytes := uintptr(n) * unsafe.Sizeof(complex64(0))
  54. tapsBytes := uintptr(256) * unsafe.Sizeof(float32(0))
  55. r.slotBufs = make([]slotBuffers, len(r.slots))
  56. for i := range r.slotBufs {
  57. for _, ptr := range []*unsafe.Pointer{&r.slotBufs[i].dShifted, &r.slotBufs[i].dFiltered, &r.slotBufs[i].dDecimated} {
  58. if bridgeCudaMalloc(ptr, iqBytes) != 0 {
  59. r.freeSlotBuffers()
  60. return errors.New("cudaMalloc slot buffer failed")
  61. }
  62. }
  63. if bridgeCudaMalloc(&r.slotBufs[i].dTaps, tapsBytes) != 0 {
  64. r.freeSlotBuffers()
  65. return errors.New("cudaMalloc slot taps failed")
  66. }
  67. s, res := bridgeStreamCreate()
  68. if res != 0 {
  69. r.freeSlotBuffers()
  70. return errors.New("cudaStreamCreate failed")
  71. }
  72. r.slotBufs[i].stream = s
  73. }
  74. return nil
  75. }
  76. func (r *BatchRunner) shiftFilterDecimateBatchImpl(iq []complex64) ([][]complex64, []int, error) {
  77. e := r.eng
  78. if e == nil || !e.cudaReady {
  79. return nil, nil, ErrUnavailable
  80. }
  81. outs := make([][]complex64, len(r.slots))
  82. rates := make([]int, len(r.slots))
  83. n := len(iq)
  84. if n == 0 {
  85. return outs, rates, nil
  86. }
  87. if err := r.allocSlotBuffers(n); err != nil {
  88. return nil, nil, err
  89. }
  90. bytesIn := uintptr(n) * unsafe.Sizeof(complex64(0))
  91. if bridgeMemcpyH2D(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytesIn) != 0 {
  92. return nil, nil, errors.New("cudaMemcpy H2D failed")
  93. }
  94. for i := range r.slots {
  95. if !r.slots[i].active {
  96. continue
  97. }
  98. nOut, rate, err := r.shiftFilterDecimateSlotParallel(iq, r.slots[i].job, r.slotBufs[i])
  99. if err != nil {
  100. return nil, nil, err
  101. }
  102. r.slots[i].rate = rate
  103. outs[i] = make([]complex64, nOut)
  104. rates[i] = rate
  105. }
  106. for i := range r.slots {
  107. if !r.slots[i].active {
  108. continue
  109. }
  110. buf := r.slotBufs[i]
  111. if bridgeStreamSync(buf.stream) != 0 {
  112. return nil, nil, errors.New("cuda stream sync failed")
  113. }
  114. out := outs[i]
  115. if len(out) == 0 {
  116. continue
  117. }
  118. outBytes := uintptr(len(out)) * unsafe.Sizeof(complex64(0))
  119. if bridgeMemcpyD2H(unsafe.Pointer(&out[0]), buf.dDecimated, outBytes) != 0 {
  120. return nil, nil, errors.New("cudaMemcpy D2H failed")
  121. }
  122. r.slots[i].out = out
  123. }
  124. return outs, rates, nil
  125. }
  126. func (r *BatchRunner) shiftFilterDecimateSlotParallel(iq []complex64, job ExtractJob, buf slotBuffers) (int, int, error) {
  127. e := r.eng
  128. if e == nil || !e.cudaReady {
  129. return 0, 0, ErrUnavailable
  130. }
  131. n := len(iq)
  132. if n == 0 {
  133. return 0, 0, nil
  134. }
  135. cutoff := job.BW / 2
  136. if cutoff < 200 {
  137. cutoff = 200
  138. }
  139. base := dsp.LowpassFIR(cutoff, e.sampleRate, 101)
  140. taps := make([]float32, len(base))
  141. for i, v := range base {
  142. taps[i] = float32(v)
  143. }
  144. if len(taps) == 0 {
  145. return 0, 0, errors.New("no FIR taps configured")
  146. }
  147. tapsBytes := uintptr(len(taps)) * unsafe.Sizeof(float32(0))
  148. if bridgeMemcpyH2D(buf.dTaps, unsafe.Pointer(&taps[0]), tapsBytes) != 0 {
  149. return 0, 0, errors.New("taps H2D failed")
  150. }
  151. decim := int(math.Round(float64(e.sampleRate) / float64(job.OutRate)))
  152. if decim < 1 {
  153. decim = 1
  154. }
  155. nOut := n / decim
  156. if nOut <= 0 {
  157. return 0, 0, errors.New("not enough output samples after decimation")
  158. }
  159. phaseInc := -2.0 * math.Pi * job.OffsetHz / float64(e.sampleRate)
  160. if bridgeLaunchFreqShiftStream(e.dIQIn, (*gpuFloat2)(buf.dShifted), n, phaseInc, e.phase, buf.stream) != 0 {
  161. return 0, 0, errors.New("gpu freq shift failed")
  162. }
  163. if bridgeLaunchFIRv2Stream((*gpuFloat2)(buf.dShifted), (*gpuFloat2)(buf.dFiltered), (*C.float)(buf.dTaps), n, len(taps), buf.stream) != 0 {
  164. return 0, 0, errors.New("gpu FIR v2 failed")
  165. }
  166. if bridgeLaunchDecimateStream((*gpuFloat2)(buf.dFiltered), (*gpuFloat2)(buf.dDecimated), nOut, decim, buf.stream) != 0 {
  167. return 0, 0, errors.New("gpu decimate failed")
  168. }
  169. return nOut, e.sampleRate / decim, nil
  170. }