Wideband autonomous SDR analysis engine forked from sdr-visual-suite
25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.

206 satır
5.2KB

  1. #include <cuda_runtime.h>
  2. #include <math.h>
  3. #if defined(_WIN32)
  4. #define GPUD_API extern "C" __declspec(dllexport)
  5. #define GPUD_CALL __stdcall
  6. #else
  7. #define GPUD_API extern "C"
  8. #define GPUD_CALL
  9. #endif
  10. __global__ void gpud_freq_shift_kernel(
  11. const float2* __restrict__ in,
  12. float2* __restrict__ out,
  13. int n,
  14. double phase_inc,
  15. double phase_start
  16. ) {
  17. int idx = blockIdx.x * blockDim.x + threadIdx.x;
  18. if (idx >= n) return;
  19. double phase = phase_start + phase_inc * (double)idx;
  20. float si, co;
  21. sincosf((float)phase, &si, &co);
  22. float2 v = in[idx];
  23. out[idx].x = v.x * co - v.y * si;
  24. out[idx].y = v.x * si + v.y * co;
  25. }
  26. GPUD_API int GPUD_CALL gpud_launch_freq_shift_cuda(
  27. const float2* in,
  28. float2* out,
  29. int n,
  30. double phase_inc,
  31. double phase_start
  32. ) {
  33. if (n <= 0) return 0;
  34. const int block = 256;
  35. const int grid = (n + block - 1) / block;
  36. gpud_freq_shift_kernel<<<grid, block>>>(in, out, n, phase_inc, phase_start);
  37. return (int)cudaGetLastError();
  38. }
  39. __global__ void gpud_fm_discrim_kernel(
  40. const float2* __restrict__ in,
  41. float* __restrict__ out,
  42. int n
  43. ) {
  44. int idx = blockIdx.x * blockDim.x + threadIdx.x;
  45. if (idx >= n - 1) return;
  46. float2 prev = in[idx];
  47. float2 curr = in[idx + 1];
  48. float re = prev.x * curr.x + prev.y * curr.y;
  49. float im = prev.x * curr.y - prev.y * curr.x;
  50. out[idx] = atan2f(im, re);
  51. }
  52. GPUD_API int GPUD_CALL gpud_launch_fm_discrim_cuda(
  53. const float2* in,
  54. float* out,
  55. int n
  56. ) {
  57. if (n <= 1) return 0;
  58. const int block = 256;
  59. const int grid = (n + block - 1) / block;
  60. gpud_fm_discrim_kernel<<<grid, block>>>(in, out, n);
  61. return (int)cudaGetLastError();
  62. }
  63. __global__ void gpud_decimate_kernel(
  64. const float2* __restrict__ in,
  65. float2* __restrict__ out,
  66. int n_out,
  67. int factor
  68. ) {
  69. int idx = blockIdx.x * blockDim.x + threadIdx.x;
  70. if (idx >= n_out) return;
  71. out[idx] = in[idx * factor];
  72. }
  73. __device__ __constant__ float gpud_fir_taps[256];
  74. __global__ void gpud_fir_kernel(
  75. const float2* __restrict__ in,
  76. float2* __restrict__ out,
  77. int n,
  78. int num_taps
  79. ) {
  80. extern __shared__ float2 s_data[];
  81. int gid = blockIdx.x * blockDim.x + threadIdx.x;
  82. int lid = threadIdx.x;
  83. int halo = num_taps - 1;
  84. if (gid < n) {
  85. s_data[lid + halo] = in[gid];
  86. } else {
  87. s_data[lid + halo] = make_float2(0.0f, 0.0f);
  88. }
  89. if (lid < halo) {
  90. int src = gid - halo;
  91. s_data[lid] = (src >= 0) ? in[src] : make_float2(0.0f, 0.0f);
  92. }
  93. __syncthreads();
  94. if (gid >= n) return;
  95. float acc_r = 0.0f;
  96. float acc_i = 0.0f;
  97. for (int k = 0; k < num_taps; ++k) {
  98. float2 v = s_data[lid + halo - k];
  99. float t = gpud_fir_taps[k];
  100. acc_r += v.x * t;
  101. acc_i += v.y * t;
  102. }
  103. out[gid] = make_float2(acc_r, acc_i);
  104. }
  105. GPUD_API int GPUD_CALL gpud_upload_fir_taps_cuda(const float* taps, int n) {
  106. if (!taps || n <= 0 || n > 256) return -1;
  107. cudaError_t err = cudaMemcpyToSymbol(gpud_fir_taps, taps, (size_t)n * sizeof(float));
  108. return (int)err;
  109. }
  110. GPUD_API int GPUD_CALL gpud_launch_fir_cuda(
  111. const float2* in,
  112. float2* out,
  113. int n,
  114. int num_taps
  115. ) {
  116. if (n <= 0 || num_taps <= 0 || num_taps > 256) return 0;
  117. const int block = 256;
  118. const int grid = (n + block - 1) / block;
  119. size_t sharedBytes = (size_t)(block + num_taps - 1) * sizeof(float2);
  120. gpud_fir_kernel<<<grid, block, sharedBytes>>>(in, out, n, num_taps);
  121. return (int)cudaGetLastError();
  122. }
  123. GPUD_API int GPUD_CALL gpud_launch_decimate_cuda(
  124. const float2* in,
  125. float2* out,
  126. int n_out,
  127. int factor
  128. ) {
  129. if (n_out <= 0 || factor <= 0) return 0;
  130. const int block = 256;
  131. const int grid = (n_out + block - 1) / block;
  132. gpud_decimate_kernel<<<grid, block>>>(in, out, n_out, factor);
  133. return (int)cudaGetLastError();
  134. }
  135. __global__ void gpud_am_envelope_kernel(
  136. const float2* __restrict__ in,
  137. float* __restrict__ out,
  138. int n
  139. ) {
  140. int idx = blockIdx.x * blockDim.x + threadIdx.x;
  141. if (idx >= n) return;
  142. float2 v = in[idx];
  143. out[idx] = sqrtf(v.x * v.x + v.y * v.y);
  144. }
  145. GPUD_API int GPUD_CALL gpud_launch_am_envelope_cuda(
  146. const float2* in,
  147. float* out,
  148. int n
  149. ) {
  150. if (n <= 0) return 0;
  151. const int block = 256;
  152. const int grid = (n + block - 1) / block;
  153. gpud_am_envelope_kernel<<<grid, block>>>(in, out, n);
  154. return (int)cudaGetLastError();
  155. }
  156. __global__ void gpud_ssb_product_kernel(
  157. const float2* __restrict__ in,
  158. float* __restrict__ out,
  159. int n,
  160. double phase_inc,
  161. double phase_start
  162. ) {
  163. int idx = blockIdx.x * blockDim.x + threadIdx.x;
  164. if (idx >= n) return;
  165. double phase = phase_start + phase_inc * (double)idx;
  166. float si, co;
  167. sincosf((float)phase, &si, &co);
  168. float2 v = in[idx];
  169. out[idx] = v.x * co - v.y * si;
  170. }
  171. GPUD_API int GPUD_CALL gpud_launch_ssb_product_cuda(
  172. const float2* in,
  173. float* out,
  174. int n,
  175. double phase_inc,
  176. double phase_start
  177. ) {
  178. if (n <= 0) return 0;
  179. const int block = 256;
  180. const int grid = (n + block - 1) / block;
  181. gpud_ssb_product_kernel<<<grid, block>>>(in, out, n, phase_inc, phase_start);
  182. return (int)cudaGetLastError();
  183. }