Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

227 Zeilen
5.8KB

  1. #include <cuda_runtime.h>
  2. #include <math.h>
  3. #if defined(_WIN32)
  4. #define GPUD_API extern "C" __declspec(dllexport)
  5. #define GPUD_CALL __stdcall
  6. #else
  7. #define GPUD_API extern "C"
  8. #define GPUD_CALL
  9. #endif
  10. typedef void* gpud_stream_handle;
  11. GPUD_API int GPUD_CALL gpud_stream_create(gpud_stream_handle* out) {
  12. if (!out) return -1;
  13. cudaStream_t stream;
  14. cudaError_t err = cudaStreamCreate(&stream);
  15. if (err != cudaSuccess) return (int)err;
  16. *out = (gpud_stream_handle)stream;
  17. return 0;
  18. }
  19. GPUD_API int GPUD_CALL gpud_stream_destroy(gpud_stream_handle stream) {
  20. if (!stream) return 0;
  21. return (int)cudaStreamDestroy((cudaStream_t)stream);
  22. }
  23. GPUD_API int GPUD_CALL gpud_stream_sync(gpud_stream_handle stream) {
  24. if (!stream) return (int)cudaDeviceSynchronize();
  25. return (int)cudaStreamSynchronize((cudaStream_t)stream);
  26. }
  27. __global__ void gpud_freq_shift_kernel(
  28. const float2* __restrict__ in,
  29. float2* __restrict__ out,
  30. int n,
  31. double phase_inc,
  32. double phase_start
  33. ) {
  34. int idx = blockIdx.x * blockDim.x + threadIdx.x;
  35. if (idx >= n) return;
  36. double phase = phase_start + phase_inc * (double)idx;
  37. float si, co;
  38. sincosf((float)phase, &si, &co);
  39. float2 v = in[idx];
  40. out[idx].x = v.x * co - v.y * si;
  41. out[idx].y = v.x * si + v.y * co;
  42. }
  43. GPUD_API int GPUD_CALL gpud_launch_freq_shift_cuda(
  44. const float2* in,
  45. float2* out,
  46. int n,
  47. double phase_inc,
  48. double phase_start
  49. ) {
  50. if (n <= 0) return 0;
  51. const int block = 256;
  52. const int grid = (n + block - 1) / block;
  53. gpud_freq_shift_kernel<<<grid, block>>>(in, out, n, phase_inc, phase_start);
  54. return (int)cudaGetLastError();
  55. }
  56. __global__ void gpud_fm_discrim_kernel(
  57. const float2* __restrict__ in,
  58. float* __restrict__ out,
  59. int n
  60. ) {
  61. int idx = blockIdx.x * blockDim.x + threadIdx.x;
  62. if (idx >= n - 1) return;
  63. float2 prev = in[idx];
  64. float2 curr = in[idx + 1];
  65. float re = prev.x * curr.x + prev.y * curr.y;
  66. float im = prev.x * curr.y - prev.y * curr.x;
  67. out[idx] = atan2f(im, re);
  68. }
  69. GPUD_API int GPUD_CALL gpud_launch_fm_discrim_cuda(
  70. const float2* in,
  71. float* out,
  72. int n
  73. ) {
  74. if (n <= 1) return 0;
  75. const int block = 256;
  76. const int grid = (n + block - 1) / block;
  77. gpud_fm_discrim_kernel<<<grid, block>>>(in, out, n);
  78. return (int)cudaGetLastError();
  79. }
  80. __global__ void gpud_decimate_kernel(
  81. const float2* __restrict__ in,
  82. float2* __restrict__ out,
  83. int n_out,
  84. int factor
  85. ) {
  86. int idx = blockIdx.x * blockDim.x + threadIdx.x;
  87. if (idx >= n_out) return;
  88. out[idx] = in[idx * factor];
  89. }
  90. __device__ __constant__ float gpud_fir_taps[256];
  91. __global__ void gpud_fir_kernel(
  92. const float2* __restrict__ in,
  93. float2* __restrict__ out,
  94. int n,
  95. int num_taps
  96. ) {
  97. extern __shared__ float2 s_data[];
  98. int gid = blockIdx.x * blockDim.x + threadIdx.x;
  99. int lid = threadIdx.x;
  100. int halo = num_taps - 1;
  101. if (gid < n) {
  102. s_data[lid + halo] = in[gid];
  103. } else {
  104. s_data[lid + halo] = make_float2(0.0f, 0.0f);
  105. }
  106. if (lid < halo) {
  107. int src = gid - halo;
  108. s_data[lid] = (src >= 0) ? in[src] : make_float2(0.0f, 0.0f);
  109. }
  110. __syncthreads();
  111. if (gid >= n) return;
  112. float acc_r = 0.0f;
  113. float acc_i = 0.0f;
  114. for (int k = 0; k < num_taps; ++k) {
  115. float2 v = s_data[lid + halo - k];
  116. float t = gpud_fir_taps[k];
  117. acc_r += v.x * t;
  118. acc_i += v.y * t;
  119. }
  120. out[gid] = make_float2(acc_r, acc_i);
  121. }
  122. GPUD_API int GPUD_CALL gpud_upload_fir_taps_cuda(const float* taps, int n) {
  123. if (!taps || n <= 0 || n > 256) return -1;
  124. cudaError_t err = cudaMemcpyToSymbol(gpud_fir_taps, taps, (size_t)n * sizeof(float));
  125. return (int)err;
  126. }
  127. GPUD_API int GPUD_CALL gpud_launch_fir_cuda(
  128. const float2* in,
  129. float2* out,
  130. int n,
  131. int num_taps
  132. ) {
  133. if (n <= 0 || num_taps <= 0 || num_taps > 256) return 0;
  134. const int block = 256;
  135. const int grid = (n + block - 1) / block;
  136. size_t sharedBytes = (size_t)(block + num_taps - 1) * sizeof(float2);
  137. gpud_fir_kernel<<<grid, block, sharedBytes>>>(in, out, n, num_taps);
  138. return (int)cudaGetLastError();
  139. }
  140. GPUD_API int GPUD_CALL gpud_launch_decimate_cuda(
  141. const float2* in,
  142. float2* out,
  143. int n_out,
  144. int factor
  145. ) {
  146. if (n_out <= 0 || factor <= 0) return 0;
  147. const int block = 256;
  148. const int grid = (n_out + block - 1) / block;
  149. gpud_decimate_kernel<<<grid, block>>>(in, out, n_out, factor);
  150. return (int)cudaGetLastError();
  151. }
  152. __global__ void gpud_am_envelope_kernel(
  153. const float2* __restrict__ in,
  154. float* __restrict__ out,
  155. int n
  156. ) {
  157. int idx = blockIdx.x * blockDim.x + threadIdx.x;
  158. if (idx >= n) return;
  159. float2 v = in[idx];
  160. out[idx] = sqrtf(v.x * v.x + v.y * v.y);
  161. }
  162. GPUD_API int GPUD_CALL gpud_launch_am_envelope_cuda(
  163. const float2* in,
  164. float* out,
  165. int n
  166. ) {
  167. if (n <= 0) return 0;
  168. const int block = 256;
  169. const int grid = (n + block - 1) / block;
  170. gpud_am_envelope_kernel<<<grid, block>>>(in, out, n);
  171. return (int)cudaGetLastError();
  172. }
  173. __global__ void gpud_ssb_product_kernel(
  174. const float2* __restrict__ in,
  175. float* __restrict__ out,
  176. int n,
  177. double phase_inc,
  178. double phase_start
  179. ) {
  180. int idx = blockIdx.x * blockDim.x + threadIdx.x;
  181. if (idx >= n) return;
  182. double phase = phase_start + phase_inc * (double)idx;
  183. float si, co;
  184. sincosf((float)phase, &si, &co);
  185. float2 v = in[idx];
  186. out[idx] = v.x * co - v.y * si;
  187. }
  188. GPUD_API int GPUD_CALL gpud_launch_ssb_product_cuda(
  189. const float2* in,
  190. float* out,
  191. int n,
  192. double phase_inc,
  193. double phase_start
  194. ) {
  195. if (n <= 0) return 0;
  196. const int block = 256;
  197. const int grid = (n + block - 1) / block;
  198. gpud_ssb_product_kernel<<<grid, block>>>(in, out, n, phase_inc, phase_start);
  199. return (int)cudaGetLastError();
  200. }