No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.

130 líneas
3.3KB

  1. #include <cuda_runtime.h>
  2. #include <math.h>
  3. extern "C" __global__ void gpud_freq_shift_kernel(
  4. const float2* __restrict__ in,
  5. float2* __restrict__ out,
  6. int n,
  7. double phase_inc,
  8. double phase_start
  9. ) {
  10. int idx = blockIdx.x * blockDim.x + threadIdx.x;
  11. if (idx >= n) return;
  12. double phase = phase_start + phase_inc * (double)idx;
  13. float si, co;
  14. sincosf((float)phase, &si, &co);
  15. float2 v = in[idx];
  16. out[idx].x = v.x * co - v.y * si;
  17. out[idx].y = v.x * si + v.y * co;
  18. }
  19. extern "C" int gpud_launch_freq_shift_cuda(
  20. const float2* in,
  21. float2* out,
  22. int n,
  23. double phase_inc,
  24. double phase_start
  25. ) {
  26. if (n <= 0) return 0;
  27. const int block = 256;
  28. const int grid = (n + block - 1) / block;
  29. gpud_freq_shift_kernel<<<grid, block>>>(in, out, n, phase_inc, phase_start);
  30. return (int)cudaGetLastError();
  31. }
  32. extern "C" __global__ void gpud_fm_discrim_kernel(
  33. const float2* __restrict__ in,
  34. float* __restrict__ out,
  35. int n
  36. ) {
  37. int idx = blockIdx.x * blockDim.x + threadIdx.x;
  38. if (idx >= n - 1) return;
  39. float2 prev = in[idx];
  40. float2 curr = in[idx + 1];
  41. float re = prev.x * curr.x + prev.y * curr.y;
  42. float im = prev.x * curr.y - prev.y * curr.x;
  43. out[idx] = atan2f(im, re);
  44. }
  45. extern "C" int gpud_launch_fm_discrim_cuda(
  46. const float2* in,
  47. float* out,
  48. int n
  49. ) {
  50. if (n <= 1) return 0;
  51. const int block = 256;
  52. const int grid = (n + block - 1) / block;
  53. gpud_fm_discrim_kernel<<<grid, block>>>(in, out, n);
  54. return (int)cudaGetLastError();
  55. }
  56. extern "C" __global__ void gpud_decimate_kernel(
  57. const float2* __restrict__ in,
  58. float2* __restrict__ out,
  59. int n_out,
  60. int factor
  61. ) {
  62. int idx = blockIdx.x * blockDim.x + threadIdx.x;
  63. if (idx >= n_out) return;
  64. out[idx] = in[idx * factor];
  65. }
  66. extern "C" __constant__ float gpud_fir_taps[256];
  67. extern "C" __global__ void gpud_fir_kernel(
  68. const float2* __restrict__ in,
  69. float2* __restrict__ out,
  70. int n,
  71. int num_taps
  72. ) {
  73. int idx = blockIdx.x * blockDim.x + threadIdx.x;
  74. if (idx >= n) return;
  75. float acc_r = 0.0f;
  76. float acc_i = 0.0f;
  77. for (int k = 0; k < num_taps; ++k) {
  78. int src = idx - k;
  79. if (src < 0) break;
  80. float2 v = in[src];
  81. float t = gpud_fir_taps[k];
  82. acc_r += v.x * t;
  83. acc_i += v.y * t;
  84. }
  85. out[idx] = make_float2(acc_r, acc_i);
  86. }
  87. extern "C" int gpud_upload_fir_taps_cuda(const float* taps, int n) {
  88. if (!taps || n <= 0 || n > 256) return -1;
  89. cudaError_t err = cudaMemcpyToSymbol(gpud_fir_taps, taps, (size_t)n * sizeof(float));
  90. return (int)err;
  91. }
  92. extern "C" int gpud_launch_fir_cuda(
  93. const float2* in,
  94. float2* out,
  95. int n,
  96. int num_taps
  97. ) {
  98. if (n <= 0 || num_taps <= 0 || num_taps > 256) return 0;
  99. const int block = 256;
  100. const int grid = (n + block - 1) / block;
  101. gpud_fir_kernel<<<grid, block>>>(in, out, n, num_taps);
  102. return (int)cudaGetLastError();
  103. }
  104. extern "C" int gpud_launch_decimate_cuda(
  105. const float2* in,
  106. float2* out,
  107. int n_out,
  108. int factor
  109. ) {
  110. if (n_out <= 0 || factor <= 0) return 0;
  111. const int block = 256;
  112. const int grid = (n_out + block - 1) / block;
  113. gpud_decimate_kernel<<<grid, block>>>(in, out, n_out, factor);
  114. return (int)cudaGetLastError();
  115. }