Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

134 linhas
10KB

  1. //go:build cufft && windows
  2. package gpudemod
  3. /*
  4. #cgo windows CFLAGS: -I"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.2/include"
  5. #cgo windows LDFLAGS: -lcudart64_13 -lkernel32
  6. #include <windows.h>
  7. #include <stdlib.h>
  8. #include <cuda_runtime.h>
  9. typedef struct { float x; float y; } gpud_float2;
  10. typedef void* gpud_stream_handle;
  11. typedef int (__stdcall *gpud_stream_create_fn)(gpud_stream_handle* out);
  12. typedef int (__stdcall *gpud_stream_destroy_fn)(gpud_stream_handle stream);
  13. typedef int (__stdcall *gpud_stream_sync_fn)(gpud_stream_handle stream);
  14. typedef int (__stdcall *gpud_upload_fir_taps_fn)(const float* taps, int n);
  15. typedef int (__stdcall *gpud_launch_freq_shift_stream_fn)(const gpud_float2* in, gpud_float2* out, int n, double phase_inc, double phase_start, gpud_stream_handle stream);
  16. typedef int (__stdcall *gpud_launch_freq_shift_fn)(const gpud_float2* in, gpud_float2* out, int n, double phase_inc, double phase_start);
  17. typedef int (__stdcall *gpud_launch_fm_discrim_fn)(const gpud_float2* in, float* out, int n);
  18. typedef int (__stdcall *gpud_launch_fir_stream_fn)(const gpud_float2* in, gpud_float2* out, int n, int num_taps, gpud_stream_handle stream);
  19. typedef int (__stdcall *gpud_launch_fir_fn)(const gpud_float2* in, gpud_float2* out, int n, int num_taps);
  20. typedef int (__stdcall *gpud_launch_decimate_stream_fn)(const gpud_float2* in, gpud_float2* out, int n_out, int factor, gpud_stream_handle stream);
  21. typedef int (__stdcall *gpud_launch_decimate_fn)(const gpud_float2* in, gpud_float2* out, int n_out, int factor);
  22. typedef int (__stdcall *gpud_launch_am_envelope_fn)(const gpud_float2* in, float* out, int n);
  23. typedef int (__stdcall *gpud_launch_ssb_product_fn)(const gpud_float2* in, float* out, int n, double phase_inc, double phase_start);
  24. static HMODULE gpud_mod = NULL;
  25. static gpud_stream_create_fn gpud_p_stream_create = NULL;
  26. static gpud_stream_destroy_fn gpud_p_stream_destroy = NULL;
  27. static gpud_stream_sync_fn gpud_p_stream_sync = NULL;
  28. static gpud_upload_fir_taps_fn gpud_p_upload_fir_taps = NULL;
  29. static gpud_launch_freq_shift_stream_fn gpud_p_launch_freq_shift_stream = NULL;
  30. static gpud_launch_freq_shift_fn gpud_p_launch_freq_shift = NULL;
  31. static gpud_launch_fm_discrim_fn gpud_p_launch_fm_discrim = NULL;
  32. static gpud_launch_fir_stream_fn gpud_p_launch_fir_stream = NULL;
  33. static gpud_launch_fir_fn gpud_p_launch_fir = NULL;
  34. static gpud_launch_decimate_stream_fn gpud_p_launch_decimate_stream = NULL;
  35. static gpud_launch_decimate_fn gpud_p_launch_decimate = NULL;
  36. static gpud_launch_am_envelope_fn gpud_p_launch_am_envelope = NULL;
  37. static gpud_launch_ssb_product_fn gpud_p_launch_ssb_product = NULL;
  38. static int gpud_cuda_malloc(void **ptr, size_t bytes) { return (int)cudaMalloc(ptr, bytes); }
  39. static int gpud_cuda_free(void *ptr) { return (int)cudaFree(ptr); }
  40. static int gpud_memcpy_h2d(void *dst, const void *src, size_t bytes) { return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice); }
  41. static int gpud_memcpy_d2h(void *dst, const void *src, size_t bytes) { return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost); }
  42. static int gpud_device_sync() { return (int)cudaDeviceSynchronize(); }
  43. static int gpud_load_library(const char* path) {
  44. if (gpud_mod != NULL) return 0;
  45. gpud_mod = LoadLibraryA(path);
  46. if (gpud_mod == NULL) return -1;
  47. gpud_p_stream_create = (gpud_stream_create_fn)GetProcAddress(gpud_mod, "gpud_stream_create");
  48. gpud_p_stream_destroy = (gpud_stream_destroy_fn)GetProcAddress(gpud_mod, "gpud_stream_destroy");
  49. gpud_p_stream_sync = (gpud_stream_sync_fn)GetProcAddress(gpud_mod, "gpud_stream_sync");
  50. gpud_p_upload_fir_taps = (gpud_upload_fir_taps_fn)GetProcAddress(gpud_mod, "gpud_upload_fir_taps_cuda");
  51. gpud_p_launch_freq_shift_stream = (gpud_launch_freq_shift_stream_fn)GetProcAddress(gpud_mod, "gpud_launch_freq_shift_stream_cuda");
  52. gpud_p_launch_freq_shift = (gpud_launch_freq_shift_fn)GetProcAddress(gpud_mod, "gpud_launch_freq_shift_cuda");
  53. gpud_p_launch_fm_discrim = (gpud_launch_fm_discrim_fn)GetProcAddress(gpud_mod, "gpud_launch_fm_discrim_cuda");
  54. gpud_p_launch_fir_stream = (gpud_launch_fir_stream_fn)GetProcAddress(gpud_mod, "gpud_launch_fir_stream_cuda");
  55. gpud_p_launch_fir = (gpud_launch_fir_fn)GetProcAddress(gpud_mod, "gpud_launch_fir_cuda");
  56. gpud_p_launch_decimate_stream = (gpud_launch_decimate_stream_fn)GetProcAddress(gpud_mod, "gpud_launch_decimate_stream_cuda");
  57. gpud_p_launch_decimate = (gpud_launch_decimate_fn)GetProcAddress(gpud_mod, "gpud_launch_decimate_cuda");
  58. gpud_p_launch_am_envelope = (gpud_launch_am_envelope_fn)GetProcAddress(gpud_mod, "gpud_launch_am_envelope_cuda");
  59. gpud_p_launch_ssb_product = (gpud_launch_ssb_product_fn)GetProcAddress(gpud_mod, "gpud_launch_ssb_product_cuda");
  60. if (!gpud_p_stream_create || !gpud_p_stream_destroy || !gpud_p_stream_sync || !gpud_p_upload_fir_taps || !gpud_p_launch_freq_shift_stream || !gpud_p_launch_freq_shift || !gpud_p_launch_fm_discrim || !gpud_p_launch_fir_stream || !gpud_p_launch_fir || !gpud_p_launch_decimate_stream || !gpud_p_launch_decimate || !gpud_p_launch_am_envelope || !gpud_p_launch_ssb_product) {
  61. FreeLibrary(gpud_mod);
  62. gpud_mod = NULL;
  63. return -2;
  64. }
  65. return 0;
  66. }
  67. static int gpud_stream_create(gpud_stream_handle* out) { if (!gpud_p_stream_create) return -1; return gpud_p_stream_create(out); }
  68. static int gpud_stream_destroy(gpud_stream_handle stream) { if (!gpud_p_stream_destroy) return -1; return gpud_p_stream_destroy(stream); }
  69. static int gpud_stream_sync(gpud_stream_handle stream) { if (!gpud_p_stream_sync) return -1; return gpud_p_stream_sync(stream); }
  70. static int gpud_upload_fir_taps(const float* taps, int n) { if (!gpud_p_upload_fir_taps) return -1; return gpud_p_upload_fir_taps(taps, n); }
  71. static int gpud_launch_freq_shift_stream(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start, gpud_stream_handle stream) { if (!gpud_p_launch_freq_shift_stream) return -1; return gpud_p_launch_freq_shift_stream(in, out, n, phase_inc, phase_start, stream); }
  72. static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) { if (!gpud_p_launch_freq_shift) return -1; return gpud_p_launch_freq_shift(in, out, n, phase_inc, phase_start); }
  73. static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) { if (!gpud_p_launch_fm_discrim) return -1; return gpud_p_launch_fm_discrim(in, out, n); }
  74. static int gpud_launch_fir_stream(gpud_float2 *in, gpud_float2 *out, int n, int num_taps, gpud_stream_handle stream) { if (!gpud_p_launch_fir_stream) return -1; return gpud_p_launch_fir_stream(in, out, n, num_taps, stream); }
  75. static int gpud_launch_fir(gpud_float2 *in, gpud_float2 *out, int n, int num_taps) { if (!gpud_p_launch_fir) return -1; return gpud_p_launch_fir(in, out, n, num_taps); }
  76. static int gpud_launch_decimate_stream(gpud_float2 *in, gpud_float2 *out, int n_out, int factor, gpud_stream_handle stream) { if (!gpud_p_launch_decimate_stream) return -1; return gpud_p_launch_decimate_stream(in, out, n_out, factor, stream); }
  77. static int gpud_launch_decimate(gpud_float2 *in, gpud_float2 *out, int n_out, int factor) { if (!gpud_p_launch_decimate) return -1; return gpud_p_launch_decimate(in, out, n_out, factor); }
  78. static int gpud_launch_am_envelope(gpud_float2 *in, float *out, int n) { if (!gpud_p_launch_am_envelope) return -1; return gpud_p_launch_am_envelope(in, out, n); }
  79. static int gpud_launch_ssb_product(gpud_float2 *in, float *out, int n, double phase_inc, double phase_start) { if (!gpud_p_launch_ssb_product) return -1; return gpud_p_launch_ssb_product(in, out, n, phase_inc, phase_start); }
  80. */
  81. import "C"
  82. import "unsafe"
  83. type streamHandle = C.gpud_stream_handle
  84. type gpuFloat2 = C.gpud_float2
  85. func bridgeLoadLibrary(path string) int {
  86. cp := C.CString(path)
  87. defer C.free(unsafe.Pointer(cp))
  88. return int(C.gpud_load_library(cp))
  89. }
  90. func bridgeCudaMalloc(ptr *unsafe.Pointer, bytes uintptr) int { return int(C.gpud_cuda_malloc(ptr, C.size_t(bytes))) }
  91. func bridgeCudaFree(ptr unsafe.Pointer) int { return int(C.gpud_cuda_free(ptr)) }
  92. func bridgeMemcpyH2D(dst unsafe.Pointer, src unsafe.Pointer, bytes uintptr) int { return int(C.gpud_memcpy_h2d(dst, src, C.size_t(bytes))) }
  93. func bridgeMemcpyD2H(dst unsafe.Pointer, src unsafe.Pointer, bytes uintptr) int { return int(C.gpud_memcpy_d2h(dst, src, C.size_t(bytes))) }
  94. func bridgeDeviceSync() int { return int(C.gpud_device_sync()) }
  95. func bridgeUploadFIRTaps(taps *C.float, n int) int { return int(C.gpud_upload_fir_taps(taps, C.int(n))) }
  96. func bridgeLaunchFreqShift(in *C.gpud_float2, out *C.gpud_float2, n int, phaseInc float64, phaseStart float64) int {
  97. return int(C.gpud_launch_freq_shift(in, out, C.int(n), C.double(phaseInc), C.double(phaseStart)))
  98. }
  99. func bridgeLaunchFreqShiftStream(in *C.gpud_float2, out *C.gpud_float2, n int, phaseInc float64, phaseStart float64, stream streamHandle) int {
  100. return int(C.gpud_launch_freq_shift_stream(in, out, C.int(n), C.double(phaseInc), C.double(phaseStart), C.gpud_stream_handle(stream)))
  101. }
  102. func bridgeLaunchFIR(in *C.gpud_float2, out *C.gpud_float2, n int, numTaps int) int { return int(C.gpud_launch_fir(in, out, C.int(n), C.int(numTaps))) }
  103. func bridgeLaunchFIRStream(in *C.gpud_float2, out *C.gpud_float2, n int, numTaps int, stream streamHandle) int {
  104. return int(C.gpud_launch_fir_stream(in, out, C.int(n), C.int(numTaps), C.gpud_stream_handle(stream)))
  105. }
  106. func bridgeLaunchDecimate(in *C.gpud_float2, out *C.gpud_float2, nOut int, factor int) int { return int(C.gpud_launch_decimate(in, out, C.int(nOut), C.int(factor))) }
  107. func bridgeLaunchDecimateStream(in *C.gpud_float2, out *C.gpud_float2, nOut int, factor int, stream streamHandle) int {
  108. return int(C.gpud_launch_decimate_stream(in, out, C.int(nOut), C.int(factor), C.gpud_stream_handle(stream)))
  109. }
  110. func bridgeLaunchFMDiscrim(in *C.gpud_float2, out *C.float, n int) int { return int(C.gpud_launch_fm_discrim(in, out, C.int(n))) }
  111. func bridgeLaunchAMEnvelope(in *C.gpud_float2, out *C.float, n int) int { return int(C.gpud_launch_am_envelope(in, out, C.int(n))) }
  112. func bridgeLaunchSSBProduct(in *C.gpud_float2, out *C.float, n int, phaseInc float64, phaseStart float64) int {
  113. return int(C.gpud_launch_ssb_product(in, out, C.int(n), C.double(phaseInc), C.double(phaseStart)))
  114. }
  115. func bridgeStreamCreate() (streamHandle, int) {
  116. var s C.gpud_stream_handle
  117. res := int(C.gpud_stream_create(&s))
  118. return streamHandle(s), res
  119. }
  120. func bridgeStreamDestroy(stream streamHandle) int { return int(C.gpud_stream_destroy(C.gpud_stream_handle(stream))) }
  121. func bridgeStreamSync(stream streamHandle) int { return int(C.gpud_stream_sync(C.gpud_stream_handle(stream))) }