From 7a37d56e8313f904f354e784e54734af8a2ee8d3 Mon Sep 17 00:00:00 2001 From: Jan Svabenik Date: Thu, 19 Mar 2026 11:12:04 +0100 Subject: [PATCH] Add Windows gpudemod DLL build path --- README.md | 13 +- build-cuda-windows.ps1 | 21 +- build-gpudemod-dll.ps1 | 27 + build-sdrplay.ps1 | 41 +- docs/build-cuda.md | 53 +- .../demod/gpudemod/build/gpudemod_kernels.exp | Bin 0 -> 4516 bytes .../demod/gpudemod/build/gpudemod_kernels.lib | Bin 86280 -> 8238 bytes internal/demod/gpudemod/gpudemod.go | 2 +- internal/demod/gpudemod/gpudemod_windows.go | 525 ++++++++++++++++++ internal/demod/gpudemod/native/exports.cu | 190 +++++++ 10 files changed, 787 insertions(+), 85 deletions(-) create mode 100644 build-gpudemod-dll.ps1 create mode 100644 internal/demod/gpudemod/build/gpudemod_kernels.exp create mode 100644 internal/demod/gpudemod/gpudemod_windows.go create mode 100644 internal/demod/gpudemod/native/exports.cu diff --git a/README.md b/README.md index 73bd73c..9466dc5 100644 --- a/README.md +++ b/README.md @@ -42,15 +42,14 @@ powershell -ExecutionPolicy Bypass -File .\build-sdrplay.ps1 ``` This path uses: -- MinGW GCC/G++ for the Go/CGO toolchain -- `nvcc` with MinGW `g++` as the host compiler for `gpudemod` kernels -- MinGW-compatible CUDA import libs from `cuda-mingw/` +- `nvcc` + MSVC to build `gpudemod_kernels.dll` +- MinGW GCC/G++ for the Go/CGO application build +- runtime DLL loading for the Windows `gpudemod` path Important: -- the kernel archive is generated as `internal/demod/gpudemod/build/libgpudemod_kernels.a` -- `-lstdc++` is linked explicitly for CUDA host-side C++ runtime references -- CUDA 13.x no longer supports older targets like `sm_50`/`sm_60`, so the script builds for `sm_75+` -- if `nvcc` is missing, CUDA kernel preparation will fail +- `gpudemod_kernels.dll` must be present next to `sdrd.exe` or in `internal/demod/gpudemod/build/` +- `build-sdrplay.ps1` copies the DLL to the repo root after a successful app build when available +- this avoids directly linking MSVC CUDA kernel objects into the MinGW-linked Go binary ### Linux ```bash diff --git a/build-cuda-windows.ps1 b/build-cuda-windows.ps1 index fb2f2e3..be408ef 100644 --- a/build-cuda-windows.ps1 +++ b/build-cuda-windows.ps1 @@ -1,19 +1,6 @@ $ErrorActionPreference = 'Stop' -$mingw = 'C:\msys64\mingw64\bin' -if (-not (Test-Path (Join-Path $mingw 'g++.exe'))) { - throw "MinGW g++ not found at $mingw" -} - -$cudaBin = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.2\bin' -if (-not (Test-Path (Join-Path $cudaBin 'nvcc.exe'))) { - throw "nvcc.exe not found at $cudaBin" -} - -$env:PATH = "$mingw;$cudaBin;" + $env:PATH - -Write-Host 'Preparing Windows CUDA environment for gpudemod (MinGW host compiler)...' -ForegroundColor Cyan -powershell -ExecutionPolicy Bypass -File tools\build-gpudemod-kernel.ps1 -if ($LASTEXITCODE -ne 0) { throw 'kernel build failed' } - -Write-Host 'Done. GNU-compatible gpudemod kernel library prepared.' -ForegroundColor Green +Write-Host 'Preparing Windows CUDA DLL for gpudemod (MSVC/nvcc path)...' -ForegroundColor Cyan +powershell -ExecutionPolicy Bypass -File .\build-gpudemod-dll.ps1 +if ($LASTEXITCODE -ne 0) { throw 'gpudemod DLL build failed' } +Write-Host 'Done. gpudemod_kernels.dll is ready.' -ForegroundColor Green diff --git a/build-gpudemod-dll.ps1 b/build-gpudemod-dll.ps1 new file mode 100644 index 0000000..47f021c --- /dev/null +++ b/build-gpudemod-dll.ps1 @@ -0,0 +1,27 @@ +$ErrorActionPreference = 'Stop' + +$vcvars = 'C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvars64.bat' +$cudaRoot = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.2' +$nvcc = Join-Path $cudaRoot 'bin\nvcc.exe' +$src = Join-Path $PSScriptRoot 'internal\demod\gpudemod\native\exports.cu' +$outDir = Join-Path $PSScriptRoot 'internal\demod\gpudemod\build' +$dll = Join-Path $outDir 'gpudemod_kernels.dll' +$lib = Join-Path $outDir 'gpudemod_kernels.lib' +$exp = Join-Path $outDir 'gpudemod_kernels.exp' + +if (!(Test-Path $vcvars)) { throw "vcvars64.bat not found at $vcvars" } +if (!(Test-Path $nvcc)) { throw "nvcc.exe not found at $nvcc" } +if (!(Test-Path $src)) { throw "CUDA source not found at $src" } +if (!(Test-Path $outDir)) { New-Item -ItemType Directory -Path $outDir | Out-Null } + +Remove-Item $dll,$lib,$exp -Force -ErrorAction SilentlyContinue + +$cmd = @" +call "$vcvars" && "$nvcc" -shared "$src" -o "$dll" -Xcompiler "/MD" -arch=sm_75 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_89,code=sm_89 -gencode arch=compute_90,code=sm_90 +"@ + +Write-Host 'Building gpudemod CUDA DLL...' -ForegroundColor Cyan +cmd.exe /c $cmd +if ($LASTEXITCODE -ne 0) { throw 'gpudemod DLL build failed' } + +Write-Host "Built: $dll" -ForegroundColor Green diff --git a/build-sdrplay.ps1 b/build-sdrplay.ps1 index 6f8e56b..a731d63 100644 --- a/build-sdrplay.ps1 +++ b/build-sdrplay.ps1 @@ -15,36 +15,27 @@ $env:CXX = 'g++' $env:CGO_CFLAGS = '-IC:\PROGRA~1\SDRplay\API\inc' $env:CGO_LDFLAGS = '-LC:\PROGRA~1\SDRplay\API\x64 -lsdrplay_api' -# CUDA (cuFFT) +# CUDA runtime / cuFFT $cudaInc = 'C:\CUDA\include' $cudaBin = 'C:\CUDA\bin' -if (-not (Test-Path $cudaInc)) { - $cudaInc = 'C:\PROGRA~1\NVIDIA GPU Computing Toolkit\CUDA\v13.2\include' - $cudaBin = 'C:\PROGRA~1\NVIDIA GPU Computing Toolkit\CUDA\v13.2\bin' -} -if (Test-Path $cudaInc) { - $env:CGO_CFLAGS = "$env:CGO_CFLAGS -I$cudaInc" -} -if (Test-Path $cudaBin) { - $env:PATH = "$cudaBin;" + $env:PATH -} - +if (-not (Test-Path $cudaInc)) { $cudaInc = 'C:\PROGRA~1\NVIDIA~2\CUDA\v13.2\include' } +if (-not (Test-Path $cudaBin)) { $cudaBin = 'C:\PROGRA~1\NVIDIA~2\CUDA\v13.2\bin' } $cudaMingw = Join-Path $PSScriptRoot 'cuda-mingw' -$gpuDemodBuild = Join-Path $PSScriptRoot 'internal\demod\gpudemod\build' -if (Test-Path $cudaMingw) { - $env:CGO_LDFLAGS = "$env:CGO_LDFLAGS -L$cudaMingw" -} -if (Test-Path $gpuDemodBuild) { - $env:CGO_LDFLAGS = "$env:CGO_LDFLAGS -L$gpuDemodBuild" -} -$env:CGO_LDFLAGS = "$env:CGO_LDFLAGS -lgpudemod_kernels -lcufft64_12 -lcudart64_13 -lstdc++" - -Write-Host 'Building with SDRplay + cuFFT support (MinGW-host CUDA path)...' -ForegroundColor Cyan -Write-Host 'Preparing GNU-compatible CUDA kernel artifacts...' -ForegroundColor Cyan -powershell -ExecutionPolicy Bypass -File tools\build-gpudemod-kernel.ps1 -if ($LASTEXITCODE -ne 0) { throw 'kernel build failed' } +if (Test-Path $cudaInc) { $env:CGO_CFLAGS = "$env:CGO_CFLAGS -I$cudaInc" } +if (Test-Path $cudaBin) { $env:PATH = "$cudaBin;" + $env:PATH } +if (Test-Path $cudaMingw) { $env:CGO_LDFLAGS = "$env:CGO_LDFLAGS -L$cudaMingw -lcudart64_13 -lcufft64_12 -lkernel32" } +Write-Host 'Building SDRplay + cuFFT app (Windows DLL path)...' -ForegroundColor Cyan go build -tags "sdrplay,cufft" ./cmd/sdrd if ($LASTEXITCODE -ne 0) { throw 'build failed' } +$dllSrc = Join-Path $PSScriptRoot 'internal\demod\gpudemod\build\gpudemod_kernels.dll' +$dllDst = Join-Path $PSScriptRoot 'gpudemod_kernels.dll' +if (Test-Path $dllSrc) { + Copy-Item $dllSrc $dllDst -Force + Write-Host "Copied DLL to $dllDst" -ForegroundColor Green +} else { + Write-Host 'WARNING: gpudemod_kernels.dll not found; build succeeded but runtime GPU demod will not load.' -ForegroundColor Yellow +} + Write-Host 'Done.' -ForegroundColor Green diff --git a/docs/build-cuda.md b/docs/build-cuda.md index 21c53d4..938bc73 100644 --- a/docs/build-cuda.md +++ b/docs/build-cuda.md @@ -1,55 +1,38 @@ # CUDA Build Strategy -## Windows: MinGW-host NVCC path +## Windows: gpudemod DLL split -The recommended Windows CUDA build path for this repository is: +The recommended Windows CUDA path is now a DLL split for `gpudemod`: -1. Compile `internal/demod/gpudemod/kernels.cu` with `nvcc` using MinGW `g++` as the host compiler -2. Archive the result as `internal/demod/gpudemod/build/libgpudemod_kernels.a` -3. Build the Go app with MinGW GCC/G++ via CGO +1. Build `internal/demod/gpudemod/native/exports.cu` into `gpudemod_kernels.dll` using `nvcc` + MSVC +2. Build the Go app with MinGW GCC/G++ via CGO +3. Load `gpudemod_kernels.dll` at runtime from Go on Windows -This keeps the CUDA demod kernel library in a GNU-compatible format so Go's MinGW CGO linker can consume it. +This avoids direct static linking of MSVC-built CUDA objects into the MinGW-linked Go binary. -### Why +## Why -The previous failing path mixed: -- `nvcc` + default MSVC host compiler (`cl.exe`) for CUDA kernels -- MinGW GCC/LD for the final Go/CGO link +The previous failing paths mixed incompatible toolchains at final link time: +- MSVC-host CUDA object/library generation +- MinGW GCC/LD for the Go executable -That produced unresolved MSVC runtime symbols such as: -- `__GSHandlerCheck` -- `__security_cookie` -- `_Init_thread_epoch` +The DLL split keeps that boundary at runtime instead of link time. -### Current Windows build flow +## Current Windows build flow ```powershell powershell -ExecutionPolicy Bypass -File .\build-cuda-windows.ps1 powershell -ExecutionPolicy Bypass -File .\build-sdrplay.ps1 ``` -### Critical details +## Runtime expectation -- CUDA kernel archive must be named `libgpudemod_kernels.a` -- `nvcc` must be invoked with `-ccbin C:\msys64\mingw64\bin\g++.exe` -- Windows CGO link uses: - - SDRplay API import lib - - MinGW CUDA import libs from `cuda-mingw/` - - `-lgpudemod_kernels` - - `-lcufft64_12` - - `-lcudart64_13` - - `-lstdc++` +`gpudemod_kernels.dll` must be available either: +- next to `sdrd.exe`, or +- in `internal/demod/gpudemod/build/` during local runs from the repo -### Caveat - -`nvcc` + MinGW on Windows is not officially supported by NVIDIA. For the kernel launcher style used here (`extern "C"` functions, limited host C++ surface), it is the most practical path. - -CUDA 13.x also drops older GPU targets such as `sm_50` and `sm_60`, so the kernel build script targets `sm_75+`. +The Windows `gpudemod` loader searches both locations. ## Linux -Linux remains the cleanest end-to-end CUDA path: - -1. Build CUDA kernels with `nvcc` + GCC -2. Link via standard CGO/GCC flow -3. Avoid Windows toolchain mismatch entirely +Linux remains the simpler direct-link path and still avoids the Windows mixed-toolchain problem entirely. diff --git a/internal/demod/gpudemod/build/gpudemod_kernels.exp b/internal/demod/gpudemod/build/gpudemod_kernels.exp new file mode 100644 index 0000000000000000000000000000000000000000..979be6468077c803a33e94efa23f7204af4bbfcd GIT binary patch literal 4516 zcmeHKJ8T?P6utI1iQ~kM9l(4-k|i;T4e_!c+p$q1Hgzj-lN!-HK^@X~VYR6-_hDO{-{Xo*%4f+O_N`&C^Pj8_rv}JXW$D-Onv7YL-?NsH>|HnE-LeE>%s;*-MMY+&Y z%R1+#xylwL@uPvyLBmZFV)Pt@=gG?+%E_xIWLpm^#SJZf5sHDKHw5cnD@EwLWicv1 zjzv{SvG{e1f^k*67NWMGijTZ_3w*umi8Ang0({2+-?V2D(M*t~5Xn*$7(m>|Y!LAX zGas>&*%?GyrNB1ger6fOqs-n$>|!>8c#PRQh-iAaVjS@}vv(1x3I)y~rkK5t*v+hf z_#Cs_h&{|MBA#IOA>tus(}*XT-9hYSHj79*P~aoPK4uGu&ojG=c$nD|;wfhL5y>SL zSVcU|>|?~onW>0$9TfNs@d;*cA`UV80z?XPcgF*&oKKM z@o8pN#9?L+5T9Yzf%q$a{y?Hb8*K!pURIYq)AiK1^on}X*{aaZ^;FMrhc_)R(CuL_ zuzXXsDn44HZmaB*D!naanCevm=6hg6};p)wx_O4se;TcBF&DZa}()&CY#TzYgR>_c07M|Y((A0k8M?M;GRr? zB^d9c9=N;B;VNcrZg4C&i=ThG{S4y&P}atO`Y^TuoaBT2UMQvQ?l^GIc+l$@JpQ5lZ}b zG`;T%#D6icVG7HOuNs3p3FR#OoKVK04P7F>Y6AKjm23u1C6p|jPbfK9 zN+@}_nNUXHgM=~)_Y=w(JV+>K;fI7W4i6K`1azY(iCy5uQ9Cd@CtGO3I&-LQg?$ zv_l5ZBei;1<#l Ti>i=f@$M1@<7&L{roHBG^W|%z literal 0 HcmV?d00001 diff --git a/internal/demod/gpudemod/build/gpudemod_kernels.lib b/internal/demod/gpudemod/build/gpudemod_kernels.lib index 1776fcda0d814c3ec6354831c22889df3ae12210..948c17bc0ab5d52e80c50302cba64ab80c6cd812 100644 GIT binary patch literal 8238 zcmcIoO>7fK6n=4_1OvtiN%#r*sYD7?;Mh)Vic}RR4Jd&mSox7s4qY6tO$>JI`Uj~T zC@0iIk3DkjC5H%ct6CMSRVBnBYAJ;l(WoeG?;|8hqvP@Olhe}h>8aNzN8{7yC#6e;ik!;jW$8+)xRA=0K$0eA z#>RL}c*m-$Do&5gOq`R32d4+IYbIAvEK2EO>Mf}>pGlWB!$5iq*i4^nqOy>jXLXVl zIiYs6kukl~IZ4ixlEqBU?2%(LeW+DKhxcfaF}>wfGLuV`Qzm~_CZnIu6iup3o6)Ki zviSr?3YE)=LP;_Z2_;NVIvVV0bh0veeH}5^$t+=dCUR10;c67%RB&V)sk}0WFuFOf&g>YD$zwvX?G|pE! z9G{XhvK))OF*qAPJvua_a$_-U49?N@Vq1vtq=!+Mpz1tp(JzVGe63qGo}$QC+vW2uoe8U8@ymIX#{R|>YS7HUx;)&uG=P5m+hU4qubSJXV6t{yQ>1# zFDnPkRmRxZ47<)7JHy&+Z!mk-uxhP*P2UK+Ep5#b!Q}>Qhq(@WrE1#dv%{VVET4E* zBrb*dlAiI!pD-AqU8%{d=%AFMke3r=b|NEx>@qZsUJr+WWv*c-7F}4 zeW;o-g~>!-Z5x9!sTm_-*>-VOOld;htuj7VqQ}<6Gyw1&3>2JHX3OqvkRbw>bI|jk3rWe)NSaYB+^hfmw zKL$Nj557CF_8?EIXsCx_kAE8qzfe@LoPkbQuC$D2lEr)}pDyU zu|*IO0<3PxKP<7Zx*`8W?hsWnpDQ5zMGk8I|0G#0UCuEmSlu8PdUnXRgap^ z0MS7eLnmTz2%pqu^dYtYhG-Mt9+2_Z-M4og`AL&wo!JzQoYXW)HhEyN|KZb56|^dJ zQ8tlrun&_>{gh9#39qM256LDHm(9zP^~^XH+&Bw1S6q%)V}EdfiC-f;J5~J~ui|jF z)khS+8mlp)byHUJBihj3F^JLCI6y^Y)qhVLjNazOc|9?H36a!4S3@BqR{_Y1mT!JC zJ@q`PjC)I+Pcgzh#c_Uw!bW~j%)kG3@~?Ts+<^NSS4X?Ad4<58eq};o`r#tC+iI+* zKl|}X3UPacxZRc1LeLwC_AB~oq^MUvYX2^as2gjcHp&XMMsQZL3&A^&xORciV?_KP z-{t#h#J)v{-Q|g(PN<_?#5=!z{LN)Vy|o5vzAQ6Bp0>S5)$V=jf;<}0fHU#_Gz!kH;txdPH@o<&aZ<6hZ!x7v%I zo2X~Yp@4Jhe7tlUWX5~+!}gV9=ykj9)u{aIIpV0m_uV6H{}JC_-M4iM5KpwzXrIvQ zF=JI*&c%9SrF{A&zi2^C;MzOMZmhQwe+25pCJ3=UuQoj+22CF3-1Di9~tle Dnc*{9 literal 86280 zcmeHw2Vhi1*Y@3HlWa&JA%FscEMbw*LK+=RA%Oq^LNy|q9tfndyMfR{6$BMf5qwb* zv4AKDq9}r)BgKMXFMucrh=`~l_~t+7%-k}2H=B#-`+e{KF-q>4GtYC*^qD(%?{3p> znfB~ach+cU6+>jlj?tYXI>f|8i~0y2jf#xw$i$2qk|Yh0q~Pyw{`>ptAT~B4C%-T{ zE^c5_e0*H*-o4|7OR?$cIl1HVvvboO&eE*(^x={1N0yZ4r03<^)5ql6i*gH6`*z4H zD9&_7r4CNd&yS0{J3cKnsdvIqri_b&Nc_;qxa4Ga)}kWFT25|ueqp9F_ir&7#WGl! zo|Es$w&xe>jOJC5%YN>&$nDv4$EG_*<>xtd_MqH3IaL?De}A_nkEU#97N+MGjms@4 zF3}hMI`hd4IviQ)CHCT+(rkT3^Qz2ezhsun%q#~Mq)#8rGLQ7@r-=x#^gihfm|aXX zj)d4Kw8Eo~{30=?O>|}!MX^-o1>zbo#5CaSEW1-p%+Ad((8P*MCOGm(ax&QxG9~6I z*Pf?RA(E|$m6h-eVX1S=N`$1sNHv-#)K4A^9hOGUq4yWtHQ2&Tp2$)QGt0E#Lb-}~ zYB6=EeyjrX>`E?Dlj5n5j+#(X%&USJ`31!zv(R8NMGJBi2EhwPW){jBz)}?y*z=2s zpWcTN3hc$DMdVc06pxIIQ1cJ=BQ-SvQB5U5Oh8gjSl$&!+>hdh#S4pbctb$RC?`d^ znf9y+Ja^1SQEr(tGtZgJtCL_Q#p5;gXU{Fn%rD9j5TF}dTFgOmy~)L9hLy~)s3SYG zAgh4)AnMoHp|bK>MfE4Yf`-Pmpc#O*EA)4WXnL??thNUWeO>J8PL~#Q+DnVnPKrG* zPojR2Csi?xFulvf%9QHd&Eu|9C0D!7cuZZ830WDHuAR8DOLH=XM8&Nx1%2z4Y6kRa zYA$qX+PO|m)N0uh#qw5K0?QOzxty6L4welXDtXd%??oazcTn~sCfJO8A50q($@E|H z-_5lihpOx5ChF!|kJZ8#wwr6cvWdC5*7M!m&9z?M58Pbq*+$^MaIJSac&P5&Atp*b zkI1M2J5#vp%`6y^lM9>I5xDsxA*(dMAZG;IZaBh=vqnqptGJ!-54a!(Nu8``{l2 z#hyJU)0xS1_%|F9MsbX+b?3JmBrbVHMzE_j82q~A?-ocp47k%Ai+Nfyv;j=dYH&Cur2=L~ zH8}LU05BU2;NAwz2?MxufC<^?xg0pc~RDHeF0!&ynIP5P`fEiW|4%;mQFcYi6VZWLMm^B7)uL0(B1Gtlbsqwn!@% zUBDz(gTr$42h1n~I6Gh#RD;8QzXULwtHHGg+z!C}ZUAR`1J*70hoP!|kM`mKQ(6rU z%R3D)%M9S21kQaCWYFGGz#Xr~UK9W*?3Eg{HwAF>4A^_YpuKf~+hV}pX9n#Z z2i%Va>;>!)nuE>bZu)Z_z%}7mPwmm&puHr(4K!e{*r2`ffSYN+-ZKX6tp?mC1NJ^L zXzvTaoibq0XQ#3Ds0p|R9P6n)IvTVW3%I@p?ByG@Hx_VH4A}dZL3_^vZk+*phYZ>~ z3b^A2>{S}H=eJ9e>cBq?-te!g{ZJ&w5KzXs8E>5dmt??Rwn2Mi05{%%y~hmNdlGP~ z4cOaj(BAui`@(>|D+cYEcEcVO{$cRc9<4csA*)``qX8Fdz}~$E?d1Y)tO0uu8?^U0 z;GQ*LZ@WQz?*i_q0eim~w09M7etTFgduos798+C;v;|ye1NQDRXfGXbV+`1vXVBgv zz&&Ze-WG%Qb^`8w1NMG2Xzy3Rnf6xS9!)r=y7p)RxM&0R1{$OE+L|ia~p` z0k_D2y>$ldy#ctL2J9U-XzvHW{c6Boop+73$8CUX!Lgp&Bgvq>et^5%fW7es?M(yR zd;|7Y8?^T-;IMwiL7)OT~Wt4UDgXc%C z^Ggj$f{zTf>lxoWrFYMMy$AP;AJQYaOK5t(!9CK4B@OBx**PL2)J=NtAxVSchYT2$ zo|u#{v{!mc(y*kI?(UF-N{gKNg}EvDS@ul(gzo)@qz`sxI`gv!}|*8J4!PP+B!<}ow*~7e;lie)UCEDOOig2>PU^H z_e$XagJ1jQXWNS%#d%I^*r1ly0VPo0{E7KRBdtA(3rq3~a_x|x)E|ewWs-uW%bA!v zN%E3RrZ~869*QwFSl}@sK9G<@yezm}rzNHI^fH;^<;l@Y+C~o;W!R@fm{dQ7UCn(Z z-#Dq0WR~KjW{3=TjB+^b&de;3IZ1pIhZIK52Z<^%L-0bw2ij!~KR+#HXP?BXH^yy@VKhK!60A6ZbGm019W^PLoz zQ)7f!lp%`|Rio^=1?Ww-?g6RRf%f$V{t|&$2UP&Hbx;MwB_Hlk?sM!8speSoR)?5V zmjrDNU6NdKrcHI9BP~MBiSmh1w-x6uRhuVw=`I=5LEPs~2UD|{r@nFGHk0ntwb^u! zaLK3}ko&B<0ctXGq=Zyy&QOM?vxUl1wM`5%(o@H+0=zH|^Zi*DvOA(QUYwb|4ow0$8`(-)%4 z;LY!G*cFOkQ#a92htOMhvtw`r(v(m?--#u3n=X{lEkf=ARZr6H+sn;Ft#2_mAlDgK zTMvKtBwf{Rlk{#zpE4!#Wsu^oy^eu&>pN5Hs#xA|S8;6#dT+@`m)e>1zB>IN(!-k% zE&)GF>Me!1XyJp1pfPFn{8Z%w!iVbc#|W?L>vII&KY7C)jcceReO*_zGyrZPyrp_N zeJSZ1>-3+1-V5%RK7cER_52#?g?4UXO3mN9NRJ=w6T?o@-zq90cAWH0boxv2LDUXoQRmrtkQrWr###J!~kETZ|FNBX834HI{h9zPN(hC0v{yrtVj zCB!YS9SigqQ~tr+}p@&fqx0pzLwu-=KitFEmx+v>s~WUT`icU8t?3o3J)C>0pxL-nUgs z-s#}Ckwr=c)iHmNZ^y~lbDppcGfZ_MPjwX^f7NFNbVwsUab?Uf)d!#T6`$S`tQnee z{?$CB!r(BH2Rw-Wr}9t%wmtO&Tpj<8I>$A!1e(}Ld2v_vpXz%TG<^@;(!7n=f5U=- z*FX978oz3upw@0K`QUU$0p~fK-<*8z#JCmy1#03r250dJ5ib}&?Gmr6iR1XisT~8V zacpzg}q$gq`1WcFb4^rIxbF&6+*1-SS8fefqamdxCSGxY6t$>>a zzgYt}Yv5)L+^m6{HSiDCz|D2`AFSNJ!L)6hsS*!6Dy4_~q?18Xg9+SdbGU}pf;hi{ zYYK5&Ey*^KrF(k;et#1w5aT1@-ZtxkxAbX{)DP|_St$3G&ICyz4t_(|$O_*$1N?F3 zO6flC??jL^)IqnIK!LLph}*{bR7&Y;dJ^QP1aB-QX0P=)FE1$r?xRr^a=R4l@MULg z!4Z(Z+a+t6*>~oZ&{#=|1A9%(0cDbJd9U|+NtnO=L6VQdk8vZd@C{M5BwvRQJ4UPv zao{&Av-o@-^zJ+KL}_0WijQi#awDE=N&A8*zB^{e3lxX^_L>J@@IqjgKlBHAohBV- z^?~W-n)C)`UX^%%?|Tc}sA74`ynXTTiRvHXGbWoQ$UFuMrP~AOMqEx0Df9Be8mp{; z`bWOq-zD7#8?3zYnw%aiqz9Loy~^=0_*fyBx z@Tw$4{_TO%%34xznZG}zNHTw6nXfOlMeMPkgD^jVb}1P1R}u>4KN(~WER%vUy(IJx zZ%8*UwEM+kbH<*WM&-kFtjm(n%L*P(+e$)zQ{vDwAb!bP+6DFjk7qI^p%*DVxGb3L z%lT<;_wvI0Q+@3pAO%AlAFWg#`_m8GFL-i*H|D4E_ttG}og48(lzh3#0v;q%TDreI82rl#T zLw}XWS$n{E!0Ls-#zWEslVqhK2>t7O>@Qwklz+@G*n|3}{y_D^>YLIv_2=bMzh10< zS^YtIy;%K1dqMr!LcJ-!7PUUSSbanNL;KmHeAIO4AGRolufg!xK3@3PLH6&J;6q(Y zlItsh;R7{zJh!3thyG^^VfHB> z%J7tq_IUY%5D)&mqI@W8O8$6x0x%uxBZ2WLj(k4eyqP~5|0UF7Z2Sk8`T0@%V0!`{ z+ShEw{9qi*C#NHywV$8Mcp<*rzo9<;So=Z!v-b5<>h~bn zr~F_iI}v1&>r>NSeysheKLFnBqrofnrD-oer9VLV8rXd*afbK7`C=E;rhbiW!uj24$8?>@QwuAN<33vQT>rV)a+s?&pv7Um5%` zn?LLpYAeXVntz}?;NMsA z4{=;SkuNohp!GLwBCLN>IcoU&=f&nDzW!P478ziM^-t;r>!+2#tbVL^3!BfSIGW#W zc9wsdFR4BW41XBj=Jc+l^7HY~d>puk=eD%|e*o))BM^%VZ?VE@?Mld^6V~^&2Rf?j zRSiFEA--Ojt#%&^+LO>f)>m1j9~z+c!1{3dRt}Eu*IDL6f3AW2l2m99NN?se`~2wl z`dd^dFdmvYeSJ>OwsiAFqmUk4=FR-k`UdMa;J2)4)t&IrKbzV8eOP%I9{hVV|Fft) z(B6w{T5ZElg5wSOFj;To{*X`W1C;^#Gp0jb#mebeUntA*EHjJuZbCg_L$?482$&g ze)N;~bG$vVKU~1&2iKnp7_SqdcJW~s&d-p)zkC1ZRpw341j#;(7k&T1;zOYRQ%koW z#Qonf0ziL+_J#iA&32@5FkS~h`F+R$uK74W1ef`u_=~(BusF=@Q$sju<-zj^*q>Ea zPP3Q9L4Sh%D(=rNKJDr*((3;h}U>oi*bpnP64 zyjc19`iteo{ia;r0J{VmZV1PIruy)j>xIvNrjdWBZ^VCq>AZeme+%wmf4hU>o0@UE zRSx9^qlkyjzQP^;FvB;qcq2ZYe8T!=fze0piR}aT7gjGbjYn30O(d(uf>p)p6YRT| zr>U1OOgJ!JOyD2u*UQh0W0!2QIIW*lrx3SVykPrT-j3`;fAO;Wu=$hse=ob21VvEB z1GWbROo#lYPLRAR@L^cxPTIe@j;H#{cmsSzkW^j#pT7Ts`?E^vUYI{aSbz7KOEVix zP`tgMeHUu_gD~GieY@VD;dsoD@z@?vf7F(|e+lCo(tXM_{-D3h`zOBN27V3JR-A8x z^$G3K{0Z|B?BA)w(RhaTQ}(<3eiQp=1>m7gsXc6VFXH?2_1??wt?oZ*y%Oh}8I#N~ zbAkn&KUc!8yUd67E94LT!~M=>Z0=*f(0&2x(}(I4{ZqQdf_;nOn*hEB+%P~m38jGY zH?#ZFew=Dy7tU9HbpJuOw9x-B|8R!}f!zPgywuxphtiMX!zd?I89|!ZLu%Dp$Q&|t7KY2+m<)yJq zvq1WHLLefb)?6cjqHE zr=QFI3R)BD!w>6+`YTOmtUg|>1^X>8U&@Pz^)gN~KCQ6dQP#_o191OE`(w7B2(mPT z{Ry@>&3U_My)q-7Hc8MwaeaXFI4J?{KcKx}|H9(%;vL)1_~QZ)2a~9Y(j9#Lg6@dt zD_#ys-tWLSC)a}g5)PO0SlYzn`HJ1*jYE&}2>qi$nK$jnrMgfEm@iCh+Xnwc* znsMyVAq3AKaD5g<9Jfn^LCO~8SI|0fH=Q^JzUSrF;43_&PT8vfEe9B!FsPB zf0oLCvC&=S{iy4FXtn#%h90W`$2-oCO8tKi`Ey$QlE;_7jb)_#Lwv*MZMtjY!EKjq z)y50Xd?uv7I=S{XZThjdQ|{Epr>t3{q(gpay`%iF`l0g$nj^4&TEaV&m=``@!hQ%Q z%*x7=1il}Fj!>dW$No9}yYsPBezaHS>vz`qNvyoz#`P~wgBRv!@W=Q?jQ=t4EgC-B zo6F;91KP*6{-OV|{)OjhF#aHp>pg5r;C_|tn=4NqxFt?*AJsqfR~SE*xxVHvf_ldS z!w2zA?X+RR{!0EVOMNRLJq}{jpJ9K4{d1aB!Tg1k`C!$R`)p?SbG0V|5$2=HV0Qm# zvHRc+01J@6+7qEC-vj-*?!(G{oYJeJP(Sa z#)S0&$`b`{S^vZFKT`2f?)YS4i= zJule>_F%rG>cDLkv=7dI7_V^|`WMDw{X=_Hf7HHybbdtpDTs%_`5pEZ`T2D2ns3^%?+4Dnc)a=i@#gbK7$0=Lg3rrtK7Ulte_;N!-F*JYo=@U_ z64palk8eJIgz<9o`6KiP*dIWDp!>xVeEtdX|M};S0WRlbu)l)yu>j?K?0?(yN3ZL7 z{%D5#NjgxitoR$AKf-v1=Z*4toANvnHtZO9oSsLOL!8b-Dl zhw~&K&GW}Vcphn1|ZqV%1W%T8wt3}rrT;4BXK<5R5>(a3VBy9!|^7;9Y2%v!l+VZ zH0|!y2uS$Oq)w_B8tAYKv1a68xLn|T*9Joy+He>mZu$HrNZFsvag872`~(@mub18M-SvMwR73x+ zeEH?>xtPd_;YcvK;YwxWOw6d5LBEy|JQJ^Kq%ArcQ<~{R>5~#i7xQnjh{!M zW4QBwt|{Yd^N%K7+Yi4d#T`GR8RKi)3r)K6@%vQV@kepKw*KAm2Z2rGyDT&C>qyK$ zyQyO154uF1a;}yy!+`9T-;N>5ao|9}3$%Y=IYu-AKlR~`UmnFk<&cg6`Oy9~%Q2z} zi>u&E?KudILqI;Xzoi@_n$R8+-1(<3Z~&bAeJ#j`_75r7a8W!_1;0F=0n71_6aw3n7KY-bZzw%Yf`)m0$nNq>{dcL1o4c9n`a=W92* z6x*vj)bFBnTKZTDejs>ua+}X*&c3)`N;1DE^?qP)aI?<>7ti>ik0qu-E1#Jw(^{rg z#BINvRKLT#&?CO%?_OiE+irOv-WFyJF$eqK*SmCM=e@_rXV(92-hkcK5bqaX3~A=~ zUh{r6+nkE8+kDqcA(3rXhCb51ZL8A<4&~JBGUmdoOV+OabHn!wH;vyv+_7=VnzcW) z81wZ0hLb;@+VyV7!|kW7jchUI&#?902d8g}w?DssLjwGU zJzjIzuOs>|xB0y_q>gX1!0chmQjY9ge_&0T?OIw)=7vTG9tbse4-EbAd}^C_AE_;+p9sq1Ge!{1pq@WVUpuQo~xu>JDw{z>O9 zyfW1CzzY8a?=j)4BTQlYLt5Y3x4ASa?2#7XB?*VNKGR`L;FY#Hr-pd1-8yPQ-6Js- z!yRwDKJ3MdqY8qXT|NKYq=2qN!w=SJcGkM<_xM${y2LhoZ}`38qhH>)@s%yxU#dBB z?T*LSEGUd>o)pz6`1hL0ztntd)!dJE-2Y<2F#Dm^0cXP-#@6*;l-9;JG}O^OIe4CP z#7CVT@-m&9S9{50p{CGRFSmLk;%L3Q=k7{q-(mi%sezZm(mp!1I5A_z4{O5G9G{2G zxc%dPhbG_e`>|Keogb#|-Ek^>${Q2TozHkSbL!F+&o4gl#M|~Rr!#hV9jeuC;}>Vo z#-CVqb@9}nI(==rc$@7VXukj*RSCb!wVR(hxNrC%2X6n$&z|%(<6ZU;6%yt@{`D+PM1j zm_XkijaNr(+CHk`wx7y2PpbLsq7m)7t!>rnx!-?C%{%YA_>YBoP4^Cu_3Qme-5;j) zKRt0_&P!vauB-cA?Q28p)w{dDmEV4M?;+{(*x0V; z&KHlj{MqF7oj;vFfBxFIt6xs)9lo&nu>QL?)W7q$->*FoG-%THgP*^BwZ5%)>wtRQ zhlJjL@!a=kW^QRz?_j~PU+;F@JLsFs-~9B-hN7LPK4>!L>ZBiD8~V+6-|l{9%eo@R zy3yO7f2HRqJ(k&Oo1fl!wZ)~UQkKlxADZ9kv-_qDbp&m=HmYH}d*7JT_>Hc!wp|$X z`>RiM9XQSZ>d$2hr+nG%)2~eVML%?Yck;m1{qq)F+W5}SCN(Gjc>cpt4X=H=_sM=c zqnp(1c>A|M*V?_wdJ7d^mCYoGzuWA1La+ahSPq;<}Xw*IeoJ>Xfzn zzdJo>@zx(2^(=k#y*|IaHM7&+IxpVRKBs;D?gfwS%};rCbipp`u*OFnZ3j=g*kp`t z+*fyAnc6w#j>+pow*ELL?VVN~e#%aI`=iG%4{7+-njdFB_ITI4p{w`i-<#cV(g11f zgru;q2DX@7d-fA?8(ui-`||c@9_TvEKfmjSm(Lv;C#of_#>de4B{*U+D=XFP`iqWCYk4i_!9jy85=LgD< z?@g)OF7HHH(no2Ff4|i5`*$2&&fdN6Ouds1pUA!b(>vVz?~b*9{o&W0x38O!wrzJ% z;-WiFG;H`qLASNhgWrB~($F@$cQ38m{EY?6Qd$f?dGudvYj$|4;mm>eefV^XgM){B z`oz)QHLRf-4d4H??~&ajDn^~z+ZKt{nDoEgW6~G+SerKsU~0j za3tuR;8`sr>Msk+ZdAGSqVvU$P5SQoV#Tme+SHT^&1Lr=o%&r_#)r1=Z|j-4cl2Xl ze1F+H=fcKketsvVvgh2*lRj_#RHLURezU_r?O^_zD~FSM*;;PNJQ_8*^Gg@+uD2-U z#3$>1PqsC$zv%b8+MPNc4$2!pU{dAE`=@=`yH!#DDJOSr`Dw{T^Va%jKfV8~wb|M6 z|E^<_UJ6_K-IL|Li^?w*E!;TEuQYh;drwJiQ-1uqou$Ux$FF{oy1#|x%!oxpPkeqy ztv^5icXO|PEm}x3#^z2*{q*?*m!xHLFPL`kEI-ya@sTNqSKWKM$@_l}|Mt6P2YNS& zThyr6(wDxo?`;xYD(Bt=DmL1pS=Q)yfUkK=S>~r`2b(r(KTvxxuu>u~D@i zd2-jqD@zLZjE()h!^zGq(@)l_|IDxfE7H$5S+Fzp^pdQrFMjt>{B!em)vIBy>~LX9 z%;Rk*=WT7?ZPwk7#m{_l<0JKQKc3z9tL-yZHXZonl}4*(K6oH|--&+poBExYn%?~1 zGj=apckVIs_Fc^z@9wa+=2r*uzf4V=X8Lp20}pQN`gF(56+7lmd4JdIo!%JRC+u{e zn${+J|JXI;k2a5<-{jbG&z{ZZD7+qOV+$uudFclnCa^y4-X%Z_4pH&&wSLQc;wFK?e895JE&ws!s^tF zhGQEf_||$Zum9TjuRLHCX=57n##XFC5$Ak&*Ft zc>VlU)Z+njO}!smeQSdb5lLVB9BTdJJxyCb{&CBSeKlf^_4e7jF*5e2LlujPI+bnv zvu^2W-)}BId;2HR10H&yv)8+~6|{IPAnK=E9)9rSl+bR+hMex&f9@lne`kKDO@ZIV zsG}vB8{bawd+2JL15M8~t~2k?L8w8 z8T00+KfU`^tF$!{v6rF_gny!*L+ZWK?*VJ;9)5Vuw{h>*34HDk`^5{T z-wr>Mv|!_)@`mlxEK5@6HGgPl-%VfmU5ow9+xl>7SIbkU&%D|Eli#L%6*2Gi=W>p2 ze&EpHODi^a-!=H?r(3>hb^HF9SKdEXduPjo>o&IAdhx=xvAaI%v?ICqZy!gdw0R-n z_^GEl_X-S7ymeSu16#WuQI|&SS^50xlzSr1-ksl~uWj_)sdbNxirnb^@i%8X{`pLF zdDi<=ew^6;(eACUwlS@1Hq9qy`|{asd^R*~aL@RFDYlb`FRX6;Nzd4Z7xq2=*rVSy z2&_G$m(Rg*?=E?)?z%_6w7+#=(Wb`po`1d7v@IoV7C0tHKjUlpxY_#quO51H<|kVk z_-C5(E}qHBD=aW&OHI1$9D5~p(ObWFK7UKgu@A%*nztQo()peTb2?nya(3U`Q*Y17 zczW9TKYr~pceU4mwRL{KYkAEro4)>a=xlq(rqh4A{}#u^x&5E)dFbVctN!g5Zrh)q z+O^A%xtYuB`JP|0dD6fm$J1WAJn-`gf$_6;N&Z{UCAWC6UQ$5#muqTH_f5~^w|&y9 zJ+gY@y^3qV`wBd_F-ghuQf!vw`HApn9^PR{I}Zxsm!R4ywqW>+h?(cYehCUrv2}vK ziMEK-h)$X5i`KnWJLQ}TneY~_%Ik+}Fb zvE}0Nh(~%0ZD6iCv0Ocvf;qjCJpTf(F)bKM0QIbt0I(SXHYxhur09xdsJzrgvJQi)SA<|MCF1}{V{0&i~3sgL>Q`Al)u>LjMY~SxxQ#P{$Hvh zw?pyoo8Q<<@_%T4bGyFYT(AFQ>veVeL0oIJQ+M)wbsBfs3etq@wmoUW#hE{OKJ7V^ zqjx3Gn`QfyO|21dk#OmNozVhX)05{d*oEsQt`U_LxLPR57MPbkBnNv=#_vJ(z{WMp zP4a^*(`>&?%f$Q&Rt>qy^XoQG!|cxw+&|xJ=}gF}1GF_Zvpbfrvb0O`gY=YHn>M?m z)Nc=L8BJ$>@FE4h4u9t|cbfH5*t0+$CEQ@7(NgL*DVdT|0MrRL&afZBO|HU$O^MGB zKY|uQ^+`*@A|~R6sFinQ31H#uswquo-u8yMm_}bj$A^z%vmwo|ES^Ha}vg()3X`^P~QLw2I~ZX8(k`#0r$$F48yGZ}-~<&+B*f{nPa*a{9TO>$iNY z`1kG|Z?50^_50@i4;-T1y#M*Hz5ik70r>a^Ph9ykBD}FM$lEM9|6xAXE7B(*F(2?nM^Yp)@Qr@v z;{}iy<0D1-10=?14sVQLqnV{vATdTZNcg1KEL{ePacve^>I)L%7J<}(a1M|d_W($} zNSEm^Gagi=*Fa+2pFrwFxJw{0Ze#d>Z#I>@BS<+Ubpxp@;d&^#WRUJ8-E@%F6Rrb% zn3c6mcaTPsE&-$wAO%Q$K-xgM43J7l%2%W!ke;X7b#k4LG!-OfZ#GDyiSYnPEy?e5 zAQh5sElAAXt028VRt|!+nxs>Tgx~Pdg}4(yV)mwj)S7g^D7wpv&H`Pw52bbmiIs8# zNURUN2@-s76tvMckXR}AfW%7qB}kJg^*cp60}?Cc1+Mdv{s4*DGu4u$am4TiiIuV` zNcdgmX8cw&Zm%s!tdz+hu~KF$(qxcWDSrit+4}<|R?6ZaS?2_a>1Kh%N(tZm$4faZ zSniYef;5eCkOdMeCQ;kVhkXR{~fyC@R1rjS!oe)`9A0(y=1BsO~10>d#zk$TY z>YpIZryO{}doiq(H9%sew1R}MMwq3RiWCkKD`gDV`ABzy#Ox)3gtLuV>H`uhWiCjI zNLK(7vo{tbR>}n+v9i6ONEINlQg*H@+q)AaR-%oHZnL7>2NEmgRghRI?QpEk=7j9FSNkmxHu|QlC?#H6XE4uID-*=`E0$z3m`9L5w{hu~Hrf=}FR^28r4G z5hPYh3%p3hO4&@2x`D(>xd|j@?=6s6DcixTLrfO~64NDu#7a3GBv#4}_2t>DJ4nw` z4iZ3OrR)O|D`f^qYbiBfk%~ZKrF3!~%swEo*>N_e65|1oSSgj zK}=<{k0Rl(2eVR60EyX~3KA>PFN*H6qO&xVOBo5$I`Y;Pq!-EZc#xo4p@&Tc391XE z*&wkzKMYbF&0GgSf=Yta!yrL+K>7?M)`H(ExF+yE47i5WJ3s<8NNqu4{kbDZ%$@@z z_#aXy!iBvAH60{2y61z$aC<>wuQ&?NKGj<3#3qz@SbgwJA?TWMq zr0$e@2&8zD4ukX@*@HhyfQeEvN%W)O4o6mciM=?dG~1c3{bBK{^i};Zc~$zne7j)R zHSYQY=8!YDKWr}R^*=+;@Kx&dKYCu(kD)U?=qv?Yr?h@f)rak8^?%vJ-KJq9p4 z31hTZd~3ksJ{e=M-ygY7{K@OYA0j+`2HbTB;|DdX7{M)o^`7j>L!aJl0JDQI#`^s` zqCGY?;%GgG&&AY`&$G^2Py#G{F2V{Qb?USem-@5t&&)n9jY~cISh;cf4>v|XLv(d^ zah+H&hhG@Ld~E>podL|31~A_kz#KM!c~^%SE^jX`1`V&E52IoE<&Bmw4gV5^V494n zsTBrJ#T8^(>^_Wulc$KssdEeAO&0hi`vedn;O&td`ogxJecLyaERe8*bS8h3TZKRt4FeEq6Z6Cdmx;8$@6}8dE;7 zP`L4I&o{WT_ksKZT{Tb(SRSK2dM+4&I0KXN}a1n3qa~bsS81Rlu{p6q@^Hrq15Fd zJw~Z36loPm-6-`%kd{#DT1CR~aVMp|2GZk{x>=Fl1}TnGD?q{n6Zkr1McM~a0;L`V zX(^?Cph!nR!iO~Q13n-vqtq`I=@>}8DfJ{sPf+S0gw3Ns%stG=NgC zfwY`bar`m~2EH_iQelCRo}|=3MS`hQ8cL}+&he-S`kx{-0x6ABtsvo{fVX5*q%e?% zQ>qEqg)B&h(4JlBZz>?z2w;*0w7W=G6=R^nU=lL+dt?k!7)(OO2Pz|hGC~J@Op>I* z3WG_=_}C+3h{9kJGQRZ47^*Osgp3m&8N(C?laO)RBjYZG!6amy_sB?77)(OOC6A1| z6$X=#@rOsoaD~AnWJoYWdP^qgY`9Ki(5@r*C=4bc!y!6anhH%x+$ zDj4@F3??BX#AB}RQy5G_MtzSy(iH}ikb&P2S9Pv36b6%!Ve`nyR2WP`Mk|kuEQP@& zWJGvmWGf6NA)|vwMvlT@5;D4ZWaKIgCLtriBO^~?FbNsSj8R3u8mTatgpB?keT-5V zOhU#G#<;~K-I4%N2vO`pfAbXvlaMjoW3EOk3??BX!y{vi!eA0I@D=N-M^u5rU=lJ4 z7(=tWwt-d-g$jd7$iV$h)jo<829uCc#u!zM!D5BMBxFowj9ThlLa3Z2xRxjkCLv>% zM;~Jq29uESfJcU1VK5093q3L%3WG_=z&&Ht5(x`VK509 zFM4E*R~Sq}#w#8fWeS5y$au{oV}in95;ETM$e5@wn1qZ9#;Bt2PEr_5LdG7CJ|-&+ zCL!Y>V^lGhOi>t2LdIc_jHwEPNyzxrBV(GvU=lLEW{i3!sa^s^Aw;kX{Vi7*OhU#< z#;9TpPFEOALdF@7j2Q}pNyxas7*&kgnF@nR$oS2pk68+XNyxb7kuh6gFbNr6>RvPf zq7bxuhB*p@NyxBxWZbVXn1qa49vO2L29uCc*CS(|!eA0I;26WhT=IazU=lK{#K6xs z_JY(f(DZ;S&M>I!334dt@LM0!xGo8Fc;wj(u9LYg(Txu6xN;Lfr}Z)Mc#pSlyR*FM zEAwk^t6SoG-CY=?9)JJAf939~)#BDmcciU+s#R>llyyT(etl+oz{E8>eYbB2 zdAxkz9Sx6v@yVhc@Bg?w=9^HT{KI`;I#4<dE4hM*#G4)@6TKJe`3^TSzp!l z)`U*sDX)5;sw!R98^y`^s;;hfnpsw#`C>)nJF?#O`pWU8P2W!H`JAkm?KZEqr*Gxz zIkn{Y-*uJ!$@XNwuGb!AM-!L4o4rWJ$yYgFcTfH3$$J{5{}^z4bMGIXymhI3m2ua5 zeL3k{X|bbl*8|QchBWY7HOKzKosFZG%z7dGtI*doXTARUtr7PQ-_v=u?X=~#T641M z6h1Tjv5)s%^ZBl@yqERP&s&B*TK|!aiDSasJUzZM z-G0^d{PtV3)2GDz@=)}M(SF}72wpV*Smxr>V>kQ%zW30U$^GqrZkRE4UHfs>#btkk zzd0B?spgv({?qX~EovM~&A)xUT)yi3UoXBodpDY1U49Pzxp?iM@zG18+f3UUGxqFf zuK4QWa(jO}KcGVui9VEX_VOBRb>>em53WyAxX`e7gpN%F1=+1{r5@E7xOu}h5R=&v;)@!2FY z!=1ThPFrCSDt%C6`l68}9Rn%s`+GXUg@>+iT?i>Yk`-A!2EohyJ%(VSFZ_cX!e8_y zh7mkU7%AMIlj+Q4DnGb*o5DHe)!BPyGretauc!`@G31#r64!Lb(r7j9BXGYhXd;tc zHBlL^n&=W&O-#9~ru|Y^O^5ZanvVNiHJy&RYC2zb(R7G_n;0R_9U`rYM%xxxpWdc! z@E60-{Dc{-zi2W3km7B^CyH2&c~|hmirA;Lm>Pt^eu0?e;vGP8#RLeL{%LQ-=`n$X z!8SlUPbCb1gLGFr_#rrNlV@M>`R{5mbedg_onQlY@ENbS3C}Yz&--V@_15PZK5boX ze(D&&)HQ&qX8?n(SzVs-L$uY^GqzN97<@uq9R^#oIt)G;uMPuUvsw&(6s$Ts_-wm6 zOcTO*Zg;BzJ533LXNuT=7F9T*kzMrQ{NY%Az8v~fO1~B0UFzpOrA`D<64PdZ$ zRaf?C!gy}?7z1{26Hr~>z$gCIVLBSXbRrDa684{CX`e!6xb~mUgz?-DyBNUW-o)GF zxv%0%>TSXr!2ImZiF#I_pY8%?O6Zj*^cei)gKEb;CYCUs%N1t;6K?>MU;xv@04C7@ zCdmM%Ct+}w#(M6uDE@AJ9rhv&&M=6{dd>^BNv?e(nK1Iax?+=83BzQESD0BQYI4g;qI82hKTp8|+Og*rjx8;gEukjYo+n2E#x<&Q(iDu$ zEG*2FVTf_+H0T4L%CmqN40(bE`j~74(h!4vLsgLm`i6@J`i5Skb~|JVX2`N&hBSg1 z(g+<>W~m)hX1Qq8UWg?`@6hSg49c3w2t|jO!r~lFMWd#qc)WsOb{u0}TQEB}zd!{c zE3crq*siEZgQhVn6i}0%p~jOChEie?7av{p3uA~A+`isl)v zX`^WH6G+jZH3K(_Qv>Y_YxQ2Doji8Az zF$~kZ51I@Rk;OFMgJy-G@$zLaOskk%L306GQPs2n4SaN3qlpL23a()d{uHtV%tp{y z;h$>f5X9o(s8H2>2byw0;{~yWaO|dH8iMASpy>!27UXgb0!=U+vLTD*r&Nb|P^WoO zr`e&?9MNgcf+j-nRSR;L!8NR&b3tQ;gE+Mmn?SPyj?`3*1Q4}>jb(&sKKm@8a&SwHF$a{YVfR5)CB7^P}MG&Iyw!WRf=|? zIb1aLbQ(M@6j^vSC~EK|P}JbLpQyomRZ)ZIb)p7O`9uw#*NGau?-Mn67AI<~I!#lZ zCRC?srqkGTn&vvq9Xd@5ohD4DX{pn+(rNI_N-P(i8i^V_vl2CULM3YOJWA9==rnka zBx3N?NYvm!6E%1uBx-Pgh#EZY5jA+WBWmy@N7UfCji~9O)8Ltmh{014Q4^=r#OpK( zI!zCqCQ+wJ5;Vsk2Fnlgjz#9Hl{P;av^HrmY_-y2aPOknp_z@XRvbg~4_mFc1}l@V zO&*ut|4D+ZPH@c+Qc>3D10r@YS>z!)v&cdt6^(_R>Rf;t%j`y zs0lAqpW^ifpX}KmdT)Gg;|zZ)8ODb2wSX{Ec!|pD1F7C7od0nhS=;HzTPiD`vj`*j z>PswG%qSczFIyg{_?p65gb{qD5KH#;RIB^esjNpii!g$(e#DY}z0Kx0iWX0ER&LWK93-@Gd9ey~bL9EI;!dZk7d<_(Q zwK}u&^InRt6P!gD!Pg+c*S8ZMX{EA$<}AVpz6KMkjS2e<+?(G2<~q3WP{~<@5qu2+ zgtrM<*p@p6?!m>JhgtzlAdKK^C?u%;Fx@fg{%%US8gUk31Yg65CHIX6_qIKxvhXX! zFc3!Ybr)zwU)>d712~H?g0D1UX?>|I{PHmjgb{qf;i-$SI~89uIEyfXui?bf`chfT zIg2oYuX_Yv*yjTjUz<3KFoLfUg0FAePWn-0?dL4Q2)^#s`KqD#`kJ!{Blx;c=SyY% z!dZk7e5LDr)l_`>VI@HzjNmH+wCec6a;dDwoJAPHS0=Hv^F}SjR}^OvM(~xT^QE$S za~5F)U)efeL5i<*&LWK9D@W%`WtDOkVFX{fI$yzxuLn4bFoLfZWyB@U_ zUqN`Q0f8`ruL97j{SfDQmDPf?2qX9^B$l=x)>VAn$ytOEd=&}4u+OWkA)G}R!B?@) zS3Sj7K4%d|@KvJorLv}R7GVTmV+CJWhqoxc9_1{;2)^usFRVkAwT`n0BlvO%zHmOg zRq^#MXAws5fBz&LWK9YmVRx`@G7^dwr>Wv= zJZBL`@HJQPB@H>fS7klKS%eXM%@cfK9fm5tR&f?#1YZvbzOW8e))vkpjNog&&Q~+V z*N2=%7{S*9oiCMjnzINa_@b!?+m&&Ra42cj3Blvn4v}!-Zd!puw zuhyJJ7{S*GWt>GA!Pg^# zFYNOz6kiW>7GVTmj|#r9&#SEGIg2oYug3&mcn=$<_K3m z5qv!^_`-6vRD7M~EW!xBmI}VGTq?`EHY7qIjNoe-Xw|-fzFH~18gdq41Yb`OOYR%! zOJzlK7GVTm|I+ztt@!H8S%eXME!X){S-G4=7{S+*I$!M+UsE}YFoLhAbiP#9Bb-GT z!PnC|Uu_g$FK`xN1Yaw3zEsvW&LWK9>lvM|wu-MKoJAPH*Gio)m35Z02qXAsjMQ-B8=c`wa!42aTZ|&UoQ*3aNdYfe0|SZgb{qbBKX3-p|aqMvM3Nn@U>3xg=FoLhw1z%GJADXSQKIAOI2)^DBd|}_{ ztoZtovj`*j+AR3OzM-p~&~!U(?J1g*G!by0lP=PbeqzTP61cKuRW?Kq1tg0C%t zFWdukReU9J7GVTmZwtO~51_KrIg2oYudRYFtixEvR~cs!M)0*w@P&1#vL56t!U(?J z(fNu~d_BWigb{pI=zOWHH#mzhg0Jm5U-62sgPcVe!PgF*FO_wIvj`*j+NtxEp!oWo zvj`*j+NJZQvI6TtA_T$+zIKCF?1w!RUslc{jNoezv9$eAWp&^z!U(?h>Us(OJyzMEW!W|KBos73tR`oKW|h>!MCt4 zmLH;M%0PonreYokO?}X)82m|tMgrzD(AWgbHPD0!nA>iJr=GuNoT?gdRZ0aMO3 zD&|ShBny}~L6a!>`VcgjUp3FCL6ah2YCxef1WaqtAVc-F5Htk><`QT|2$55-fLRBc$pYpGXqF2Y{Aq=`0;YaL_Hznq**k&;Evw}k z0h-qYJLRB3%PQt2&{PPRqo7$OU}A2A?^IDR;qV@-GYjjBe=nj5>jlG*q3E1(ICFM*W+n!2E2 zEJT-q#wK8%0gY9_ybBsRcuC<|4u>4W#_%=;%TeL_C1(+aQfgRLfN19sB&f47LwRv< zeJc|PLt-juEFdw<+Wfo++N$i>frWuELMh${ZB?w#tjLih!U(JnKp)GU#`)dMHqqgwTB$; z;Vi-ktZzK9TC`v}B#gj1MlAQ`TFP035m?8G<-S~(Ig2m?>jbgfkFUFM0*62tf%PqD zs~TT#a~5F))=6TyFIQws^u*R4_)Y`OO#2;ZtC}|+<1E4mtnZ2CzFe0%i!kU^>eh-b z^atJt$t!S{I&yPlofirH1x$GMs44lj^nIm&^((O*lPHIv-{GS#Fxr&W|H?N}O z;sz$g$H(>V-5YiAHXXwH^wC7p`=nzO3)d^xnUd*nCfV)9c2e}qEzB;NFr;|!gu<-i z0;woB)1EbfUIK&Hu`=_EAjf&|22;rdd}}8U(#xEgd64BiDHb(Z`IM{l{{7IOpy{vE z4AA7MKPB_r_9HgFZ-seRW1}=tcxOtCX%pe~DNQ22uO~*MSd`r0t7vK}zQm-7@q%jM z+Dzd~St=T8L5O5)Vr3;lgl7ON0AA55j8qfx{YOm4hpDQ#2qrr|?)&E2i*gHiJS7wOqqquYAH-;NFj*Cp zlnaSJq%QjEft}rl3er!~bdNeObE02~%DnA)sVc{8$;O z3n(Y)@S5vb`1oReo|E!Y)$gGRl*06!d`GrDzfePQ{V6u(_t3Pz3s+iFPz)0?{)`*` zOd6xd#@eu$;)EDyvnAXekpS~+KR0y|pA^p$ZMGf>{Zf+>p!`v+wwRf|FoDL!$u%Gs z72KjHRz|K=@~x&+^=ovTp-omvRsG~0Cux(3#Jf&$iN5DRx1eQ-Eqx`KcJOG=rID6~ zZ4Jd@Ytq8y$yw8+kv*{VpxGGVEtdQ!|om!&Sv`>SdL)^_kZ)XAoD zdBvs$9=c#@R(ksIs3`ZHFtu+7m}E1ZQK^H|^K)pzgu?Ys7|OKR-+RXojf_iX3l?ky z+?rLpBI{?W*}Q!0IC`7?XinAHi=b>Oy}{KQWU>l-m`ArJBip--0X1)b!CF)_S<`kt z*F0WliJ3ud$J86Fs$u#m%>vse?9WL>&V1(txyN9i&dSUl!&^Y!IHjle8k|fUEPIbp zx!Gf&pE+`~OYOi+&qhUhc5(5T{9MUaS~MO;uua%i+2{lPE;<(=-*LG>Wi1D@Z$97{ z9^px}^yW!CO^(StT*;M0{`~-QR{0yxoGHiT>J!{jaMR|NLJ4juxao6C zp#+)2H$gPWWVE1j0n%FQ0%SS`sNGE}NI|KEs*AD~tI;Z8?Q*VQ1*I0Mu^{&wVGAI) zTP)nEsC!t&uAo%x3VQ=BR^v?z)@*;ZXsud?3%g?(EGiYKcI&J_1*HPj?W+~MK1C&| z8)Fr&(P@C1g7az?w(3ePYq#vOO+hEy1f{ya(jYZT1uE~WH9cL~YoksBRJXW_P534g zoS($rE8Zk%u$o&3#jbD*A!9|QoCW-@4(3fe`dzdN7{AUz%^R;+@t#D@gm7=7rugiRsrkoi4=l9MJBo2HOUNDwG0<;s${f_RtCG= za>-~{wF0MaI?=U|%Z-{+k*+svig}kCIV}OdYX%*#TRaWJB|*uO=8jK6Yjql+=59~} z)oQh1^lVvxeQug5H9$F^2OVZGEx7iw=K20I= z69{2HAh(G)r7BG#V&xVQ!MeM6IXC(QIV-{zfcH8-tTUDx- zEFTZJoP_X#kcFxsS~L0|k=I&qld2`l4%GWuIwjCrs0yMr<8uEh)PgQeOT;4?cKauA zv}!Hf?Lm$N#0yHwTUE�w{c(WMeD0F9 zbe=setg1}tFn98DYK&WZk(<195J z?oYMUnaK4>#>in=wV78Z8CUZjkC3X%^m_nZ7aGYR0eUSX9l@b+J@uo*$yC<~`$tXXd#j z#o43y<8gRs4^Q^1crqT%A3!C=^G8Sg*$#5?p+#(DEJ)Qn&xng5D?QS{3ZQZBhsP5= zO2I8VFOG}T_yGjijY7LF6UGfq&Qa`H1LmbiVp0#x2rKhH>q~g5$I9<4EcqvWruT=! z*@g@r9Pv+jNv5Xs+)#gQ#kkHE`s+^qLeux# +#include +#include + +typedef struct { float x; float y; } gpud_float2; + +typedef int (__stdcall *gpud_upload_fir_taps_fn)(const float* taps, int n); +typedef int (__stdcall *gpud_launch_freq_shift_fn)(const gpud_float2* in, gpud_float2* out, int n, double phase_inc, double phase_start); +typedef int (__stdcall *gpud_launch_fm_discrim_fn)(const gpud_float2* in, float* out, int n); +typedef int (__stdcall *gpud_launch_fir_fn)(const gpud_float2* in, gpud_float2* out, int n, int num_taps); +typedef int (__stdcall *gpud_launch_decimate_fn)(const gpud_float2* in, gpud_float2* out, int n_out, int factor); +typedef int (__stdcall *gpud_launch_am_envelope_fn)(const gpud_float2* in, float* out, int n); +typedef int (__stdcall *gpud_launch_ssb_product_fn)(const gpud_float2* in, float* out, int n, double phase_inc, double phase_start); + +static HMODULE gpud_mod = NULL; +static gpud_upload_fir_taps_fn gpud_p_upload_fir_taps = NULL; +static gpud_launch_freq_shift_fn gpud_p_launch_freq_shift = NULL; +static gpud_launch_fm_discrim_fn gpud_p_launch_fm_discrim = NULL; +static gpud_launch_fir_fn gpud_p_launch_fir = NULL; +static gpud_launch_decimate_fn gpud_p_launch_decimate = NULL; +static gpud_launch_am_envelope_fn gpud_p_launch_am_envelope = NULL; +static gpud_launch_ssb_product_fn gpud_p_launch_ssb_product = NULL; + +static int gpud_cuda_malloc(void **ptr, size_t bytes) { return (int)cudaMalloc(ptr, bytes); } +static int gpud_cuda_free(void *ptr) { return (int)cudaFree(ptr); } +static int gpud_memcpy_h2d(void *dst, const void *src, size_t bytes) { return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice); } +static int gpud_memcpy_d2h(void *dst, const void *src, size_t bytes) { return (int)cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost); } +static int gpud_device_sync() { return (int)cudaDeviceSynchronize(); } + +static int gpud_load_library(const char* path) { + if (gpud_mod != NULL) return 0; + gpud_mod = LoadLibraryA(path); + if (gpud_mod == NULL) return -1; + gpud_p_upload_fir_taps = (gpud_upload_fir_taps_fn)GetProcAddress(gpud_mod, "gpud_upload_fir_taps_cuda"); + gpud_p_launch_freq_shift = (gpud_launch_freq_shift_fn)GetProcAddress(gpud_mod, "gpud_launch_freq_shift_cuda"); + gpud_p_launch_fm_discrim = (gpud_launch_fm_discrim_fn)GetProcAddress(gpud_mod, "gpud_launch_fm_discrim_cuda"); + gpud_p_launch_fir = (gpud_launch_fir_fn)GetProcAddress(gpud_mod, "gpud_launch_fir_cuda"); + gpud_p_launch_decimate = (gpud_launch_decimate_fn)GetProcAddress(gpud_mod, "gpud_launch_decimate_cuda"); + gpud_p_launch_am_envelope = (gpud_launch_am_envelope_fn)GetProcAddress(gpud_mod, "gpud_launch_am_envelope_cuda"); + gpud_p_launch_ssb_product = (gpud_launch_ssb_product_fn)GetProcAddress(gpud_mod, "gpud_launch_ssb_product_cuda"); + if (!gpud_p_upload_fir_taps || !gpud_p_launch_freq_shift || !gpud_p_launch_fm_discrim || !gpud_p_launch_fir || !gpud_p_launch_decimate || !gpud_p_launch_am_envelope || !gpud_p_launch_ssb_product) { + FreeLibrary(gpud_mod); + gpud_mod = NULL; + return -2; + } + return 0; +} + +static int gpud_upload_fir_taps(const float* taps, int n) { + if (!gpud_p_upload_fir_taps) return -1; + return gpud_p_upload_fir_taps(taps, n); +} +static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) { + if (!gpud_p_launch_freq_shift) return -1; + return gpud_p_launch_freq_shift(in, out, n, phase_inc, phase_start); +} +static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) { + if (!gpud_p_launch_fm_discrim) return -1; + return gpud_p_launch_fm_discrim(in, out, n); +} +static int gpud_launch_fir(gpud_float2 *in, gpud_float2 *out, int n, int num_taps) { + if (!gpud_p_launch_fir) return -1; + return gpud_p_launch_fir(in, out, n, num_taps); +} +static int gpud_launch_decimate(gpud_float2 *in, gpud_float2 *out, int n_out, int factor) { + if (!gpud_p_launch_decimate) return -1; + return gpud_p_launch_decimate(in, out, n_out, factor); +} +static int gpud_launch_am_envelope(gpud_float2 *in, float *out, int n) { + if (!gpud_p_launch_am_envelope) return -1; + return gpud_p_launch_am_envelope(in, out, n); +} +static int gpud_launch_ssb_product(gpud_float2 *in, float *out, int n, double phase_inc, double phase_start) { + if (!gpud_p_launch_ssb_product) return -1; + return gpud_p_launch_ssb_product(in, out, n, phase_inc, phase_start); +} +*/ +import "C" + +import ( + "errors" + "fmt" + "math" + "os" + "path/filepath" + "sync" + "unsafe" + + "sdr-visual-suite/internal/demod" + "sdr-visual-suite/internal/dsp" +) + +type DemodType int + +const ( + DemodNFM DemodType = iota + DemodWFM + DemodAM + DemodUSB + DemodLSB + DemodCW +) + +var loadOnce sync.Once +var loadErr error + +func ensureDLLLoaded() error { + loadOnce.Do(func() { + candidates := []string{} + if exe, err := os.Executable(); err == nil { + dir := filepath.Dir(exe) + candidates = append(candidates, filepath.Join(dir, "gpudemod_kernels.dll")) + } + if wd, err := os.Getwd(); err == nil { + candidates = append(candidates, + filepath.Join(wd, "gpudemod_kernels.dll"), + filepath.Join(wd, "internal", "demod", "gpudemod", "build", "gpudemod_kernels.dll"), + ) + } + seen := map[string]bool{} + for _, p := range candidates { + if p == "" || seen[p] { + continue + } + seen[p] = true + if _, err := os.Stat(p); err == nil { + cp := C.CString(p) + res := C.gpud_load_library(cp) + C.free(unsafe.Pointer(cp)) + if res == 0 { + loadErr = nil + return + } + loadErr = fmt.Errorf("failed to load gpudemod DLL: %s (code %d)", p, int(res)) + } + } + if loadErr == nil { + loadErr = errors.New("gpudemod_kernels.dll not found") + } + }) + return loadErr +} + +type Engine struct { + maxSamples int + sampleRate int + phase float64 + bfoPhase float64 + firTaps []float32 + cudaReady bool + lastShiftUsedGPU bool + lastFIRUsedGPU bool + lastDecimUsedGPU bool + lastDemodUsedGPU bool + dIQIn *C.gpud_float2 + dShifted *C.gpud_float2 + dFiltered *C.gpud_float2 + dDecimated *C.gpud_float2 + dAudio *C.float + iqBytes C.size_t + audioBytes C.size_t +} + +func Available() bool { + if ensureDLLLoaded() != nil { + return false + } + var count C.int + if C.cudaGetDeviceCount(&count) != C.cudaSuccess { + return false + } + return count > 0 +} + +func New(maxSamples int, sampleRate int) (*Engine, error) { + if maxSamples <= 0 { + return nil, errors.New("invalid maxSamples") + } + if sampleRate <= 0 { + return nil, errors.New("invalid sampleRate") + } + if err := ensureDLLLoaded(); err != nil { + return nil, err + } + if !Available() { + return nil, errors.New("cuda device not available") + } + e := &Engine{ + maxSamples: maxSamples, + sampleRate: sampleRate, + cudaReady: true, + iqBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.gpud_float2{})), + audioBytes: C.size_t(maxSamples) * C.size_t(unsafe.Sizeof(C.float(0))), + } + var ptr unsafe.Pointer + if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess { + e.Close() + return nil, errors.New("cudaMalloc dIQIn failed") + } + e.dIQIn = (*C.gpud_float2)(ptr) + ptr = nil + if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess { + e.Close() + return nil, errors.New("cudaMalloc dShifted failed") + } + e.dShifted = (*C.gpud_float2)(ptr) + ptr = nil + if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess { + e.Close() + return nil, errors.New("cudaMalloc dFiltered failed") + } + e.dFiltered = (*C.gpud_float2)(ptr) + ptr = nil + if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess { + e.Close() + return nil, errors.New("cudaMalloc dDecimated failed") + } + e.dDecimated = (*C.gpud_float2)(ptr) + ptr = nil + if C.gpud_cuda_malloc(&ptr, e.audioBytes) != C.cudaSuccess { + e.Close() + return nil, errors.New("cudaMalloc dAudio failed") + } + e.dAudio = (*C.float)(ptr) + return e, nil +} + +func (e *Engine) SetFIR(taps []float32) { + if len(taps) == 0 { + e.firTaps = nil + return + } + if len(taps) > 256 { + taps = taps[:256] + } + e.firTaps = append(e.firTaps[:0], taps...) + if e.cudaReady { + _ = C.gpud_upload_fir_taps((*C.float)(unsafe.Pointer(&e.firTaps[0])), C.int(len(e.firTaps))) + } +} + +func phaseStatus() string { return "phase1c-validated-shift" } +func (e *Engine) LastShiftUsedGPU() bool { return e != nil && e.lastShiftUsedGPU } +func (e *Engine) LastDemodUsedGPU() bool { return e != nil && e.lastDemodUsedGPU } + +func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) ([]complex64, bool) { + if e == nil || !e.cudaReady || len(iq) == 0 || e.dIQIn == nil || e.dShifted == nil { + return nil, false + } + bytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0))) + if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytes) != C.cudaSuccess { + return nil, false + } + phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate) + if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(len(iq)), C.double(phaseInc), C.double(e.phase)) != 0 { + return nil, false + } + if C.gpud_device_sync() != C.cudaSuccess { + return nil, false + } + out := make([]complex64, len(iq)) + if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dShifted), bytes) != C.cudaSuccess { + return nil, false + } + e.phase += phaseInc * float64(len(iq)) + return out, true +} + +func (e *Engine) tryCUDAFIR(iq []complex64, numTaps int) ([]complex64, bool) { + if e == nil || !e.cudaReady || len(iq) == 0 || numTaps <= 0 || e.dShifted == nil || e.dFiltered == nil { + return nil, false + } + iqBytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0))) + if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&iq[0]), iqBytes) != C.cudaSuccess { + return nil, false + } + if C.gpud_launch_fir(e.dShifted, e.dFiltered, C.int(len(iq)), C.int(numTaps)) != 0 { + return nil, false + } + if C.gpud_device_sync() != C.cudaSuccess { + return nil, false + } + out := make([]complex64, len(iq)) + if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dFiltered), iqBytes) != C.cudaSuccess { + return nil, false + } + return out, true +} + +func (e *Engine) tryCUDADecimate(filtered []complex64, factor int) ([]complex64, bool) { + if e == nil || !e.cudaReady || len(filtered) == 0 || factor <= 0 || e.dFiltered == nil || e.dDecimated == nil { + return nil, false + } + nOut := len(filtered) / factor + if nOut <= 0 { + return nil, false + } + iqBytes := C.size_t(len(filtered)) * C.size_t(unsafe.Sizeof(complex64(0))) + if C.gpud_memcpy_h2d(unsafe.Pointer(e.dFiltered), unsafe.Pointer(&filtered[0]), iqBytes) != C.cudaSuccess { + return nil, false + } + if C.gpud_launch_decimate(e.dFiltered, e.dDecimated, C.int(nOut), C.int(factor)) != 0 { + return nil, false + } + if C.gpud_device_sync() != C.cudaSuccess { + return nil, false + } + out := make([]complex64, nOut) + outBytes := C.size_t(nOut) * C.size_t(unsafe.Sizeof(complex64(0))) + if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dDecimated), outBytes) != C.cudaSuccess { + return nil, false + } + return out, true +} + +func (e *Engine) tryCUDAFMDiscrim(shifted []complex64) ([]float32, bool) { + if e == nil || !e.cudaReady || len(shifted) < 2 || e.dShifted == nil || e.dAudio == nil { + return nil, false + } + iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0))) + if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess { + return nil, false + } + if C.gpud_launch_fm_discrim(e.dShifted, e.dAudio, C.int(len(shifted))) != 0 { + return nil, false + } + if C.gpud_device_sync() != C.cudaSuccess { + return nil, false + } + out := make([]float32, len(shifted)-1) + outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0))) + if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess { + return nil, false + } + return out, true +} + +func (e *Engine) tryCUDAAMEnvelope(shifted []complex64) ([]float32, bool) { + if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil { + return nil, false + } + iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0))) + if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess { + return nil, false + } + if C.gpud_launch_am_envelope(e.dShifted, e.dAudio, C.int(len(shifted))) != 0 { + return nil, false + } + if C.gpud_device_sync() != C.cudaSuccess { + return nil, false + } + out := make([]float32, len(shifted)) + outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0))) + if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess { + return nil, false + } + return out, true +} + +func (e *Engine) tryCUDASSBProduct(shifted []complex64, bfoHz float64) ([]float32, bool) { + if e == nil || !e.cudaReady || len(shifted) == 0 || e.dShifted == nil || e.dAudio == nil { + return nil, false + } + iqBytes := C.size_t(len(shifted)) * C.size_t(unsafe.Sizeof(complex64(0))) + if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&shifted[0]), iqBytes) != C.cudaSuccess { + return nil, false + } + phaseInc := 2.0 * math.Pi * bfoHz / float64(e.sampleRate) + if C.gpud_launch_ssb_product(e.dShifted, e.dAudio, C.int(len(shifted)), C.double(phaseInc), C.double(e.bfoPhase)) != 0 { + return nil, false + } + if C.gpud_device_sync() != C.cudaSuccess { + return nil, false + } + out := make([]float32, len(shifted)) + outBytes := C.size_t(len(out)) * C.size_t(unsafe.Sizeof(float32(0))) + if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dAudio), outBytes) != C.cudaSuccess { + return nil, false + } + e.bfoPhase += phaseInc * float64(len(shifted)) + return out, true +} + +func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) { + if e == nil { + return nil, 0, errors.New("nil CUDA demod engine") + } + if !e.cudaReady { + return nil, 0, errors.New("cuda demod engine is not initialized") + } + if len(iq) == 0 { + return nil, 0, nil + } + if len(iq) > e.maxSamples { + return nil, 0, errors.New("sample count exceeds engine capacity") + } + + _ = fmt.Sprintf("%s:%0.3f", phaseStatus(), offsetHz) + shifted, ok := e.tryCUDAFreqShift(iq, offsetHz) + e.lastShiftUsedGPU = ok && ValidateFreqShift(iq, e.sampleRate, offsetHz, shifted, 1e-3) + if !e.lastShiftUsedGPU { + shifted = dsp.FreqShift(iq, e.sampleRate, offsetHz) + } + + var outRate int + switch mode { + case DemodNFM, DemodAM, DemodUSB, DemodLSB, DemodCW: + outRate = 48000 + case DemodWFM: + outRate = 192000 + default: + return nil, 0, errors.New("unsupported demod type") + } + + cutoff := bw / 2 + if cutoff < 200 { + cutoff = 200 + } + taps := e.firTaps + if len(taps) == 0 { + base64 := dsp.LowpassFIR(cutoff, e.sampleRate, 101) + taps = make([]float32, len(base64)) + for i, v := range base64 { + taps[i] = float32(v) + } + e.SetFIR(taps) + } + filtered, ok := e.tryCUDAFIR(shifted, len(taps)) + e.lastFIRUsedGPU = ok && ValidateFIR(shifted, taps, filtered, 1e-3) + if !e.lastFIRUsedGPU { + ftaps := make([]float64, len(taps)) + for i, v := range taps { + ftaps[i] = float64(v) + } + filtered = dsp.ApplyFIR(shifted, ftaps) + } + + decim := int(math.Round(float64(e.sampleRate) / float64(outRate))) + if decim < 1 { + decim = 1 + } + dec, ok := e.tryCUDADecimate(filtered, decim) + e.lastDecimUsedGPU = ok && ValidateDecimate(filtered, decim, dec, 1e-3) + if !e.lastDecimUsedGPU { + dec = dsp.Decimate(filtered, decim) + } + inputRate := e.sampleRate / decim + + e.lastDemodUsedGPU = false + switch mode { + case DemodNFM: + if gpuAudio, ok := e.tryCUDAFMDiscrim(dec); ok { + e.lastDemodUsedGPU = true + return gpuAudio, inputRate, nil + } + return demod.NFM{}.Demod(dec, inputRate), inputRate, nil + case DemodWFM: + if gpuAudio, ok := e.tryCUDAFMDiscrim(dec); ok { + e.lastDemodUsedGPU = true + return gpuAudio, inputRate, nil + } + return demod.WFM{}.Demod(dec, inputRate), inputRate, nil + case DemodAM: + if gpuAudio, ok := e.tryCUDAAMEnvelope(dec); ok { + e.lastDemodUsedGPU = true + return gpuAudio, inputRate, nil + } + return demod.AM{}.Demod(dec, inputRate), inputRate, nil + case DemodUSB: + if gpuAudio, ok := e.tryCUDASSBProduct(dec, 700.0); ok { + e.lastDemodUsedGPU = true + return gpuAudio, inputRate, nil + } + return demod.USB{}.Demod(dec, inputRate), inputRate, nil + case DemodLSB: + if gpuAudio, ok := e.tryCUDASSBProduct(dec, -700.0); ok { + e.lastDemodUsedGPU = true + return gpuAudio, inputRate, nil + } + return demod.LSB{}.Demod(dec, inputRate), inputRate, nil + case DemodCW: + if gpuAudio, ok := e.tryCUDASSBProduct(dec, 700.0); ok { + e.lastDemodUsedGPU = true + return gpuAudio, inputRate, nil + } + return demod.CW{}.Demod(dec, inputRate), inputRate, nil + default: + return nil, 0, errors.New("unsupported demod type") + } +} + +func (e *Engine) Close() { + if e == nil { + return + } + if e.dIQIn != nil { + _ = C.gpud_cuda_free(unsafe.Pointer(e.dIQIn)) + e.dIQIn = nil + } + if e.dShifted != nil { + _ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted)) + e.dShifted = nil + } + if e.dFiltered != nil { + _ = C.gpud_cuda_free(unsafe.Pointer(e.dFiltered)) + e.dFiltered = nil + } + if e.dDecimated != nil { + _ = C.gpud_cuda_free(unsafe.Pointer(e.dDecimated)) + e.dDecimated = nil + } + if e.dAudio != nil { + _ = C.gpud_cuda_free(unsafe.Pointer(e.dAudio)) + e.dAudio = nil + } + e.firTaps = nil + e.cudaReady = false +} diff --git a/internal/demod/gpudemod/native/exports.cu b/internal/demod/gpudemod/native/exports.cu new file mode 100644 index 0000000..1278e61 --- /dev/null +++ b/internal/demod/gpudemod/native/exports.cu @@ -0,0 +1,190 @@ +#include +#include + +#if defined(_WIN32) +#define GPUD_API extern "C" __declspec(dllexport) +#define GPUD_CALL __stdcall +#else +#define GPUD_API extern "C" +#define GPUD_CALL +#endif + +GPUD_API __global__ void gpud_freq_shift_kernel( + const float2* __restrict__ in, + float2* __restrict__ out, + int n, + double phase_inc, + double phase_start +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n) return; + + double phase = phase_start + phase_inc * (double)idx; + float si, co; + sincosf((float)phase, &si, &co); + + float2 v = in[idx]; + out[idx].x = v.x * co - v.y * si; + out[idx].y = v.x * si + v.y * co; +} + +GPUD_API int GPUD_CALL gpud_launch_freq_shift_cuda( + const float2* in, + float2* out, + int n, + double phase_inc, + double phase_start +) { + if (n <= 0) return 0; + const int block = 256; + const int grid = (n + block - 1) / block; + gpud_freq_shift_kernel<<>>(in, out, n, phase_inc, phase_start); + return (int)cudaGetLastError(); +} + +GPUD_API __global__ void gpud_fm_discrim_kernel( + const float2* __restrict__ in, + float* __restrict__ out, + int n +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n - 1) return; + + float2 prev = in[idx]; + float2 curr = in[idx + 1]; + float re = prev.x * curr.x + prev.y * curr.y; + float im = prev.x * curr.y - prev.y * curr.x; + out[idx] = atan2f(im, re); +} + +GPUD_API int GPUD_CALL gpud_launch_fm_discrim_cuda( + const float2* in, + float* out, + int n +) { + if (n <= 1) return 0; + const int block = 256; + const int grid = (n + block - 1) / block; + gpud_fm_discrim_kernel<<>>(in, out, n); + return (int)cudaGetLastError(); +} + +GPUD_API __global__ void gpud_decimate_kernel( + const float2* __restrict__ in, + float2* __restrict__ out, + int n_out, + int factor +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n_out) return; + out[idx] = in[idx * factor]; +} + +__device__ __constant__ float gpud_fir_taps[256]; + +GPUD_API __global__ void gpud_fir_kernel( + const float2* __restrict__ in, + float2* __restrict__ out, + int n, + int num_taps +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n) return; + + float acc_r = 0.0f; + float acc_i = 0.0f; + for (int k = 0; k < num_taps; ++k) { + int src = idx - k; + if (src < 0) break; + float2 v = in[src]; + float t = gpud_fir_taps[k]; + acc_r += v.x * t; + acc_i += v.y * t; + } + out[idx] = make_float2(acc_r, acc_i); +} + +GPUD_API int GPUD_CALL gpud_upload_fir_taps_cuda(const float* taps, int n) { + if (!taps || n <= 0 || n > 256) return -1; + cudaError_t err = cudaMemcpyToSymbol(gpud_fir_taps, taps, (size_t)n * sizeof(float)); + return (int)err; +} + +GPUD_API int GPUD_CALL gpud_launch_fir_cuda( + const float2* in, + float2* out, + int n, + int num_taps +) { + if (n <= 0 || num_taps <= 0 || num_taps > 256) return 0; + const int block = 256; + const int grid = (n + block - 1) / block; + gpud_fir_kernel<<>>(in, out, n, num_taps); + return (int)cudaGetLastError(); +} + +GPUD_API int GPUD_CALL gpud_launch_decimate_cuda( + const float2* in, + float2* out, + int n_out, + int factor +) { + if (n_out <= 0 || factor <= 0) return 0; + const int block = 256; + const int grid = (n_out + block - 1) / block; + gpud_decimate_kernel<<>>(in, out, n_out, factor); + return (int)cudaGetLastError(); +} + +GPUD_API __global__ void gpud_am_envelope_kernel( + const float2* __restrict__ in, + float* __restrict__ out, + int n +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n) return; + float2 v = in[idx]; + out[idx] = sqrtf(v.x * v.x + v.y * v.y); +} + +GPUD_API int GPUD_CALL gpud_launch_am_envelope_cuda( + const float2* in, + float* out, + int n +) { + if (n <= 0) return 0; + const int block = 256; + const int grid = (n + block - 1) / block; + gpud_am_envelope_kernel<<>>(in, out, n); + return (int)cudaGetLastError(); +} + +GPUD_API __global__ void gpud_ssb_product_kernel( + const float2* __restrict__ in, + float* __restrict__ out, + int n, + double phase_inc, + double phase_start +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n) return; + double phase = phase_start + phase_inc * (double)idx; + float si, co; + sincosf((float)phase, &si, &co); + float2 v = in[idx]; + out[idx] = v.x * co - v.y * si; +} + +GPUD_API int GPUD_CALL gpud_launch_ssb_product_cuda( + const float2* in, + float* out, + int n, + double phase_inc, + double phase_start +) { + if (n <= 0) return 0; + const int block = 256; + const int grid = (n + block - 1) / block; + gpud_ssb_product_kernel<<>>(in, out, n, phase_inc, phase_start); + return (int)cudaGetLastError(); +}