From 15220ab597b2ae2d5c4b8193dece06272ebe1fa9 Mon Sep 17 00:00:00 2001 From: Jan Svabenik Date: Thu, 19 Mar 2026 08:49:44 +0100 Subject: [PATCH] build: fix gpudemod kernel archive integration --- build-sdrplay.ps1 | 8 + internal/demod/gpudemod/README.md | 5 +- .../demod/gpudemod/build/gpudemod_kernels.lib | Bin 0 -> 86280 bytes internal/demod/gpudemod/build/kernels.obj | Bin 81894 -> 81894 bytes internal/demod/gpudemod/gpudemod.go | 195 ++++++++++++------ tools/build-gpudemod-kernel.ps1 | 36 +++- 6 files changed, 181 insertions(+), 63 deletions(-) create mode 100644 internal/demod/gpudemod/build/gpudemod_kernels.lib diff --git a/build-sdrplay.ps1 b/build-sdrplay.ps1 index 2f46e1f..d4f41e0 100644 --- a/build-sdrplay.ps1 +++ b/build-sdrplay.ps1 @@ -39,6 +39,14 @@ if (Test-Path $cudaMingw) { Write-Host "Building with SDRplay + cuFFT support..." -ForegroundColor Cyan +$gccHost = Join-Path $gcc 'g++.exe' +if (!(Test-Path $gccHost)) { + throw "g++.exe not found at $gccHost" +} + +powershell -ExecutionPolicy Bypass -File tools\build-gpudemod-kernel.ps1 -HostCompiler $gccHost +if ($LASTEXITCODE -ne 0) { throw "kernel build failed" } + go build -tags "sdrplay,cufft" ./cmd/sdrd if ($LASTEXITCODE -ne 0) { throw "build failed" } diff --git a/internal/demod/gpudemod/README.md b/internal/demod/gpudemod/README.md index e51c0ed..c69ee9c 100644 --- a/internal/demod/gpudemod/README.md +++ b/internal/demod/gpudemod/README.md @@ -20,9 +20,10 @@ This is **not compiled automatically yet** in the current environment because th On a CUDA-capable dev machine with toolchain installed: -1. Compile `kernels.cu` into an object file +1. Compile `kernels.cu` into an object file and archive it into a linkable library - helper script: `tools/build-gpudemod-kernel.ps1` -2. Link it into the `cufft` build +2. For MinGW/CGO builds, prefer building the archive with MinGW host compiler + `ar.exe` +3. Link `gpudemod_kernels.lib` into the `cufft` build 3. Replace `gpud_launch_freq_shift(...)` stub body with the real kernel launch 4. Validate copied-back shifted IQ against `dsp.FreqShift` 5. Only then move the next stage (FM discriminator) onto the GPU diff --git a/internal/demod/gpudemod/build/gpudemod_kernels.lib b/internal/demod/gpudemod/build/gpudemod_kernels.lib new file mode 100644 index 0000000000000000000000000000000000000000..1776fcda0d814c3ec6354831c22889df3ae12210 GIT binary patch literal 86280 zcmeHw2Vhi1*Y@3HlWa&JA%FscEMbw*LK+=RA%Oq^LNy|q9tfndyMfR{6$BMf5qwb* zv4AKDq9}r)BgKMXFMucrh=`~l_~t+7%-k}2H=B#-`+e{KF-q>4GtYC*^qD(%?{3p> znfB~ach+cU6+>jlj?tYXI>f|8i~0y2jf#xw$i$2qk|Yh0q~Pyw{`>ptAT~B4C%-T{ zE^c5_e0*H*-o4|7OR?$cIl1HVvvboO&eE*(^x={1N0yZ4r03<^)5ql6i*gH6`*z4H zD9&_7r4CNd&yS0{J3cKnsdvIqri_b&Nc_;qxa4Ga)}kWFT25|ueqp9F_ir&7#WGl! zo|Es$w&xe>jOJC5%YN>&$nDv4$EG_*<>xtd_MqH3IaL?De}A_nkEU#97N+MGjms@4 zF3}hMI`hd4IviQ)CHCT+(rkT3^Qz2ezhsun%q#~Mq)#8rGLQ7@r-=x#^gihfm|aXX zj)d4Kw8Eo~{30=?O>|}!MX^-o1>zbo#5CaSEW1-p%+Ad((8P*MCOGm(ax&QxG9~6I z*Pf?RA(E|$m6h-eVX1S=N`$1sNHv-#)K4A^9hOGUq4yWtHQ2&Tp2$)QGt0E#Lb-}~ zYB6=EeyjrX>`E?Dlj5n5j+#(X%&USJ`31!zv(R8NMGJBi2EhwPW){jBz)}?y*z=2s zpWcTN3hc$DMdVc06pxIIQ1cJ=BQ-SvQB5U5Oh8gjSl$&!+>hdh#S4pbctb$RC?`d^ znf9y+Ja^1SQEr(tGtZgJtCL_Q#p5;gXU{Fn%rD9j5TF}dTFgOmy~)L9hLy~)s3SYG zAgh4)AnMoHp|bK>MfE4Yf`-Pmpc#O*EA)4WXnL??thNUWeO>J8PL~#Q+DnVnPKrG* zPojR2Csi?xFulvf%9QHd&Eu|9C0D!7cuZZ830WDHuAR8DOLH=XM8&Nx1%2z4Y6kRa zYA$qX+PO|m)N0uh#qw5K0?QOzxty6L4welXDtXd%??oazcTn~sCfJO8A50q($@E|H z-_5lihpOx5ChF!|kJZ8#wwr6cvWdC5*7M!m&9z?M58Pbq*+$^MaIJSac&P5&Atp*b zkI1M2J5#vp%`6y^lM9>I5xDsxA*(dMAZG;IZaBh=vqnqptGJ!-54a!(Nu8``{l2 z#hyJU)0xS1_%|F9MsbX+b?3JmBrbVHMzE_j82q~A?-ocp47k%Ai+Nfyv;j=dYH&Cur2=L~ zH8}LU05BU2;NAwz2?MxufC<^?xg0pc~RDHeF0!&ynIP5P`fEiW|4%;mQFcYi6VZWLMm^B7)uL0(B1Gtlbsqwn!@% zUBDz(gTr$42h1n~I6Gh#RD;8QzXULwtHHGg+z!C}ZUAR`1J*70hoP!|kM`mKQ(6rU z%R3D)%M9S21kQaCWYFGGz#Xr~UK9W*?3Eg{HwAF>4A^_YpuKf~+hV}pX9n#Z z2i%Va>;>!)nuE>bZu)Z_z%}7mPwmm&puHr(4K!e{*r2`ffSYN+-ZKX6tp?mC1NJ^L zXzvTaoibq0XQ#3Ds0p|R9P6n)IvTVW3%I@p?ByG@Hx_VH4A}dZL3_^vZk+*phYZ>~ z3b^A2>{S}H=eJ9e>cBq?-te!g{ZJ&w5KzXs8E>5dmt??Rwn2Mi05{%%y~hmNdlGP~ z4cOaj(BAui`@(>|D+cYEcEcVO{$cRc9<4csA*)``qX8Fdz}~$E?d1Y)tO0uu8?^U0 z;GQ*LZ@WQz?*i_q0eim~w09M7etTFgduos798+C;v;|ye1NQDRXfGXbV+`1vXVBgv zz&&Ze-WG%Qb^`8w1NMG2Xzy3Rnf6xS9!)r=y7p)RxM&0R1{$OE+L|ia~p` z0k_D2y>$ldy#ctL2J9U-XzvHW{c6Boop+73$8CUX!Lgp&Bgvq>et^5%fW7es?M(yR zd;|7Y8?^T-;IMwiL7)OT~Wt4UDgXc%C z^Ggj$f{zTf>lxoWrFYMMy$AP;AJQYaOK5t(!9CK4B@OBx**PL2)J=NtAxVSchYT2$ zo|u#{v{!mc(y*kI?(UF-N{gKNg}EvDS@ul(gzo)@qz`sxI`gv!}|*8J4!PP+B!<}ow*~7e;lie)UCEDOOig2>PU^H z_e$XagJ1jQXWNS%#d%I^*r1ly0VPo0{E7KRBdtA(3rq3~a_x|x)E|ewWs-uW%bA!v zN%E3RrZ~869*QwFSl}@sK9G<@yezm}rzNHI^fH;^<;l@Y+C~o;W!R@fm{dQ7UCn(Z z-#Dq0WR~KjW{3=TjB+^b&de;3IZ1pIhZIK52Z<^%L-0bw2ij!~KR+#HXP?BXH^yy@VKhK!60A6ZbGm019W^PLoz zQ)7f!lp%`|Rio^=1?Ww-?g6RRf%f$V{t|&$2UP&Hbx;MwB_Hlk?sM!8speSoR)?5V zmjrDNU6NdKrcHI9BP~MBiSmh1w-x6uRhuVw=`I=5LEPs~2UD|{r@nFGHk0ntwb^u! zaLK3}ko&B<0ctXGq=Zyy&QOM?vxUl1wM`5%(o@H+0=zH|^Zi*DvOA(QUYwb|4ow0$8`(-)%4 z;LY!G*cFOkQ#a92htOMhvtw`r(v(m?--#u3n=X{lEkf=ARZr6H+sn;Ft#2_mAlDgK zTMvKtBwf{Rlk{#zpE4!#Wsu^oy^eu&>pN5Hs#xA|S8;6#dT+@`m)e>1zB>IN(!-k% zE&)GF>Me!1XyJp1pfPFn{8Z%w!iVbc#|W?L>vII&KY7C)jcceReO*_zGyrZPyrp_N zeJSZ1>-3+1-V5%RK7cER_52#?g?4UXO3mN9NRJ=w6T?o@-zq90cAWH0boxv2LDUXoQRmrtkQrWr###J!~kETZ|FNBX834HI{h9zPN(hC0v{yrtVj zCB!YS9SigqQ~tr+}p@&fqx0pzLwu-=KitFEmx+v>s~WUT`icU8t?3o3J)C>0pxL-nUgs z-s#}Ckwr=c)iHmNZ^y~lbDppcGfZ_MPjwX^f7NFNbVwsUab?Uf)d!#T6`$S`tQnee z{?$CB!r(BH2Rw-Wr}9t%wmtO&Tpj<8I>$A!1e(}Ld2v_vpXz%TG<^@;(!7n=f5U=- z*FX978oz3upw@0K`QUU$0p~fK-<*8z#JCmy1#03r250dJ5ib}&?Gmr6iR1XisT~8V zacpzg}q$gq`1WcFb4^rIxbF&6+*1-SS8fefqamdxCSGxY6t$>>a zzgYt}Yv5)L+^m6{HSiDCz|D2`AFSNJ!L)6hsS*!6Dy4_~q?18Xg9+SdbGU}pf;hi{ zYYK5&Ey*^KrF(k;et#1w5aT1@-ZtxkxAbX{)DP|_St$3G&ICyz4t_(|$O_*$1N?F3 zO6flC??jL^)IqnIK!LLph}*{bR7&Y;dJ^QP1aB-QX0P=)FE1$r?xRr^a=R4l@MULg z!4Z(Z+a+t6*>~oZ&{#=|1A9%(0cDbJd9U|+NtnO=L6VQdk8vZd@C{M5BwvRQJ4UPv zao{&Av-o@-^zJ+KL}_0WijQi#awDE=N&A8*zB^{e3lxX^_L>J@@IqjgKlBHAohBV- z^?~W-n)C)`UX^%%?|Tc}sA74`ynXTTiRvHXGbWoQ$UFuMrP~AOMqEx0Df9Be8mp{; z`bWOq-zD7#8?3zYnw%aiqz9Loy~^=0_*fyBx z@Tw$4{_TO%%34xznZG}zNHTw6nXfOlMeMPkgD^jVb}1P1R}u>4KN(~WER%vUy(IJx zZ%8*UwEM+kbH<*WM&-kFtjm(n%L*P(+e$)zQ{vDwAb!bP+6DFjk7qI^p%*DVxGb3L z%lT<;_wvI0Q+@3pAO%AlAFWg#`_m8GFL-i*H|D4E_ttG}og48(lzh3#0v;q%TDreI82rl#T zLw}XWS$n{E!0Ls-#zWEslVqhK2>t7O>@Qwklz+@G*n|3}{y_D^>YLIv_2=bMzh10< zS^YtIy;%K1dqMr!LcJ-!7PUUSSbanNL;KmHeAIO4AGRolufg!xK3@3PLH6&J;6q(Y zlItsh;R7{zJh!3thyG^^VfHB> z%J7tq_IUY%5D)&mqI@W8O8$6x0x%uxBZ2WLj(k4eyqP~5|0UF7Z2Sk8`T0@%V0!`{ z+ShEw{9qi*C#NHywV$8Mcp<*rzo9<;So=Z!v-b5<>h~bn zr~F_iI}v1&>r>NSeysheKLFnBqrofnrD-oer9VLV8rXd*afbK7`C=E;rhbiW!uj24$8?>@QwuAN<33vQT>rV)a+s?&pv7Um5%` zn?LLpYAeXVntz}?;NMsA z4{=;SkuNohp!GLwBCLN>IcoU&=f&nDzW!P478ziM^-t;r>!+2#tbVL^3!BfSIGW#W zc9wsdFR4BW41XBj=Jc+l^7HY~d>puk=eD%|e*o))BM^%VZ?VE@?Mld^6V~^&2Rf?j zRSiFEA--Ojt#%&^+LO>f)>m1j9~z+c!1{3dRt}Eu*IDL6f3AW2l2m99NN?se`~2wl z`dd^dFdmvYeSJ>OwsiAFqmUk4=FR-k`UdMa;J2)4)t&IrKbzV8eOP%I9{hVV|Fft) z(B6w{T5ZElg5wSOFj;To{*X`W1C;^#Gp0jb#mebeUntA*EHjJuZbCg_L$?482$&g ze)N;~bG$vVKU~1&2iKnp7_SqdcJW~s&d-p)zkC1ZRpw341j#;(7k&T1;zOYRQ%koW z#Qonf0ziL+_J#iA&32@5FkS~h`F+R$uK74W1ef`u_=~(BusF=@Q$sju<-zj^*q>Ea zPP3Q9L4Sh%D(=rNKJDr*((3;h}U>oi*bpnP64 zyjc19`iteo{ia;r0J{VmZV1PIruy)j>xIvNrjdWBZ^VCq>AZeme+%wmf4hU>o0@UE zRSx9^qlkyjzQP^;FvB;qcq2ZYe8T!=fze0piR}aT7gjGbjYn30O(d(uf>p)p6YRT| zr>U1OOgJ!JOyD2u*UQh0W0!2QIIW*lrx3SVykPrT-j3`;fAO;Wu=$hse=ob21VvEB z1GWbROo#lYPLRAR@L^cxPTIe@j;H#{cmsSzkW^j#pT7Ts`?E^vUYI{aSbz7KOEVix zP`tgMeHUu_gD~GieY@VD;dsoD@z@?vf7F(|e+lCo(tXM_{-D3h`zOBN27V3JR-A8x z^$G3K{0Z|B?BA)w(RhaTQ}(<3eiQp=1>m7gsXc6VFXH?2_1??wt?oZ*y%Oh}8I#N~ zbAkn&KUc!8yUd67E94LT!~M=>Z0=*f(0&2x(}(I4{ZqQdf_;nOn*hEB+%P~m38jGY zH?#ZFew=Dy7tU9HbpJuOw9x-B|8R!}f!zPgywuxphtiMX!zd?I89|!ZLu%Dp$Q&|t7KY2+m<)yJq zvq1WHLLefb)?6cjqHE zr=QFI3R)BD!w>6+`YTOmtUg|>1^X>8U&@Pz^)gN~KCQ6dQP#_o191OE`(w7B2(mPT z{Ry@>&3U_My)q-7Hc8MwaeaXFI4J?{KcKx}|H9(%;vL)1_~QZ)2a~9Y(j9#Lg6@dt zD_#ys-tWLSC)a}g5)PO0SlYzn`HJ1*jYE&}2>qi$nK$jnrMgfEm@iCh+Xnwc* znsMyVAq3AKaD5g<9Jfn^LCO~8SI|0fH=Q^JzUSrF;43_&PT8vfEe9B!FsPB zf0oLCvC&=S{iy4FXtn#%h90W`$2-oCO8tKi`Ey$QlE;_7jb)_#Lwv*MZMtjY!EKjq z)y50Xd?uv7I=S{XZThjdQ|{Epr>t3{q(gpay`%iF`l0g$nj^4&TEaV&m=``@!hQ%Q z%*x7=1il}Fj!>dW$No9}yYsPBezaHS>vz`qNvyoz#`P~wgBRv!@W=Q?jQ=t4EgC-B zo6F;91KP*6{-OV|{)OjhF#aHp>pg5r;C_|tn=4NqxFt?*AJsqfR~SE*xxVHvf_ldS z!w2zA?X+RR{!0EVOMNRLJq}{jpJ9K4{d1aB!Tg1k`C!$R`)p?SbG0V|5$2=HV0Qm# zvHRc+01J@6+7qEC-vj-*?!(G{oYJeJP(Sa z#)S0&$`b`{S^vZFKT`2f?)YS4i= zJule>_F%rG>cDLkv=7dI7_V^|`WMDw{X=_Hf7HHybbdtpDTs%_`5pEZ`T2D2ns3^%?+4Dnc)a=i@#gbK7$0=Lg3rrtK7Ulte_;N!-F*JYo=@U_ z64palk8eJIgz<9o`6KiP*dIWDp!>xVeEtdX|M};S0WRlbu)l)yu>j?K?0?(yN3ZL7 z{%D5#NjgxitoR$AKf-v1=Z*4toANvnHtZO9oSsLOL!8b-Dl zhw~&K&GW}Vcphn1|ZqV%1W%T8wt3}rrT;4BXK<5R5>(a3VBy9!|^7;9Y2%v!l+VZ zH0|!y2uS$Oq)w_B8tAYKv1a68xLn|T*9Joy+He>mZu$HrNZFsvag872`~(@mub18M-SvMwR73x+ zeEH?>xtPd_;YcvK;YwxWOw6d5LBEy|JQJ^Kq%ArcQ<~{R>5~#i7xQnjh{!M zW4QBwt|{Yd^N%K7+Yi4d#T`GR8RKi)3r)K6@%vQV@kepKw*KAm2Z2rGyDT&C>qyK$ zyQyO154uF1a;}yy!+`9T-;N>5ao|9}3$%Y=IYu-AKlR~`UmnFk<&cg6`Oy9~%Q2z} zi>u&E?KudILqI;Xzoi@_n$R8+-1(<3Z~&bAeJ#j`_75r7a8W!_1;0F=0n71_6aw3n7KY-bZzw%Yf`)m0$nNq>{dcL1o4c9n`a=W92* z6x*vj)bFBnTKZTDejs>ua+}X*&c3)`N;1DE^?qP)aI?<>7ti>ik0qu-E1#Jw(^{rg z#BINvRKLT#&?CO%?_OiE+irOv-WFyJF$eqK*SmCM=e@_rXV(92-hkcK5bqaX3~A=~ zUh{r6+nkE8+kDqcA(3rXhCb51ZL8A<4&~JBGUmdoOV+OabHn!wH;vyv+_7=VnzcW) z81wZ0hLb;@+VyV7!|kW7jchUI&#?902d8g}w?DssLjwGU zJzjIzuOs>|xB0y_q>gX1!0chmQjY9ge_&0T?OIw)=7vTG9tbse4-EbAd}^C_AE_;+p9sq1Ge!{1pq@WVUpuQo~xu>JDw{z>O9 zyfW1CzzY8a?=j)4BTQlYLt5Y3x4ASa?2#7XB?*VNKGR`L;FY#Hr-pd1-8yPQ-6Js- z!yRwDKJ3MdqY8qXT|NKYq=2qN!w=SJcGkM<_xM${y2LhoZ}`38qhH>)@s%yxU#dBB z?T*LSEGUd>o)pz6`1hL0ztntd)!dJE-2Y<2F#Dm^0cXP-#@6*;l-9;JG}O^OIe4CP z#7CVT@-m&9S9{50p{CGRFSmLk;%L3Q=k7{q-(mi%sezZm(mp!1I5A_z4{O5G9G{2G zxc%dPhbG_e`>|Keogb#|-Ek^>${Q2TozHkSbL!F+&o4gl#M|~Rr!#hV9jeuC;}>Vo z#-CVqb@9}nI(==rc$@7VXukj*RSCb!wVR(hxNrC%2X6n$&z|%(<6ZU;6%yt@{`D+PM1j zm_XkijaNr(+CHk`wx7y2PpbLsq7m)7t!>rnx!-?C%{%YA_>YBoP4^Cu_3Qme-5;j) zKRt0_&P!vauB-cA?Q28p)w{dDmEV4M?;+{(*x0V; z&KHlj{MqF7oj;vFfBxFIt6xs)9lo&nu>QL?)W7q$->*FoG-%THgP*^BwZ5%)>wtRQ zhlJjL@!a=kW^QRz?_j~PU+;F@JLsFs-~9B-hN7LPK4>!L>ZBiD8~V+6-|l{9%eo@R zy3yO7f2HRqJ(k&Oo1fl!wZ)~UQkKlxADZ9kv-_qDbp&m=HmYH}d*7JT_>Hc!wp|$X z`>RiM9XQSZ>d$2hr+nG%)2~eVML%?Yck;m1{qq)F+W5}SCN(Gjc>cpt4X=H=_sM=c zqnp(1c>A|M*V?_wdJ7d^mCYoGzuWA1La+ahSPq;<}Xw*IeoJ>Xfzn zzdJo>@zx(2^(=k#y*|IaHM7&+IxpVRKBs;D?gfwS%};rCbipp`u*OFnZ3j=g*kp`t z+*fyAnc6w#j>+pow*ELL?VVN~e#%aI`=iG%4{7+-njdFB_ITI4p{w`i-<#cV(g11f zgru;q2DX@7d-fA?8(ui-`||c@9_TvEKfmjSm(Lv;C#of_#>de4B{*U+D=XFP`iqWCYk4i_!9jy85=LgD< z?@g)OF7HHH(no2Ff4|i5`*$2&&fdN6Ouds1pUA!b(>vVz?~b*9{o&W0x38O!wrzJ% z;-WiFG;H`qLASNhgWrB~($F@$cQ38m{EY?6Qd$f?dGudvYj$|4;mm>eefV^XgM){B z`oz)QHLRf-4d4H??~&ajDn^~z+ZKt{nDoEgW6~G+SerKsU~0j za3tuR;8`sr>Msk+ZdAGSqVvU$P5SQoV#Tme+SHT^&1Lr=o%&r_#)r1=Z|j-4cl2Xl ze1F+H=fcKketsvVvgh2*lRj_#RHLURezU_r?O^_zD~FSM*;;PNJQ_8*^Gg@+uD2-U z#3$>1PqsC$zv%b8+MPNc4$2!pU{dAE`=@=`yH!#DDJOSr`Dw{T^Va%jKfV8~wb|M6 z|E^<_UJ6_K-IL|Li^?w*E!;TEuQYh;drwJiQ-1uqou$Ux$FF{oy1#|x%!oxpPkeqy ztv^5icXO|PEm}x3#^z2*{q*?*m!xHLFPL`kEI-ya@sTNqSKWKM$@_l}|Mt6P2YNS& zThyr6(wDxo?`;xYD(Bt=DmL1pS=Q)yfUkK=S>~r`2b(r(KTvxxuu>u~D@i zd2-jqD@zLZjE()h!^zGq(@)l_|IDxfE7H$5S+Fzp^pdQrFMjt>{B!em)vIBy>~LX9 z%;Rk*=WT7?ZPwk7#m{_l<0JKQKc3z9tL-yZHXZonl}4*(K6oH|--&+poBExYn%?~1 zGj=apckVIs_Fc^z@9wa+=2r*uzf4V=X8Lp20}pQN`gF(56+7lmd4JdIo!%JRC+u{e zn${+J|JXI;k2a5<-{jbG&z{ZZD7+qOV+$uudFclnCa^y4-X%Z_4pH&&wSLQc;wFK?e895JE&ws!s^tF zhGQEf_||$Zum9TjuRLHCX=57n##XFC5$Ak&*Ft zc>VlU)Z+njO}!smeQSdb5lLVB9BTdJJxyCb{&CBSeKlf^_4e7jF*5e2LlujPI+bnv zvu^2W-)}BId;2HR10H&yv)8+~6|{IPAnK=E9)9rSl+bR+hMex&f9@lne`kKDO@ZIV zsG}vB8{bawd+2JL15M8~t~2k?L8w8 z8T00+KfU`^tF$!{v6rF_gny!*L+ZWK?*VJ;9)5Vuw{h>*34HDk`^5{T z-wr>Mv|!_)@`mlxEK5@6HGgPl-%VfmU5ow9+xl>7SIbkU&%D|Eli#L%6*2Gi=W>p2 ze&EpHODi^a-!=H?r(3>hb^HF9SKdEXduPjo>o&IAdhx=xvAaI%v?ICqZy!gdw0R-n z_^GEl_X-S7ymeSu16#WuQI|&SS^50xlzSr1-ksl~uWj_)sdbNxirnb^@i%8X{`pLF zdDi<=ew^6;(eACUwlS@1Hq9qy`|{asd^R*~aL@RFDYlb`FRX6;Nzd4Z7xq2=*rVSy z2&_G$m(Rg*?=E?)?z%_6w7+#=(Wb`po`1d7v@IoV7C0tHKjUlpxY_#quO51H<|kVk z_-C5(E}qHBD=aW&OHI1$9D5~p(ObWFK7UKgu@A%*nztQo()peTb2?nya(3U`Q*Y17 zczW9TKYr~pceU4mwRL{KYkAEro4)>a=xlq(rqh4A{}#u^x&5E)dFbVctN!g5Zrh)q z+O^A%xtYuB`JP|0dD6fm$J1WAJn-`gf$_6;N&Z{UCAWC6UQ$5#muqTH_f5~^w|&y9 zJ+gY@y^3qV`wBd_F-ghuQf!vw`HApn9^PR{I}Zxsm!R4ywqW>+h?(cYehCUrv2}vK ziMEK-h)$X5i`KnWJLQ}TneY~_%Ik+}Fb zvE}0Nh(~%0ZD6iCv0Ocvf;qjCJpTf(F)bKM0QIbt0I(SXHYxhur09xdsJzrgvJQi)SA<|MCF1}{V{0&i~3sgL>Q`Al)u>LjMY~SxxQ#P{$Hvh zw?pyoo8Q<<@_%T4bGyFYT(AFQ>veVeL0oIJQ+M)wbsBfs3etq@wmoUW#hE{OKJ7V^ zqjx3Gn`QfyO|21dk#OmNozVhX)05{d*oEsQt`U_LxLPR57MPbkBnNv=#_vJ(z{WMp zP4a^*(`>&?%f$Q&Rt>qy^XoQG!|cxw+&|xJ=}gF}1GF_Zvpbfrvb0O`gY=YHn>M?m z)Nc=L8BJ$>@FE4h4u9t|cbfH5*t0+$CEQ@7(NgL*DVdT|0MrRL&afZBO|HU$O^MGB zKY|uQ^+`*@A|~R6sFinQ31H#uswquo-u8yMm_}bj$A^z%vmwo|ES^Ha}vg()3X`^P~QLw2I~ZX8(k`#0r$$F48yGZ}-~<&+B*f{nPa*a{9TO>$iNY z`1kG|Z?50^_50@i4;-T1y#M*Hz5ik70r>a^Ph9ykBD}FM$lEM9|6xAXE7B(*F(2?nM^Yp)@Qr@v z;{}iy<0D1-10=?14sVQLqnV{vATdTZNcg1KEL{ePacve^>I)L%7J<}(a1M|d_W($} zNSEm^Gagi=*Fa+2pFrwFxJw{0Ze#d>Z#I>@BS<+Ubpxp@;d&^#WRUJ8-E@%F6Rrb% zn3c6mcaTPsE&-$wAO%Q$K-xgM43J7l%2%W!ke;X7b#k4LG!-OfZ#GDyiSYnPEy?e5 zAQh5sElAAXt028VRt|!+nxs>Tgx~Pdg}4(yV)mwj)S7g^D7wpv&H`Pw52bbmiIs8# zNURUN2@-s76tvMckXR}AfW%7qB}kJg^*cp60}?Cc1+Mdv{s4*DGu4u$am4TiiIuV` zNcdgmX8cw&Zm%s!tdz+hu~KF$(qxcWDSrit+4}<|R?6ZaS?2_a>1Kh%N(tZm$4faZ zSniYef;5eCkOdMeCQ;kVhkXR{~fyC@R1rjS!oe)`9A0(y=1BsO~10>d#zk$TY z>YpIZryO{}doiq(H9%sew1R}MMwq3RiWCkKD`gDV`ABzy#Ox)3gtLuV>H`uhWiCjI zNLK(7vo{tbR>}n+v9i6ONEINlQg*H@+q)AaR-%oHZnL7>2NEmgRghRI?QpEk=7j9FSNkmxHu|QlC?#H6XE4uID-*=`E0$z3m`9L5w{hu~Hrf=}FR^28r4G z5hPYh3%p3hO4&@2x`D(>xd|j@?=6s6DcixTLrfO~64NDu#7a3GBv#4}_2t>DJ4nw` z4iZ3OrR)O|D`f^qYbiBfk%~ZKrF3!~%swEo*>N_e65|1oSSgj zK}=<{k0Rl(2eVR60EyX~3KA>PFN*H6qO&xVOBo5$I`Y;Pq!-EZc#xo4p@&Tc391XE z*&wkzKMYbF&0GgSf=Yta!yrL+K>7?M)`H(ExF+yE47i5WJ3s<8NNqu4{kbDZ%$@@z z_#aXy!iBvAH60{2y61z$aC<>wuQ&?NKGj<3#3qz@SbgwJA?TWMq zr0$e@2&8zD4ukX@*@HhyfQeEvN%W)O4o6mciM=?dG~1c3{bBK{^i};Zc~$zne7j)R zHSYQY=8!YDKWr}R^*=+;@Kx&dKYCu(kD)U?=qv?Yr?h@f)rak8^?%vJ-KJq9p4 z31hTZd~3ksJ{e=M-ygY7{K@OYA0j+`2HbTB;|DdX7{M)o^`7j>L!aJl0JDQI#`^s` zqCGY?;%GgG&&AY`&$G^2Py#G{F2V{Qb?USem-@5t&&)n9jY~cISh;cf4>v|XLv(d^ zah+H&hhG@Ld~E>podL|31~A_kz#KM!c~^%SE^jX`1`V&E52IoE<&Bmw4gV5^V494n zsTBrJ#T8^(>^_Wulc$KssdEeAO&0hi`vedn;O&td`ogxJecLyaERe8*bS8h3TZKRt4FeEq6Z6Cdmx;8$@6}8dE;7 zP`L4I&o{WT_ksKZT{Tb(SRSK2dM+4&I0KXN}a1n3qa~bsS81Rlu{p6q@^Hrq15Fd zJw~Z36loPm-6-`%kd{#DT1CR~aVMp|2GZk{x>=Fl1}TnGD?q{n6Zkr1McM~a0;L`V zX(^?Cph!nR!iO~Q13n-vqtq`I=@>}8DfJ{sPf+S0gw3Ns%stG=NgC zfwY`bar`m~2EH_iQelCRo}|=3MS`hQ8cL}+&he-S`kx{-0x6ABtsvo{fVX5*q%e?% zQ>qEqg)B&h(4JlBZz>?z2w;*0w7W=G6=R^nU=lL+dt?k!7)(OO2Pz|hGC~J@Op>I* z3WG_=_}C+3h{9kJGQRZ47^*Osgp3m&8N(C?laO)RBjYZG!6amy_sB?77)(OOC6A1| z6$X=#@rOsoaD~AnWJoYWdP^qgY`9Ki(5@r*C=4bc!y!6anhH%x+$ zDj4@F3??BX#AB}RQy5G_MtzSy(iH}ikb&P2S9Pv36b6%!Ve`nyR2WP`Mk|kuEQP@& zWJGvmWGf6NA)|vwMvlT@5;D4ZWaKIgCLtriBO^~?FbNsSj8R3u8mTatgpB?keT-5V zOhU#G#<;~K-I4%N2vO`pfAbXvlaMjoW3EOk3??BX!y{vi!eA0I@D=N-M^u5rU=lJ4 z7(=tWwt-d-g$jd7$iV$h)jo<829uCc#u!zM!D5BMBxFowj9ThlLa3Z2xRxjkCLv>% zM;~Jq29uESfJcU1VK5093q3L%3WG_=z&&Ht5(x`VK509 zFM4E*R~Sq}#w#8fWeS5y$au{oV}in95;ETM$e5@wn1qZ9#;Bt2PEr_5LdG7CJ|-&+ zCL!Y>V^lGhOi>t2LdIc_jHwEPNyzxrBV(GvU=lLEW{i3!sa^s^Aw;kX{Vi7*OhU#< z#;9TpPFEOALdF@7j2Q}pNyxas7*&kgnF@nR$oS2pk68+XNyxb7kuh6gFbNr6>RvPf zq7bxuhB*p@NyxBxWZbVXn1qa49vO2L29uCc*CS(|!eA0I;26WhT=IazU=lK{#K6xs z_JY(f(DZ;S&M>I!334dt@LM0!xGo8Fc;wj(u9LYg(Txu6xN;Lfr}Z)Mc#pSlyR*FM zEAwk^t6SoG-CY=?9)JJAf939~)#BDmcciU+s#R>llyyT(etl+oz{E8>eYbB2 zdAxkz9Sx6v@yVhc@Bg?w=9^HT{KI`;I#4<dE4hM*#G4)@6TKJe`3^TSzp!l z)`U*sDX)5;sw!R98^y`^s;;hfnpsw#`C>)nJF?#O`pWU8P2W!H`JAkm?KZEqr*Gxz zIkn{Y-*uJ!$@XNwuGb!AM-!L4o4rWJ$yYgFcTfH3$$J{5{}^z4bMGIXymhI3m2ua5 zeL3k{X|bbl*8|QchBWY7HOKzKosFZG%z7dGtI*doXTARUtr7PQ-_v=u?X=~#T641M z6h1Tjv5)s%^ZBl@yqERP&s&B*TK|!aiDSasJUzZM z-G0^d{PtV3)2GDz@=)}M(SF}72wpV*Smxr>V>kQ%zW30U$^GqrZkRE4UHfs>#btkk zzd0B?spgv({?qX~EovM~&A)xUT)yi3UoXBodpDY1U49Pzxp?iM@zG18+f3UUGxqFf zuK4QWa(jO}KcGVui9VEX_VOBRb>>em53WyAxX`e7gpN%F1=+1{r5@E7xOu}h5R=&v;)@!2FY z!=1ThPFrCSDt%C6`l68}9Rn%s`+GXUg@>+iT?i>Yk`-A!2EohyJ%(VSFZ_cX!e8_y zh7mkU7%AMIlj+Q4DnGb*o5DHe)!BPyGretauc!`@G31#r64!Lb(r7j9BXGYhXd;tc zHBlL^n&=W&O-#9~ru|Y^O^5ZanvVNiHJy&RYC2zb(R7G_n;0R_9U`rYM%xxxpWdc! z@E60-{Dc{-zi2W3km7B^CyH2&c~|hmirA;Lm>Pt^eu0?e;vGP8#RLeL{%LQ-=`n$X z!8SlUPbCb1gLGFr_#rrNlV@M>`R{5mbedg_onQlY@ENbS3C}Yz&--V@_15PZK5boX ze(D&&)HQ&qX8?n(SzVs-L$uY^GqzN97<@uq9R^#oIt)G;uMPuUvsw&(6s$Ts_-wm6 zOcTO*Zg;BzJ533LXNuT=7F9T*kzMrQ{NY%Az8v~fO1~B0UFzpOrA`D<64PdZ$ zRaf?C!gy}?7z1{26Hr~>z$gCIVLBSXbRrDa684{CX`e!6xb~mUgz?-DyBNUW-o)GF zxv%0%>TSXr!2ImZiF#I_pY8%?O6Zj*^cei)gKEb;CYCUs%N1t;6K?>MU;xv@04C7@ zCdmM%Ct+}w#(M6uDE@AJ9rhv&&M=6{dd>^BNv?e(nK1Iax?+=83BzQESD0BQYI4g;qI82hKTp8|+Og*rjx8;gEukjYo+n2E#x<&Q(iDu$ zEG*2FVTf_+H0T4L%CmqN40(bE`j~74(h!4vLsgLm`i6@J`i5Skb~|JVX2`N&hBSg1 z(g+<>W~m)hX1Qq8UWg?`@6hSg49c3w2t|jO!r~lFMWd#qc)WsOb{u0}TQEB}zd!{c zE3crq*siEZgQhVn6i}0%p~jOChEie?7av{p3uA~A+`isl)v zX`^WH6G+jZH3K(_Qv>Y_YxQ2Doji8Az zF$~kZ51I@Rk;OFMgJy-G@$zLaOskk%L306GQPs2n4SaN3qlpL23a()d{uHtV%tp{y z;h$>f5X9o(s8H2>2byw0;{~yWaO|dH8iMASpy>!27UXgb0!=U+vLTD*r&Nb|P^WoO zr`e&?9MNgcf+j-nRSR;L!8NR&b3tQ;gE+Mmn?SPyj?`3*1Q4}>jb(&sKKm@8a&SwHF$a{YVfR5)CB7^P}MG&Iyw!WRf=|? zIb1aLbQ(M@6j^vSC~EK|P}JbLpQyomRZ)ZIb)p7O`9uw#*NGau?-Mn67AI<~I!#lZ zCRC?srqkGTn&vvq9Xd@5ohD4DX{pn+(rNI_N-P(i8i^V_vl2CULM3YOJWA9==rnka zBx3N?NYvm!6E%1uBx-Pgh#EZY5jA+WBWmy@N7UfCji~9O)8Ltmh{014Q4^=r#OpK( zI!zCqCQ+wJ5;Vsk2Fnlgjz#9Hl{P;av^HrmY_-y2aPOknp_z@XRvbg~4_mFc1}l@V zO&*ut|4D+ZPH@c+Qc>3D10r@YS>z!)v&cdt6^(_R>Rf;t%j`y zs0lAqpW^ifpX}KmdT)Gg;|zZ)8ODb2wSX{Ec!|pD1F7C7od0nhS=;HzTPiD`vj`*j z>PswG%qSczFIyg{_?p65gb{qD5KH#;RIB^esjNpii!g$(e#DY}z0Kx0iWX0ER&LWK93-@Gd9ey~bL9EI;!dZk7d<_(Q zwK}u&^InRt6P!gD!Pg+c*S8ZMX{EA$<}AVpz6KMkjS2e<+?(G2<~q3WP{~<@5qu2+ zgtrM<*p@p6?!m>JhgtzlAdKK^C?u%;Fx@fg{%%US8gUk31Yg65CHIX6_qIKxvhXX! zFc3!Ybr)zwU)>d712~H?g0D1UX?>|I{PHmjgb{qf;i-$SI~89uIEyfXui?bf`chfT zIg2oYuX_Yv*yjTjUz<3KFoLfUg0FAePWn-0?dL4Q2)^#s`KqD#`kJ!{Blx;c=SyY% z!dZk7e5LDr)l_`>VI@HzjNmH+wCec6a;dDwoJAPHS0=Hv^F}SjR}^OvM(~xT^QE$S za~5F)U)efeL5i<*&LWK9D@W%`WtDOkVFX{fI$yzxuLn4bFoLfZWyB@U_ zUqN`Q0f8`ruL97j{SfDQmDPf?2qX9^B$l=x)>VAn$ytOEd=&}4u+OWkA)G}R!B?@) zS3Sj7K4%d|@KvJorLv}R7GVTmV+CJWhqoxc9_1{;2)^usFRVkAwT`n0BlvO%zHmOg zRq^#MXAws5fBz&LWK9YmVRx`@G7^dwr>Wv= zJZBL`@HJQPB@H>fS7klKS%eXM%@cfK9fm5tR&f?#1YZvbzOW8e))vkpjNog&&Q~+V z*N2=%7{S*9oiCMjnzINa_@b!?+m&&Ra42cj3Blvn4v}!-Zd!puw zuhyJJ7{S*GWt>GA!Pg^# zFYNOz6kiW>7GVTmj|#r9&#SEGIg2oYug3&mcn=$<_K3m z5qv!^_`-6vRD7M~EW!xBmI}VGTq?`EHY7qIjNoe-Xw|-fzFH~18gdq41Yb`OOYR%! zOJzlK7GVTm|I+ztt@!H8S%eXME!X){S-G4=7{S+*I$!M+UsE}YFoLhAbiP#9Bb-GT z!PnC|Uu_g$FK`xN1Yaw3zEsvW&LWK9>lvM|wu-MKoJAPH*Gio)m35Z02qXAsjMQ-B8=c`wa!42aTZ|&UoQ*3aNdYfe0|SZgb{qbBKX3-p|aqMvM3Nn@U>3xg=FoLhw1z%GJADXSQKIAOI2)^DBd|}_{ ztoZtovj`*j+AR3OzM-p~&~!U(?J1g*G!by0lP=PbeqzTP61cKuRW?Kq1tg0C%t zFWdukReU9J7GVTmZwtO~51_KrIg2oYudRYFtixEvR~cs!M)0*w@P&1#vL56t!U(?J z(fNu~d_BWigb{pI=zOWHH#mzhg0Jm5U-62sgPcVe!PgF*FO_wIvj`*j+NtxEp!oWo zvj`*j+NJZQvI6TtA_T$+zIKCF?1w!RUslc{jNoezv9$eAWp&^z!U(?h>Us(OJyzMEW!W|KBos73tR`oKW|h>!MCt4 zmLH;M%0PonreYokO?}X)82m|tMgrzD(AWgbHPD0!nA>iJr=GuNoT?gdRZ0aMO3 zD&|ShBny}~L6a!>`VcgjUp3FCL6ah2YCxef1WaqtAVc-F5Htk><`QT|2$55-fLRBc$pYpGXqF2Y{Aq=`0;YaL_Hznq**k&;Evw}k z0h-qYJLRB3%PQt2&{PPRqo7$OU}A2A?^IDR;qV@-GYjjBe=nj5>jlG*q3E1(ICFM*W+n!2E2 zEJT-q#wK8%0gY9_ybBsRcuC<|4u>4W#_%=;%TeL_C1(+aQfgRLfN19sB&f47LwRv< zeJc|PLt-juEFdw<+Wfo++N$i>frWuELMh${ZB?w#tjLih!U(JnKp)GU#`)dMHqqgwTB$; z;Vi-ktZzK9TC`v}B#gj1MlAQ`TFP035m?8G<-S~(Ig2m?>jbgfkFUFM0*62tf%PqD zs~TT#a~5F))=6TyFIQws^u*R4_)Y`OO#2;ZtC}|+<1E4mtnZ2CzFe0%i!kU^>eh-b z^atJt$t!S{I&yPlofirH1x$GMs44lj^nIm&^((O*lPHIv-{GS#Fxr&W|H?N}O z;sz$g$H(>V-5YiAHXXwH^wC7p`=nzO3)d^xnUd*nCfV)9c2e}qEzB;NFr;|!gu<-i z0;woB)1EbfUIK&Hu`=_EAjf&|22;rdd}}8U(#xEgd64BiDHb(Z`IM{l{{7IOpy{vE z4AA7MKPB_r_9HgFZ-seRW1}=tcxOtCX%pe~DNQ22uO~*MSd`r0t7vK}zQm-7@q%jM z+Dzd~St=T8L5O5)Vr3;lgl7ON0AA55j8qfx{YOm4hpDQ#2qrr|?)&E2i*gHiJS7wOqqquYAH-;NFj*Cp zlnaSJq%QjEft}rl3er!~bdNeObE02~%DnA)sVc{8$;O z3n(Y)@S5vb`1oReo|E!Y)$gGRl*06!d`GrDzfePQ{V6u(_t3Pz3s+iFPz)0?{)`*` zOd6xd#@eu$;)EDyvnAXekpS~+KR0y|pA^p$ZMGf>{Zf+>p!`v+wwRf|FoDL!$u%Gs z72KjHRz|K=@~x&+^=ovTp-omvRsG~0Cux(3#Jf&$iN5DRx1eQ-Eqx`KcJOG=rID6~ zZ4Jd@Ytq8y$yw8+kv*{VpxGGVEtdQ!|om!&Sv`>SdL)^_kZ)XAoD zdBvs$9=c#@R(ksIs3`ZHFtu+7m}E1ZQK^H|^K)pzgu?Ys7|OKR-+RXojf_iX3l?ky z+?rLpBI{?W*}Q!0IC`7?XinAHi=b>Oy}{KQWU>l-m`ArJBip--0X1)b!CF)_S<`kt z*F0WliJ3ud$J86Fs$u#m%>vse?9WL>&V1(txyN9i&dSUl!&^Y!IHjle8k|fUEPIbp zx!Gf&pE+`~OYOi+&qhUhc5(5T{9MUaS~MO;uua%i+2{lPE;<(=-*LG>Wi1D@Z$97{ z9^px}^yW!CO^(StT*;M0{`~-QR{0yxoGHiT>J!{jaMR|NLJ4juxao6C zp#+)2H$gPWWVE1j0n%FQ0%SS`sNGE}NI|KEs*AD~tI;Z8?Q*VQ1*I0Mu^{&wVGAI) zTP)nEsC!t&uAo%x3VQ=BR^v?z)@*;ZXsud?3%g?(EGiYKcI&J_1*HPj?W+~MK1C&| z8)Fr&(P@C1g7az?w(3ePYq#vOO+hEy1f{ya(jYZT1uE~WH9cL~YoksBRJXW_P534g zoS($rE8Zk%u$o&3#jbD*A!9|QoCW-@4(3fe`dzdN7{AUz%^R;+@t#D@gm7=7rugiRsrkoi4=l9MJBo2HOUNDwG0<;s${f_RtCG= za>-~{wF0MaI?=U|%Z-{+k*+svig}kCIV}OdYX%*#TRaWJB|*uO=8jK6Yjql+=59~} z)oQh1^lVvxeQug5H9$F^2OVZGEx7iw=K20I= z69{2HAh(G)r7BG#V&xVQ!MeM6IXC(QIV-{zfcH8-tTUDx- zEFTZJoP_X#kcFxsS~L0|k=I&qld2`l4%GWuIwjCrs0yMr<8uEh)PgQeOT;4?cKauA zv}!Hf?Lm$N#0yHwTUE�w{c(WMeD0F9 zbe=setg1}tFn98DYK&WZk(<195J z?oYMUnaK4>#>in=wV78Z8CUZjkC3X%^m_nZ7aGYR0eUSX9l@b+J@uo*$yC<~`$tXXd#j z#o43y<8gRs4^Q^1crqT%A3!C=^G8Sg*$#5?p+#(DEJ)Qn&xng5D?QS{3ZQZBhsP5= zO2I8VFOG}T_yGjijY7LF6UGfq&Qa`H1LmbiVp0#x2rKhH>q~g5$I9<4EcqvWruT=! z*@g@r9Pv+jNv5Xs+)#gQ#kkHE`s+^qLeux# #include @@ -31,16 +31,40 @@ static int gpud_device_sync() { } extern int gpud_launch_freq_shift_cuda(const gpud_float2* in, gpud_float2* out, int n, double phase_inc, double phase_start); +extern int gpud_launch_fm_discrim_cuda(const gpud_float2* in, float* out, int n); +extern int gpud_upload_fir_taps_cuda(const float* taps, int n); +extern int gpud_launch_fir_cuda(const gpud_float2* in, gpud_float2* out, int n, int num_taps); +extern int gpud_launch_decimate_cuda(const gpud_float2* in, gpud_float2* out, int n_out, int factor); +extern int gpud_launch_am_envelope_cuda(const gpud_float2* in, float* out, int n); +extern int gpud_launch_ssb_product_cuda(const gpud_float2* in, float* out, int n, double phase_inc, double phase_start); static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) { return gpud_launch_freq_shift_cuda(in, out, n, phase_inc, phase_start); } -extern int gpud_launch_fm_discrim_cuda(const gpud_float2* in, float* out, int n); - static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) { return gpud_launch_fm_discrim_cuda(in, out, n); } + +static int gpud_upload_fir_taps(const float* taps, int n) { + return gpud_upload_fir_taps_cuda(taps, n); +} + +static int gpud_launch_fir(gpud_float2 *in, gpud_float2 *out, int n, int num_taps) { + return gpud_launch_fir_cuda(in, out, n, num_taps); +} + +static int gpud_launch_decimate(gpud_float2 *in, gpud_float2 *out, int n_out, int factor) { + return gpud_launch_decimate_cuda(in, out, n_out, factor); +} + +static int gpud_launch_am_envelope(gpud_float2 *in, float *out, int n) { + return gpud_launch_am_envelope_cuda(in, out, n); +} + +static int gpud_launch_ssb_product(gpud_float2 *in, float *out, int n, double phase_inc, double phase_start) { + return gpud_launch_ssb_product_cuda(in, out, n, phase_inc, phase_start); +} */ import "C" @@ -66,18 +90,23 @@ const ( ) type Engine struct { - maxSamples int - sampleRate int - phase float64 - bfoPhase float64 - firTaps []float32 - cudaReady bool - lastShiftUsedGPU bool - dIQIn *C.gpud_float2 - dShifted *C.gpud_float2 - dAudio *C.float - iqBytes C.size_t - audioBytes C.size_t + maxSamples int + sampleRate int + phase float64 + bfoPhase float64 + firTaps []float32 + cudaReady bool + lastShiftUsedGPU bool + lastFIRUsedGPU bool + lastDecimUsedGPU bool + lastDemodUsedGPU bool + dIQIn *C.gpud_float2 + dShifted *C.gpud_float2 + dFiltered *C.gpud_float2 + dDecimated *C.gpud_float2 + dAudio *C.float + iqBytes C.size_t + audioBytes C.size_t } func Available() bool { @@ -118,6 +147,18 @@ func New(maxSamples int, sampleRate int) (*Engine, error) { } e.dShifted = (*C.gpud_float2)(ptr) ptr = nil + if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess { + e.Close() + return nil, errors.New("cudaMalloc dFiltered failed") + } + e.dFiltered = (*C.gpud_float2)(ptr) + ptr = nil + if C.gpud_cuda_malloc(&ptr, e.iqBytes) != C.cudaSuccess { + e.Close() + return nil, errors.New("cudaMalloc dDecimated failed") + } + e.dDecimated = (*C.gpud_float2)(ptr) + ptr = nil if C.gpud_cuda_malloc(&ptr, e.audioBytes) != C.cudaSuccess { e.Close() return nil, errors.New("cudaMalloc dAudio failed") @@ -131,18 +172,21 @@ func (e *Engine) SetFIR(taps []float32) { e.firTaps = nil return } + if len(taps) > 256 { + taps = taps[:256] + } e.firTaps = append(e.firTaps[:0], taps...) + if e.cudaReady { + _ = C.gpud_upload_fir_taps((*C.float)(unsafe.Pointer(&e.firTaps[0])), C.int(len(e.firTaps))) + } } -func phaseStatus() string { - return "phase1c-validated-shift" -} - +func phaseStatus() string { return "phase1c-validated-shift" } func (e *Engine) LastShiftUsedGPU() bool { - if e == nil { - return false - } - return e.lastShiftUsedGPU + return e != nil && e.lastShiftUsedGPU +} +func (e *Engine) LastDemodUsedGPU() bool { + return e != nil && e.lastDemodUsedGPU } func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) ([]complex64, bool) { @@ -168,6 +212,53 @@ func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) ([]complex64 return out, true } +func (e *Engine) tryCUDAFIR(iq []complex64, numTaps int) ([]complex64, bool) { + if e == nil || !e.cudaReady || len(iq) == 0 || numTaps <= 0 || e.dShifted == nil || e.dFiltered == nil { + return nil, false + } + iqBytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0))) + if C.gpud_memcpy_h2d(unsafe.Pointer(e.dShifted), unsafe.Pointer(&iq[0]), iqBytes) != C.cudaSuccess { + return nil, false + } + if C.gpud_launch_fir(e.dShifted, e.dFiltered, C.int(len(iq)), C.int(numTaps)) != 0 { + return nil, false + } + if C.gpud_device_sync() != C.cudaSuccess { + return nil, false + } + out := make([]complex64, len(iq)) + if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dFiltered), iqBytes) != C.cudaSuccess { + return nil, false + } + return out, true +} + +func (e *Engine) tryCUDADecimate(filtered []complex64, factor int) ([]complex64, bool) { + if e == nil || !e.cudaReady || len(filtered) == 0 || factor <= 0 || e.dFiltered == nil || e.dDecimated == nil { + return nil, false + } + nOut := len(filtered) / factor + if nOut <= 0 { + return nil, false + } + iqBytes := C.size_t(len(filtered)) * C.size_t(unsafe.Sizeof(complex64(0))) + if C.gpud_memcpy_h2d(unsafe.Pointer(e.dFiltered), unsafe.Pointer(&filtered[0]), iqBytes) != C.cudaSuccess { + return nil, false + } + if C.gpud_launch_decimate(e.dFiltered, e.dDecimated, C.int(nOut), C.int(factor)) != 0 { + return nil, false + } + if C.gpud_device_sync() != C.cudaSuccess { + return nil, false + } + out := make([]complex64, nOut) + outBytes := C.size_t(nOut) * C.size_t(unsafe.Sizeof(complex64(0))) + if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dDecimated), outBytes) != C.cudaSuccess { + return nil, false + } + return out, true +} + func (e *Engine) tryCUDAFMDiscrim(shifted []complex64) ([]float32, bool) { if e == nil || !e.cudaReady || len(shifted) < 2 || e.dShifted == nil || e.dAudio == nil { return nil, false @@ -273,20 +364,32 @@ func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodT } taps := e.firTaps if len(taps) == 0 { - base := dsp.LowpassFIR(cutoff, e.sampleRate, 101) - taps = append(make([]float32, 0, len(base)), base...) + base64 := dsp.LowpassFIR(cutoff, e.sampleRate, 101) + taps = make([]float32, len(base64)) + for i, v := range base64 { + taps[i] = float32(v) + } e.SetFIR(taps) } filtered, ok := e.tryCUDAFIR(shifted, len(taps)) - if !ok { - filtered = dsp.ApplyFIR(shifted, taps) + e.lastFIRUsedGPU = ok && ValidateFIR(shifted, taps, filtered, 1e-3) + if !e.lastFIRUsedGPU { + ftaps := make([]float64, len(taps)) + for i, v := range taps { + ftaps[i] = float64(v) + } + filtered = dsp.ApplyFIR(shifted, ftaps) } + decim := int(math.Round(float64(e.sampleRate) / float64(outRate))) if decim < 1 { decim = 1 } - dec := dsp.Decimate(filtered, decim) - e.lastDecimUsedGPU = false + dec, ok := e.tryCUDADecimate(filtered, decim) + e.lastDecimUsedGPU = ok && ValidateDecimate(filtered, decim, dec, 1e-3) + if !e.lastDecimUsedGPU { + dec = dsp.Decimate(filtered, decim) + } inputRate := e.sampleRate / decim e.lastDemodUsedGPU = false @@ -344,37 +447,9 @@ func (e *Engine) Close() { _ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted)) e.dShifted = nil } - if e.dDecimated != nil { - _ = C.gpud_cuda_free(unsafe.Pointer(e.dDecimated)) - e.dDecimated = nil - } - if e.dAudio != nil { - _ = C.gpud_cuda_free(unsafe.Pointer(e.dAudio)) - e.dAudio = nil - } - e.firTaps = nil - e.cudaReady = false -} -odLSB: - return demod.LSB{}.Demod(dec, inputRate), inputRate, nil - case DemodCW: - return demod.CW{}.Demod(dec, inputRate), inputRate, nil - default: - return nil, 0, errors.New("unsupported demod type") - } -} - -func (e *Engine) Close() { - if e == nil { - return - } - if e.dIQIn != nil { - _ = C.gpud_cuda_free(unsafe.Pointer(e.dIQIn)) - e.dIQIn = nil - } - if e.dShifted != nil { - _ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted)) - e.dShifted = nil + if e.dFiltered != nil { + _ = C.gpud_cuda_free(unsafe.Pointer(e.dFiltered)) + e.dFiltered = nil } if e.dDecimated != nil { _ = C.gpud_cuda_free(unsafe.Pointer(e.dDecimated)) diff --git a/tools/build-gpudemod-kernel.ps1 b/tools/build-gpudemod-kernel.ps1 index 5c8a96b..d021574 100644 --- a/tools/build-gpudemod-kernel.ps1 +++ b/tools/build-gpudemod-kernel.ps1 @@ -15,13 +15,47 @@ if (!(Test-Path $nvcc)) { New-Item -ItemType Directory -Force -Path $OutDir | Out-Null $outObj = Join-Path $OutDir 'kernels.obj' +$outLib = Join-Path $OutDir 'gpudemod_kernels.lib' Write-Host "Using nvcc: $nvcc" Write-Host "Building $Source -> $outObj" -& $nvcc -c $Source -o $outObj -I (Join-Path $CudaRoot 'include') -Xcompiler "/EHsc" +$nvccArgs = @('-c', $Source, '-o', $outObj, '-I', (Join-Path $CudaRoot 'include')) +if ($HostCompiler) { + Write-Host "Using host compiler: $HostCompiler" + $hostDir = Split-Path -Parent $HostCompiler + $nvccArgs += @('-ccbin', $hostDir) +} else { + $nvccArgs += @('-Xcompiler', '/EHsc') +} + +& $nvcc @nvccArgs if ($LASTEXITCODE -ne 0) { throw "nvcc failed with exit code $LASTEXITCODE" } +if ($HostCompiler) { + $ar = Get-Command ar.exe -ErrorAction SilentlyContinue + if (-not $ar) { + throw "ar.exe not found in PATH; required for MinGW-compatible archive" + } + Write-Host "Archiving $outObj -> $outLib with ar.exe" + if (Test-Path $outLib) { Remove-Item $outLib -Force } + & $ar 'rcs' $outLib $outObj + if ($LASTEXITCODE -ne 0) { + throw "ar.exe failed with exit code $LASTEXITCODE" + } +} else { + $libexe = Get-Command lib.exe -ErrorAction SilentlyContinue + if (-not $libexe) { + throw "lib.exe not found in PATH; run from vcvars64.bat environment" + } + Write-Host "Archiving $outObj -> $outLib with lib.exe" + & $libexe /nologo /OUT:$outLib $outObj + if ($LASTEXITCODE -ne 0) { + throw "lib.exe failed with exit code $LASTEXITCODE" + } +} + Write-Host "Built: $outObj" +Write-Host "Archived: $outLib"