From 2c900988d29c0560feb4f4850f5ec1d5dae30882 Mon Sep 17 00:00:00 2001 From: Jan Svabenik Date: Thu, 19 Mar 2026 08:06:44 +0100 Subject: [PATCH] feat: wire CUDA freq-shift launcher --- internal/demod/gpudemod/build/kernels.obj | Bin 47249 -> 47909 bytes internal/demod/gpudemod/gpudemod.go | 26 +++++++++++++--------- internal/demod/gpudemod/kernels.cu | 14 ++++++++++++ internal/demod/gpudemod/validation.go | 25 +++++++++++++++++++++ 4 files changed, 54 insertions(+), 11 deletions(-) create mode 100644 internal/demod/gpudemod/validation.go diff --git a/internal/demod/gpudemod/build/kernels.obj b/internal/demod/gpudemod/build/kernels.obj index 2037977b6936d9cac4fac5453f27b105fe281b3c..ecf18a01ca62f57fa315244bdb1dfa1350e7f837 100644 GIT binary patch delta 7013 zcmaKw2~-qU_J`lAh6Y(=YY>%1p+yFRvUIt}6(a&7yMW3jD!8J8WBiYUE5?8eUbhez z%tR+9W-y`?b&S!8dmLjluKBB(>}nDdow!63=l9+#%j3y8Q|I)p?|%2yt*XcC?rQ3P z(l+1KJWFfNvXsVul;@6TEUex$&k`3GTVLM65*Hs=UmqXW%gR{o0LG3aGPyYr+p)Bw z&I2>D?70K8lx55na4Lzh^GWr0!?e$ms11+Mypm}EkFX{uGgh2TlX;kR0&t&z^_@T1rfVUJr0Q#lK!-7*7>!z?ja6}5FTEeVjfVC;q(k0v()THWrfM-*vK*@Vb z@}Z2i9V!p%08Ab#yJiEYD0~q}3fBYQRd^8islu;-j|GN1r!p3vDjP%t2P;emPDqt! zr~}KHm0z=&5FhR$WK2(F8hcVV^m>jPIM<^TvtQ{uD+cAC>(mkRQE{0zhg)r+K zpl2F|Si`KX(irQJMpvD}ok640WPKcPuEJX2W`#|_;|k9LzgKt<=#wrFYn#rPuCOmK zPvL0bY=w(}n*@fqiEDcc=|`$^33w-6-pjAR@C=F38H{Br90i=8K^t7dtV@9JWymug z1^!#%O`vn8yeF?r#$q$6Pm3^XFJOhjS->WRdw@S=iVplf^>???Z0=_b!b0qtCENA` z<|`}({#oHmz}FOR10GR$8h9g%PPv6y{{!^RmbVv)+Y8EOEFoLoNGh;0n_h4a)Bc=I zCqg3O7GhXb$$QnY7VEh3lP+IDo#Wvpi|b!{8Yb17ft6LYCX=5MNe zLoS_{`7bIzol6lxVti7NJYLJA8OZaf0eK!{jyyUd^BXE3n@0h`qTVxD*4O6ISed`9 z@)LRVcbQ*S`J+6tVkYt_3Nw?>Sm%7ok@+l@7v<9ynTwf2eMP>UxqLZu$PeYqnah_m zhx~57nz<2b=0;%VMyQ!nCKV=MnVHJWfvHpGO=Wh#>{sUhmH86pM`c=$l$`@c;sbA_ z?A%Y8M3^jPrYSQErbe09mDvWfTbc99T!p!zjO!@b*?Sb$&nVfsr!oUzQk9vgOeM@5 zWi}|Y1!jjbr z^GF$|LdLucW#`U?@?2eE1}IacOc_k2GAop6fZ3wVA!Sa&TvX<+GQYvN70J$#MY3~D z5o5iT$ycTXX0kGM%B+NGRA#?2M_|q<^P@8NVVsI(=dfbgIjR`%8)dST83|LSOpP*2 zVH%X#t;_+Klgivs<~Gc4$^@05b4TYA#$rlvs&Fpy2x|^Ri4bAhoDy>LkRw)>P*)Ez zB46c4N~lKWZ>s#hIv`-QY!Nb=v8d6qMWQlkFe8sv4ZLZoZehWiV9!H- zj+YZViWrQ`bYg80BPG3qxL#JTnB=742|-r-fQtL*_qZ%C_l}oOmOYS)Q~hYp z&|rQ{9=?S(3@zlJQs>lYzEhh0viUo-J+%{W5=>>8cD`5A{B%1_O|#RkBs(p`X(r&+ zlxn5b86gxtEQ;b&Bl&qbvM#fQxO9G0_PikD0g6cP!4JxMXj&BQN{!$<=#%t5{3`in z^r7V$Rtn7M!LP^>?J^VTz2p{hB6Qr=VNAkdQQ`Y&NoGe()vnA~-b~+QcHw`cR$0Mx zBFo07(h$%i&>!jlKwENbd^Wuex(J$2*FbK=ZG18LWQVzjIALx0u~roUMHH9)oO5qX zR1}TQj@H6B)nxaq-Vb5HpBmrHl3Wp&;m>A~OF*nE3-g9kWCQ(-w~&Pef+ospkV)-8 z_hi)$nk1`{COrpwAgeJTu{12qQEp;)&_h`k*Em_0`Wzt6Mc?NH`^U)^;=UD$`+ikas1ai}QTp&;Is^Kv zB&S>(R;F-WLCuoNKzk%LfZmsM5_FjUog0hQEJnxV+4#rQlp9PXpbOHi1YMDI1oVZh z-UnTmE-K%~Z%G;n`dQLa&_hWFK%A!L2h(kkizNF98~2oy1M-vf5~wxtk-@YV6e`_! zpbj))M6iF7dGCtEy?2tM(?;5OtfYCMZjyF_o~K&{!Spp~pmc$wYV|lI2E0EXA zYj_Q`M!HWx>*dv0i)?tah`a3$+A3*0sEMu@2GcswE_$aZmj6Q>x1!jFcc{4FIM7kq zsuc7uNvlC;B^?7@lJpDcnjrsjbK(?t)xuzuDx^BerEX6jmI2g58HR9IX53G-EMJlS#--U`PxqMj_OYb z${q!Ye*rke`Hz3y13P2uh5e7tbg{h5)=d}A1uqM;XAX|jyLRi^oHhP*3;LwWx7l8~ zlk<4GRF%j3Q2dMuenW7bYHP=EdDt?W-Os$u?MngYW_nP!S%I+%^1|#GYHq0Q?TsI8v|H&tWGaek(_Qd%A z&P2k9FK%ZXrDk+7%x8vCtFRF!hO05k?E77x3ou0}I#-*Ge)-)XUwm4s?a2j+=hJ7+ z;NO`7m^q@g*?BI^cEhL(x(HK&^EtDiC->nZK6K^wD9%$W^to4 ziyc}UH*$U1p~bjSAM6IXQRT}HJXvggS=~mg{&{9D#U<>Jcx~ zQN*9<*X6qA;z^ErT#qM>$F8?0HP$)fzx=jxJqp#yioll`ri1UdM8|XOVc6y9>l=M|Po;oKW z-l0vabj{U^F0OKDAzsvSwL^>dqJ-5Bo#)H=+GFFCDuXnlRD=XwXvp(*S2>Pfz8#h<`Vea_5>5szW`m$hNOgAqS@ z#d}DN$?{{&_RN^+FycM*bWA;r`1SL&&n_7IGh;6Hgo=D-kOh6kPoSrbq7C!3Q5{Tg z>aii*Y3z=7k5JKupdmM)5}s+)9>3-jpJ_B4M*J*#+Gq}pIFWd%J9~o;Z_x29{BVQb z>N=`o`xjoS+8+NGaHYo^{AuLIAZ=g(&D!YTZ&K4nU2ETpzTW827PX?FMu+XYR`N)8 z#ym78kV+fFv;l$iQe%u(9@xCMu^smwT~%5fOr~X<*R>s2QB_(vvAAmT=(57G)5c6KoL*Kwwz5#PX^z@* a)w9q47{KfmrNx!S_Wv=Y`Em0Kr~d-IejDrn delta 6595 zcmZXY33yG{+Q--0Cpj|6#7UwdgXD-=f^!HiQq+)0NMs}p&BO0QYbtF*MPTi?~{@YPkWwrJh|de^o*p6=&4@BXdd`|fwIz0W>p?{i+g zWz}4_LPpO!Bc5Hqd;EvvglJY1QtU|W+O=kU3rA{NT1`#oPQm^{Eb1df?Et|`3Sxp7 zTG63zZl1fSZ=SY71OO%u65=m|YHlT3vj$O&Ot#({M15qk^IQ0PgQ%BZl7EH}Ng4EK zndoc}9FReKfjPkO+D-+&q;UmshsNE&vl=e}f6bt5N20$YQ;60yNhbMs24yi7f~GJ% z16r=gxgPX>CY|(8@&kR#?gl7iFi#|Uun-+J_5kKLc|Z@+Sb6n8nb|78RZn(=${l6gFP&V z+^+NQ1J4Yh->oFI{m)v;p+Ynr%B@-edn?p)8-(E`otXiw8OqaK1Kh9iIPkj0JHX~S zR2PuwY?~v*h#YSCKfo%D3xJz*_>kTLp30$Kfr-xZz|dUACb@WLH5LLF=W>TyV7ks8o0gs>7<6rWq zHaIS5Aa)g%FGN{B{oXJxLVb?LVYmk`wM0_;kfzp>0-6w-_P`v`=4)+!gt@IvlVa|iP%K2dV%}GVHn}im+WfCJ zb788r*{01-m;>7UQ=6+WH??UroIA%17b1B$ckZXnV3;CprfV|`W}!B3Xj2cfN1F@U zT!#5kn}#E}vugyt5l3+6UfT4B$<=1EHZx)7YO_h3EigN^`Ba-PVXkW9EaA?fB|^lM zaOX5_(qRT`GeMgtVPa{tk%{iFM+K5u_98fBRtCTx;(xy91e{DuznKol!p46sVo0TxHXmdcDV=(8m zxv9-vn1FKboLo*BAuavOh3H;R(?gPEIjz>?iRE;N$KUC3T{%UCs(klQu0LH)V|ct! zkF5&YiE#y8z_>yPPX#%{)VNI;kH=I{0gs>7g1&>HUY185zSItWEoej))F#~ZH#>4tTnX6zdK-FHnu+O-0;;%p8m z3blS>1ZXX{UIB_^s#95}S3!Ex+d)aZ?O{+u+`O^z319-tGa!9}FMyKSeQn%zL$^T9 zIr|&P&1Cu0p^T5@W?Gn;BsbBv%*W&rI-QwB*N3|4MrL~&H@GGF2D_=p;P&)+W>dMB zvt@aKDkBf7kvzff`8+r6&vn!8EVq1(&1*1@Y&W`B3{H`I*u`fB(uwTO@-U?iDVMKs z<^oj@X)k>=IeVC_r=VeNkgVFoClke2v?f2Kt%P ze*&p%%^}ha^#k4I>_AW$UJijrRGaWjCVuCv43Z*fB!*bLbS*!=aSeBR*f%eT@4VuV zUT?-Rm9y%t(4~rj)1N+-3I;4 z)NX`J{zi|E=qMfZNl7eKV;snI02Ib_6BJ47kxedf$)@azN?eVb>r)#gMN}V521_TZ zDv6gZQQJ|rS6f65GPBmCX5E>4C6u~kH#%M#OSz!l?B;?V|)AX zvXE&os8ms8j_I#S^&iDG^{5%obQ$ynldIe%r!n;hJ;O8;^c>R`(0ryZK`*JfMHZX> znpA({n)C{nT*b3~611M_70|0p=RjNOxyo1ysC3C4yhV4=drT8SAE={?tTg>Kss0~v z&1uveWwJ)PT)1AW7Eccd%$|M}gyhouGW7?l!m{Pl#==6?&i z>AN~=oE5$=?q2Wz`zdGT6WM?9pLuppZ(qfjdPl&)+$DuoLpy#<5pB|`Xu{oQ>Njx; zrc^g$!FsIhr3cm@`qAXa$GN(6RIUMD4&fd)pp&;tmrlMP9zPxEJ2~wgDHCbVjAC30 z7iT=)I1PuYmjr>iVd>)=F>||Yk!2_TB&W2OD2)n;>J>*(Su8 zt+oj@W|wWkj5%VPaAQ8RO(SEjRN1PrQ8#T9VT_+XG~qI)k!>Q4Nw7_nF`aD_ZA`jt zni!L9Rq50Bh*2Z#q8MYw+oqW@({0n#n7OuzHHK^xXUs<1#2Zs@3BCUWqxRWF&5b!} zn?z$S+9t^u^?aiiw=s8Y)54fgJg%rE*_b#O-hWG@+S^5~jOl5c*2ZMnrj0RWwrOk3 zWZR?|^Q>*!8M9a!wg2`;t+9(b81trWQjK}fHXV&QWSdULoU=`5W3Jl9W6T{k*uU4P zFm)Hg(#4oq+jKRiqixcR>2I5E#^l+iyD_6|)5Dl)+Ej_&Mm=vAr5jUin@5dNkCtlb zW6W0D^fl&v+w?Q$sBIoI=5yQN`0sy1T(#8$a|cG<@cmJv&tD0?y#8q$eT7Yh(aZfl zOJLNk{J~Z`{!nxDzNs?1|JQwG%#mdL&GpD|+CA5c+wXP6p49RMFK(fwh($z*W9ca3 zG`fd4m%Q`hqX(i9rX#u?`+kgSs+XnoICAQ58ubIAcF?c*@8bTlkv7ir;u+?%d0y*i z0}7q*u`V~DepuaYKu^#2;5lLYe6LKQD~JQB>5E?6r*jae(0s&s^a0{(x`9|vtrvK$ zg8@{rz+-(GK($!?6hJ2ycw_*{gkfW;b1yY?XfNf(=S-v45kh>9vMWVYP_;JtwQWdrx3Hrf2miFrXGmT&=kZP zdL8i%y13M9?QKXd!h9N1239v3(rog`V0xFlxVe6h*o~Sm^U5Jqh&YyBLYze(A=c8b zh;LEKa<6qLgvKoQSpNv2wOIWULVsWGkrC9O)+>{#Ct`P+ikL%hB95g?h_fhig;y@4 zOvEkp+zPL?FO>ee!egBcr5~~SE|lCWJyKHfO0SHjml0FwAYy;IgIGZwS9#@AG!}6g ztwY>KXI6QwV__7u+GAY_qjaq9gwgcX>bl*!+AG`9SBL{BdW||8S%|Y~4q`36kGPet zt?^n1!YO&J$ND^+O0fDVoR+RtXXEf%uS}rd5Ixj+ow{7cBbL*8#A$REaUliQdGVC? zD58&M)Zy}tplx+HClT~7tZqe6%zBRup&{$NRqjUmdOwDh{wnDXqaMxhH>qtl!>FG^ z>T{xNf*a$XriW_Uz^ETY4{8R&s2@fT`b>p!KUA|W%2p4G?!u%}&kadcz5o5&DO6)A zpZ>(eNuxC%$4XCPIA)-JGCkO~21fmKdN7H@FzFA~{2NAn89bZ|%+ne*XnQ28{Lq diff --git a/internal/demod/gpudemod/gpudemod.go b/internal/demod/gpudemod/gpudemod.go index 0664ab5..3687fb5 100644 --- a/internal/demod/gpudemod/gpudemod.go +++ b/internal/demod/gpudemod/gpudemod.go @@ -30,11 +30,10 @@ static int gpud_device_sync() { return (int)cudaDeviceSynchronize(); } +extern int gpud_launch_freq_shift_cuda(const gpud_float2* in, gpud_float2* out, int n, double phase_inc, double phase_start); + static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) { - // TODO(phase2): replace with real CUDA kernel launch. - // Phase 1b keeps the launch boundary in place without pretending acceleration. - (void)in; (void)out; (void)n; (void)phase_inc; (void)phase_start; - return -1; + return gpud_launch_freq_shift_cuda(in, out, n, phase_inc, phase_start); } static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) { @@ -138,22 +137,27 @@ func phaseStatus() string { return "phase1b-launch-boundary" } -func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) bool { +func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) ([]complex64, bool) { if e == nil || !e.cudaReady || len(iq) == 0 || e.dIQIn == nil || e.dShifted == nil { - return false + return nil, false } - if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), C.size_t(len(iq))*C.size_t(unsafe.Sizeof(complex64(0)))) != C.cudaSuccess { - return false + bytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0))) + if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytes) != C.cudaSuccess { + return nil, false } phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate) if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(len(iq)), C.double(phaseInc), C.double(e.phase)) != 0 { - return false + return nil, false } if C.gpud_device_sync() != C.cudaSuccess { - return false + return nil, false + } + out := make([]complex64, len(iq)) + if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dShifted), bytes) != C.cudaSuccess { + return nil, false } e.phase += phaseInc * float64(len(iq)) - return true + return out, true } func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) { diff --git a/internal/demod/gpudemod/kernels.cu b/internal/demod/gpudemod/kernels.cu index eeef3ce..2625a77 100644 --- a/internal/demod/gpudemod/kernels.cu +++ b/internal/demod/gpudemod/kernels.cu @@ -19,3 +19,17 @@ extern "C" __global__ void gpud_freq_shift_kernel( out[idx].x = v.x * co - v.y * si; out[idx].y = v.x * si + v.y * co; } + +extern "C" int gpud_launch_freq_shift_cuda( + const float2* in, + float2* out, + int n, + double phase_inc, + double phase_start +) { + if (n <= 0) return 0; + const int block = 256; + const int grid = (n + block - 1) / block; + gpud_freq_shift_kernel<<>>(in, out, n, phase_inc, phase_start); + return (int)cudaGetLastError(); +} diff --git a/internal/demod/gpudemod/validation.go b/internal/demod/gpudemod/validation.go new file mode 100644 index 0000000..841d029 --- /dev/null +++ b/internal/demod/gpudemod/validation.go @@ -0,0 +1,25 @@ +//go:build cufft + +package gpudemod + +import ( + "math/cmplx" + + "sdr-visual-suite/internal/dsp" +) + +// ValidateFreqShift compares a candidate shifted IQ stream against the CPU DSP +// reference. This is intended for bring-up while the first real CUDA launch path +// is being wired in. +func ValidateFreqShift(iq []complex64, sampleRate int, offsetHz float64, shifted []complex64, tol float64) bool { + if len(iq) != len(shifted) { + return false + } + ref := dsp.FreqShift(iq, sampleRate, offsetHz) + for i := range ref { + if cmplx.Abs(complex128(ref[i]-shifted[i])) > tol { + return false + } + } + return true +}