From 8033e1423866aa54cdd7e9c5647fb9938ae0d5e5 Mon Sep 17 00:00:00 2001 From: Jan Svabenik Date: Thu, 19 Mar 2026 08:36:09 +0100 Subject: [PATCH] feat: add CUDA FIR preparation path --- internal/demod/gpudemod/build/kernels.obj | Bin 61020 -> 68626 bytes internal/demod/gpudemod/gpudemod.go | 38 ++++++++++++++++++- internal/demod/gpudemod/kernels.cu | 43 ++++++++++++++++++++++ 3 files changed, 80 insertions(+), 1 deletion(-) diff --git a/internal/demod/gpudemod/build/kernels.obj b/internal/demod/gpudemod/build/kernels.obj index 062d18fd62a9f0e44744c145415b6480d4593237..808846a4be1faaafad92091d5fa740c9cc5dcafd 100644 GIT binary patch delta 16666 zcmb`u30O_rAOFAhIj3|QPop%eQ$mv*lp&RnsY1xq9A#_}AxNKJWed3~SiKIcM*xsS)SoNwgv}?jgTz zhhrxGj*v22%anVb-L?(4D|@hkmwSf}T~K37>Wk1|dE+)wo+!Z6*}a3STN7*y`rJf> z>L+GzwQw~xKD`Cqg1DkMm zBE1FJo3m5(TYymH0z8GYR-FQJA(J^EcP_w3f&KxqlF?fr-!S%^TdPKc%wjYT6v*}C*gPf6#`)F8;QnT>{4Lj9LAS<(RE|6kI4g)#E$U{b60Qtm7 z%N&|znS)UK9GcaSk-)11EhAqT5$7SKlSeB%=Fz@70r6ragpu(;5*W#5WHpd2jGScT0+4D({$=DF z5b0`Kxy@=?*?BcW-543kNEndGj4WmZ16j|=5k^h{xyr~(Mm_)$uc4J&uc4LOu0g01 zBSRS(4P-nc^BGwRWHloP7&!*y0wYftc@5+nBc^LHm`^L4=OffMpH}Y6h(D0gjLc*t706OXb}&*5%^rHJh05lQeI8U%i-SGvR5oX;8gU=%c zvyuTw)f2hF70MHtVGntz*|?bakcm<8Q=%pzrzz8&Vy29n>NGw!F){=kcy`-XEt~rz zrcOzi7B*#CNBm6gDkd>kY^|h149;#7j5F1iU&=d8=P|TM;=3TA>tv;HU+B=#Z!d;1DDDZunE%Nx8{S6b@-Ii8uU(kEGn_ z7e1HrhF%-pu)Y$#=9-V!L&PBz z1#OIXJKJzlT;)8vMF6af#6p6LoLh4mKxT8=*v}<_)5RxTio;;XBLjZzh#|w@$*&pN zCROD>Z7=+3dq2dGM&ikke8>hgvTay=wZn5!ab1U6^0rk}d_+;f;kpjSnClhVQ*##j zfyN>ftM^KbZs{PEg>bbZ?FV#mUkCMt@8!#BZz*E*R0qkYiGPsA`hl`qfFFCh87e~? z{17jSR{6qjx*sjjSskPqkCY-(AU2#$fDE{t6hQod<3+T9ROmpA)QOu6@QMyfO%Rb) zLTI7|LL0SF+H48Tubdqc8{x1`Qc)rEqaBg~BuS+oe?Y5y+XBJ1<}4))h^!zP01Z%6 zBgZaFu!ID4brS#M0SpsX#3o)X@GjG4GMy)6cr9^(mA<15J(hmWSixr`H_0%j8E5}ThK%fBXw#UXM^!7Dk;o?e z%c#=%j1^5XOh|^wB(emsi+P7&jAb|>z)rpp+;BDf#qGv_a1n#8tPGS5OWOq!qyRmI zN=*1RNg#x*3;A$r$m89p-~Y%D;sZh=zoS8;O~}3*uhHXc$N?G<|{Oqa&py%mc;f9)okyE0U_|`3^yYh)GUMTU3D1d ze`OT$7u#t~`e1wa!2igQ^B+A8Ye{;fd)Kk%e`GlD84XukAp@@-VytZVHjx6^q>;6T zj|r_5veBfe@pI;{dx9s$oR|CzNIoosT8a?C#E$>YSE3ZSV3F4FK}rh1s_3N<*rbv3 zoAmMX08l#?l)|6YBp*I}^ivoX-nWAy`NZGY;IK+j;Dn*hTsM5RgAqP7)Jk+vMx^yw z8y41J8&ne5x6EE%$fv4$TqQHpc`@4IH9J{Wuwu9dmkeQmcn+=^&77T3dZ(uUWWT4KTSg`1j9okXp4rz&r6uk^q?bGs-gOQLNF4k}+P+r( z4Amqfug(kaoj>2pZPxnEp(9p!6>n5p{T|E0;`7t{wDhQ~yqDf)wil6&6pdH?J8Pw_<6n z>d?zxJE|_IRcEJg@S($cY7&IuE;adD~7@+AS}rAZ6N8kF~?APw!k5d8jz<&EVr^ zM^yHInf-=Cc`HBIBwXHl*zm#m;D_6Cr#XhWzJ0&%%mUrHdrm6nJUbks%q|w$q%U7} zE95wfYH@x8j23$DtXOz!&i*j&4Mi^lJj3oc~DbFKW77c47CGPqPXwYo1itd)Oz zkK@Z1t9-b2s{f(rL;m3fw^Yt7@NfNA`Btlb@uF>a3UX?O3^K~T^ZEW_6OE*9!8dAW zKSHSnCq4wz2Io6{dMDi@ zfjfo|KCcsh=H4U^+&8RCZREZmS|u*K-9D#xy)ZaiIaYuEedEpTPngy3F%;eU+(BGr zccjmsK3#_AetUA}&0jG_&rJV(dgYVDq`Xuo$Kt~domw5fHd7RN^XrK3u`M=sns&dc z!pY(0_0@J~Dl^*;KW1E()?Xemxzyt5=-z3DBk`7=^WT&%v#)R4`{j=O()_Y8$-wO$ zhnJM5*oW2?os0rf4M`%oStPSu0^+c*y>917VC`;Hp$b>v47cTmF@1W^VDL8 z7sV88`;g%sdS_XOqYvBp**titRCj;c$Cw&6o{O??00adHFzd1 z>&HDwX*2P}5UbA`XH(r<`%NC>Tvfj{yy)!Wim)VUhqL2-zh-)tq~y!y?0k~tH0Gd_ z+IgpqXR5dMHh%vxLR3&D-rg}{v`dzIi-WEEA9~l@?4;V3f8`^ss$0yP!rfLoIpg}i z)v}X5dMk4`6plSG;z8Br7F$+T3@qHPX}t5TwsMvIALer_{;AyR7E|#khqJo1!)aun z&h@vB&}tUFz^S9#Qin>p?8yyC#Diw-ltM8)=vJpM5=DRAYX4!W{bqnZg9?LH-^ z?(tV2maO+m+2guR#{AQ(xn0i-^AmN4pLMP}Yqc)1%2#*GvUKt1g_GNNxY4`PVf7zf zMNup3>do`hPse@y9s4cX)um?gXKf9`8S@pbJ9~?zpRbO&scUO(d0D-WmU~EV@%Fy= zbAIcVc`GCE?dew+Po3@+{PcOxv3oTxMpT`egs5~UVq76Gj;#L)B%-;-d;L6TX*gE39DbuYwusP zFR9D?F+=00_bv@iZslq=MN7P9XE=9MDqVrop{xx_@$-aOK;Tq`^JLjE~;N z#o>WsLw-p8M(`~E&(Za*$;^?b+55mIEoz6H-NwQzs^txap z>hD`}+$l{nw!PI9-jM}LNX`dF|$<>{xA zn`_poecbJOEO6H85%POMnYt>j!L~jnJ^UYZow@I&@9l}<1u ze(?;`tB1{$(zaZ$kIxFn>%PAhQK!x<*OC*|!q zsg;%Mv^i({^{N!y+dn6|K6XXm*Qnoi+7}M5S$9n@>+9ydpGWD%ZX3SKalhjLatjIV z?G@gK{thl~9jq?v$CssC!VA49Xk01f_yqpGTxs&}pjg4(pvX;e39?ad zVD_LGa|7i`y%RyPdEN)bcIOjN!{`wH0p$To1HA{url|?HRDy|Di3}SgG6BWrX~i=Y z)D9FY*a1`!E!Y_po9AFqqbUmlMGK;_pf=HYCV^t}#DdxhicQlNZifenVg3@-4kVk? zdI|~KEU^8c*gRi>V)M*^Q<@zg z#+1Gw84Lo&=D8dco2MPzIAW)cBdE!A2(F;mJiCHo^9%qroq9(KDi{=-XC%*5&=gRt zULvSi@?1iKW`Sh$TnQ?HmdOXj3KoE3^DG0!=2#rlsuE|&x%<>}nQoUtMm?|#WQ)59?+!R9!4$^>f0iJJ-bt0y)cY5{mDr8D zO35DjzMH;3z@*3JZdfZ(~2SbnhuxuLb zi#tRa2kaQFbdUB{L2F;3?{)P39L}HVjw7I2aiW|%N_#ns`_J;={>GW`{*b;uqSd-V z#BHinVJ3r;EZz+ab81s?}! zIX;$bj%8^ETrPeM$~(^QBATK?M+H7$($biqs!YV*(Z$aAR;! z7ZiCuRmF8jtu854yFixn?6si9QjfsVrGCJywpiWSA9n@K> zRF@cZtb_}b7;%OiGW3QtXfDjq_vu1qrby-Mq_W`*!NU~s+@_u+!NU~s)R9#(oGEyi zBA!PyXO`e$ig=z=&uqcN6!E;Fo@Bwp6!Cndo;iYtDdPD~JVtW`4^zY=5#!~lW=8B1 zNlp@`h)bQiMyCi_Oc9rio|fe2HTebD1_*h6oD8c;82NqpGb2AZtsxGc#nT7hB(5tS zw8YXrZj|fn4eR?3T)H-@Lc2}Nchn00=l&W*0mW>gigSP~?X zrM@5faO~*9wQe(ppVmE6)4lj>R*_h3skPgh`tcdI@O*0nR^a8D{php*sJ zGxAs8>j3OD@ZLy0^o{)e*A;y{a7BQ!lBfRVd7qh-5AEyP$-T3JY_))w81bZ&>}S&1 z>Sxl$?PuZ^@H26bZX_PGvo00*ARHa zr?|R6j`J~E0=di+ZGqh7iH<;?^F$_)FFer`h*Zgj4S#6VFi>Nj=nBN1Ck6uP%o9U_ zkXs<683|+rPg)8jk|)Lji5J1=H)*W|)G#0ufh^_I%mlKAC#C`^D9L0vW~=H-SvxiMv3Oc+yoM3wY8^ zAgdWsqMicU$_IH0WIs>51ag8W-U7MIlU@R;<%y3#Uht&1K)&(><}aLhDr|`wiTy96 z8xV4>Q-LhD#`!?%c*2giAB&%6WF%T0*F zU&w@CNGgy}ctKy}??Xx344cat;uq};Yz4rLyT(xufRE=ZnoSA0Lm3O-o*QGH1ChZ^ zkj814!p%h18m8gUgcN@-yeCn1Bmr!x$=|fd2l{m)ge^5sy3iQ9uuTbDSf03(7&eWj zgiWIB?k{`6@a?4(M;BQbKY&d# zg*wZf{2xyd|92@~SELYWs^jZm?9{RPUWKTqI`#xJQXNkR6R(bUf?1-D?}OQ*j*a&z zM5XH34@{*xP6zXsIxYqCK^?yWqoslEiWMSj4Lk;ny9Uky56} zQMLx20A`y8-T>x^2EGZVN&{<_C`3;*us4_=8aN3|OHI5RjFTpQ0LDiXoBRO-)WiW` zrfT90FzK4O49t2>{07V+O>BPv2B?XH!Q9uxE5Llz#OJ|iY2_S*@oC}CU^;8zNni$Q z;f-LzwD2u3)3vbnA%$p>7WM(NK?~0UQ=)~7z+BYAkHFm5!e*sVQVS0Oqos`(f{|NYvXo@6{7LlI26oWZJY~cg*LtjW`{Nwl_^BWwXqwRTiSREn18hKW-ux` z_%;|*9V|Nn1JuENzy#>vWH8Y>cpn(04t@+KR|i`hRfu-#;6N}HI(QM7J8+W>%v&A& z9*nvSI~-GpY-BhbOg9|ZQv0!$~@K!JtGF$`ZjtuJ^ zSBPHAurHVvx_B-aGhMtNjH@nw3Z}0vwmP8@1?%GBV5aHfC194oA1i~|q>Dd-Iiic5 zPAWt~!zp0i>ft}YsOjV9U@Y~q^=XBulRgdt=M>6R z_+C!hd3dp_+$tb&fkO0LBrbZ-7cOZi5MC@M8twv<#KxEfK#ZHkYyo1{Ean^#d9#@3 zNIp&V&4NOKOluai7|8Nw zF(p8fo5dKwZS{P7?!2jzd=nr|F~Q;(Qz6r*PMRFaR`3EKwoBt7gH&yt)OjF@^FIe_Z1$D8R?;uauK){9m z0dl=rOiMV_ZUJeG>1l>fTu@jJ0S9TMB%5gpPY4xZ@x`_VSv@R11vb6}r=bP*1M}Jfr-KQz#HC=$E%7Tb7FO7Kk;oVfF7)KhY^RSa#W3k;6L(5+SFE=!psJ$2YnMM9h)@=W5;#*%Xhv z+=0`JNQ?{{6g4>_c4pwzp))6kPn}epd!<^U{P|f(V(g@;Vf25u=)X>dkUZ40r~KD^ zPr2NCq>Hzgm**f)&yij|y+$ZVQ1AHosqvnkUcJ4TQ!el2Jt&~JcTmUxR~EsBF(772 zq^D;~&r$4&%A~NwDG|}+f5vE+c>In1;^ATO#hN$WIOE8u=`j&eAqmqG!$U$wb?DUa z3LCS$RWT!~dtr6Q|68ZW;g2nKn)Q?*P8UuS=qYHT{3G zCa@&@Fn;ai|D%t-|5po7I%&EZjq8u|ADFb@cSG^F2d6|h_>s2yFN@ygS7e0`{{3A` sSakA$thnT{qYCc#-00V6W4z|M<9}mytofq-f4SmcwD~W5{l&un1Ae-ky8r+H delta 13100 zcmZ{q2|N|sAHZj>?Q$*ma;@3Pmc6T@l2DI!Qcs(-P^n%*lGK%mMB>O2rF}0Xt=ecW zJ?(p`(DvHiYhU#LopZ*}|NZ~->CW#v-`_dEGiUD1y>mx-MT+F_ie zVwliejZW;KTzgwqBR5Fex4WmSE7QJ&dNE9;Rr@w3A7+@3OLtEST&4cEW&a( zo4Oy?WSB0n)iQ%NiHBg$3=4Pm3l6ak^$Th1Cb3d9Y=fGC}Y-GgR2&`g| zFKeSbK89iD#GpS|OJzA=F~tplM`F-Xux9`tQu`cGA(ph&h-DaCif(`dDUJY~6^n)_ zSSrs0ETeWCU>(JJz^4@70gB>CG2J+Zafu_{JORg3I}LC##caSW6e|HQP`nQKhT<1M zojIhK$sC61PO&fG7>W^q@;PLPrGVws9t6CK(NxVHj?8`7)QTsK4B>e8p%@4_i{d=M zvUt+o8rpAz&3c--3HSqNER|K}!h19qnJQZ2(+24C9za#u`9PFqklElBh`qvx1u4)a(OOOHCs+ z&%nH=MtcdVY_tT<-6f>5KQ)8Egi;ev%|bAl)NG}u0!$4xcc^&`<~23y>7=qW9X|Hc zNo8+p`hy9kCYqWAFsan6r)E2tebii|<{p@5)F@|=%9)eDlLam0(7HQjG|vFqvei$(PhVOaw#pjd=Bc`F`NF#T||)>^NRV z6VJ^Uwv)M%vlZa2(-!OHxr{^gF5&2iix~BGO%OCPXuoSD8;(rfzRB_a;+LXCNFpGk z=ptnB{O}8VRV-n_+%|9A1|Ksa=i82-qBixUJuu%Mxb4^|Mk>O@p%b)eUh!R@J-Mc{1l+$F1n~fp_-sZ)kgGSn?BlUE<^`M8wiX==;~;1)n45Dlz@!Jm^sa) zEu(#C7T&$ABH=_LQXG;Tz}*`PSuEh!qAg}G~!lgHBhUW9&$7nJDmfCkCAW3 zJa7Y7K7k%UlBI7#44D7bBY^wFitEsWz6oY9h5yx~hMQyCl<^?sO>px6KRr%zJu*6s zL3*GsVFvPKd_mxa%i==1PyZ-=& zF{2HVxr+c7fSNkVRP*Z{KzIjmzT=Y2eGkER550KQ@Gjzyg(4TY;PlX_aB~5?d4QG& zI?gHc6{)T_WwA^Mjg(UD=x+vS~+90eeGn8(M6)VY_nZ`|#}{{+$aq1a38)9Cc^1o5N18knjUX z?v`fN2j05p)xG=SgLWk)IVJOdwiF!nTyyu{*gu9`YKi>W?NfNHq}$V@^}RPXXzopm zRnQ&rd+H9m9oj2p>$e8>zgpARcx7h~d3~ON=PYZpcr$W>X@|kJ#|EN6c~N0A(IaHys!8_K{ijY{*|(_0^zf#TnAgqs4^Evr zV`DMk?km3Ra&i>@#lnT&4KTiFSkJtD5`o>3`lP`$%J|=x5@WdhhZz zS*`I)i+;RREz|s9oM`{>DzdupS@T=TIM(yX#pR2A%zIQi7Mi90b8?NUo%b_y;}<__ z^KV8tDLu8hb^l?I#=f4cflmpwAYUdYkM83*U4N8}wc%2`luN@TSQ8NPX+Q zvSVxRi`;5Gt>jq`U57k-b-yP4^Uv@o1*Mkq@8>_adPN3C4&9wqqOQ6x+PUI&gN693 zAhJGx$ljEOhOskzRxt(xB3JqwPMY!h)2x=&HTQZ&T-gvcD5UmOR%}GlZ)t%CEVB!j zw?-x$RMCC4;-BLS-WbbU-3A!yH^%tY-_=|m-Rvh0k;SR}ZJ;C1{In%}@9W}f<5we3 z56$T+vG}bvM=P=P%(^A5FMG|_kNLhxIMqh`)hwM}Q)+E{W}K{BTHty#`@ZITgR&&! zPl5eRh7W2f`s-U0>LRLfX?)%D^X-k2nOSdFmpmNc`6cMO^2p22f-Fk5?{TczWxi!h zQa3~G=a&^0t8D#ybeSaAP$JKjS_f`Yh*1+O`bQPz-!Ro?Vx}3Lbd59~u54&%D3JuJ z1+VZeVUrJ}FTbzX^Txqk63<|rrF}_b>B^!SzIP64P#Af@3YQ5$;*?rcnTEFg8 z%Aty~Y{QDa7mB{Us9l!)s&|~;L|s>-(OYY>EO(7-E;cl%o?11!WnDo)sYq^TW^|~0 zb9{=jqbM>fwc?}3MBTw+iRrmf3Yi6GUU)yt?W6MKs=)F0Fndg2EnRjT%nEnRu9&g(+OHU>In?lXI-Q(R&hZP$nDcJorH8SGtUq2aPAE!?5 zx_Zvs>sYnr#N*?h=zGMiiN;Lq;Jshjw;#TCVHSw|Q@^|)QuZ+Yip3F2dA_CXvkA}AGA>K}9EaqO7Trk| zpU%4-Cvmu@WODb^y3bKD3Mwjc>x=aZHb1{-*_^f5%(T0;i^~Vg{o($H-)~IWkd~4Y zulr-nl3`J*ZxdW|zxEJItIIyE8h0`2Sz`a%K>w9-C!RmJka^{5-ITe0%7=5K#$8m% zJmKy6+-yQkb|)^Ug16351*)dK>8HF> zOO4@LhX=0xlyU1th=%R)3u>1Xny!r~|D)2X_)|YyzgZ!5o0_gPZwl7a^PM0qx_n?& zMZMpsZcpoycdN(jVtxZLjj>xX!*%YCcROXvW;}HJ z=XLmBD-4R5S#~d_3O8@we4rJeoP9%iQNaN9#$_oRt{S9$v+ohvY2ly;R%^ps(_qrn;-n0eM9) zzn1kn*$Z9NhmUjEzu9t{@rYfIjbHih{4!-Iht!#%uI4SronkHM^hAyY=x;|Dx3B3!N{Y?RP|E6k^=(?#yw8ufIJ0XUmgbRi`>- z4rFI!?0JA|a=M55G zDVKWWMVGpJ$QOl85?h3O)uzvWuP3V8P|;sw-5r`^_qoJ;6Hkh%pD{jgwL^Vz{WxiSQ^1MQUl+btbFh9A?RH<% zFrxP2)%o2IpR5_Ub=YX%+`60@HkFmD7tNQy&@c9$DfwOWTWZ{_+?bvvXZ8xhBW+V- z`@i1}q zcPI6I@pDU>H(1W}*{Yp8XvOADHY+0=*}-O3?4gs+FDI=1IC5tBQ6$6nmX&@J`;%Wz zRgb1?Dk9gJN~r7)QI$)KqiWT+1;#AeAs3_l^5Jq@xHD8_dPA2McquYxU~K_+c8W|V zSmU2TicBS}*O4s#kwZxl?wieEOOfdVWJL(yOK&Fa76Z8vX8`qS4}YGf1JSapaPC#0)5~z%nxd^n5&{Limc&uViI?n^51s4KY688`9KJ!ipH|riG z+Y^XRa}N+b<_CrF5XFn^!cmZPo~ME6JnsSdk!&+hFM#Mg-*Qfg`3gh}vT(oOoAgiy zqVu!>@*&O+h}LrgBGZ&J0f2O#lXyx3qVxO!L<@ceqVt>uzj>&O1EMY!h|cpk5S`~p z`1MTBk8wc#$u3L;qSK558leF5S0oR0hLZGBkd&4K(Rt=`PKj9qLL3M4(Y1JQY& z0vbfzMIc(`G7x<`{{o`ZRDlNql+4u`IXBN@khI_$AUY=<_+d<)0T6X|Ky;o#K*40V zF+gj{nNkRZpH(R`Yk>3!Z33b@T@K_-&V`pi_$i?xBmWx^KWG`rF(ywv=PgM@+UXSm zT;~Mw4Oyz_&&;muMdG%T^)|A8h_+=ru?H}t)NE_E8foNoLGNL_)(mU*DDKT3CN)+q zwPtsd?OkL$4aPEpHb-E~9_`MyWABjcF;e#eS>Gk=lW5s;Pxcha){ws8*~&;e$A;a9 zJafF+$0z~Tjd+bFK-G`w)+i6=<^Xf^h8PbcpCkQtW@#gZTs@U7S%hhN1 zqd~bY>>iYqYogl?jy`_0$uN}Q%4Qa!-H_?dXDH!JI%>={k@w&;lyD}SI98KoG<{(c z78@GK+LP}=3HLy>hZf6daXsuh^zh<)P{KWmX%Cc?=Z~85hN9n)*l;p<8S(I!RF1MR zp+qzespR)!&C#%YeR(Z7I!{WtUK#0!Ph)(o(dXZk@N;1$IF%L1r4moF#^5NK1F4X# zHPBp=#rsal5lEF}U4igJE(Q45=dlNnh@|l!73Pt&4^RF;8YCM4lt{9Jc*0Luv`97t zXgXIxzjtfXOiKk>B`h-0d zXd%gF@RS8)Li(-%!WX^*gP(pe0)*b2-}tl=tqIEZcoxv5dC7h{5;e{r;-h74<&g>_dK757}&Kw~bA3j3~XXQd#X`UZ6wd)JruYqTDM7u7 zd~r&+cn(cX;!~7xY9%d@TPhWlP^>GJvg^nUK7iaprR{1dyNgU_6i^kRWk3fp=|=H| zDdED$(C0D}z3F_063(2WB`wxS1s5m|Un7NYBRDiD4dhKK_XFOi_#E&F#ZF~X_BnDb za}&HFLNAsdff63!18rMbCKY_6_@qoKXUWC(6Bc||Z42$JASMYI20MJ4ZVB!uyh1sL zZ~N_F8=uFe;1VDC)pYVsw%EN);KO7s1eog;X6*2d`3-KvW9!v*59BcVO*iz5~t8M;X&FwXEtmCmLL1z}y) z%hQQ_-bmm9ax$8}ZG}4iKUR2o^Xh0QQrRAEjQ{&oJczOJOP`SpS@nc{nK><;mFa&ocw_d?n$e}2g=#bVO*jY)0T+lOmE(#pr(UX z@_f#Vc)yM_YP{La84cdla7L3i7dWHEo7CtHE?~&` zw!HG@8rkt?2xshh6UG?_-pt}m7v3!5j0uQZz4G3#hWGIpfQl8qWCf20y#Ni$8DfaHbz`o^uA~-}VWi1WzdF z(q?r3W$eHVgGNfwiyqOXU>Z3?&%(7}@JBmtLro2saKLtR70g;}7?=BbFBvr@x&&a zjM~xAQ9GKoYH0i^SAmBR<(`rW1_;p^2w_6>1Hv33>UvrxSSCc%AQTHx8HCM3bR9yq z5Q)yn1a(5x7s3@GntR67U?U8Q-{7~4*}ixO=+Y=eyCAt0wjAIkD_3#nG7#Pf(Gw`} zLx>E|$^`nVXeb0%Rg?x{fGRo+VUj9(2VsFKvOOmg6sV$c5O$~{1mPI`bcS$E75#+p zOci;YmkC-`(R2tpYG@q<2Q_pPf{z-~xF8dZR6~9crmLZO5K`1oIfNoL)C^&_8tQZr z=A?##Aly+y=@8zkp(7B4BJ>XgGZC_{mkB&XXgq`gBD4}hxCmWS!s1Ty=B|!e({!0m2b=)a5eNR7VpaJXc4B5L(qyJp^41q;N$h=%Rre&=v@e zTIfE6{#r=qrc5wi3k5=m(?W6x8Cs|sLYWqN387jGS=@qRTIhEOkF`)XgwI;24uXal zeSu&lMlQEyf*xWN0b!^Zl|YCPqstH$h>^-2nIIqj^Am*KViXJEj2LZ$@K}rb08FHqa6@-X`@FF>a>ymLz&=~ zHX01!l{Q)ofz?3=A?WL%HxL|kQ0GROz+VTAfiOk~DsX z5j^n2rhSLJ!Swp2%>>T0w^;}#;FmV5!1(`Cr$%=m9jJY<`_Kq~%d{5~!c)fZUkbT{ z3H_x_E|?i;&ErlIyjyrlMmpXE@}B7HzrktPYA^a5Je*8G&QHwbYk-n}$$bHn3Wn~~ z`1n~fW=@7pST4Ye9+Tm4tn*>3y<#Sq6=2$@avThP0MXv&IcM72NLWKu|3s!_jh}&{ z@Frd4)MP9T##^w|!;6lWX^6%($pkwM5rUv&gw8?8G(tZiyfs1|%`(A=PG~xW>P~1K zgwDq3CWKUDq|pLbiZSwo(9Z3mZ%7ITLm?3gK1dS)rR%q4JI zz&KBuIz6_VjI_Oro3i--r2e{mHmw@-%#E!odwxT4YE>7r>jZ_(3hev+PZe25rNSZ+oA@6vbwynO diff --git a/internal/demod/gpudemod/gpudemod.go b/internal/demod/gpudemod/gpudemod.go index 0b68ba6..cb0b084 100644 --- a/internal/demod/gpudemod/gpudemod.go +++ b/internal/demod/gpudemod/gpudemod.go @@ -229,8 +229,12 @@ func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodT if len(taps) == 0 { base := dsp.LowpassFIR(cutoff, e.sampleRate, 101) taps = append(make([]float32, 0, len(base)), base...) + e.SetFIR(taps) + } + filtered, ok := e.tryCUDAFIR(shifted, len(taps)) + if !ok { + filtered = dsp.ApplyFIR(shifted, taps) } - filtered := dsp.ApplyFIR(shifted, taps) decim := int(math.Round(float64(e.sampleRate) / float64(outRate))) if decim < 1 { decim = 1 @@ -285,3 +289,35 @@ func (e *Engine) Close() { e.firTaps = nil e.cudaReady = false } +odLSB: + return demod.LSB{}.Demod(dec, inputRate), inputRate, nil + case DemodCW: + return demod.CW{}.Demod(dec, inputRate), inputRate, nil + default: + return nil, 0, errors.New("unsupported demod type") + } +} + +func (e *Engine) Close() { + if e == nil { + return + } + if e.dIQIn != nil { + _ = C.gpud_cuda_free(unsafe.Pointer(e.dIQIn)) + e.dIQIn = nil + } + if e.dShifted != nil { + _ = C.gpud_cuda_free(unsafe.Pointer(e.dShifted)) + e.dShifted = nil + } + if e.dDecimated != nil { + _ = C.gpud_cuda_free(unsafe.Pointer(e.dDecimated)) + e.dDecimated = nil + } + if e.dAudio != nil { + _ = C.gpud_cuda_free(unsafe.Pointer(e.dAudio)) + e.dAudio = nil + } + e.firTaps = nil + e.cudaReady = false +} diff --git a/internal/demod/gpudemod/kernels.cu b/internal/demod/gpudemod/kernels.cu index 5250a91..d29ed54 100644 --- a/internal/demod/gpudemod/kernels.cu +++ b/internal/demod/gpudemod/kernels.cu @@ -72,6 +72,49 @@ extern "C" __global__ void gpud_decimate_kernel( out[idx] = in[idx * factor]; } +extern "C" __constant__ float gpud_fir_taps[256]; + +extern "C" __global__ void gpud_fir_kernel( + const float2* __restrict__ in, + float2* __restrict__ out, + int n, + int num_taps +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n) return; + + float acc_r = 0.0f; + float acc_i = 0.0f; + for (int k = 0; k < num_taps; ++k) { + int src = idx - k; + if (src < 0) break; + float2 v = in[src]; + float t = gpud_fir_taps[k]; + acc_r += v.x * t; + acc_i += v.y * t; + } + out[idx] = make_float2(acc_r, acc_i); +} + +extern "C" int gpud_upload_fir_taps_cuda(const float* taps, int n) { + if (!taps || n <= 0 || n > 256) return -1; + cudaError_t err = cudaMemcpyToSymbol(gpud_fir_taps, taps, (size_t)n * sizeof(float)); + return (int)err; +} + +extern "C" int gpud_launch_fir_cuda( + const float2* in, + float2* out, + int n, + int num_taps +) { + if (n <= 0 || num_taps <= 0 || num_taps > 256) return 0; + const int block = 256; + const int grid = (n + block - 1) / block; + gpud_fir_kernel<<>>(in, out, n, num_taps); + return (int)cudaGetLastError(); +} + extern "C" int gpud_launch_decimate_cuda( const float2* in, float2* out,