From 8d0bea214ce2c4d55b4c510fe668e81dff9b96c7 Mon Sep 17 00:00:00 2001 From: Jan Svabenik Date: Thu, 19 Mar 2026 11:07:39 +0100 Subject: [PATCH] Attempt MinGW-host CUDA build path for Windows --- README.md | 25 ++++--- build-cuda-windows.ps1 | 15 ++--- build-sdrplay.ps1 | 40 ++++------- docs/build-cuda.md | 64 ++++++++++-------- internal/demod/gpudemod/gpudemod.go | 10 +-- tools/build-gpudemod-kernel.ps1 | 100 +++++++++++++--------------- 6 files changed, 120 insertions(+), 134 deletions(-) diff --git a/README.md b/README.md index 50f544b..73bd73c 100644 --- a/README.md +++ b/README.md @@ -33,19 +33,24 @@ go build -tags sdrplay ./cmd/sdrd .\sdrd.exe -config config.yaml ``` -#### Windows (GPU / CUDA status) -Windows CUDA support in this repository is currently split into separate steps: +#### Windows (GPU / CUDA + SDRplay) +Recommended build path: -- `build-windows-default.ps1` → reliable default Windows app build -- `build-cuda-windows.ps1` → builds CUDA kernel artifacts (`kernels.obj`, `gpudemod_kernels.lib`) -- `build-windows-cuda-app.ps1` → experimental full Windows CUDA app build path +```powershell +powershell -ExecutionPolicy Bypass -File .\build-cuda-windows.ps1 +powershell -ExecutionPolicy Bypass -File .\build-sdrplay.ps1 +``` -Important: -- the original invalid `#cgo LDFLAGS` CUDA integration issue has been fixed -- CUDA kernel artifact preparation works on Jan's machine -- a full end-to-end Windows CUDA app build is still blocked by Go/CGO + Windows toolchain behavior (see `docs/build-cuda.md` and `docs/windows-cgo-msvc-note.md`) +This path uses: +- MinGW GCC/G++ for the Go/CGO toolchain +- `nvcc` with MinGW `g++` as the host compiler for `gpudemod` kernels +- MinGW-compatible CUDA import libs from `cuda-mingw/` -Use the scripts above instead of the older manual one-liner. +Important: +- the kernel archive is generated as `internal/demod/gpudemod/build/libgpudemod_kernels.a` +- `-lstdc++` is linked explicitly for CUDA host-side C++ runtime references +- CUDA 13.x no longer supports older targets like `sm_50`/`sm_60`, so the script builds for `sm_75+` +- if `nvcc` is missing, CUDA kernel preparation will fail ### Linux ```bash diff --git a/build-cuda-windows.ps1 b/build-cuda-windows.ps1 index 2294d21..fb2f2e3 100644 --- a/build-cuda-windows.ps1 +++ b/build-cuda-windows.ps1 @@ -1,8 +1,8 @@ $ErrorActionPreference = 'Stop' -$msvcCl = 'C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.29.30133\bin\Hostx64\x64' -if (-not (Test-Path (Join-Path $msvcCl 'cl.exe'))) { - throw "cl.exe not found at $msvcCl" +$mingw = 'C:\msys64\mingw64\bin' +if (-not (Test-Path (Join-Path $mingw 'g++.exe'))) { + throw "MinGW g++ not found at $mingw" } $cudaBin = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.2\bin' @@ -10,11 +10,10 @@ if (-not (Test-Path (Join-Path $cudaBin 'nvcc.exe'))) { throw "nvcc.exe not found at $cudaBin" } -$env:PATH = "$msvcCl;$cudaBin;" + $env:PATH +$env:PATH = "$mingw;$cudaBin;" + $env:PATH -Write-Host "Building CUDA kernel artifacts for Windows..." -ForegroundColor Cyan +Write-Host 'Preparing Windows CUDA environment for gpudemod (MinGW host compiler)...' -ForegroundColor Cyan powershell -ExecutionPolicy Bypass -File tools\build-gpudemod-kernel.ps1 -if ($LASTEXITCODE -ne 0) { throw "kernel build failed" } +if ($LASTEXITCODE -ne 0) { throw 'kernel build failed' } -Write-Host "Done. Kernel artifacts prepared." -ForegroundColor Green -Write-Host "Note: final full-app linking may still require an MSVC-compatible CGO/link strategy, not the current MinGW flow." -ForegroundColor Yellow +Write-Host 'Done. GNU-compatible gpudemod kernel library prepared.' -ForegroundColor Green diff --git a/build-sdrplay.ps1 b/build-sdrplay.ps1 index d2e4e88..6f8e56b 100644 --- a/build-sdrplay.ps1 +++ b/build-sdrplay.ps1 @@ -3,26 +3,23 @@ $gcc = 'C:\msys64\mingw64\bin' if (-not (Test-Path (Join-Path $gcc 'gcc.exe'))) { throw "gcc not found at $gcc" } -$msvcCl = 'C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.29.30133\bin\Hostx64\x64' -if (-not (Test-Path (Join-Path $msvcCl 'cl.exe'))) { - throw "cl.exe not found at $msvcCl" +if (-not (Test-Path (Join-Path $gcc 'g++.exe'))) { + throw "g++ not found at $gcc" } -$env:PATH = "$gcc;$msvcCl;" + $env:PATH +$env:PATH = "$gcc;" + $env:PATH $env:CGO_ENABLED = '1' +$env:CC = 'gcc' +$env:CXX = 'g++' # SDRplay $env:CGO_CFLAGS = '-IC:\PROGRA~1\SDRplay\API\inc' $env:CGO_LDFLAGS = '-LC:\PROGRA~1\SDRplay\API\x64 -lsdrplay_api' # CUDA (cuFFT) -# Prefer C:\CUDA if present (no spaces) $cudaInc = 'C:\CUDA\include' -$cudaLib = 'C:\CUDA\lib\x64' $cudaBin = 'C:\CUDA\bin' - if (-not (Test-Path $cudaInc)) { $cudaInc = 'C:\PROGRA~1\NVIDIA GPU Computing Toolkit\CUDA\v13.2\include' - $cudaLib = 'C:\PROGRA~1\NVIDIA GPU Computing Toolkit\CUDA\v13.2\lib\x64' $cudaBin = 'C:\PROGRA~1\NVIDIA GPU Computing Toolkit\CUDA\v13.2\bin' } if (Test-Path $cudaInc) { @@ -33,30 +30,21 @@ if (Test-Path $cudaBin) { } $cudaMingw = Join-Path $PSScriptRoot 'cuda-mingw' +$gpuDemodBuild = Join-Path $PSScriptRoot 'internal\demod\gpudemod\build' if (Test-Path $cudaMingw) { - # Use MinGW import libs to avoid MSVC .lib linking issues $env:CGO_LDFLAGS = "$env:CGO_LDFLAGS -L$cudaMingw" -} elseif (Test-Path $cudaLib) { - # Fallback to CUDA lib path (requires compatible toolchain) - $env:CGO_LDFLAGS = "$env:CGO_LDFLAGS -L$cudaLib -lcufft -lcudart" } - -Write-Host "Building with SDRplay + cuFFT support..." -ForegroundColor Cyan -Write-Host "WARNING: this path still performs final Go linking through MinGW GCC." -ForegroundColor Yellow -Write-Host "If CUDA kernel artifacts are MSVC-built, final link may fail due to mixed toolchains." -ForegroundColor Yellow -Write-Host "Use build-cuda-windows.ps1 for CUDA artifact prep; use this script for the current MinGW-oriented app build path." -ForegroundColor Yellow - -$gccHost = Join-Path $gcc 'g++.exe' -if (!(Test-Path $gccHost)) { - throw "g++.exe not found at $gccHost" +if (Test-Path $gpuDemodBuild) { + $env:CGO_LDFLAGS = "$env:CGO_LDFLAGS -L$gpuDemodBuild" } +$env:CGO_LDFLAGS = "$env:CGO_LDFLAGS -lgpudemod_kernels -lcufft64_12 -lcudart64_13 -lstdc++" -# Kernel build currently relies on nvcc + MSVC host compiler availability. +Write-Host 'Building with SDRplay + cuFFT support (MinGW-host CUDA path)...' -ForegroundColor Cyan +Write-Host 'Preparing GNU-compatible CUDA kernel artifacts...' -ForegroundColor Cyan powershell -ExecutionPolicy Bypass -File tools\build-gpudemod-kernel.ps1 -if ($LASTEXITCODE -ne 0) { throw "kernel build failed" } +if ($LASTEXITCODE -ne 0) { throw 'kernel build failed' } go build -tags "sdrplay,cufft" ./cmd/sdrd +if ($LASTEXITCODE -ne 0) { throw 'build failed' } -if ($LASTEXITCODE -ne 0) { throw "build failed" } - -Write-Host "Done." -ForegroundColor Green +Write-Host 'Done.' -ForegroundColor Green diff --git a/docs/build-cuda.md b/docs/build-cuda.md index 66e994a..21c53d4 100644 --- a/docs/build-cuda.md +++ b/docs/build-cuda.md @@ -1,47 +1,55 @@ # CUDA Build Strategy -## Problem statement +## Windows: MinGW-host NVCC path -The repository currently mixes two Windows toolchain worlds: +The recommended Windows CUDA build path for this repository is: -- Go/CGO final link often goes through MinGW GCC/LD -- CUDA kernel compilation via `nvcc` on Windows prefers MSVC (`cl.exe`) +1. Compile `internal/demod/gpudemod/kernels.cu` with `nvcc` using MinGW `g++` as the host compiler +2. Archive the result as `internal/demod/gpudemod/build/libgpudemod_kernels.a` +3. Build the Go app with MinGW GCC/G++ via CGO -This works for isolated package tests, but full application builds can fail when an MSVC-built CUDA library is linked by MinGW, producing unresolved symbols such as: +This keeps the CUDA demod kernel library in a GNU-compatible format so Go's MinGW CGO linker can consume it. +### Why + +The previous failing path mixed: +- `nvcc` + default MSVC host compiler (`cl.exe`) for CUDA kernels +- MinGW GCC/LD for the final Go/CGO link + +That produced unresolved MSVC runtime symbols such as: - `__GSHandlerCheck` - `__security_cookie` - `_Init_thread_epoch` -## Recommended split +### Current Windows build flow -### Windows +```powershell +powershell -ExecutionPolicy Bypass -File .\build-cuda-windows.ps1 +powershell -ExecutionPolicy Bypass -File .\build-sdrplay.ps1 +``` -Use an explicitly Windows-oriented build path: +### Critical details -1. Prepare CUDA kernel artifacts with `nvcc` -2. Keep the resulting CUDA linkage path clearly separated from MinGW-based fallback builds -3. Do not assume that a MinGW-linked Go binary can always consume MSVC-built CUDA archives +- CUDA kernel archive must be named `libgpudemod_kernels.a` +- `nvcc` must be invoked with `-ccbin C:\msys64\mingw64\bin\g++.exe` +- Windows CGO link uses: + - SDRplay API import lib + - MinGW CUDA import libs from `cuda-mingw/` + - `-lgpudemod_kernels` + - `-lcufft64_12` + - `-lcudart64_13` + - `-lstdc++` -### Linux +### Caveat -Prefer a GCC/NVCC-oriented build path: +`nvcc` + MinGW on Windows is not officially supported by NVIDIA. For the kernel launcher style used here (`extern "C"` functions, limited host C++ surface), it is the most practical path. -1. Build CUDA kernels with `nvcc` + GCC -2. Link through the normal Linux CGO flow -3. Avoid Windows-specific import-lib and MSVC runtime assumptions entirely +CUDA 13.x also drops older GPU targets such as `sm_50` and `sm_60`, so the kernel build script targets `sm_75+`. -## Repository design guidance +## Linux -- Keep `internal/demod/gpudemod/` platform-neutral at the Go API level -- Keep CUDA kernels in `kernels.cu` -- Use OS-specific build scripts for orchestration -- Avoid embedding Windows-only build assumptions into shared Go code when possible +Linux remains the cleanest end-to-end CUDA path: -## Current practical status - -- `go test ./...` passes -- `go test -tags cufft ./internal/demod/gpudemod` passes with NVCC/MSVC setup -- `build-sdrplay.ps1` has progressed past the original invalid `#cgo LDFLAGS` issue -- Remaining Windows blocker in the default path is a toolchain mismatch between MSVC-built CUDA artifacts and MinGW final linking -- Experimental full-MSVC CGO path (`build-windows-cuda-app.ps1`) also currently blocks because even `go build runtime/cgo` emits GCC-style flags (`-Wall`, `-Werror`, `-fno-stack-protector`) that `cl.exe` rejects in this environment; this is a toolchain/Go integration issue, not a project-specific one +1. Build CUDA kernels with `nvcc` + GCC +2. Link via standard CGO/GCC flow +3. Avoid Windows toolchain mismatch entirely diff --git a/internal/demod/gpudemod/gpudemod.go b/internal/demod/gpudemod/gpudemod.go index f42cd2b..3559971 100644 --- a/internal/demod/gpudemod/gpudemod.go +++ b/internal/demod/gpudemod/gpudemod.go @@ -3,7 +3,7 @@ package gpudemod /* -#cgo windows LDFLAGS: -L${SRCDIR}/../../../cuda-mingw -L${SRCDIR}/build -lgpudemod_kernels -lcufft64_12 -lcudart64_13 +#cgo windows LDFLAGS: -L${SRCDIR}/../../../cuda-mingw -L${SRCDIR}/build -lgpudemod_kernels -lcufft64_12 -lcudart64_13 -lstdc++ #cgo windows CFLAGS: -I"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.2/include" #include #include @@ -182,12 +182,8 @@ func (e *Engine) SetFIR(taps []float32) { } func phaseStatus() string { return "phase1c-validated-shift" } -func (e *Engine) LastShiftUsedGPU() bool { - return e != nil && e.lastShiftUsedGPU -} -func (e *Engine) LastDemodUsedGPU() bool { - return e != nil && e.lastDemodUsedGPU -} +func (e *Engine) LastShiftUsedGPU() bool { return e != nil && e.lastShiftUsedGPU } +func (e *Engine) LastDemodUsedGPU() bool { return e != nil && e.lastDemodUsedGPU } func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) ([]complex64, bool) { if e == nil || !e.cudaReady || len(iq) == 0 || e.dIQIn == nil || e.dShifted == nil { diff --git a/tools/build-gpudemod-kernel.ps1 b/tools/build-gpudemod-kernel.ps1 index f09276f..dd0a329 100644 --- a/tools/build-gpudemod-kernel.ps1 +++ b/tools/build-gpudemod-kernel.ps1 @@ -1,63 +1,53 @@ -param( - [string]$CudaRoot = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.2', - [string]$Source = 'internal/demod/gpudemod/kernels.cu', - [string]$OutDir = 'internal/demod/gpudemod/build' -) - $ErrorActionPreference = 'Stop' -$repo = Split-Path -Parent $PSScriptRoot -Set-Location $repo -$nvcc = Join-Path $CudaRoot 'bin\nvcc.exe' -if (!(Test-Path $nvcc)) { - throw "nvcc not found at $nvcc" +$nvcc = (Get-Command nvcc -ErrorAction SilentlyContinue).Path +if (-not $nvcc) { + $nvcc = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.2\bin\nvcc.exe' } - -New-Item -ItemType Directory -Force -Path $OutDir | Out-Null -$outObj = Join-Path $OutDir 'kernels.obj' -$outLib = Join-Path $OutDir 'gpudemod_kernels.lib' -if (Test-Path $outObj) { Remove-Item $outObj -Force } -if (Test-Path $outLib) { Remove-Item $outLib -Force } - -Write-Host "Using nvcc: $nvcc" -Write-Host "Building $Source -> $outObj" - -$nvccArgs = @('-c', $Source, '-o', $outObj, '-I', (Join-Path $CudaRoot 'include')) -if ($HostCompiler) { - Write-Host "Using host compiler: $HostCompiler" - $hostDir = Split-Path -Parent $HostCompiler - $nvccArgs += @('-ccbin', $hostDir) -} else { - $nvccArgs += @('-Xcompiler', '/EHsc') +if (-not (Test-Path $nvcc)) { + Write-Host 'nvcc not found — skipping kernel build' -ForegroundColor Yellow + exit 0 } -& $nvcc @nvccArgs -if ($LASTEXITCODE -ne 0) { - throw "nvcc failed with exit code $LASTEXITCODE" +$mingwRoot = 'C:\msys64\mingw64\bin' +$mingwGpp = Join-Path $mingwRoot 'g++.exe' +$ar = Join-Path $mingwRoot 'ar.exe' +if (-not (Test-Path $mingwGpp)) { + throw 'MinGW g++ not found' } - -if ($HostCompiler) { - $ar = Get-Command ar.exe -ErrorAction SilentlyContinue - if (-not $ar) { - throw "ar.exe not found in PATH; required for MinGW-compatible archive" - } - Write-Host "Archiving $outObj -> $outLib with ar.exe" - if (Test-Path $outLib) { Remove-Item $outLib -Force } - & $ar 'rcs' $outLib $outObj - if ($LASTEXITCODE -ne 0) { - throw "ar.exe failed with exit code $LASTEXITCODE" - } -} else { - $libexe = Get-Command lib.exe -ErrorAction SilentlyContinue - if (-not $libexe) { - throw "lib.exe not found in PATH; run from vcvars64.bat environment" - } - Write-Host "Archiving $outObj -> $outLib with lib.exe" - & $libexe /nologo /OUT:$outLib $outObj - if ($LASTEXITCODE -ne 0) { - throw "lib.exe failed with exit code $LASTEXITCODE" - } +if (-not (Test-Path $ar)) { + throw 'MinGW ar not found' } -Write-Host "Built: $outObj" -Write-Host "Archived: $outLib" +$kernelSrc = Join-Path $PSScriptRoot '..\internal\demod\gpudemod\kernels.cu' +$buildDir = Join-Path $PSScriptRoot '..\internal\demod\gpudemod\build' +if (-not (Test-Path $buildDir)) { New-Item -ItemType Directory -Path $buildDir | Out-Null } + +$objFile = Join-Path $buildDir 'kernels.o' +$libFile = Join-Path $buildDir 'libgpudemod_kernels.a' +$legacyLib = Join-Path $buildDir 'gpudemod_kernels.lib' + +if (Test-Path $objFile) { Remove-Item $objFile -Force } +if (Test-Path $libFile) { Remove-Item $libFile -Force } +if (Test-Path $legacyLib) { Remove-Item $legacyLib -Force } + +Write-Host 'Compiling CUDA kernels with MinGW host...' -ForegroundColor Cyan +& $nvcc -ccbin $mingwGpp -c $kernelSrc -o $objFile ` + --compiler-options=-fno-exceptions ` + -arch=sm_75 ` + -gencode arch=compute_75,code=sm_75 ` + -gencode arch=compute_80,code=sm_80 ` + -gencode arch=compute_86,code=sm_86 ` + -gencode arch=compute_87,code=sm_87 ` + -gencode arch=compute_88,code=sm_88 ` + -gencode arch=compute_89,code=sm_89 ` + -gencode arch=compute_90,code=sm_90 + +if ($LASTEXITCODE -ne 0) { throw 'nvcc compilation failed' } + +Write-Host 'Archiving GNU-compatible CUDA kernel library...' -ForegroundColor Cyan +& $ar rcs $libFile $objFile +if ($LASTEXITCODE -ne 0) { throw 'ar archive failed' } + +Write-Host "Kernel object: $objFile" -ForegroundColor Green +Write-Host "Kernel library: $libFile" -ForegroundColor Green