diff --git a/internal/demod/gpudemod/build/kernels.obj b/internal/demod/gpudemod/build/kernels.obj
index 2037977..ecf18a0 100644
Binary files a/internal/demod/gpudemod/build/kernels.obj and b/internal/demod/gpudemod/build/kernels.obj differ
diff --git a/internal/demod/gpudemod/gpudemod.go b/internal/demod/gpudemod/gpudemod.go
index 0664ab5..3687fb5 100644
--- a/internal/demod/gpudemod/gpudemod.go
+++ b/internal/demod/gpudemod/gpudemod.go
@@ -30,11 +30,10 @@ static int gpud_device_sync() {
 	return (int)cudaDeviceSynchronize();
 }
 
+extern int gpud_launch_freq_shift_cuda(const gpud_float2* in, gpud_float2* out, int n, double phase_inc, double phase_start);
+
 static int gpud_launch_freq_shift(gpud_float2 *in, gpud_float2 *out, int n, double phase_inc, double phase_start) {
-	// TODO(phase2): replace with real CUDA kernel launch.
-	// Phase 1b keeps the launch boundary in place without pretending acceleration.
-	(void)in; (void)out; (void)n; (void)phase_inc; (void)phase_start;
-	return -1;
+	return gpud_launch_freq_shift_cuda(in, out, n, phase_inc, phase_start);
 }
 
 static int gpud_launch_fm_discrim(gpud_float2 *in, float *out, int n) {
@@ -138,22 +137,27 @@ func phaseStatus() string {
 	return "phase1b-launch-boundary"
 }
 
-func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) bool {
+func (e *Engine) tryCUDAFreqShift(iq []complex64, offsetHz float64) ([]complex64, bool) {
 	if e == nil || !e.cudaReady || len(iq) == 0 || e.dIQIn == nil || e.dShifted == nil {
-		return false
+		return nil, false
 	}
-	if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), C.size_t(len(iq))*C.size_t(unsafe.Sizeof(complex64(0)))) != C.cudaSuccess {
-		return false
+	bytes := C.size_t(len(iq)) * C.size_t(unsafe.Sizeof(complex64(0)))
+	if C.gpud_memcpy_h2d(unsafe.Pointer(e.dIQIn), unsafe.Pointer(&iq[0]), bytes) != C.cudaSuccess {
+		return nil, false
 	}
 	phaseInc := -2.0 * math.Pi * offsetHz / float64(e.sampleRate)
 	if C.gpud_launch_freq_shift(e.dIQIn, e.dShifted, C.int(len(iq)), C.double(phaseInc), C.double(e.phase)) != 0 {
-		return false
+		return nil, false
 	}
 	if C.gpud_device_sync() != C.cudaSuccess {
-		return false
+		return nil, false
+	}
+	out := make([]complex64, len(iq))
+	if C.gpud_memcpy_d2h(unsafe.Pointer(&out[0]), unsafe.Pointer(e.dShifted), bytes) != C.cudaSuccess {
+		return nil, false
 	}
 	e.phase += phaseInc * float64(len(iq))
-	return true
+	return out, true
 }
 
 func (e *Engine) Demod(iq []complex64, offsetHz float64, bw float64, mode DemodType) ([]float32, int, error) {
diff --git a/internal/demod/gpudemod/kernels.cu b/internal/demod/gpudemod/kernels.cu
index eeef3ce..2625a77 100644
--- a/internal/demod/gpudemod/kernels.cu
+++ b/internal/demod/gpudemod/kernels.cu
@@ -19,3 +19,17 @@ extern "C" __global__ void gpud_freq_shift_kernel(
     out[idx].x = v.x * co - v.y * si;
     out[idx].y = v.x * si + v.y * co;
 }
+
+extern "C" int gpud_launch_freq_shift_cuda(
+    const float2* in,
+    float2* out,
+    int n,
+    double phase_inc,
+    double phase_start
+) {
+    if (n <= 0) return 0;
+    const int block = 256;
+    const int grid = (n + block - 1) / block;
+    gpud_freq_shift_kernel<<<grid, block>>>(in, out, n, phase_inc, phase_start);
+    return (int)cudaGetLastError();
+}
diff --git a/internal/demod/gpudemod/validation.go b/internal/demod/gpudemod/validation.go
new file mode 100644
index 0000000..841d029
--- /dev/null
+++ b/internal/demod/gpudemod/validation.go
@@ -0,0 +1,25 @@
+//go:build cufft
+
+package gpudemod
+
+import (
+	"math/cmplx"
+
+	"sdr-visual-suite/internal/dsp"
+)
+
+// ValidateFreqShift compares a candidate shifted IQ stream against the CPU DSP
+// reference. This is intended for bring-up while the first real CUDA launch path
+// is being wired in.
+func ValidateFreqShift(iq []complex64, sampleRate int, offsetHz float64, shifted []complex64, tol float64) bool {
+	if len(iq) != len(shifted) {
+		return false
+	}
+	ref := dsp.FreqShift(iq, sampleRate, offsetHz)
+	for i := range ref {
+		if cmplx.Abs(complex128(ref[i]-shifted[i])) > tol {
+			return false
+		}
+	}
+	return true
+}