From e87633f8a06fd6498fffc3f2dc939ac2f20c443d Mon Sep 17 00:00:00 2001
From: Jan Svabenik <jan@svabi.ch>
Date: Mon, 23 Mar 2026 16:02:29 +0100
Subject: [PATCH] Improve GPU phase precision

---
 internal/demod/gpudemod/kernels.cu        | 4 ++++
 internal/demod/gpudemod/native/exports.cu | 8 ++++++++
 2 files changed, 12 insertions(+)

diff --git a/internal/demod/gpudemod/kernels.cu b/internal/demod/gpudemod/kernels.cu
index f308b8d..8250792 100644
--- a/internal/demod/gpudemod/kernels.cu
+++ b/internal/demod/gpudemod/kernels.cu
@@ -12,6 +12,8 @@ extern "C" __global__ void gpud_freq_shift_kernel(
     if (idx >= n) return;
 
     double phase = phase_start + phase_inc * (double)idx;
+    const double TWO_PI = 6.283185307179586;
+    phase = phase - rint(phase / TWO_PI) * TWO_PI;
     float si, co;
     sincosf((float)phase, &si, &co);
 
@@ -161,6 +163,8 @@ extern "C" __global__ void gpud_ssb_product_kernel(
     int idx = blockIdx.x * blockDim.x + threadIdx.x;
     if (idx >= n) return;
     double phase = phase_start + phase_inc * (double)idx;
+    const double TWO_PI = 6.283185307179586;
+    phase = phase - rint(phase / TWO_PI) * TWO_PI;
     float si, co;
     sincosf((float)phase, &si, &co);
     float2 v = in[idx];
diff --git a/internal/demod/gpudemod/native/exports.cu b/internal/demod/gpudemod/native/exports.cu
index 97118c8..6081b57 100644
--- a/internal/demod/gpudemod/native/exports.cu
+++ b/internal/demod/gpudemod/native/exports.cu
@@ -41,6 +41,12 @@ __global__ void gpud_freq_shift_kernel(
     if (idx >= n) return;
 
     double phase = phase_start + phase_inc * (double)idx;
+    // Reduce phase to [-pi, pi) BEFORE float cast to preserve precision.
+    // Without this, phase accumulates to millions of radians and the
+    // (float) cast loses ~0.03-0.1 rad, causing audible clicks at
+    // frame boundaries in streaming audio.
+    const double TWO_PI = 6.283185307179586;
+    phase = phase - rint(phase / TWO_PI) * TWO_PI;
     float si, co;
     sincosf((float)phase, &si, &co);
 
@@ -293,6 +299,8 @@ __global__ void gpud_ssb_product_kernel(
     int idx = blockIdx.x * blockDim.x + threadIdx.x;
     if (idx >= n) return;
     double phase = phase_start + phase_inc * (double)idx;
+    const double TWO_PI = 6.283185307179586;
+    phase = phase - rint(phase / TWO_PI) * TWO_PI;
     float si, co;
     sincosf((float)phase, &si, &co);
     float2 v = in[idx];