Переглянути джерело

feat(watermark): implement STFT-based embedding and recovery

Add an STFT watermark path inspired by Kirovski & Malvar, including the frequency-domain embedder/decoder, FFT support, and round-trip coverage. Wire the generator and CLI tools to use the new analysis/synthesis flow for watermark experiments on the watermark-rework branch.
main
Jan 1 місяць тому
джерело
коміт
0b76c7cdc2
7 змінених файлів з 904 додано та 292 видалено
  1. +2
    -1
      cmd/fmrtx/main.go
  2. +181
    -235
      cmd/wmdecode/main.go
  3. +65
    -0
      internal/dsp/fft.go
  4. +69
    -55
      internal/offline/generator.go
  5. +163
    -0
      internal/watermark/stft_roundtrip_test.go
  6. +413
    -0
      internal/watermark/stft_watermark.go
  7. +11
    -1
      internal/watermark/watermark.go

+ 2
- 1
cmd/fmrtx/main.go Переглянути файл

@@ -186,7 +186,8 @@ func runTXMode(cfg cfgpkg.Config, configPath string, driver platform.SoapyDriver
log.Printf("license: no valid key — evaluation jingle every %d minutes", license.JingleIntervalMinutes)
}
engine.SetLicenseState(licState, licenseKey)
log.Printf("watermark: embedding key fingerprint — Level=%.3f ChipRate=%d", watermark.Level, watermark.ChipRate)
log.Printf("watermark: STFT-domain embedding — WMRate=%d FFTSize=%d Level=%.1fdB",
watermark.WMRate, watermark.FFTSize, watermark.WMLevelDB)
cfg = applyLegacyAudioFlags(cfg, audioStdin, audioRate, audioHTTP)

var streamSrc *audio.StreamSource


+ 181
- 235
cmd/wmdecode/main.go Переглянути файл

@@ -1,16 +1,23 @@
// cmd/wmdecode — fm-rds-tx spread-spectrum watermark recovery tool.
// cmd/wmdecode — STFT-domain spread-spectrum watermark decoder.
//
// Approach: downsample to chip rate (12 kHz), correlate at 1 sample/chip.
// No fractional stepping, no clock drift issues. FFT-free phase search.
// Decodes watermark from FM broadcast recordings following
// Kirovski & Malvar (IEEE TSP 2003) architecture.
//
// Usage:
//
// wmdecode <file.wav> [key ...]
package main

import (
"encoding/binary"
"fmt"
"math"
"math/cmplx"
"os"
"sort"
"time"

"github.com/jan/fm-rds-tx/internal/dsp"
"github.com/jan/fm-rds-tx/internal/watermark"
)

@@ -20,6 +27,8 @@ func main() {
os.Exit(1)
}

t0 := time.Now()

samples, recRate, err := readMonoWAV(os.Args[1])
if err != nil {
fmt.Fprintf(os.Stderr, "read WAV: %v\n", err)
@@ -29,295 +38,235 @@ func main() {
fmt.Printf("WAV: %d samples @ %.0f Hz = %.2fs, RMS %.1f dBFS\n",
len(samples), recRate, float64(len(samples))/recRate, 20*math.Log10(rms+1e-9))

chipRate := float64(watermark.ChipRate) // 12000
pnChips := watermark.PnChips // 2048

// Step 1: LPF at ChipRate/2 then downsample to ChipRate.
// At chip rate: 1 sample = 1 chip. No fractional stepping.
decimFactor := int(recRate / chipRate) // 192000/12000 = 16
// Step 1: Decimate to WMRate (12 kHz)
wmRate := float64(watermark.WMRate)
decimFactor := int(recRate / wmRate)
if decimFactor < 1 {
decimFactor = 1
}
actualChipRate := recRate / float64(decimFactor) // should be exactly chipRate

fmt.Printf("Downsample: %d:1 (%.0f Hz → %.0f Hz)\n", decimFactor, recRate, actualChipRate)
actualRate := recRate / float64(decimFactor)
fmt.Printf("Downsample: %d:1 (%.0f Hz → %.0f Hz)\n", decimFactor, recRate, actualRate)

// Anti-alias LPF (8th-order IIR at 5.5 kHz)
lpfCoeffs := designLPF8(5500, recRate)
filtered := applyIIR(samples, lpfCoeffs)

// Decimate
nDown := len(filtered) / decimFactor
down := make([]float64, nDown)
for i := 0; i < nDown; i++ {
down[i] = filtered[i*decimFactor]
}
rmsDown := rmsLevel(down)
fmt.Printf("Downsampled: %d samples @ %.0f Hz, RMS %.1f dBFS\n",
nDown, actualChipRate, 20*math.Log10(rmsDown+1e-9))

// Step 2: Phase search — slide 1-bit PN template across [0, pnChips).
// At chip rate this is a simple 2048-element dot product per offset.
// Test all 2048 phases, accumulate energy over many bits.
fmt.Printf("Phase search: %d candidates\n", pnChips)

nSearchBits := nDown / pnChips
if nSearchBits > 500 {
nSearchBits = 500
fmt.Printf("Downsampled: %d samples, %.1fs\n", nDown, float64(nDown)/wmRate)

// Step 2: Compute ALL STFT frames with cepstrum filtering
fftSize := watermark.FFTSize
hop := watermark.FFTHop
nFrames := (nDown - fftSize) / hop
if nFrames <= 0 {
fmt.Fprintln(os.Stderr, "Recording too short")
os.Exit(1)
}
bestPhase := 0
bestEnergy := 0.0
for phase := 0; phase < pnChips; phase++ {
var energy float64
for b := 0; b < nSearchBits; b++ {
start := phase + b*pnChips
if start+pnChips > nDown {
break
}
c := corrChipRate(down, start, pnChips)
energy += c * c

var window [watermark.FFTSize]float64
dsp.HannWindow(window[:])
fmt.Printf("STFT: %d frames (%d-point, hop=%d)\n", nFrames, fftSize, hop)

type stftMag [watermark.FFTSize / 2]float64
frameMags := make([]stftMag, nFrames)
for f := 0; f < nFrames; f++ {
offset := f * hop
var buf [watermark.FFTSize]complex128
for i := 0; i < fftSize; i++ {
buf[i] = complex(down[offset+i]*window[i], 0)
}
if energy > bestEnergy {
bestEnergy = energy
bestPhase = phase
dsp.FFT(buf[:])
for bin := 0; bin < fftSize/2; bin++ {
mag := cmplx.Abs(buf[bin])
if mag < 1e-12 {
mag = 1e-12
}
frameMags[f][bin] = 20 * math.Log10(mag)
}
cepstrumFilter(frameMags[f][:], 8)
}
fmt.Printf("Phase: offset=%d (%.2fms), energy=%.0f\n",
bestPhase, float64(bestPhase)/actualChipRate*1000, bestEnergy)

// Step 3: Per-bit correlation with ±4 sample sliding (handles residual drift).
// At chip rate, ±4 samples = ±0.33ms — covers ~±40 ppm over 22s frame.
nCompleteBits := (nDown - bestPhase) / pnChips
nFrames := nCompleteBits / watermark.PayloadBits
if nFrames < 1 {
nFrames = 1

// Step 3: For each key, search cycle offset + rep offset
keys := os.Args[2:]
if len(keys) == 0 {
fmt.Println("No keys supplied.")
os.Exit(1)
}
fmt.Printf("Sync: %d complete bits, %d frames\n", nCompleteBits, nFrames)

const slideWindow = 200 // ±200 chips — handles phase errors + drift

corrs := make([]float64, watermark.PayloadBits)
for i := 0; i < watermark.PayloadBits; i++ {
// For each offset, sum correlation across ALL frames first.
// Signal adds coherently (×nFrames), noise adds as √nFrames.
// Then pick the offset with maximum |sum|.
bestAbs := 0.0
bestVal := 0.0
for off := -slideWindow; off <= slideWindow; off++ {
var sum float64
for f := 0; f < nFrames; f++ {
nominal := bestPhase + (f*watermark.PayloadBits+i)*pnChips + off
if nominal < 0 || nominal+pnChips > nDown {
continue

for _, key := range keys {
fmt.Printf("\nKey: %q\n", key)
det := watermark.NewSTFTDetector(key)

totalGroups := watermark.TotalGroups
timeRep := watermark.TimeRep
framesPerWM := watermark.FramesPerWM
numBins := watermark.NumBins
binLow := watermark.BinLow
centerRep := timeRep / 2

bestMetric := -1.0
var bestCorrs [watermark.PayloadBits]float64
bestCycleOff := 0
bestRepOff := 0

nCandidates := 0
for cycleOff := 0; cycleOff < framesPerWM; cycleOff += timeRep {
for repOff := 0; repOff < timeRep; repOff++ {
var testCorrs [watermark.PayloadBits]float64

for f := 0; f < nFrames; f++ {
wmFrame := ((f - cycleOff - repOff) % framesPerWM + framesPerWM) % framesPerWM
if wmFrame%timeRep != centerRep {
continue
}
g := wmFrame / timeRep
if g >= totalGroups {
continue
}

var corr float64
for b := 0; b < numBins; b++ {
corr += frameMags[f][binLow+b] * float64(det.PNChipAt(g, b))
}
testCorrs[det.GroupBit(g)] += corr
}
sum += corrChipRate(down, nominal, pnChips)
}
if math.Abs(sum) > bestAbs {
bestAbs = math.Abs(sum)
bestVal = sum

var metric float64
for _, c := range testCorrs {
metric += c * c
}

if metric > bestMetric {
bestMetric = metric
bestCorrs = testCorrs
bestCycleOff = cycleOff
bestRepOff = repOff
}
nCandidates++
}
}
corrs[i] = bestVal
}

// Diagnostics
var corrMin, corrMax, sumAbs float64
var nStrong, nDead int
for i, c := range corrs {
ac := math.Abs(c)
sumAbs += ac
if i == 0 || ac < corrMin {
corrMin = ac
}
if ac > corrMax {
corrMax = ac
}
if ac > sumAbs/float64(i+1)*2 {
nStrong++
}
if ac < 3 {
nDead++
}
}
avgCorr := sumAbs / 128
nStrong = 0
for _, c := range corrs {
if math.Abs(c) > avgCorr*0.5 {
nStrong++
}
}
fmt.Printf("Corrs: min|c|=%.1f, max|c|=%.1f, avg|c|=%.1f (strong=%d, dead=%d)\n",
corrMin, corrMax, avgCorr, nStrong, nDead)
fmt.Printf("Searched %d candidates in %v\n", nCandidates, time.Since(t0).Round(time.Millisecond))
fmt.Printf("Best: cycleOff=%d, repOff=%d, metric=%.0f\n", bestCycleOff, bestRepOff, bestMetric)

// Step 4: Frame sync — 128 rotations × byte-level erasure + bit-flipping.
var sumAbs float64
for _, c := range bestCorrs {
sumAbs += math.Abs(c)
}
fmt.Printf("Corrs: avg|c|=%.1f\n", sumAbs/128)

// Verbose: compute BER at each rotation against the known key (if supplied)
knownPayload := [watermark.RsDataBytes]byte{}
hasKnown := false
if len(os.Args) >= 3 {
hasKnown = true
knownPayload = watermark.KeyToPayload(os.Args[2])
// BER diagnostic against known key
knownPayload := watermark.KeyToPayload(key)
knownCW := watermark.RSEncode(knownPayload)
var knownBits [watermark.PayloadBits]int
for i := 0; i < watermark.PayloadBits; i++ {
knownBits[i] = int((knownCW[i/8] >> uint(7-(i%8))) & 1)
}
fmt.Println("\nRotation sweep (top 10 by BER):")
type rotBER struct{ rot, ber int }
var results []rotBER
for rot := 0; rot < watermark.PayloadBits; rot++ {
nerr := 0
for i := 0; i < watermark.PayloadBits; i++ {
srcBit := (i + rot) % watermark.PayloadBits
hard := 0
if corrs[srcBit] < 0 {
hard = 1
}
if hard != knownBits[i] {
nerr++
}
nerr := 0
for i := 0; i < watermark.PayloadBits; i++ {
hard := 0
if bestCorrs[i] < 0 {
hard = 1
}
results = append(results, rotBER{rot, nerr})
}
sort.Slice(results, func(a, b int) bool { return results[a].ber < results[b].ber })
for j := 0; j < 10 && j < len(results); j++ {
r := results[j]
fmt.Printf(" rot=%3d: BER=%d/128 (%4.1f%%)\n", r.rot, r.ber, 100*float64(r.ber)/128)
}

// Show byte error pattern at best rotation
bestRot := results[0].rot
fmt.Printf("\nByte errors at rot=%d:\n ", bestRot)
for b := 0; b < watermark.RsTotalBytes; b++ {
nerr := 0
for bit := 0; bit < 8; bit++ {
srcBit := (b*8 + bit + bestRot) % watermark.PayloadBits
hard := 0
if corrs[srcBit] < 0 {
hard = 1
}
if hard != knownBits[b*8+bit] {
nerr++
}
if hard != knownBits[i] {
nerr++
}
fmt.Printf("B%d:%d ", b, nerr)
}
fmt.Println()
fmt.Printf("BER: %d/128 (%.1f%%)\n", nerr, 100*float64(nerr)/128)

// Show received vs expected codeword at best rotation
// Show recv vs expected
var recv [watermark.RsTotalBytes]byte
confs := make([]float64, watermark.PayloadBits)
for i := 0; i < watermark.PayloadBits; i++ {
srcBit := (i + bestRot) % watermark.PayloadBits
if corrs[srcBit] < 0 {
confs[i] = math.Abs(bestCorrs[i])
if bestCorrs[i] < 0 {
recv[i/8] |= 1 << uint(7-(i%8))
}
}
fmt.Printf(" recv: %x\n", recv)
fmt.Printf(" want: %x\n", knownCW)
}
_ = hasKnown
_ = knownPayload
type decodeResult struct {
rotation int
payload [watermark.RsDataBytes]byte
flips int
}
var best *decodeResult
fmt.Printf("recv: %x\nwant: %x\n", recv, knownCW)

for rot := 0; rot < watermark.PayloadBits; rot++ {
var recv [watermark.RsTotalBytes]byte
confs := make([]float64, watermark.PayloadBits)
for i := 0; i < watermark.PayloadBits; i++ {
srcBit := (i + rot) % watermark.PayloadBits
c := corrs[srcBit]
confs[i] = math.Abs(c)
if c < 0 {
recv[i/8] |= 1 << uint(7-(i%8))
// Confidence-based erasure (MIN bit confidence per byte)
type bc struct{ idx int; conf float64 }
byteConfs := make([]bc, watermark.RsTotalBytes)
for b := 0; b < watermark.RsTotalBytes; b++ {
minC := confs[b*8]
for bit := 1; bit < 8; bit++ {
if confs[b*8+bit] < minC {
minC = confs[b*8+bit]
}
}
byteConfs[b] = bc{b, minC}
}
sort.Slice(byteConfs, func(a, b int) bool { return byteConfs[a].conf < byteConfs[b].conf })

// Brute-force RS decode: try ALL possible erasure subsets of size 1..8.
// With sliding correlation, confidence values are unreliable for erasure
// selection (all bits look "strong"). Instead, let RS tell us which
// subsets produce a valid codeword. This is fast: sum(C(16,k), k=1..8)
// = ~39k RS decodes per rotation, ~5M total. Each takes <1µs.
decoded := false
for nErase := 1; nErase <= watermark.RsCheckBytes; nErase++ {
if decoded { break }
indices := make([]int, nErase)
for i := range indices { indices[i] = i }
for {
erasePos := make([]int, nErase)
copy(erasePos, indices)
payload, ok := watermark.RSDecode(recv, erasePos)
if ok {
if best == nil {
best = &decodeResult{rot, payload, nErase}
}
for nErase := 0; nErase <= watermark.RsCheckBytes; nErase++ {
if nErase == 0 {
p, ok := watermark.RSDecode(recv, nil)
if ok && watermark.KeyMatchesPayload(key, p) {
fmt.Printf(" ✓ MATCH (0 erasures), payload=%x\n", p)
decoded = true
break
}
// Next combination
i := nErase - 1
for i >= 0 && indices[i] == watermark.RsTotalBytes-nErase+i {
i--
}
if i < 0 { break }
indices[i]++
for j := i + 1; j < nErase; j++ {
indices[j] = indices[j-1] + 1
}
continue
}
erasePos := make([]int, nErase)
for i := 0; i < nErase; i++ {
erasePos[i] = byteConfs[i].idx
}
sort.Ints(erasePos)
p, ok := watermark.RSDecode(recv, erasePos)
if ok && watermark.KeyMatchesPayload(key, p) {
fmt.Printf(" ✓ MATCH (%d erasures), payload=%x\n", nErase, p)
decoded = true
break
}
}
if decoded && best != nil && best.flips <= 4 {
break // clean decode with few erasures — stop early
}
}

if best == nil {
fmt.Println("RS decode: FAILED — no valid frame alignment found.")
fmt.Println("Watermark may not be present, or recording is too noisy/short.")
os.Exit(1)
if !decoded {
fmt.Println(" ✗ NOT FOUND")
}
}

fmt.Printf("\nFrame sync: rotation=%d, %d byte erasures\n", best.rotation, best.flips)
fmt.Printf("Payload: %x\n\n", best.payload)
fmt.Printf("\nDone in %v\n", time.Since(t0).Round(time.Millisecond))
}

keys := os.Args[2:]
if len(keys) == 0 {
fmt.Println("No keys supplied — payload shown above.")
func cepstrumFilter(magDB []float64, nCeps int) {
n := len(magDB)
if n < nCeps*2 {
return
}
fmt.Println("Key check:")
matched := false
for _, key := range keys {
if watermark.KeyMatchesPayload(key, best.payload) {
fmt.Printf(" ✓ MATCH: %q\n", key)
matched = true
} else {
fmt.Printf(" ✗ : %q\n", key)
ceps := make([]float64, n)
for k := 0; k < n; k++ {
var sum float64
for i := 0; i < n; i++ {
sum += magDB[i] * math.Cos(math.Pi*float64(k)*(float64(i)+0.5)/float64(n))
}
ceps[k] = sum
}
if !matched {
fmt.Println("\nNo key matched.")
for k := 0; k < nCeps; k++ {
ceps[k] = 0
}
}

// corrChipRate correlates at chip rate (1 sample = 1 chip).
func corrChipRate(down []float64, start, pnChips int) float64 {
var acc float64
for i := 0; i < pnChips; i++ {
acc += down[start+i] * float64(watermark.PNSequence[i])
for i := 0; i < n; i++ {
var sum float64
for k := 0; k < n; k++ {
w := 1.0
if k == 0 {
w = 0.5
}
sum += w * ceps[k] * math.Cos(math.Pi*float64(k)*(float64(i)+0.5)/float64(n))
}
magDB[i] = sum * 2.0 / float64(n)
}
return acc
}

// --- 8th-order Butterworth LPF (4 cascaded biquads) ---
type biquad struct{ b0, b1, b2, a1, a2 float64 }
type iirCoeffs []biquad

func designLPF8(cutoffHz, sampleRate float64) iirCoeffs {
// 8th-order Butterworth = 4 biquad sections
angles := []float64{math.Pi / 16, 3 * math.Pi / 16, 5 * math.Pi / 16, 7 * math.Pi / 16}
coeffs := make(iirCoeffs, 4)
for i, angle := range angles {
@@ -328,11 +277,8 @@ func designLPF8(cutoffHz, sampleRate float64) iirCoeffs {
alpha := sinW / (2 * q)
a0 := 1 + alpha
coeffs[i] = biquad{
b0: (1 - cosW) / 2 / a0,
b1: (1 - cosW) / a0,
b2: (1 - cosW) / 2 / a0,
a1: (-2 * cosW) / a0,
a2: (1 - alpha) / a0,
b0: (1 - cosW) / 2 / a0, b1: (1 - cosW) / a0, b2: (1 - cosW) / 2 / a0,
a1: (-2 * cosW) / a0, a2: (1 - alpha) / a0,
}
}
return coeffs
@@ -396,7 +342,7 @@ func readMonoWAV(path string) ([]float64, float64, error) {
}
}
if dataStart == 0 || bitsPerSample != 16 || channels == 0 {
return nil, 0, fmt.Errorf("unsupported WAV (need 16-bit PCM, got bits=%d ch=%d)", bitsPerSample, channels)
return nil, 0, fmt.Errorf("unsupported WAV")
}
if dataStart+dataLen > len(data) {
dataLen = len(data) - dataStart


+ 65
- 0
internal/dsp/fft.go Переглянути файл

@@ -0,0 +1,65 @@
package dsp

import (
"math"
"math/cmplx"
)

// FFT computes the discrete Fourier transform of x (in-place, radix-2).
// len(x) must be a power of 2.
func FFT(x []complex128) {
n := len(x)
if n <= 1 {
return
}
// Bit-reversal permutation
j := 0
for i := 1; i < n; i++ {
bit := n >> 1
for j&bit != 0 {
j ^= bit
bit >>= 1
}
j ^= bit
if i < j {
x[i], x[j] = x[j], x[i]
}
}
// Cooley-Tukey butterfly
for size := 2; size <= n; size <<= 1 {
half := size >> 1
wn := cmplx.Exp(complex(0, -2*math.Pi/float64(size)))
for start := 0; start < n; start += size {
w := complex(1, 0)
for k := 0; k < half; k++ {
u := x[start+k]
v := x[start+k+half] * w
x[start+k] = u + v
x[start+k+half] = u - v
w *= wn
}
}
}
}

// IFFT computes the inverse DFT of x (in-place).
func IFFT(x []complex128) {
n := len(x)
// Conjugate, FFT, conjugate, scale
for i := range x {
x[i] = cmplx.Conj(x[i])
}
FFT(x)
scale := 1.0 / float64(n)
for i := range x {
x[i] = cmplx.Conj(x[i]) * complex(scale, 0)
}
}

// HannWindow fills w with a Hann window of length n.
func HannWindow(w []float64) {
n := len(w)
for i := range w {
w[i] = 0.5 * (1.0 - math.Cos(2*math.Pi*float64(i)/float64(n)))
}
}

+ 69
- 55
internal/offline/generator.go Переглянути файл

@@ -5,7 +5,6 @@ import (
"encoding/binary"
"fmt"
"log"
"math"
"path/filepath"
"sync/atomic"
"time"
@@ -133,24 +132,20 @@ type Generator struct {
licenseState *license.State
jingleFrames []license.JingleFrame

// Watermark: spread-spectrum key fingerprint, always active.
watermark *watermark.Embedder
wmShapeLPF *dsp.FilterChain // pulse-shaping: confines PN energy to 0-6kHz
// Watermark: STFT-domain spread-spectrum (Kirovski & Malvar 2003).
stftEmbedder *watermark.STFTEmbedder
wmDecimLPF *dsp.FilterChain // anti-alias LPF for 228k→12k decimation
}

func NewGenerator(cfg cfgpkg.Config) *Generator {
return &Generator{cfg: cfg}
}

// SetLicense configures license state (jingle) and creates the watermark
// SetLicense configures license state (jingle) and creates the STFT watermark
// embedder. Must be called before the first GenerateFrame.
func (g *Generator) SetLicense(state *license.State, key string) {
g.licenseState = state
g.watermark = watermark.NewEmbedder(key)
// Gate threshold: -40 dBFS ≈ 0.01 linear amplitude.
// Watermark is muted during silence to prevent audibility.
// Composite rate will be set in init(); use 228000 as default.
g.watermark.EnableGate(0.001, 228000)
g.stftEmbedder = watermark.NewSTFTEmbedder(key)
}

// SetExternalSource sets a live audio source (e.g. StreamResampler) that
@@ -285,17 +280,10 @@ func (g *Generator) init() {
}
}

// Update watermark gate ramp rate with actual composite rate (may differ
// from the 228000 default used in SetLicense).
if g.watermark != nil {
g.watermark.EnableGate(0.001, g.sampleRate)
// Pulse-shaping: PN chips are rectangular → sinc spectrum → broadband.
// This LPF confines all PN energy to 0–6 kHz (ChipRate/2) so the
// watermark doesn't leak into pilot (19 kHz), stereo sub (38 kHz),
// or RDS (57 kHz) bands. Also prevents audible wideband noise.
// 4th-order Butterworth: -24 dB/octave rolloff. At 12 kHz: -24 dB,
// at 19 kHz: -38 dB, at 38 kHz: -62 dB. Clean enough.
g.wmShapeLPF = dsp.NewLPF4(float64(watermark.ChipRate)/2, g.sampleRate)
// STFT watermark: anti-alias LPF for decimation to WMRate (12 kHz).
// Nyquist at 12 kHz = 6 kHz. Cut at 5.5 kHz with margin.
if g.stftEmbedder != nil {
g.wmDecimLPF = dsp.NewLPF4(5500, g.sampleRate)
}

g.initialized = true
@@ -338,6 +326,10 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame
g.frameSeq++
frame.Sequence = g.frameSeq

// L/R buffers for two-pass processing (STFT watermark between stages 3 and 4)
lBuf := make([]float64, samples)
rBuf := make([]float64, samples)

// Load live params once per chunk — single atomic read, zero per-sample cost
lp := g.liveParams.Load()
if lp == nil {
@@ -404,15 +396,6 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame
r := g.audioLPF_R.Process(float64(in.R))
r = g.pilotNotchR.Process(r)

// Watermark gate level measurement — done BEFORE drive/clip/cleanup.
// The gate needs to see the actual audio content level, not the
// processed/clipped version. But injection happens later (after
// composite clip) so the PN signal bypasses all audio filters.
if g.watermark != nil {
audioLevel := (math.Abs(l) + math.Abs(r)) / 2.0
g.watermark.SetAudioLevel(audioLevel)
}

// --- Stage 2: Drive + Compress + Clip₁ ---
l *= lp.OutputDrive
r *= lp.OutputDrive
@@ -428,6 +411,59 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame
l = dsp.HardClip(l, ceiling)
r = dsp.HardClip(r, ceiling)

lBuf[i] = l
rBuf[i] = r
}

// --- STFT Watermark: decimate → embed → upsample → add to L/R ---
if g.stftEmbedder != nil {
decimFactor := int(g.sampleRate) / watermark.WMRate // 228000/12000 = 19
if decimFactor < 1 {
decimFactor = 1
}
nDown := samples / decimFactor

// Anti-alias: LPF ALL composite-rate samples, THEN decimate.
// The LPF must see every sample for correct IIR state update.
mono12k := make([]float64, nDown)
lpfState := 0.0
decimCount := 0
outIdx := 0
for i := 0; i < samples && outIdx < nDown; i++ {
mono := (lBuf[i] + rBuf[i]) / 2
if g.wmDecimLPF != nil {
lpfState = g.wmDecimLPF.Process(mono)
} else {
lpfState = mono
}
decimCount++
if decimCount >= decimFactor {
decimCount = 0
mono12k[outIdx] = lpfState
outIdx++
}
}

// STFT embed at 12 kHz
embedded := g.stftEmbedder.ProcessBlock(mono12k)

// Extract watermark signal (difference) and upsample via ZOH
for i := 0; i < samples; i++ {
wmIdx := i / decimFactor
if wmIdx >= nDown {
wmIdx = nDown - 1
}
wmSig := embedded[wmIdx] - mono12k[wmIdx]
lBuf[i] += wmSig
rBuf[i] += wmSig
}
}

// --- Pass 2: Stereo encode + composite processing ---
for i := 0; i < samples; i++ {
l := lBuf[i]
r := rBuf[i]

// --- Stage 4: Stereo encode ---
limited := audio.NewFrame(audio.Sample(l), audio.Sample(r))
comps := g.stereoEncoder.Encode(limited)
@@ -453,22 +489,6 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame
}
bs412PowerAccum += audioMPX * audioMPX

// --- Watermark injection: into audio composite AFTER all processing ---
// Injected after the entire clip-filter-clip chain, notch filters, and
// BS.412 power measurement. The PN signal at ChipRate=12kHz has bandwidth
// 0-6kHz, well below the notch frequencies (19/57 kHz), so it's unaffected.
// At -48 dBFS the watermark causes <0.05 dB of over-modulation, negligible.
// Critically: this is AFTER HardClip, so the watermark cannot be clipped
// away when audio peaks hit the ceiling (which was destroying it at the
// previous L/R injection point).
if g.watermark != nil {
wm := g.watermark.NextSample()
if g.wmShapeLPF != nil {
wm = g.wmShapeLPF.Process(wm)
}
audioMPX += wm
}

// --- Stage 6: Add protected components ---
composite := audioMPX
if lp.StereoEnabled {
@@ -503,15 +523,9 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame
g.bs412.ProcessChunk(bs412PowerAccum / float64(samples))
}

// Watermark diagnostic: log state every 100 chunks (~5s) so we can verify
// the embedder is actually running and producing non-zero output.
if g.watermark != nil && g.frameSeq%100 == 1 {
wm := g.watermark.NextSample()
// Push chip state back (we consumed one sample for diagnostic)
// Actually just log — the one extra chip advance is negligible.
stats := g.watermark.DiagnosticState()
log.Printf("watermark diag: frame=%d gateGain=%.4f chipIdx=%d bitIdx=%d symbol=%d lastSample=%.6f enabled=%t",
g.frameSeq, stats.GateGain, stats.ChipIdx, stats.BitIdx, stats.Symbol, wm, stats.GateEnabled)
// STFT watermark diagnostic
if g.stftEmbedder != nil && g.frameSeq%100 == 1 {
log.Printf("watermark stft: frame=%d, active", g.frameSeq)
}

return frame


+ 163
- 0
internal/watermark/stft_roundtrip_test.go Переглянути файл

@@ -0,0 +1,163 @@
package watermark

import (
"math"
"testing"
)

func TestSTFTRoundTrip(t *testing.T) {
const key = "test-stft-key"
const duration = 150.0 // seconds — need > 136.5s for one full WM cycle

nSamples := int(duration * WMRate)
t.Logf("Generating %d samples @ %d Hz (%.1fs)", nSamples, WMRate, duration)
t.Logf("WM cycle: %d STFT frames, %.1fs", FramesPerWM, float64(SamplesPerWM)/WMRate)

// Generate test signal: broadband noise (the multiplicative watermark
// needs energy in all frequency bins to work — a pure tone only has
// energy in one bin and the watermark has no effect on silent bins)
audio := make([]float64, nSamples)
// Simple LCG pseudo-random for reproducibility
var lcg uint64 = 12345
for i := range audio {
lcg = lcg*6364136223846793005 + 1442695040888963407
audio[i] = 0.3 * (float64(int32(lcg>>33))/float64(1<<31))
}

rmsIn := rmsF64(audio)
t.Logf("Input RMS: %.1f dBFS", 20*math.Log10(rmsIn+1e-12))

// Embed watermark
embedder := NewSTFTEmbedder(key)
watermarked := embedder.ProcessBlock(audio)

rmsOut := rmsF64(watermarked)
t.Logf("Output RMS: %.1f dBFS", 20*math.Log10(rmsOut+1e-12))
t.Logf("RMS change: %.2f dB", 20*math.Log10(rmsOut/rmsIn))

// Detect watermark
detector := NewSTFTDetector(key)
corrs, offset := detector.Detect(watermarked)

t.Logf("Detection offset: %d", offset)

// Check correlations
var nPositive, nNegative int
var sumAbs float64
for _, c := range corrs {
sumAbs += math.Abs(c)
if c > 0 {
nPositive++
} else {
nNegative++
}
}
avgAbs := sumAbs / float64(payloadBits)
t.Logf("Correlations: avg|c|=%.1f, positive=%d, negative=%d", avgAbs, nPositive, nNegative)

if avgAbs < 1.0 {
t.Errorf("avg|c| too low: %.1f (expected >> 1.0)", avgAbs)
}

// Check against known payload
payload := KeyToPayload(key)
codeword := RSEncode(payload)
var expectedBits [payloadBits]int
for i := 0; i < payloadBits; i++ {
expectedBits[i] = int((codeword[i/8] >> uint(7-(i%8))) & 1)
}

nerr := 0
for i := 0; i < payloadBits; i++ {
hard := 0
if corrs[i] < 0 {
hard = 1
}
if hard != expectedBits[i] {
nerr++
}
}
t.Logf("BER: %d/%d (%.1f%%)", nerr, payloadBits, 100*float64(nerr)/float64(payloadBits))

if nerr > 20 {
t.Errorf("BER too high: %d/%d", nerr, payloadBits)
}

// Try RS decode
var recv [rsTotalBytes]byte
for i := 0; i < payloadBits; i++ {
if corrs[i] < 0 {
recv[i/8] |= 1 << uint(7-(i%8))
}
}

// Try with erasures if needed
decoded := false
for nErase := 0; nErase <= rsCheckBytes; nErase++ {
if nErase == 0 {
// Try zero erasures (valid if BER=0)
p, ok := RSDecode(recv, nil)
if ok {
if KeyMatchesPayload(key, p) {
t.Logf("Decoded with 0 erasures: MATCH ✓")
decoded = true
break
}
}
continue
}
// Erase weakest bytes by |correlation|
type bc struct{ idx int; conf float64 }
byteConfs := make([]bc, rsTotalBytes)
for b := 0; b < rsTotalBytes; b++ {
minC := math.Abs(corrs[b*8])
for bit := 1; bit < 8; bit++ {
c := math.Abs(corrs[b*8+bit])
if c < minC {
minC = c
}
}
byteConfs[b] = bc{b, minC}
}
// Sort by confidence (weakest first)
for i := 0; i < len(byteConfs); i++ {
for j := i + 1; j < len(byteConfs); j++ {
if byteConfs[j].conf < byteConfs[i].conf {
byteConfs[i], byteConfs[j] = byteConfs[j], byteConfs[i]
}
}
}
erasePos := make([]int, nErase)
for i := 0; i < nErase; i++ {
erasePos[i] = byteConfs[i].idx
}
// Sort positions
for i := 0; i < len(erasePos); i++ {
for j := i + 1; j < len(erasePos); j++ {
if erasePos[j] < erasePos[i] {
erasePos[i], erasePos[j] = erasePos[j], erasePos[i]
}
}
}
p, ok := RSDecode(recv, erasePos)
if ok {
if KeyMatchesPayload(key, p) {
t.Logf("Decoded with %d erasures: MATCH ✓", nErase)
decoded = true
break
}
}
}

if !decoded {
t.Errorf("RS decode FAILED")
}
}

func rmsF64(s []float64) float64 {
var acc float64
for _, v := range s {
acc += v * v
}
return math.Sqrt(acc / float64(len(s)))
}

+ 413
- 0
internal/watermark/stft_watermark.go Переглянути файл

@@ -0,0 +1,413 @@
// Package watermark implements STFT-domain spread-spectrum audio watermarking
// based on Kirovski & Malvar (IEEE TSP 2003).
//
// Architecture:
// - Embedding in STFT magnitude (dB scale) — multiplicative, natural masking
// - Block repetition coding (R=5 time frames) — automatic drift tolerance
// - Cepstrum filtering at detection — 6 dB carrier noise reduction
// - PCC covert channel — PN partitioned into M=128 subsets for 128-bit payload
// - Multi-test sync — scan R frame offsets to find alignment
//
// Both encoder and decoder operate at 12 kHz (WMRate). The encoder decimates
// from composite rate (÷19), processes STFT, and upsamples back. The decoder
// decimates from recording rate (÷16 from 192kHz, ÷4 from 48kHz, etc.).
// Same STFT parameters → bins align perfectly → no rate mismatch.
package watermark

import (
"crypto/sha256"
"math"
"math/cmplx"

"github.com/jan/fm-rds-tx/internal/dsp"
)

// STFT watermark constants.
const (
WMRate = 12000 // watermark processing sample rate (Hz)
FFTSize = 512 // STFT frame size (samples at WMRate)
FFTHop = 256 // 50% overlap
BinLow = 9 // ~211 Hz at WMRate/FFTSize
BinHigh = 213 // ~4992 Hz at WMRate/FFTSize
NumBins = BinHigh - BinLow // 204 frequency chips per STFT frame
TimeRep = 5 // block repetition factor (±2 frame drift tolerance)
GroupsPerBit = 10 // time groups per data bit
WMLevelDB = 1.5 // embedding level (dB)

TotalGroups = GroupsPerBit * payloadBits // 10 × 128 = 1280
FramesPerWM = TotalGroups * TimeRep // 1280 × 5 = 6400
SamplesPerWM = FramesPerWM * FFTHop // 6400 × 256 = 1638400
// Duration at WMRate: 1638400 / 12000 = 136.5 seconds
)

// STFTEmbedder processes audio blocks and adds the STFT-domain watermark.
// It works at WMRate (12 kHz). The caller must decimate input to WMRate
// and upsample output back to the desired rate.
type STFTEmbedder struct {
// PN chip matrix: pnChips[group][bin] ∈ {-1, +1}
// group ∈ [0, TotalGroups), bin ∈ [0, NumBins)
pnChips [TotalGroups][NumBins]int8

// Bit assignment: which data bit owns each group (PCC permutation)
groupToBit [TotalGroups]int

// RS-encoded codeword: 128 bits → symbol[bit] = +1 or -1
symbols [payloadBits]int8

// STFT state
window [FFTSize]float64
inBuf [FFTSize]float64 // analysis window buffer
outBuf [FFTSize + FFTHop]float64 // overlap-add output buffer
inPos int // samples written to inBuf
outPos int // samples read from outBuf
frameIdx int // STFT frame counter (wraps at FramesPerWM)
primed bool // true after first full frame

// Level in linear scale: 10^(WMLevelDB/20) - 1 ≈ 0.189 for 1.5 dB
levelLinear float64
}

// NewSTFTEmbedder creates an embedder for the given license key.
func NewSTFTEmbedder(key string) *STFTEmbedder {
e := &STFTEmbedder{}

// Compute RS-encoded payload
var data [rsDataBytes]byte
if key != "" {
h := sha256.Sum256([]byte(key))
copy(data[:], h[:rsDataBytes])
}
codeword := rsEncode(data)

// BPSK symbols: bit 0 → +1, bit 1 → -1
for i := 0; i < payloadBits; i++ {
if (codeword[i/8]>>uint(7-(i%8)))&1 == 1 {
e.symbols[i] = -1
} else {
e.symbols[i] = 1
}
}

// Generate PN chips from key-seeded PRNG
seed := sha256.Sum256(append([]byte("stft-pn-"), key...))
prng := newPRNG(seed[:])
for g := 0; g < TotalGroups; g++ {
for b := 0; b < NumBins; b++ {
if prng.next()&1 == 0 {
e.pnChips[g][b] = 1
} else {
e.pnChips[g][b] = -1
}
}
}

// PCC permutation: assign groups to bits (interleaved + permuted)
// Simple interleaving first, then Fisher-Yates shuffle
for g := 0; g < TotalGroups; g++ {
e.groupToBit[g] = g % payloadBits
}
// Permute within each bit's groups using key-seeded PRNG
permSeed := sha256.Sum256(append([]byte("stft-perm-"), key...))
permRNG := newPRNG(permSeed[:])
for i := TotalGroups - 1; i > 0; i-- {
j := permRNG.next() % uint32(i+1)
e.groupToBit[i], e.groupToBit[j] = e.groupToBit[j], e.groupToBit[i]
}

// Hann window
dsp.HannWindow(e.window[:])

// Embedding level
e.levelLinear = math.Pow(10, WMLevelDB/20) - 1 // fractional magnitude change

return e
}

// ProcessBlock takes mono audio at WMRate and returns watermarked audio.
// The input and output lengths are the same. Internally buffers for STFT
// overlap-add processing. Call with chunks of any size.
func (e *STFTEmbedder) ProcessBlock(in []float64) []float64 {
out := make([]float64, len(in))
for i, s := range in {
// Feed sample into STFT input buffer
e.inBuf[e.inPos] = s
e.inPos++

if e.inPos == FFTSize {
// Full frame: process STFT
e.processFrame()
e.inPos = FFTHop // shift: keep last hop samples for next frame overlap
copy(e.inBuf[:FFTHop], e.inBuf[FFTHop:FFTSize])
}

// Read from overlap-add output buffer
if e.primed {
out[i] = e.outBuf[e.outPos]
e.outPos++
if e.outPos >= FFTHop {
e.outPos = 0
// Shift output buffer: move overlap region to start
copy(e.outBuf[:FFTSize], e.outBuf[FFTHop:FFTSize+FFTHop])
// Zero the new region
for j := FFTSize - FFTHop; j < FFTSize+FFTHop; j++ {
if j < len(e.outBuf) {
e.outBuf[j] = 0
}
}
}
} else {
out[i] = s // pass-through until first frame is processed
}
}
return out
}

// processFrame computes one STFT frame: window → FFT → modify magnitudes → IFFT → overlap-add.
func (e *STFTEmbedder) processFrame() {
// Determine which group this frame belongs to
wmFrame := e.frameIdx % FramesPerWM
groupIdx := wmFrame / TimeRep
repIdx := wmFrame % TimeRep
centerRep := TimeRep / 2 // only center repetition carries the watermark for detection

// Apply window and convert to complex
var buf [FFTSize]complex128
for i := 0; i < FFTSize; i++ {
buf[i] = complex(e.inBuf[i]*e.window[i], 0)
}

// Forward FFT
dsp.FFT(buf[:])

// Modify magnitudes in the watermark sub-band
// Only modify if this is within a valid group AND at the center repetition
// (we embed in ALL repetitions so the watermark energy is present everywhere,
// but the PN pattern is the same for all R frames in a group)
if groupIdx < TotalGroups {
bitIdx := e.groupToBit[groupIdx]
dataSign := float64(e.symbols[bitIdx])
_ = repIdx
_ = centerRep

for b := 0; b < NumBins; b++ {
bin := BinLow + b
chip := float64(e.pnChips[groupIdx][b])

// Modify magnitude: |Y| = |X| × (1 + level × chip × data)
// Phase preserved
mag := cmplx.Abs(buf[bin])
if mag < 1e-10 {
continue // skip near-silence bins to avoid division by zero
}
phase := cmplx.Phase(buf[bin])
newMag := mag * (1.0 + e.levelLinear*chip*dataSign)
buf[bin] = cmplx.Rect(newMag, phase)

// Mirror for negative frequencies (conjugate symmetry)
if bin > 0 && bin < FFTSize/2 {
buf[FFTSize-bin] = cmplx.Conj(buf[bin])
}
}
}

// Inverse FFT
dsp.IFFT(buf[:])

// Overlap-add to output buffer
for i := 0; i < FFTSize; i++ {
e.outBuf[i] += real(buf[i])
}

if !e.primed {
e.primed = true
e.outPos = 0
}

e.frameIdx++
}

// STFTDetector extracts watermark bits from an audio recording.
type STFTDetector struct {
pnChips [TotalGroups][NumBins]int8
groupToBit [TotalGroups]int
}

// NewSTFTDetector creates a detector matching the given key's PN sequence.
func NewSTFTDetector(key string) *STFTDetector {
d := &STFTDetector{}

// Same PN generation as embedder
seed := sha256.Sum256(append([]byte("stft-pn-"), key...))
prng := newPRNG(seed[:])
for g := 0; g < TotalGroups; g++ {
for b := 0; b < NumBins; b++ {
if prng.next()&1 == 0 {
d.pnChips[g][b] = 1
} else {
d.pnChips[g][b] = -1
}
}
}

// Same permutation
for g := 0; g < TotalGroups; g++ {
d.groupToBit[g] = g % payloadBits
}
permSeed := sha256.Sum256(append([]byte("stft-perm-"), key...))
permRNG := newPRNG(permSeed[:])
for i := TotalGroups - 1; i > 0; i-- {
j := permRNG.next() % uint32(i+1)
d.groupToBit[i], d.groupToBit[j] = d.groupToBit[j], d.groupToBit[i]
}

return d
}

// Detect processes audio at WMRate and returns soft bit decisions.
// The audio should already be decimated/resampled to WMRate and LPF'd.
//
// Multi-test: tries TimeRep frame offsets (the block repetition candidates).
// Cepstrum filtering is applied to reduce carrier noise.
//
// Returns: 128 soft correlation values (sign = bit decision, magnitude = confidence),
// and the frame offset that gave the best detection metric.
func (d *STFTDetector) Detect(audio []float64) (corrs [payloadBits]float64, bestOffset int) {
// Compute all STFT frames
var window [FFTSize]float64
dsp.HannWindow(window[:])

nFrames := (len(audio) - FFTSize) / FFTHop
if nFrames < FramesPerWM {
// Not enough data for a full watermark cycle — use what we have
}

// Compute STFT magnitudes (dB) for all frames
type stftFrame struct {
magDB [FFTSize / 2]float64
}
frames := make([]stftFrame, nFrames)

for f := 0; f < nFrames; f++ {
offset := f * FFTHop
var buf [FFTSize]complex128
for i := 0; i < FFTSize; i++ {
if offset+i < len(audio) {
buf[i] = complex(audio[offset+i]*window[i], 0)
}
}
dsp.FFT(buf[:])

for bin := 0; bin < FFTSize/2; bin++ {
mag := cmplx.Abs(buf[bin])
if mag < 1e-12 {
mag = 1e-12
}
frames[f].magDB[bin] = 20 * math.Log10(mag)
}

// Cepstrum filtering: remove spectral envelope
// DCT of dB magnitudes, zero first N_ceps coefficients, IDCT
cepstrumFilter(frames[f].magDB[:], 8)
}

// Multi-test: try each of TimeRep frame offsets within the repetition block
bestMetric := -1.0
bestOffset = 0

for startOffset := 0; startOffset < TimeRep; startOffset++ {
var testCorrs [payloadBits]float64

// For each group, use the CENTER frame of the repetition block
for g := 0; g < TotalGroups; g++ {
bitIdx := d.groupToBit[g]
frameInWM := g*TimeRep + startOffset + TimeRep/2
if frameInWM >= nFrames {
continue
}

// Correlate this frame's magnitudes with the PN chips
var corr float64
for b := 0; b < NumBins; b++ {
bin := BinLow + b
corr += frames[frameInWM].magDB[bin] * float64(d.pnChips[g][b])
}
testCorrs[bitIdx] += corr
}

// Detection metric: sum of squared partial correlations (chi-squared)
// From paper equation (10): Q = Σ (corr_m)²
var metric float64
for _, c := range testCorrs {
metric += c * c
}

if metric > bestMetric {
bestMetric = metric
bestOffset = startOffset
corrs = testCorrs
}
}

return corrs, bestOffset
}

// cepstrumFilter removes the spectral envelope from dB magnitudes.
// It zeros the first nCeps DCT coefficients (the smooth spectral shape).
// This is Kirovski's "CF" technique: reduces carrier noise by ~6 dB.
func cepstrumFilter(magDB []float64, nCeps int) {
n := len(magDB)
if n < nCeps*2 {
return
}

// DCT-II (simplified, not optimized)
ceps := make([]float64, n)
for k := 0; k < n; k++ {
var sum float64
for i := 0; i < n; i++ {
sum += magDB[i] * math.Cos(math.Pi*float64(k)*(float64(i)+0.5)/float64(n))
}
ceps[k] = sum
}

// Zero low-order cepstral coefficients (spectral envelope)
for k := 0; k < nCeps; k++ {
ceps[k] = 0
}

// IDCT (inverse DCT-II)
for i := 0; i < n; i++ {
var sum float64
for k := 0; k < n; k++ {
w := 1.0
if k == 0 {
w = 0.5
}
sum += w * ceps[k] * math.Cos(math.Pi*float64(k)*(float64(i)+0.5)/float64(n))
}
magDB[i] = sum * 2.0 / float64(n)
}
}

// Simple xorshift32 PRNG for deterministic chip generation.
type simplePRNG struct {
state uint32
}

func newPRNG(seed []byte) *simplePRNG {
var s uint32
for i, b := range seed {
s ^= uint32(b) << (uint(i%4) * 8)
}
if s == 0 {
s = 1
}
return &simplePRNG{state: s}
}

func (p *simplePRNG) next() uint32 {
p.state ^= p.state << 13
p.state ^= p.state >> 17
p.state ^= p.state << 5
return p.state
}

+ 11
- 1
internal/watermark/watermark.go Переглянути файл

@@ -395,7 +395,17 @@ func RSEncode(data [rsDataBytes]byte) [rsTotalBytes]byte {
return rsEncode(data)
}

// Constants exported for the recovery tool.
// PNChipAt returns the PN chip value at group g, bin b.
func (d *STFTDetector) PNChipAt(g, b int) int8 {
return d.pnChips[g][b]
}

// GroupBit returns the data bit index for group g.
func (d *STFTDetector) GroupBit(g int) int {
return d.groupToBit[g]
}

// Constants exported for the recovery tool and legacy tools.
const (
PnChips = pnChips
PayloadBits = payloadBits


Завантаження…
Відмінити
Зберегти