diff --git a/cmd/fmrtx/main.go b/cmd/fmrtx/main.go index 35ef2aa..048c10b 100644 --- a/cmd/fmrtx/main.go +++ b/cmd/fmrtx/main.go @@ -186,7 +186,8 @@ func runTXMode(cfg cfgpkg.Config, configPath string, driver platform.SoapyDriver log.Printf("license: no valid key — evaluation jingle every %d minutes", license.JingleIntervalMinutes) } engine.SetLicenseState(licState, licenseKey) - log.Printf("watermark: embedding key fingerprint — Level=%.3f ChipRate=%d", watermark.Level, watermark.ChipRate) + log.Printf("watermark: STFT-domain embedding — WMRate=%d FFTSize=%d Level=%.1fdB", + watermark.WMRate, watermark.FFTSize, watermark.WMLevelDB) cfg = applyLegacyAudioFlags(cfg, audioStdin, audioRate, audioHTTP) var streamSrc *audio.StreamSource diff --git a/cmd/wmdecode/main.go b/cmd/wmdecode/main.go index 263f68a..0d7cce2 100644 --- a/cmd/wmdecode/main.go +++ b/cmd/wmdecode/main.go @@ -1,16 +1,23 @@ -// cmd/wmdecode — fm-rds-tx spread-spectrum watermark recovery tool. +// cmd/wmdecode — STFT-domain spread-spectrum watermark decoder. // -// Approach: downsample to chip rate (12 kHz), correlate at 1 sample/chip. -// No fractional stepping, no clock drift issues. FFT-free phase search. +// Decodes watermark from FM broadcast recordings following +// Kirovski & Malvar (IEEE TSP 2003) architecture. +// +// Usage: +// +// wmdecode [key ...] package main import ( "encoding/binary" "fmt" "math" + "math/cmplx" "os" "sort" + "time" + "github.com/jan/fm-rds-tx/internal/dsp" "github.com/jan/fm-rds-tx/internal/watermark" ) @@ -20,6 +27,8 @@ func main() { os.Exit(1) } + t0 := time.Now() + samples, recRate, err := readMonoWAV(os.Args[1]) if err != nil { fmt.Fprintf(os.Stderr, "read WAV: %v\n", err) @@ -29,295 +38,235 @@ func main() { fmt.Printf("WAV: %d samples @ %.0f Hz = %.2fs, RMS %.1f dBFS\n", len(samples), recRate, float64(len(samples))/recRate, 20*math.Log10(rms+1e-9)) - chipRate := float64(watermark.ChipRate) // 12000 - pnChips := watermark.PnChips // 2048 - - // Step 1: LPF at ChipRate/2 then downsample to ChipRate. - // At chip rate: 1 sample = 1 chip. No fractional stepping. - decimFactor := int(recRate / chipRate) // 192000/12000 = 16 + // Step 1: Decimate to WMRate (12 kHz) + wmRate := float64(watermark.WMRate) + decimFactor := int(recRate / wmRate) if decimFactor < 1 { decimFactor = 1 } - actualChipRate := recRate / float64(decimFactor) // should be exactly chipRate - - fmt.Printf("Downsample: %d:1 (%.0f Hz → %.0f Hz)\n", decimFactor, recRate, actualChipRate) + actualRate := recRate / float64(decimFactor) + fmt.Printf("Downsample: %d:1 (%.0f Hz → %.0f Hz)\n", decimFactor, recRate, actualRate) - // Anti-alias LPF (8th-order IIR at 5.5 kHz) lpfCoeffs := designLPF8(5500, recRate) filtered := applyIIR(samples, lpfCoeffs) - // Decimate nDown := len(filtered) / decimFactor down := make([]float64, nDown) for i := 0; i < nDown; i++ { down[i] = filtered[i*decimFactor] } - rmsDown := rmsLevel(down) - fmt.Printf("Downsampled: %d samples @ %.0f Hz, RMS %.1f dBFS\n", - nDown, actualChipRate, 20*math.Log10(rmsDown+1e-9)) - - // Step 2: Phase search — slide 1-bit PN template across [0, pnChips). - // At chip rate this is a simple 2048-element dot product per offset. - // Test all 2048 phases, accumulate energy over many bits. - fmt.Printf("Phase search: %d candidates\n", pnChips) - - nSearchBits := nDown / pnChips - if nSearchBits > 500 { - nSearchBits = 500 + fmt.Printf("Downsampled: %d samples, %.1fs\n", nDown, float64(nDown)/wmRate) + + // Step 2: Compute ALL STFT frames with cepstrum filtering + fftSize := watermark.FFTSize + hop := watermark.FFTHop + nFrames := (nDown - fftSize) / hop + if nFrames <= 0 { + fmt.Fprintln(os.Stderr, "Recording too short") + os.Exit(1) } - bestPhase := 0 - bestEnergy := 0.0 - for phase := 0; phase < pnChips; phase++ { - var energy float64 - for b := 0; b < nSearchBits; b++ { - start := phase + b*pnChips - if start+pnChips > nDown { - break - } - c := corrChipRate(down, start, pnChips) - energy += c * c + + var window [watermark.FFTSize]float64 + dsp.HannWindow(window[:]) + fmt.Printf("STFT: %d frames (%d-point, hop=%d)\n", nFrames, fftSize, hop) + + type stftMag [watermark.FFTSize / 2]float64 + frameMags := make([]stftMag, nFrames) + for f := 0; f < nFrames; f++ { + offset := f * hop + var buf [watermark.FFTSize]complex128 + for i := 0; i < fftSize; i++ { + buf[i] = complex(down[offset+i]*window[i], 0) } - if energy > bestEnergy { - bestEnergy = energy - bestPhase = phase + dsp.FFT(buf[:]) + for bin := 0; bin < fftSize/2; bin++ { + mag := cmplx.Abs(buf[bin]) + if mag < 1e-12 { + mag = 1e-12 + } + frameMags[f][bin] = 20 * math.Log10(mag) } + cepstrumFilter(frameMags[f][:], 8) } - fmt.Printf("Phase: offset=%d (%.2fms), energy=%.0f\n", - bestPhase, float64(bestPhase)/actualChipRate*1000, bestEnergy) - - // Step 3: Per-bit correlation with ±4 sample sliding (handles residual drift). - // At chip rate, ±4 samples = ±0.33ms — covers ~±40 ppm over 22s frame. - nCompleteBits := (nDown - bestPhase) / pnChips - nFrames := nCompleteBits / watermark.PayloadBits - if nFrames < 1 { - nFrames = 1 + + // Step 3: For each key, search cycle offset + rep offset + keys := os.Args[2:] + if len(keys) == 0 { + fmt.Println("No keys supplied.") + os.Exit(1) } - fmt.Printf("Sync: %d complete bits, %d frames\n", nCompleteBits, nFrames) - - const slideWindow = 200 // ±200 chips — handles phase errors + drift - - corrs := make([]float64, watermark.PayloadBits) - for i := 0; i < watermark.PayloadBits; i++ { - // For each offset, sum correlation across ALL frames first. - // Signal adds coherently (×nFrames), noise adds as √nFrames. - // Then pick the offset with maximum |sum|. - bestAbs := 0.0 - bestVal := 0.0 - for off := -slideWindow; off <= slideWindow; off++ { - var sum float64 - for f := 0; f < nFrames; f++ { - nominal := bestPhase + (f*watermark.PayloadBits+i)*pnChips + off - if nominal < 0 || nominal+pnChips > nDown { - continue + + for _, key := range keys { + fmt.Printf("\nKey: %q\n", key) + det := watermark.NewSTFTDetector(key) + + totalGroups := watermark.TotalGroups + timeRep := watermark.TimeRep + framesPerWM := watermark.FramesPerWM + numBins := watermark.NumBins + binLow := watermark.BinLow + centerRep := timeRep / 2 + + bestMetric := -1.0 + var bestCorrs [watermark.PayloadBits]float64 + bestCycleOff := 0 + bestRepOff := 0 + + nCandidates := 0 + for cycleOff := 0; cycleOff < framesPerWM; cycleOff += timeRep { + for repOff := 0; repOff < timeRep; repOff++ { + var testCorrs [watermark.PayloadBits]float64 + + for f := 0; f < nFrames; f++ { + wmFrame := ((f - cycleOff - repOff) % framesPerWM + framesPerWM) % framesPerWM + if wmFrame%timeRep != centerRep { + continue + } + g := wmFrame / timeRep + if g >= totalGroups { + continue + } + + var corr float64 + for b := 0; b < numBins; b++ { + corr += frameMags[f][binLow+b] * float64(det.PNChipAt(g, b)) + } + testCorrs[det.GroupBit(g)] += corr } - sum += corrChipRate(down, nominal, pnChips) - } - if math.Abs(sum) > bestAbs { - bestAbs = math.Abs(sum) - bestVal = sum + + var metric float64 + for _, c := range testCorrs { + metric += c * c + } + + if metric > bestMetric { + bestMetric = metric + bestCorrs = testCorrs + bestCycleOff = cycleOff + bestRepOff = repOff + } + nCandidates++ } } - corrs[i] = bestVal - } - // Diagnostics - var corrMin, corrMax, sumAbs float64 - var nStrong, nDead int - for i, c := range corrs { - ac := math.Abs(c) - sumAbs += ac - if i == 0 || ac < corrMin { - corrMin = ac - } - if ac > corrMax { - corrMax = ac - } - if ac > sumAbs/float64(i+1)*2 { - nStrong++ - } - if ac < 3 { - nDead++ - } - } - avgCorr := sumAbs / 128 - nStrong = 0 - for _, c := range corrs { - if math.Abs(c) > avgCorr*0.5 { - nStrong++ - } - } - fmt.Printf("Corrs: min|c|=%.1f, max|c|=%.1f, avg|c|=%.1f (strong=%d, dead=%d)\n", - corrMin, corrMax, avgCorr, nStrong, nDead) + fmt.Printf("Searched %d candidates in %v\n", nCandidates, time.Since(t0).Round(time.Millisecond)) + fmt.Printf("Best: cycleOff=%d, repOff=%d, metric=%.0f\n", bestCycleOff, bestRepOff, bestMetric) - // Step 4: Frame sync — 128 rotations × byte-level erasure + bit-flipping. + var sumAbs float64 + for _, c := range bestCorrs { + sumAbs += math.Abs(c) + } + fmt.Printf("Corrs: avg|c|=%.1f\n", sumAbs/128) - // Verbose: compute BER at each rotation against the known key (if supplied) - knownPayload := [watermark.RsDataBytes]byte{} - hasKnown := false - if len(os.Args) >= 3 { - hasKnown = true - knownPayload = watermark.KeyToPayload(os.Args[2]) + // BER diagnostic against known key + knownPayload := watermark.KeyToPayload(key) knownCW := watermark.RSEncode(knownPayload) var knownBits [watermark.PayloadBits]int for i := 0; i < watermark.PayloadBits; i++ { knownBits[i] = int((knownCW[i/8] >> uint(7-(i%8))) & 1) } - fmt.Println("\nRotation sweep (top 10 by BER):") - type rotBER struct{ rot, ber int } - var results []rotBER - for rot := 0; rot < watermark.PayloadBits; rot++ { - nerr := 0 - for i := 0; i < watermark.PayloadBits; i++ { - srcBit := (i + rot) % watermark.PayloadBits - hard := 0 - if corrs[srcBit] < 0 { - hard = 1 - } - if hard != knownBits[i] { - nerr++ - } + nerr := 0 + for i := 0; i < watermark.PayloadBits; i++ { + hard := 0 + if bestCorrs[i] < 0 { + hard = 1 } - results = append(results, rotBER{rot, nerr}) - } - sort.Slice(results, func(a, b int) bool { return results[a].ber < results[b].ber }) - for j := 0; j < 10 && j < len(results); j++ { - r := results[j] - fmt.Printf(" rot=%3d: BER=%d/128 (%4.1f%%)\n", r.rot, r.ber, 100*float64(r.ber)/128) - } - - // Show byte error pattern at best rotation - bestRot := results[0].rot - fmt.Printf("\nByte errors at rot=%d:\n ", bestRot) - for b := 0; b < watermark.RsTotalBytes; b++ { - nerr := 0 - for bit := 0; bit < 8; bit++ { - srcBit := (b*8 + bit + bestRot) % watermark.PayloadBits - hard := 0 - if corrs[srcBit] < 0 { - hard = 1 - } - if hard != knownBits[b*8+bit] { - nerr++ - } + if hard != knownBits[i] { + nerr++ } - fmt.Printf("B%d:%d ", b, nerr) } - fmt.Println() + fmt.Printf("BER: %d/128 (%.1f%%)\n", nerr, 100*float64(nerr)/128) - // Show received vs expected codeword at best rotation + // Show recv vs expected var recv [watermark.RsTotalBytes]byte + confs := make([]float64, watermark.PayloadBits) for i := 0; i < watermark.PayloadBits; i++ { - srcBit := (i + bestRot) % watermark.PayloadBits - if corrs[srcBit] < 0 { + confs[i] = math.Abs(bestCorrs[i]) + if bestCorrs[i] < 0 { recv[i/8] |= 1 << uint(7-(i%8)) } } - fmt.Printf(" recv: %x\n", recv) - fmt.Printf(" want: %x\n", knownCW) - } - _ = hasKnown - _ = knownPayload - type decodeResult struct { - rotation int - payload [watermark.RsDataBytes]byte - flips int - } - var best *decodeResult + fmt.Printf("recv: %x\nwant: %x\n", recv, knownCW) - for rot := 0; rot < watermark.PayloadBits; rot++ { - var recv [watermark.RsTotalBytes]byte - confs := make([]float64, watermark.PayloadBits) - for i := 0; i < watermark.PayloadBits; i++ { - srcBit := (i + rot) % watermark.PayloadBits - c := corrs[srcBit] - confs[i] = math.Abs(c) - if c < 0 { - recv[i/8] |= 1 << uint(7-(i%8)) + // Confidence-based erasure (MIN bit confidence per byte) + type bc struct{ idx int; conf float64 } + byteConfs := make([]bc, watermark.RsTotalBytes) + for b := 0; b < watermark.RsTotalBytes; b++ { + minC := confs[b*8] + for bit := 1; bit < 8; bit++ { + if confs[b*8+bit] < minC { + minC = confs[b*8+bit] + } } + byteConfs[b] = bc{b, minC} } + sort.Slice(byteConfs, func(a, b int) bool { return byteConfs[a].conf < byteConfs[b].conf }) - // Brute-force RS decode: try ALL possible erasure subsets of size 1..8. - // With sliding correlation, confidence values are unreliable for erasure - // selection (all bits look "strong"). Instead, let RS tell us which - // subsets produce a valid codeword. This is fast: sum(C(16,k), k=1..8) - // = ~39k RS decodes per rotation, ~5M total. Each takes <1µs. decoded := false - for nErase := 1; nErase <= watermark.RsCheckBytes; nErase++ { - if decoded { break } - indices := make([]int, nErase) - for i := range indices { indices[i] = i } - for { - erasePos := make([]int, nErase) - copy(erasePos, indices) - payload, ok := watermark.RSDecode(recv, erasePos) - if ok { - if best == nil { - best = &decodeResult{rot, payload, nErase} - } + for nErase := 0; nErase <= watermark.RsCheckBytes; nErase++ { + if nErase == 0 { + p, ok := watermark.RSDecode(recv, nil) + if ok && watermark.KeyMatchesPayload(key, p) { + fmt.Printf(" ✓ MATCH (0 erasures), payload=%x\n", p) decoded = true break } - // Next combination - i := nErase - 1 - for i >= 0 && indices[i] == watermark.RsTotalBytes-nErase+i { - i-- - } - if i < 0 { break } - indices[i]++ - for j := i + 1; j < nErase; j++ { - indices[j] = indices[j-1] + 1 - } + continue + } + erasePos := make([]int, nErase) + for i := 0; i < nErase; i++ { + erasePos[i] = byteConfs[i].idx + } + sort.Ints(erasePos) + p, ok := watermark.RSDecode(recv, erasePos) + if ok && watermark.KeyMatchesPayload(key, p) { + fmt.Printf(" ✓ MATCH (%d erasures), payload=%x\n", nErase, p) + decoded = true + break } } - if decoded && best != nil && best.flips <= 4 { - break // clean decode with few erasures — stop early - } - } - if best == nil { - fmt.Println("RS decode: FAILED — no valid frame alignment found.") - fmt.Println("Watermark may not be present, or recording is too noisy/short.") - os.Exit(1) + if !decoded { + fmt.Println(" ✗ NOT FOUND") + } } - fmt.Printf("\nFrame sync: rotation=%d, %d byte erasures\n", best.rotation, best.flips) - fmt.Printf("Payload: %x\n\n", best.payload) + fmt.Printf("\nDone in %v\n", time.Since(t0).Round(time.Millisecond)) +} - keys := os.Args[2:] - if len(keys) == 0 { - fmt.Println("No keys supplied — payload shown above.") +func cepstrumFilter(magDB []float64, nCeps int) { + n := len(magDB) + if n < nCeps*2 { return } - fmt.Println("Key check:") - matched := false - for _, key := range keys { - if watermark.KeyMatchesPayload(key, best.payload) { - fmt.Printf(" ✓ MATCH: %q\n", key) - matched = true - } else { - fmt.Printf(" ✗ : %q\n", key) + ceps := make([]float64, n) + for k := 0; k < n; k++ { + var sum float64 + for i := 0; i < n; i++ { + sum += magDB[i] * math.Cos(math.Pi*float64(k)*(float64(i)+0.5)/float64(n)) } + ceps[k] = sum } - if !matched { - fmt.Println("\nNo key matched.") + for k := 0; k < nCeps; k++ { + ceps[k] = 0 } -} - -// corrChipRate correlates at chip rate (1 sample = 1 chip). -func corrChipRate(down []float64, start, pnChips int) float64 { - var acc float64 - for i := 0; i < pnChips; i++ { - acc += down[start+i] * float64(watermark.PNSequence[i]) + for i := 0; i < n; i++ { + var sum float64 + for k := 0; k < n; k++ { + w := 1.0 + if k == 0 { + w = 0.5 + } + sum += w * ceps[k] * math.Cos(math.Pi*float64(k)*(float64(i)+0.5)/float64(n)) + } + magDB[i] = sum * 2.0 / float64(n) } - return acc } -// --- 8th-order Butterworth LPF (4 cascaded biquads) --- type biquad struct{ b0, b1, b2, a1, a2 float64 } type iirCoeffs []biquad func designLPF8(cutoffHz, sampleRate float64) iirCoeffs { - // 8th-order Butterworth = 4 biquad sections angles := []float64{math.Pi / 16, 3 * math.Pi / 16, 5 * math.Pi / 16, 7 * math.Pi / 16} coeffs := make(iirCoeffs, 4) for i, angle := range angles { @@ -328,11 +277,8 @@ func designLPF8(cutoffHz, sampleRate float64) iirCoeffs { alpha := sinW / (2 * q) a0 := 1 + alpha coeffs[i] = biquad{ - b0: (1 - cosW) / 2 / a0, - b1: (1 - cosW) / a0, - b2: (1 - cosW) / 2 / a0, - a1: (-2 * cosW) / a0, - a2: (1 - alpha) / a0, + b0: (1 - cosW) / 2 / a0, b1: (1 - cosW) / a0, b2: (1 - cosW) / 2 / a0, + a1: (-2 * cosW) / a0, a2: (1 - alpha) / a0, } } return coeffs @@ -396,7 +342,7 @@ func readMonoWAV(path string) ([]float64, float64, error) { } } if dataStart == 0 || bitsPerSample != 16 || channels == 0 { - return nil, 0, fmt.Errorf("unsupported WAV (need 16-bit PCM, got bits=%d ch=%d)", bitsPerSample, channels) + return nil, 0, fmt.Errorf("unsupported WAV") } if dataStart+dataLen > len(data) { dataLen = len(data) - dataStart diff --git a/internal/dsp/fft.go b/internal/dsp/fft.go new file mode 100644 index 0000000..97e0ef8 --- /dev/null +++ b/internal/dsp/fft.go @@ -0,0 +1,65 @@ +package dsp + +import ( + "math" + "math/cmplx" +) + +// FFT computes the discrete Fourier transform of x (in-place, radix-2). +// len(x) must be a power of 2. +func FFT(x []complex128) { + n := len(x) + if n <= 1 { + return + } + // Bit-reversal permutation + j := 0 + for i := 1; i < n; i++ { + bit := n >> 1 + for j&bit != 0 { + j ^= bit + bit >>= 1 + } + j ^= bit + if i < j { + x[i], x[j] = x[j], x[i] + } + } + // Cooley-Tukey butterfly + for size := 2; size <= n; size <<= 1 { + half := size >> 1 + wn := cmplx.Exp(complex(0, -2*math.Pi/float64(size))) + for start := 0; start < n; start += size { + w := complex(1, 0) + for k := 0; k < half; k++ { + u := x[start+k] + v := x[start+k+half] * w + x[start+k] = u + v + x[start+k+half] = u - v + w *= wn + } + } + } +} + +// IFFT computes the inverse DFT of x (in-place). +func IFFT(x []complex128) { + n := len(x) + // Conjugate, FFT, conjugate, scale + for i := range x { + x[i] = cmplx.Conj(x[i]) + } + FFT(x) + scale := 1.0 / float64(n) + for i := range x { + x[i] = cmplx.Conj(x[i]) * complex(scale, 0) + } +} + +// HannWindow fills w with a Hann window of length n. +func HannWindow(w []float64) { + n := len(w) + for i := range w { + w[i] = 0.5 * (1.0 - math.Cos(2*math.Pi*float64(i)/float64(n))) + } +} diff --git a/internal/offline/generator.go b/internal/offline/generator.go index f2a8a13..ccce1d0 100644 --- a/internal/offline/generator.go +++ b/internal/offline/generator.go @@ -5,7 +5,6 @@ import ( "encoding/binary" "fmt" "log" - "math" "path/filepath" "sync/atomic" "time" @@ -133,24 +132,20 @@ type Generator struct { licenseState *license.State jingleFrames []license.JingleFrame - // Watermark: spread-spectrum key fingerprint, always active. - watermark *watermark.Embedder - wmShapeLPF *dsp.FilterChain // pulse-shaping: confines PN energy to 0-6kHz + // Watermark: STFT-domain spread-spectrum (Kirovski & Malvar 2003). + stftEmbedder *watermark.STFTEmbedder + wmDecimLPF *dsp.FilterChain // anti-alias LPF for 228k→12k decimation } func NewGenerator(cfg cfgpkg.Config) *Generator { return &Generator{cfg: cfg} } -// SetLicense configures license state (jingle) and creates the watermark +// SetLicense configures license state (jingle) and creates the STFT watermark // embedder. Must be called before the first GenerateFrame. func (g *Generator) SetLicense(state *license.State, key string) { g.licenseState = state - g.watermark = watermark.NewEmbedder(key) - // Gate threshold: -40 dBFS ≈ 0.01 linear amplitude. - // Watermark is muted during silence to prevent audibility. - // Composite rate will be set in init(); use 228000 as default. - g.watermark.EnableGate(0.001, 228000) + g.stftEmbedder = watermark.NewSTFTEmbedder(key) } // SetExternalSource sets a live audio source (e.g. StreamResampler) that @@ -285,17 +280,10 @@ func (g *Generator) init() { } } - // Update watermark gate ramp rate with actual composite rate (may differ - // from the 228000 default used in SetLicense). - if g.watermark != nil { - g.watermark.EnableGate(0.001, g.sampleRate) - // Pulse-shaping: PN chips are rectangular → sinc spectrum → broadband. - // This LPF confines all PN energy to 0–6 kHz (ChipRate/2) so the - // watermark doesn't leak into pilot (19 kHz), stereo sub (38 kHz), - // or RDS (57 kHz) bands. Also prevents audible wideband noise. - // 4th-order Butterworth: -24 dB/octave rolloff. At 12 kHz: -24 dB, - // at 19 kHz: -38 dB, at 38 kHz: -62 dB. Clean enough. - g.wmShapeLPF = dsp.NewLPF4(float64(watermark.ChipRate)/2, g.sampleRate) + // STFT watermark: anti-alias LPF for decimation to WMRate (12 kHz). + // Nyquist at 12 kHz = 6 kHz. Cut at 5.5 kHz with margin. + if g.stftEmbedder != nil { + g.wmDecimLPF = dsp.NewLPF4(5500, g.sampleRate) } g.initialized = true @@ -338,6 +326,10 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame g.frameSeq++ frame.Sequence = g.frameSeq + // L/R buffers for two-pass processing (STFT watermark between stages 3 and 4) + lBuf := make([]float64, samples) + rBuf := make([]float64, samples) + // Load live params once per chunk — single atomic read, zero per-sample cost lp := g.liveParams.Load() if lp == nil { @@ -404,15 +396,6 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame r := g.audioLPF_R.Process(float64(in.R)) r = g.pilotNotchR.Process(r) - // Watermark gate level measurement — done BEFORE drive/clip/cleanup. - // The gate needs to see the actual audio content level, not the - // processed/clipped version. But injection happens later (after - // composite clip) so the PN signal bypasses all audio filters. - if g.watermark != nil { - audioLevel := (math.Abs(l) + math.Abs(r)) / 2.0 - g.watermark.SetAudioLevel(audioLevel) - } - // --- Stage 2: Drive + Compress + Clip₁ --- l *= lp.OutputDrive r *= lp.OutputDrive @@ -428,6 +411,59 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame l = dsp.HardClip(l, ceiling) r = dsp.HardClip(r, ceiling) + lBuf[i] = l + rBuf[i] = r + } + + // --- STFT Watermark: decimate → embed → upsample → add to L/R --- + if g.stftEmbedder != nil { + decimFactor := int(g.sampleRate) / watermark.WMRate // 228000/12000 = 19 + if decimFactor < 1 { + decimFactor = 1 + } + nDown := samples / decimFactor + + // Anti-alias: LPF ALL composite-rate samples, THEN decimate. + // The LPF must see every sample for correct IIR state update. + mono12k := make([]float64, nDown) + lpfState := 0.0 + decimCount := 0 + outIdx := 0 + for i := 0; i < samples && outIdx < nDown; i++ { + mono := (lBuf[i] + rBuf[i]) / 2 + if g.wmDecimLPF != nil { + lpfState = g.wmDecimLPF.Process(mono) + } else { + lpfState = mono + } + decimCount++ + if decimCount >= decimFactor { + decimCount = 0 + mono12k[outIdx] = lpfState + outIdx++ + } + } + + // STFT embed at 12 kHz + embedded := g.stftEmbedder.ProcessBlock(mono12k) + + // Extract watermark signal (difference) and upsample via ZOH + for i := 0; i < samples; i++ { + wmIdx := i / decimFactor + if wmIdx >= nDown { + wmIdx = nDown - 1 + } + wmSig := embedded[wmIdx] - mono12k[wmIdx] + lBuf[i] += wmSig + rBuf[i] += wmSig + } + } + + // --- Pass 2: Stereo encode + composite processing --- + for i := 0; i < samples; i++ { + l := lBuf[i] + r := rBuf[i] + // --- Stage 4: Stereo encode --- limited := audio.NewFrame(audio.Sample(l), audio.Sample(r)) comps := g.stereoEncoder.Encode(limited) @@ -453,22 +489,6 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame } bs412PowerAccum += audioMPX * audioMPX - // --- Watermark injection: into audio composite AFTER all processing --- - // Injected after the entire clip-filter-clip chain, notch filters, and - // BS.412 power measurement. The PN signal at ChipRate=12kHz has bandwidth - // 0-6kHz, well below the notch frequencies (19/57 kHz), so it's unaffected. - // At -48 dBFS the watermark causes <0.05 dB of over-modulation, negligible. - // Critically: this is AFTER HardClip, so the watermark cannot be clipped - // away when audio peaks hit the ceiling (which was destroying it at the - // previous L/R injection point). - if g.watermark != nil { - wm := g.watermark.NextSample() - if g.wmShapeLPF != nil { - wm = g.wmShapeLPF.Process(wm) - } - audioMPX += wm - } - // --- Stage 6: Add protected components --- composite := audioMPX if lp.StereoEnabled { @@ -503,15 +523,9 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame g.bs412.ProcessChunk(bs412PowerAccum / float64(samples)) } - // Watermark diagnostic: log state every 100 chunks (~5s) so we can verify - // the embedder is actually running and producing non-zero output. - if g.watermark != nil && g.frameSeq%100 == 1 { - wm := g.watermark.NextSample() - // Push chip state back (we consumed one sample for diagnostic) - // Actually just log — the one extra chip advance is negligible. - stats := g.watermark.DiagnosticState() - log.Printf("watermark diag: frame=%d gateGain=%.4f chipIdx=%d bitIdx=%d symbol=%d lastSample=%.6f enabled=%t", - g.frameSeq, stats.GateGain, stats.ChipIdx, stats.BitIdx, stats.Symbol, wm, stats.GateEnabled) + // STFT watermark diagnostic + if g.stftEmbedder != nil && g.frameSeq%100 == 1 { + log.Printf("watermark stft: frame=%d, active", g.frameSeq) } return frame diff --git a/internal/watermark/stft_roundtrip_test.go b/internal/watermark/stft_roundtrip_test.go new file mode 100644 index 0000000..c91b081 --- /dev/null +++ b/internal/watermark/stft_roundtrip_test.go @@ -0,0 +1,163 @@ +package watermark + +import ( + "math" + "testing" +) + +func TestSTFTRoundTrip(t *testing.T) { + const key = "test-stft-key" + const duration = 150.0 // seconds — need > 136.5s for one full WM cycle + + nSamples := int(duration * WMRate) + t.Logf("Generating %d samples @ %d Hz (%.1fs)", nSamples, WMRate, duration) + t.Logf("WM cycle: %d STFT frames, %.1fs", FramesPerWM, float64(SamplesPerWM)/WMRate) + + // Generate test signal: broadband noise (the multiplicative watermark + // needs energy in all frequency bins to work — a pure tone only has + // energy in one bin and the watermark has no effect on silent bins) + audio := make([]float64, nSamples) + // Simple LCG pseudo-random for reproducibility + var lcg uint64 = 12345 + for i := range audio { + lcg = lcg*6364136223846793005 + 1442695040888963407 + audio[i] = 0.3 * (float64(int32(lcg>>33))/float64(1<<31)) + } + + rmsIn := rmsF64(audio) + t.Logf("Input RMS: %.1f dBFS", 20*math.Log10(rmsIn+1e-12)) + + // Embed watermark + embedder := NewSTFTEmbedder(key) + watermarked := embedder.ProcessBlock(audio) + + rmsOut := rmsF64(watermarked) + t.Logf("Output RMS: %.1f dBFS", 20*math.Log10(rmsOut+1e-12)) + t.Logf("RMS change: %.2f dB", 20*math.Log10(rmsOut/rmsIn)) + + // Detect watermark + detector := NewSTFTDetector(key) + corrs, offset := detector.Detect(watermarked) + + t.Logf("Detection offset: %d", offset) + + // Check correlations + var nPositive, nNegative int + var sumAbs float64 + for _, c := range corrs { + sumAbs += math.Abs(c) + if c > 0 { + nPositive++ + } else { + nNegative++ + } + } + avgAbs := sumAbs / float64(payloadBits) + t.Logf("Correlations: avg|c|=%.1f, positive=%d, negative=%d", avgAbs, nPositive, nNegative) + + if avgAbs < 1.0 { + t.Errorf("avg|c| too low: %.1f (expected >> 1.0)", avgAbs) + } + + // Check against known payload + payload := KeyToPayload(key) + codeword := RSEncode(payload) + var expectedBits [payloadBits]int + for i := 0; i < payloadBits; i++ { + expectedBits[i] = int((codeword[i/8] >> uint(7-(i%8))) & 1) + } + + nerr := 0 + for i := 0; i < payloadBits; i++ { + hard := 0 + if corrs[i] < 0 { + hard = 1 + } + if hard != expectedBits[i] { + nerr++ + } + } + t.Logf("BER: %d/%d (%.1f%%)", nerr, payloadBits, 100*float64(nerr)/float64(payloadBits)) + + if nerr > 20 { + t.Errorf("BER too high: %d/%d", nerr, payloadBits) + } + + // Try RS decode + var recv [rsTotalBytes]byte + for i := 0; i < payloadBits; i++ { + if corrs[i] < 0 { + recv[i/8] |= 1 << uint(7-(i%8)) + } + } + + // Try with erasures if needed + decoded := false + for nErase := 0; nErase <= rsCheckBytes; nErase++ { + if nErase == 0 { + // Try zero erasures (valid if BER=0) + p, ok := RSDecode(recv, nil) + if ok { + if KeyMatchesPayload(key, p) { + t.Logf("Decoded with 0 erasures: MATCH ✓") + decoded = true + break + } + } + continue + } + // Erase weakest bytes by |correlation| + type bc struct{ idx int; conf float64 } + byteConfs := make([]bc, rsTotalBytes) + for b := 0; b < rsTotalBytes; b++ { + minC := math.Abs(corrs[b*8]) + for bit := 1; bit < 8; bit++ { + c := math.Abs(corrs[b*8+bit]) + if c < minC { + minC = c + } + } + byteConfs[b] = bc{b, minC} + } + // Sort by confidence (weakest first) + for i := 0; i < len(byteConfs); i++ { + for j := i + 1; j < len(byteConfs); j++ { + if byteConfs[j].conf < byteConfs[i].conf { + byteConfs[i], byteConfs[j] = byteConfs[j], byteConfs[i] + } + } + } + erasePos := make([]int, nErase) + for i := 0; i < nErase; i++ { + erasePos[i] = byteConfs[i].idx + } + // Sort positions + for i := 0; i < len(erasePos); i++ { + for j := i + 1; j < len(erasePos); j++ { + if erasePos[j] < erasePos[i] { + erasePos[i], erasePos[j] = erasePos[j], erasePos[i] + } + } + } + p, ok := RSDecode(recv, erasePos) + if ok { + if KeyMatchesPayload(key, p) { + t.Logf("Decoded with %d erasures: MATCH ✓", nErase) + decoded = true + break + } + } + } + + if !decoded { + t.Errorf("RS decode FAILED") + } +} + +func rmsF64(s []float64) float64 { + var acc float64 + for _, v := range s { + acc += v * v + } + return math.Sqrt(acc / float64(len(s))) +} diff --git a/internal/watermark/stft_watermark.go b/internal/watermark/stft_watermark.go new file mode 100644 index 0000000..9548d15 --- /dev/null +++ b/internal/watermark/stft_watermark.go @@ -0,0 +1,413 @@ +// Package watermark implements STFT-domain spread-spectrum audio watermarking +// based on Kirovski & Malvar (IEEE TSP 2003). +// +// Architecture: +// - Embedding in STFT magnitude (dB scale) — multiplicative, natural masking +// - Block repetition coding (R=5 time frames) — automatic drift tolerance +// - Cepstrum filtering at detection — 6 dB carrier noise reduction +// - PCC covert channel — PN partitioned into M=128 subsets for 128-bit payload +// - Multi-test sync — scan R frame offsets to find alignment +// +// Both encoder and decoder operate at 12 kHz (WMRate). The encoder decimates +// from composite rate (÷19), processes STFT, and upsamples back. The decoder +// decimates from recording rate (÷16 from 192kHz, ÷4 from 48kHz, etc.). +// Same STFT parameters → bins align perfectly → no rate mismatch. +package watermark + +import ( + "crypto/sha256" + "math" + "math/cmplx" + + "github.com/jan/fm-rds-tx/internal/dsp" +) + +// STFT watermark constants. +const ( + WMRate = 12000 // watermark processing sample rate (Hz) + FFTSize = 512 // STFT frame size (samples at WMRate) + FFTHop = 256 // 50% overlap + BinLow = 9 // ~211 Hz at WMRate/FFTSize + BinHigh = 213 // ~4992 Hz at WMRate/FFTSize + NumBins = BinHigh - BinLow // 204 frequency chips per STFT frame + TimeRep = 5 // block repetition factor (±2 frame drift tolerance) + GroupsPerBit = 10 // time groups per data bit + WMLevelDB = 1.5 // embedding level (dB) + + TotalGroups = GroupsPerBit * payloadBits // 10 × 128 = 1280 + FramesPerWM = TotalGroups * TimeRep // 1280 × 5 = 6400 + SamplesPerWM = FramesPerWM * FFTHop // 6400 × 256 = 1638400 + // Duration at WMRate: 1638400 / 12000 = 136.5 seconds +) + +// STFTEmbedder processes audio blocks and adds the STFT-domain watermark. +// It works at WMRate (12 kHz). The caller must decimate input to WMRate +// and upsample output back to the desired rate. +type STFTEmbedder struct { + // PN chip matrix: pnChips[group][bin] ∈ {-1, +1} + // group ∈ [0, TotalGroups), bin ∈ [0, NumBins) + pnChips [TotalGroups][NumBins]int8 + + // Bit assignment: which data bit owns each group (PCC permutation) + groupToBit [TotalGroups]int + + // RS-encoded codeword: 128 bits → symbol[bit] = +1 or -1 + symbols [payloadBits]int8 + + // STFT state + window [FFTSize]float64 + inBuf [FFTSize]float64 // analysis window buffer + outBuf [FFTSize + FFTHop]float64 // overlap-add output buffer + inPos int // samples written to inBuf + outPos int // samples read from outBuf + frameIdx int // STFT frame counter (wraps at FramesPerWM) + primed bool // true after first full frame + + // Level in linear scale: 10^(WMLevelDB/20) - 1 ≈ 0.189 for 1.5 dB + levelLinear float64 +} + +// NewSTFTEmbedder creates an embedder for the given license key. +func NewSTFTEmbedder(key string) *STFTEmbedder { + e := &STFTEmbedder{} + + // Compute RS-encoded payload + var data [rsDataBytes]byte + if key != "" { + h := sha256.Sum256([]byte(key)) + copy(data[:], h[:rsDataBytes]) + } + codeword := rsEncode(data) + + // BPSK symbols: bit 0 → +1, bit 1 → -1 + for i := 0; i < payloadBits; i++ { + if (codeword[i/8]>>uint(7-(i%8)))&1 == 1 { + e.symbols[i] = -1 + } else { + e.symbols[i] = 1 + } + } + + // Generate PN chips from key-seeded PRNG + seed := sha256.Sum256(append([]byte("stft-pn-"), key...)) + prng := newPRNG(seed[:]) + for g := 0; g < TotalGroups; g++ { + for b := 0; b < NumBins; b++ { + if prng.next()&1 == 0 { + e.pnChips[g][b] = 1 + } else { + e.pnChips[g][b] = -1 + } + } + } + + // PCC permutation: assign groups to bits (interleaved + permuted) + // Simple interleaving first, then Fisher-Yates shuffle + for g := 0; g < TotalGroups; g++ { + e.groupToBit[g] = g % payloadBits + } + // Permute within each bit's groups using key-seeded PRNG + permSeed := sha256.Sum256(append([]byte("stft-perm-"), key...)) + permRNG := newPRNG(permSeed[:]) + for i := TotalGroups - 1; i > 0; i-- { + j := permRNG.next() % uint32(i+1) + e.groupToBit[i], e.groupToBit[j] = e.groupToBit[j], e.groupToBit[i] + } + + // Hann window + dsp.HannWindow(e.window[:]) + + // Embedding level + e.levelLinear = math.Pow(10, WMLevelDB/20) - 1 // fractional magnitude change + + return e +} + +// ProcessBlock takes mono audio at WMRate and returns watermarked audio. +// The input and output lengths are the same. Internally buffers for STFT +// overlap-add processing. Call with chunks of any size. +func (e *STFTEmbedder) ProcessBlock(in []float64) []float64 { + out := make([]float64, len(in)) + for i, s := range in { + // Feed sample into STFT input buffer + e.inBuf[e.inPos] = s + e.inPos++ + + if e.inPos == FFTSize { + // Full frame: process STFT + e.processFrame() + e.inPos = FFTHop // shift: keep last hop samples for next frame overlap + copy(e.inBuf[:FFTHop], e.inBuf[FFTHop:FFTSize]) + } + + // Read from overlap-add output buffer + if e.primed { + out[i] = e.outBuf[e.outPos] + e.outPos++ + if e.outPos >= FFTHop { + e.outPos = 0 + // Shift output buffer: move overlap region to start + copy(e.outBuf[:FFTSize], e.outBuf[FFTHop:FFTSize+FFTHop]) + // Zero the new region + for j := FFTSize - FFTHop; j < FFTSize+FFTHop; j++ { + if j < len(e.outBuf) { + e.outBuf[j] = 0 + } + } + } + } else { + out[i] = s // pass-through until first frame is processed + } + } + return out +} + +// processFrame computes one STFT frame: window → FFT → modify magnitudes → IFFT → overlap-add. +func (e *STFTEmbedder) processFrame() { + // Determine which group this frame belongs to + wmFrame := e.frameIdx % FramesPerWM + groupIdx := wmFrame / TimeRep + repIdx := wmFrame % TimeRep + centerRep := TimeRep / 2 // only center repetition carries the watermark for detection + + // Apply window and convert to complex + var buf [FFTSize]complex128 + for i := 0; i < FFTSize; i++ { + buf[i] = complex(e.inBuf[i]*e.window[i], 0) + } + + // Forward FFT + dsp.FFT(buf[:]) + + // Modify magnitudes in the watermark sub-band + // Only modify if this is within a valid group AND at the center repetition + // (we embed in ALL repetitions so the watermark energy is present everywhere, + // but the PN pattern is the same for all R frames in a group) + if groupIdx < TotalGroups { + bitIdx := e.groupToBit[groupIdx] + dataSign := float64(e.symbols[bitIdx]) + _ = repIdx + _ = centerRep + + for b := 0; b < NumBins; b++ { + bin := BinLow + b + chip := float64(e.pnChips[groupIdx][b]) + + // Modify magnitude: |Y| = |X| × (1 + level × chip × data) + // Phase preserved + mag := cmplx.Abs(buf[bin]) + if mag < 1e-10 { + continue // skip near-silence bins to avoid division by zero + } + phase := cmplx.Phase(buf[bin]) + newMag := mag * (1.0 + e.levelLinear*chip*dataSign) + buf[bin] = cmplx.Rect(newMag, phase) + + // Mirror for negative frequencies (conjugate symmetry) + if bin > 0 && bin < FFTSize/2 { + buf[FFTSize-bin] = cmplx.Conj(buf[bin]) + } + } + } + + // Inverse FFT + dsp.IFFT(buf[:]) + + // Overlap-add to output buffer + for i := 0; i < FFTSize; i++ { + e.outBuf[i] += real(buf[i]) + } + + if !e.primed { + e.primed = true + e.outPos = 0 + } + + e.frameIdx++ +} + +// STFTDetector extracts watermark bits from an audio recording. +type STFTDetector struct { + pnChips [TotalGroups][NumBins]int8 + groupToBit [TotalGroups]int +} + +// NewSTFTDetector creates a detector matching the given key's PN sequence. +func NewSTFTDetector(key string) *STFTDetector { + d := &STFTDetector{} + + // Same PN generation as embedder + seed := sha256.Sum256(append([]byte("stft-pn-"), key...)) + prng := newPRNG(seed[:]) + for g := 0; g < TotalGroups; g++ { + for b := 0; b < NumBins; b++ { + if prng.next()&1 == 0 { + d.pnChips[g][b] = 1 + } else { + d.pnChips[g][b] = -1 + } + } + } + + // Same permutation + for g := 0; g < TotalGroups; g++ { + d.groupToBit[g] = g % payloadBits + } + permSeed := sha256.Sum256(append([]byte("stft-perm-"), key...)) + permRNG := newPRNG(permSeed[:]) + for i := TotalGroups - 1; i > 0; i-- { + j := permRNG.next() % uint32(i+1) + d.groupToBit[i], d.groupToBit[j] = d.groupToBit[j], d.groupToBit[i] + } + + return d +} + +// Detect processes audio at WMRate and returns soft bit decisions. +// The audio should already be decimated/resampled to WMRate and LPF'd. +// +// Multi-test: tries TimeRep frame offsets (the block repetition candidates). +// Cepstrum filtering is applied to reduce carrier noise. +// +// Returns: 128 soft correlation values (sign = bit decision, magnitude = confidence), +// and the frame offset that gave the best detection metric. +func (d *STFTDetector) Detect(audio []float64) (corrs [payloadBits]float64, bestOffset int) { + // Compute all STFT frames + var window [FFTSize]float64 + dsp.HannWindow(window[:]) + + nFrames := (len(audio) - FFTSize) / FFTHop + if nFrames < FramesPerWM { + // Not enough data for a full watermark cycle — use what we have + } + + // Compute STFT magnitudes (dB) for all frames + type stftFrame struct { + magDB [FFTSize / 2]float64 + } + frames := make([]stftFrame, nFrames) + + for f := 0; f < nFrames; f++ { + offset := f * FFTHop + var buf [FFTSize]complex128 + for i := 0; i < FFTSize; i++ { + if offset+i < len(audio) { + buf[i] = complex(audio[offset+i]*window[i], 0) + } + } + dsp.FFT(buf[:]) + + for bin := 0; bin < FFTSize/2; bin++ { + mag := cmplx.Abs(buf[bin]) + if mag < 1e-12 { + mag = 1e-12 + } + frames[f].magDB[bin] = 20 * math.Log10(mag) + } + + // Cepstrum filtering: remove spectral envelope + // DCT of dB magnitudes, zero first N_ceps coefficients, IDCT + cepstrumFilter(frames[f].magDB[:], 8) + } + + // Multi-test: try each of TimeRep frame offsets within the repetition block + bestMetric := -1.0 + bestOffset = 0 + + for startOffset := 0; startOffset < TimeRep; startOffset++ { + var testCorrs [payloadBits]float64 + + // For each group, use the CENTER frame of the repetition block + for g := 0; g < TotalGroups; g++ { + bitIdx := d.groupToBit[g] + frameInWM := g*TimeRep + startOffset + TimeRep/2 + if frameInWM >= nFrames { + continue + } + + // Correlate this frame's magnitudes with the PN chips + var corr float64 + for b := 0; b < NumBins; b++ { + bin := BinLow + b + corr += frames[frameInWM].magDB[bin] * float64(d.pnChips[g][b]) + } + testCorrs[bitIdx] += corr + } + + // Detection metric: sum of squared partial correlations (chi-squared) + // From paper equation (10): Q = Σ (corr_m)² + var metric float64 + for _, c := range testCorrs { + metric += c * c + } + + if metric > bestMetric { + bestMetric = metric + bestOffset = startOffset + corrs = testCorrs + } + } + + return corrs, bestOffset +} + +// cepstrumFilter removes the spectral envelope from dB magnitudes. +// It zeros the first nCeps DCT coefficients (the smooth spectral shape). +// This is Kirovski's "CF" technique: reduces carrier noise by ~6 dB. +func cepstrumFilter(magDB []float64, nCeps int) { + n := len(magDB) + if n < nCeps*2 { + return + } + + // DCT-II (simplified, not optimized) + ceps := make([]float64, n) + for k := 0; k < n; k++ { + var sum float64 + for i := 0; i < n; i++ { + sum += magDB[i] * math.Cos(math.Pi*float64(k)*(float64(i)+0.5)/float64(n)) + } + ceps[k] = sum + } + + // Zero low-order cepstral coefficients (spectral envelope) + for k := 0; k < nCeps; k++ { + ceps[k] = 0 + } + + // IDCT (inverse DCT-II) + for i := 0; i < n; i++ { + var sum float64 + for k := 0; k < n; k++ { + w := 1.0 + if k == 0 { + w = 0.5 + } + sum += w * ceps[k] * math.Cos(math.Pi*float64(k)*(float64(i)+0.5)/float64(n)) + } + magDB[i] = sum * 2.0 / float64(n) + } +} + +// Simple xorshift32 PRNG for deterministic chip generation. +type simplePRNG struct { + state uint32 +} + +func newPRNG(seed []byte) *simplePRNG { + var s uint32 + for i, b := range seed { + s ^= uint32(b) << (uint(i%4) * 8) + } + if s == 0 { + s = 1 + } + return &simplePRNG{state: s} +} + +func (p *simplePRNG) next() uint32 { + p.state ^= p.state << 13 + p.state ^= p.state >> 17 + p.state ^= p.state << 5 + return p.state +} diff --git a/internal/watermark/watermark.go b/internal/watermark/watermark.go index cbf0df7..571997c 100644 --- a/internal/watermark/watermark.go +++ b/internal/watermark/watermark.go @@ -395,7 +395,17 @@ func RSEncode(data [rsDataBytes]byte) [rsTotalBytes]byte { return rsEncode(data) } -// Constants exported for the recovery tool. +// PNChipAt returns the PN chip value at group g, bin b. +func (d *STFTDetector) PNChipAt(g, b int) int8 { + return d.pnChips[g][b] +} + +// GroupBit returns the data bit index for group g. +func (d *STFTDetector) GroupBit(g int) int { + return d.groupToBit[g] +} + +// Constants exported for the recovery tool and legacy tools. const ( PnChips = pnChips PayloadBits = payloadBits