Add an STFT watermark path inspired by Kirovski & Malvar, including the frequency-domain embedder/decoder, FFT support, and round-trip coverage. Wire the generator and CLI tools to use the new analysis/synthesis flow for watermark experiments on the watermark-rework branch.main
| @@ -186,7 +186,8 @@ func runTXMode(cfg cfgpkg.Config, configPath string, driver platform.SoapyDriver | |||
| log.Printf("license: no valid key — evaluation jingle every %d minutes", license.JingleIntervalMinutes) | |||
| } | |||
| engine.SetLicenseState(licState, licenseKey) | |||
| log.Printf("watermark: embedding key fingerprint — Level=%.3f ChipRate=%d", watermark.Level, watermark.ChipRate) | |||
| log.Printf("watermark: STFT-domain embedding — WMRate=%d FFTSize=%d Level=%.1fdB", | |||
| watermark.WMRate, watermark.FFTSize, watermark.WMLevelDB) | |||
| cfg = applyLegacyAudioFlags(cfg, audioStdin, audioRate, audioHTTP) | |||
| var streamSrc *audio.StreamSource | |||
| @@ -1,16 +1,23 @@ | |||
| // cmd/wmdecode — fm-rds-tx spread-spectrum watermark recovery tool. | |||
| // cmd/wmdecode — STFT-domain spread-spectrum watermark decoder. | |||
| // | |||
| // Approach: downsample to chip rate (12 kHz), correlate at 1 sample/chip. | |||
| // No fractional stepping, no clock drift issues. FFT-free phase search. | |||
| // Decodes watermark from FM broadcast recordings following | |||
| // Kirovski & Malvar (IEEE TSP 2003) architecture. | |||
| // | |||
| // Usage: | |||
| // | |||
| // wmdecode <file.wav> [key ...] | |||
| package main | |||
| import ( | |||
| "encoding/binary" | |||
| "fmt" | |||
| "math" | |||
| "math/cmplx" | |||
| "os" | |||
| "sort" | |||
| "time" | |||
| "github.com/jan/fm-rds-tx/internal/dsp" | |||
| "github.com/jan/fm-rds-tx/internal/watermark" | |||
| ) | |||
| @@ -20,6 +27,8 @@ func main() { | |||
| os.Exit(1) | |||
| } | |||
| t0 := time.Now() | |||
| samples, recRate, err := readMonoWAV(os.Args[1]) | |||
| if err != nil { | |||
| fmt.Fprintf(os.Stderr, "read WAV: %v\n", err) | |||
| @@ -29,295 +38,235 @@ func main() { | |||
| fmt.Printf("WAV: %d samples @ %.0f Hz = %.2fs, RMS %.1f dBFS\n", | |||
| len(samples), recRate, float64(len(samples))/recRate, 20*math.Log10(rms+1e-9)) | |||
| chipRate := float64(watermark.ChipRate) // 12000 | |||
| pnChips := watermark.PnChips // 2048 | |||
| // Step 1: LPF at ChipRate/2 then downsample to ChipRate. | |||
| // At chip rate: 1 sample = 1 chip. No fractional stepping. | |||
| decimFactor := int(recRate / chipRate) // 192000/12000 = 16 | |||
| // Step 1: Decimate to WMRate (12 kHz) | |||
| wmRate := float64(watermark.WMRate) | |||
| decimFactor := int(recRate / wmRate) | |||
| if decimFactor < 1 { | |||
| decimFactor = 1 | |||
| } | |||
| actualChipRate := recRate / float64(decimFactor) // should be exactly chipRate | |||
| fmt.Printf("Downsample: %d:1 (%.0f Hz → %.0f Hz)\n", decimFactor, recRate, actualChipRate) | |||
| actualRate := recRate / float64(decimFactor) | |||
| fmt.Printf("Downsample: %d:1 (%.0f Hz → %.0f Hz)\n", decimFactor, recRate, actualRate) | |||
| // Anti-alias LPF (8th-order IIR at 5.5 kHz) | |||
| lpfCoeffs := designLPF8(5500, recRate) | |||
| filtered := applyIIR(samples, lpfCoeffs) | |||
| // Decimate | |||
| nDown := len(filtered) / decimFactor | |||
| down := make([]float64, nDown) | |||
| for i := 0; i < nDown; i++ { | |||
| down[i] = filtered[i*decimFactor] | |||
| } | |||
| rmsDown := rmsLevel(down) | |||
| fmt.Printf("Downsampled: %d samples @ %.0f Hz, RMS %.1f dBFS\n", | |||
| nDown, actualChipRate, 20*math.Log10(rmsDown+1e-9)) | |||
| // Step 2: Phase search — slide 1-bit PN template across [0, pnChips). | |||
| // At chip rate this is a simple 2048-element dot product per offset. | |||
| // Test all 2048 phases, accumulate energy over many bits. | |||
| fmt.Printf("Phase search: %d candidates\n", pnChips) | |||
| nSearchBits := nDown / pnChips | |||
| if nSearchBits > 500 { | |||
| nSearchBits = 500 | |||
| fmt.Printf("Downsampled: %d samples, %.1fs\n", nDown, float64(nDown)/wmRate) | |||
| // Step 2: Compute ALL STFT frames with cepstrum filtering | |||
| fftSize := watermark.FFTSize | |||
| hop := watermark.FFTHop | |||
| nFrames := (nDown - fftSize) / hop | |||
| if nFrames <= 0 { | |||
| fmt.Fprintln(os.Stderr, "Recording too short") | |||
| os.Exit(1) | |||
| } | |||
| bestPhase := 0 | |||
| bestEnergy := 0.0 | |||
| for phase := 0; phase < pnChips; phase++ { | |||
| var energy float64 | |||
| for b := 0; b < nSearchBits; b++ { | |||
| start := phase + b*pnChips | |||
| if start+pnChips > nDown { | |||
| break | |||
| } | |||
| c := corrChipRate(down, start, pnChips) | |||
| energy += c * c | |||
| var window [watermark.FFTSize]float64 | |||
| dsp.HannWindow(window[:]) | |||
| fmt.Printf("STFT: %d frames (%d-point, hop=%d)\n", nFrames, fftSize, hop) | |||
| type stftMag [watermark.FFTSize / 2]float64 | |||
| frameMags := make([]stftMag, nFrames) | |||
| for f := 0; f < nFrames; f++ { | |||
| offset := f * hop | |||
| var buf [watermark.FFTSize]complex128 | |||
| for i := 0; i < fftSize; i++ { | |||
| buf[i] = complex(down[offset+i]*window[i], 0) | |||
| } | |||
| if energy > bestEnergy { | |||
| bestEnergy = energy | |||
| bestPhase = phase | |||
| dsp.FFT(buf[:]) | |||
| for bin := 0; bin < fftSize/2; bin++ { | |||
| mag := cmplx.Abs(buf[bin]) | |||
| if mag < 1e-12 { | |||
| mag = 1e-12 | |||
| } | |||
| frameMags[f][bin] = 20 * math.Log10(mag) | |||
| } | |||
| cepstrumFilter(frameMags[f][:], 8) | |||
| } | |||
| fmt.Printf("Phase: offset=%d (%.2fms), energy=%.0f\n", | |||
| bestPhase, float64(bestPhase)/actualChipRate*1000, bestEnergy) | |||
| // Step 3: Per-bit correlation with ±4 sample sliding (handles residual drift). | |||
| // At chip rate, ±4 samples = ±0.33ms — covers ~±40 ppm over 22s frame. | |||
| nCompleteBits := (nDown - bestPhase) / pnChips | |||
| nFrames := nCompleteBits / watermark.PayloadBits | |||
| if nFrames < 1 { | |||
| nFrames = 1 | |||
| // Step 3: For each key, search cycle offset + rep offset | |||
| keys := os.Args[2:] | |||
| if len(keys) == 0 { | |||
| fmt.Println("No keys supplied.") | |||
| os.Exit(1) | |||
| } | |||
| fmt.Printf("Sync: %d complete bits, %d frames\n", nCompleteBits, nFrames) | |||
| const slideWindow = 200 // ±200 chips — handles phase errors + drift | |||
| corrs := make([]float64, watermark.PayloadBits) | |||
| for i := 0; i < watermark.PayloadBits; i++ { | |||
| // For each offset, sum correlation across ALL frames first. | |||
| // Signal adds coherently (×nFrames), noise adds as √nFrames. | |||
| // Then pick the offset with maximum |sum|. | |||
| bestAbs := 0.0 | |||
| bestVal := 0.0 | |||
| for off := -slideWindow; off <= slideWindow; off++ { | |||
| var sum float64 | |||
| for f := 0; f < nFrames; f++ { | |||
| nominal := bestPhase + (f*watermark.PayloadBits+i)*pnChips + off | |||
| if nominal < 0 || nominal+pnChips > nDown { | |||
| continue | |||
| for _, key := range keys { | |||
| fmt.Printf("\nKey: %q\n", key) | |||
| det := watermark.NewSTFTDetector(key) | |||
| totalGroups := watermark.TotalGroups | |||
| timeRep := watermark.TimeRep | |||
| framesPerWM := watermark.FramesPerWM | |||
| numBins := watermark.NumBins | |||
| binLow := watermark.BinLow | |||
| centerRep := timeRep / 2 | |||
| bestMetric := -1.0 | |||
| var bestCorrs [watermark.PayloadBits]float64 | |||
| bestCycleOff := 0 | |||
| bestRepOff := 0 | |||
| nCandidates := 0 | |||
| for cycleOff := 0; cycleOff < framesPerWM; cycleOff += timeRep { | |||
| for repOff := 0; repOff < timeRep; repOff++ { | |||
| var testCorrs [watermark.PayloadBits]float64 | |||
| for f := 0; f < nFrames; f++ { | |||
| wmFrame := ((f - cycleOff - repOff) % framesPerWM + framesPerWM) % framesPerWM | |||
| if wmFrame%timeRep != centerRep { | |||
| continue | |||
| } | |||
| g := wmFrame / timeRep | |||
| if g >= totalGroups { | |||
| continue | |||
| } | |||
| var corr float64 | |||
| for b := 0; b < numBins; b++ { | |||
| corr += frameMags[f][binLow+b] * float64(det.PNChipAt(g, b)) | |||
| } | |||
| testCorrs[det.GroupBit(g)] += corr | |||
| } | |||
| sum += corrChipRate(down, nominal, pnChips) | |||
| } | |||
| if math.Abs(sum) > bestAbs { | |||
| bestAbs = math.Abs(sum) | |||
| bestVal = sum | |||
| var metric float64 | |||
| for _, c := range testCorrs { | |||
| metric += c * c | |||
| } | |||
| if metric > bestMetric { | |||
| bestMetric = metric | |||
| bestCorrs = testCorrs | |||
| bestCycleOff = cycleOff | |||
| bestRepOff = repOff | |||
| } | |||
| nCandidates++ | |||
| } | |||
| } | |||
| corrs[i] = bestVal | |||
| } | |||
| // Diagnostics | |||
| var corrMin, corrMax, sumAbs float64 | |||
| var nStrong, nDead int | |||
| for i, c := range corrs { | |||
| ac := math.Abs(c) | |||
| sumAbs += ac | |||
| if i == 0 || ac < corrMin { | |||
| corrMin = ac | |||
| } | |||
| if ac > corrMax { | |||
| corrMax = ac | |||
| } | |||
| if ac > sumAbs/float64(i+1)*2 { | |||
| nStrong++ | |||
| } | |||
| if ac < 3 { | |||
| nDead++ | |||
| } | |||
| } | |||
| avgCorr := sumAbs / 128 | |||
| nStrong = 0 | |||
| for _, c := range corrs { | |||
| if math.Abs(c) > avgCorr*0.5 { | |||
| nStrong++ | |||
| } | |||
| } | |||
| fmt.Printf("Corrs: min|c|=%.1f, max|c|=%.1f, avg|c|=%.1f (strong=%d, dead=%d)\n", | |||
| corrMin, corrMax, avgCorr, nStrong, nDead) | |||
| fmt.Printf("Searched %d candidates in %v\n", nCandidates, time.Since(t0).Round(time.Millisecond)) | |||
| fmt.Printf("Best: cycleOff=%d, repOff=%d, metric=%.0f\n", bestCycleOff, bestRepOff, bestMetric) | |||
| // Step 4: Frame sync — 128 rotations × byte-level erasure + bit-flipping. | |||
| var sumAbs float64 | |||
| for _, c := range bestCorrs { | |||
| sumAbs += math.Abs(c) | |||
| } | |||
| fmt.Printf("Corrs: avg|c|=%.1f\n", sumAbs/128) | |||
| // Verbose: compute BER at each rotation against the known key (if supplied) | |||
| knownPayload := [watermark.RsDataBytes]byte{} | |||
| hasKnown := false | |||
| if len(os.Args) >= 3 { | |||
| hasKnown = true | |||
| knownPayload = watermark.KeyToPayload(os.Args[2]) | |||
| // BER diagnostic against known key | |||
| knownPayload := watermark.KeyToPayload(key) | |||
| knownCW := watermark.RSEncode(knownPayload) | |||
| var knownBits [watermark.PayloadBits]int | |||
| for i := 0; i < watermark.PayloadBits; i++ { | |||
| knownBits[i] = int((knownCW[i/8] >> uint(7-(i%8))) & 1) | |||
| } | |||
| fmt.Println("\nRotation sweep (top 10 by BER):") | |||
| type rotBER struct{ rot, ber int } | |||
| var results []rotBER | |||
| for rot := 0; rot < watermark.PayloadBits; rot++ { | |||
| nerr := 0 | |||
| for i := 0; i < watermark.PayloadBits; i++ { | |||
| srcBit := (i + rot) % watermark.PayloadBits | |||
| hard := 0 | |||
| if corrs[srcBit] < 0 { | |||
| hard = 1 | |||
| } | |||
| if hard != knownBits[i] { | |||
| nerr++ | |||
| } | |||
| nerr := 0 | |||
| for i := 0; i < watermark.PayloadBits; i++ { | |||
| hard := 0 | |||
| if bestCorrs[i] < 0 { | |||
| hard = 1 | |||
| } | |||
| results = append(results, rotBER{rot, nerr}) | |||
| } | |||
| sort.Slice(results, func(a, b int) bool { return results[a].ber < results[b].ber }) | |||
| for j := 0; j < 10 && j < len(results); j++ { | |||
| r := results[j] | |||
| fmt.Printf(" rot=%3d: BER=%d/128 (%4.1f%%)\n", r.rot, r.ber, 100*float64(r.ber)/128) | |||
| } | |||
| // Show byte error pattern at best rotation | |||
| bestRot := results[0].rot | |||
| fmt.Printf("\nByte errors at rot=%d:\n ", bestRot) | |||
| for b := 0; b < watermark.RsTotalBytes; b++ { | |||
| nerr := 0 | |||
| for bit := 0; bit < 8; bit++ { | |||
| srcBit := (b*8 + bit + bestRot) % watermark.PayloadBits | |||
| hard := 0 | |||
| if corrs[srcBit] < 0 { | |||
| hard = 1 | |||
| } | |||
| if hard != knownBits[b*8+bit] { | |||
| nerr++ | |||
| } | |||
| if hard != knownBits[i] { | |||
| nerr++ | |||
| } | |||
| fmt.Printf("B%d:%d ", b, nerr) | |||
| } | |||
| fmt.Println() | |||
| fmt.Printf("BER: %d/128 (%.1f%%)\n", nerr, 100*float64(nerr)/128) | |||
| // Show received vs expected codeword at best rotation | |||
| // Show recv vs expected | |||
| var recv [watermark.RsTotalBytes]byte | |||
| confs := make([]float64, watermark.PayloadBits) | |||
| for i := 0; i < watermark.PayloadBits; i++ { | |||
| srcBit := (i + bestRot) % watermark.PayloadBits | |||
| if corrs[srcBit] < 0 { | |||
| confs[i] = math.Abs(bestCorrs[i]) | |||
| if bestCorrs[i] < 0 { | |||
| recv[i/8] |= 1 << uint(7-(i%8)) | |||
| } | |||
| } | |||
| fmt.Printf(" recv: %x\n", recv) | |||
| fmt.Printf(" want: %x\n", knownCW) | |||
| } | |||
| _ = hasKnown | |||
| _ = knownPayload | |||
| type decodeResult struct { | |||
| rotation int | |||
| payload [watermark.RsDataBytes]byte | |||
| flips int | |||
| } | |||
| var best *decodeResult | |||
| fmt.Printf("recv: %x\nwant: %x\n", recv, knownCW) | |||
| for rot := 0; rot < watermark.PayloadBits; rot++ { | |||
| var recv [watermark.RsTotalBytes]byte | |||
| confs := make([]float64, watermark.PayloadBits) | |||
| for i := 0; i < watermark.PayloadBits; i++ { | |||
| srcBit := (i + rot) % watermark.PayloadBits | |||
| c := corrs[srcBit] | |||
| confs[i] = math.Abs(c) | |||
| if c < 0 { | |||
| recv[i/8] |= 1 << uint(7-(i%8)) | |||
| // Confidence-based erasure (MIN bit confidence per byte) | |||
| type bc struct{ idx int; conf float64 } | |||
| byteConfs := make([]bc, watermark.RsTotalBytes) | |||
| for b := 0; b < watermark.RsTotalBytes; b++ { | |||
| minC := confs[b*8] | |||
| for bit := 1; bit < 8; bit++ { | |||
| if confs[b*8+bit] < minC { | |||
| minC = confs[b*8+bit] | |||
| } | |||
| } | |||
| byteConfs[b] = bc{b, minC} | |||
| } | |||
| sort.Slice(byteConfs, func(a, b int) bool { return byteConfs[a].conf < byteConfs[b].conf }) | |||
| // Brute-force RS decode: try ALL possible erasure subsets of size 1..8. | |||
| // With sliding correlation, confidence values are unreliable for erasure | |||
| // selection (all bits look "strong"). Instead, let RS tell us which | |||
| // subsets produce a valid codeword. This is fast: sum(C(16,k), k=1..8) | |||
| // = ~39k RS decodes per rotation, ~5M total. Each takes <1µs. | |||
| decoded := false | |||
| for nErase := 1; nErase <= watermark.RsCheckBytes; nErase++ { | |||
| if decoded { break } | |||
| indices := make([]int, nErase) | |||
| for i := range indices { indices[i] = i } | |||
| for { | |||
| erasePos := make([]int, nErase) | |||
| copy(erasePos, indices) | |||
| payload, ok := watermark.RSDecode(recv, erasePos) | |||
| if ok { | |||
| if best == nil { | |||
| best = &decodeResult{rot, payload, nErase} | |||
| } | |||
| for nErase := 0; nErase <= watermark.RsCheckBytes; nErase++ { | |||
| if nErase == 0 { | |||
| p, ok := watermark.RSDecode(recv, nil) | |||
| if ok && watermark.KeyMatchesPayload(key, p) { | |||
| fmt.Printf(" ✓ MATCH (0 erasures), payload=%x\n", p) | |||
| decoded = true | |||
| break | |||
| } | |||
| // Next combination | |||
| i := nErase - 1 | |||
| for i >= 0 && indices[i] == watermark.RsTotalBytes-nErase+i { | |||
| i-- | |||
| } | |||
| if i < 0 { break } | |||
| indices[i]++ | |||
| for j := i + 1; j < nErase; j++ { | |||
| indices[j] = indices[j-1] + 1 | |||
| } | |||
| continue | |||
| } | |||
| erasePos := make([]int, nErase) | |||
| for i := 0; i < nErase; i++ { | |||
| erasePos[i] = byteConfs[i].idx | |||
| } | |||
| sort.Ints(erasePos) | |||
| p, ok := watermark.RSDecode(recv, erasePos) | |||
| if ok && watermark.KeyMatchesPayload(key, p) { | |||
| fmt.Printf(" ✓ MATCH (%d erasures), payload=%x\n", nErase, p) | |||
| decoded = true | |||
| break | |||
| } | |||
| } | |||
| if decoded && best != nil && best.flips <= 4 { | |||
| break // clean decode with few erasures — stop early | |||
| } | |||
| } | |||
| if best == nil { | |||
| fmt.Println("RS decode: FAILED — no valid frame alignment found.") | |||
| fmt.Println("Watermark may not be present, or recording is too noisy/short.") | |||
| os.Exit(1) | |||
| if !decoded { | |||
| fmt.Println(" ✗ NOT FOUND") | |||
| } | |||
| } | |||
| fmt.Printf("\nFrame sync: rotation=%d, %d byte erasures\n", best.rotation, best.flips) | |||
| fmt.Printf("Payload: %x\n\n", best.payload) | |||
| fmt.Printf("\nDone in %v\n", time.Since(t0).Round(time.Millisecond)) | |||
| } | |||
| keys := os.Args[2:] | |||
| if len(keys) == 0 { | |||
| fmt.Println("No keys supplied — payload shown above.") | |||
| func cepstrumFilter(magDB []float64, nCeps int) { | |||
| n := len(magDB) | |||
| if n < nCeps*2 { | |||
| return | |||
| } | |||
| fmt.Println("Key check:") | |||
| matched := false | |||
| for _, key := range keys { | |||
| if watermark.KeyMatchesPayload(key, best.payload) { | |||
| fmt.Printf(" ✓ MATCH: %q\n", key) | |||
| matched = true | |||
| } else { | |||
| fmt.Printf(" ✗ : %q\n", key) | |||
| ceps := make([]float64, n) | |||
| for k := 0; k < n; k++ { | |||
| var sum float64 | |||
| for i := 0; i < n; i++ { | |||
| sum += magDB[i] * math.Cos(math.Pi*float64(k)*(float64(i)+0.5)/float64(n)) | |||
| } | |||
| ceps[k] = sum | |||
| } | |||
| if !matched { | |||
| fmt.Println("\nNo key matched.") | |||
| for k := 0; k < nCeps; k++ { | |||
| ceps[k] = 0 | |||
| } | |||
| } | |||
| // corrChipRate correlates at chip rate (1 sample = 1 chip). | |||
| func corrChipRate(down []float64, start, pnChips int) float64 { | |||
| var acc float64 | |||
| for i := 0; i < pnChips; i++ { | |||
| acc += down[start+i] * float64(watermark.PNSequence[i]) | |||
| for i := 0; i < n; i++ { | |||
| var sum float64 | |||
| for k := 0; k < n; k++ { | |||
| w := 1.0 | |||
| if k == 0 { | |||
| w = 0.5 | |||
| } | |||
| sum += w * ceps[k] * math.Cos(math.Pi*float64(k)*(float64(i)+0.5)/float64(n)) | |||
| } | |||
| magDB[i] = sum * 2.0 / float64(n) | |||
| } | |||
| return acc | |||
| } | |||
| // --- 8th-order Butterworth LPF (4 cascaded biquads) --- | |||
| type biquad struct{ b0, b1, b2, a1, a2 float64 } | |||
| type iirCoeffs []biquad | |||
| func designLPF8(cutoffHz, sampleRate float64) iirCoeffs { | |||
| // 8th-order Butterworth = 4 biquad sections | |||
| angles := []float64{math.Pi / 16, 3 * math.Pi / 16, 5 * math.Pi / 16, 7 * math.Pi / 16} | |||
| coeffs := make(iirCoeffs, 4) | |||
| for i, angle := range angles { | |||
| @@ -328,11 +277,8 @@ func designLPF8(cutoffHz, sampleRate float64) iirCoeffs { | |||
| alpha := sinW / (2 * q) | |||
| a0 := 1 + alpha | |||
| coeffs[i] = biquad{ | |||
| b0: (1 - cosW) / 2 / a0, | |||
| b1: (1 - cosW) / a0, | |||
| b2: (1 - cosW) / 2 / a0, | |||
| a1: (-2 * cosW) / a0, | |||
| a2: (1 - alpha) / a0, | |||
| b0: (1 - cosW) / 2 / a0, b1: (1 - cosW) / a0, b2: (1 - cosW) / 2 / a0, | |||
| a1: (-2 * cosW) / a0, a2: (1 - alpha) / a0, | |||
| } | |||
| } | |||
| return coeffs | |||
| @@ -396,7 +342,7 @@ func readMonoWAV(path string) ([]float64, float64, error) { | |||
| } | |||
| } | |||
| if dataStart == 0 || bitsPerSample != 16 || channels == 0 { | |||
| return nil, 0, fmt.Errorf("unsupported WAV (need 16-bit PCM, got bits=%d ch=%d)", bitsPerSample, channels) | |||
| return nil, 0, fmt.Errorf("unsupported WAV") | |||
| } | |||
| if dataStart+dataLen > len(data) { | |||
| dataLen = len(data) - dataStart | |||
| @@ -0,0 +1,65 @@ | |||
| package dsp | |||
| import ( | |||
| "math" | |||
| "math/cmplx" | |||
| ) | |||
| // FFT computes the discrete Fourier transform of x (in-place, radix-2). | |||
| // len(x) must be a power of 2. | |||
| func FFT(x []complex128) { | |||
| n := len(x) | |||
| if n <= 1 { | |||
| return | |||
| } | |||
| // Bit-reversal permutation | |||
| j := 0 | |||
| for i := 1; i < n; i++ { | |||
| bit := n >> 1 | |||
| for j&bit != 0 { | |||
| j ^= bit | |||
| bit >>= 1 | |||
| } | |||
| j ^= bit | |||
| if i < j { | |||
| x[i], x[j] = x[j], x[i] | |||
| } | |||
| } | |||
| // Cooley-Tukey butterfly | |||
| for size := 2; size <= n; size <<= 1 { | |||
| half := size >> 1 | |||
| wn := cmplx.Exp(complex(0, -2*math.Pi/float64(size))) | |||
| for start := 0; start < n; start += size { | |||
| w := complex(1, 0) | |||
| for k := 0; k < half; k++ { | |||
| u := x[start+k] | |||
| v := x[start+k+half] * w | |||
| x[start+k] = u + v | |||
| x[start+k+half] = u - v | |||
| w *= wn | |||
| } | |||
| } | |||
| } | |||
| } | |||
| // IFFT computes the inverse DFT of x (in-place). | |||
| func IFFT(x []complex128) { | |||
| n := len(x) | |||
| // Conjugate, FFT, conjugate, scale | |||
| for i := range x { | |||
| x[i] = cmplx.Conj(x[i]) | |||
| } | |||
| FFT(x) | |||
| scale := 1.0 / float64(n) | |||
| for i := range x { | |||
| x[i] = cmplx.Conj(x[i]) * complex(scale, 0) | |||
| } | |||
| } | |||
| // HannWindow fills w with a Hann window of length n. | |||
| func HannWindow(w []float64) { | |||
| n := len(w) | |||
| for i := range w { | |||
| w[i] = 0.5 * (1.0 - math.Cos(2*math.Pi*float64(i)/float64(n))) | |||
| } | |||
| } | |||
| @@ -5,7 +5,6 @@ import ( | |||
| "encoding/binary" | |||
| "fmt" | |||
| "log" | |||
| "math" | |||
| "path/filepath" | |||
| "sync/atomic" | |||
| "time" | |||
| @@ -133,24 +132,20 @@ type Generator struct { | |||
| licenseState *license.State | |||
| jingleFrames []license.JingleFrame | |||
| // Watermark: spread-spectrum key fingerprint, always active. | |||
| watermark *watermark.Embedder | |||
| wmShapeLPF *dsp.FilterChain // pulse-shaping: confines PN energy to 0-6kHz | |||
| // Watermark: STFT-domain spread-spectrum (Kirovski & Malvar 2003). | |||
| stftEmbedder *watermark.STFTEmbedder | |||
| wmDecimLPF *dsp.FilterChain // anti-alias LPF for 228k→12k decimation | |||
| } | |||
| func NewGenerator(cfg cfgpkg.Config) *Generator { | |||
| return &Generator{cfg: cfg} | |||
| } | |||
| // SetLicense configures license state (jingle) and creates the watermark | |||
| // SetLicense configures license state (jingle) and creates the STFT watermark | |||
| // embedder. Must be called before the first GenerateFrame. | |||
| func (g *Generator) SetLicense(state *license.State, key string) { | |||
| g.licenseState = state | |||
| g.watermark = watermark.NewEmbedder(key) | |||
| // Gate threshold: -40 dBFS ≈ 0.01 linear amplitude. | |||
| // Watermark is muted during silence to prevent audibility. | |||
| // Composite rate will be set in init(); use 228000 as default. | |||
| g.watermark.EnableGate(0.001, 228000) | |||
| g.stftEmbedder = watermark.NewSTFTEmbedder(key) | |||
| } | |||
| // SetExternalSource sets a live audio source (e.g. StreamResampler) that | |||
| @@ -285,17 +280,10 @@ func (g *Generator) init() { | |||
| } | |||
| } | |||
| // Update watermark gate ramp rate with actual composite rate (may differ | |||
| // from the 228000 default used in SetLicense). | |||
| if g.watermark != nil { | |||
| g.watermark.EnableGate(0.001, g.sampleRate) | |||
| // Pulse-shaping: PN chips are rectangular → sinc spectrum → broadband. | |||
| // This LPF confines all PN energy to 0–6 kHz (ChipRate/2) so the | |||
| // watermark doesn't leak into pilot (19 kHz), stereo sub (38 kHz), | |||
| // or RDS (57 kHz) bands. Also prevents audible wideband noise. | |||
| // 4th-order Butterworth: -24 dB/octave rolloff. At 12 kHz: -24 dB, | |||
| // at 19 kHz: -38 dB, at 38 kHz: -62 dB. Clean enough. | |||
| g.wmShapeLPF = dsp.NewLPF4(float64(watermark.ChipRate)/2, g.sampleRate) | |||
| // STFT watermark: anti-alias LPF for decimation to WMRate (12 kHz). | |||
| // Nyquist at 12 kHz = 6 kHz. Cut at 5.5 kHz with margin. | |||
| if g.stftEmbedder != nil { | |||
| g.wmDecimLPF = dsp.NewLPF4(5500, g.sampleRate) | |||
| } | |||
| g.initialized = true | |||
| @@ -338,6 +326,10 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame | |||
| g.frameSeq++ | |||
| frame.Sequence = g.frameSeq | |||
| // L/R buffers for two-pass processing (STFT watermark between stages 3 and 4) | |||
| lBuf := make([]float64, samples) | |||
| rBuf := make([]float64, samples) | |||
| // Load live params once per chunk — single atomic read, zero per-sample cost | |||
| lp := g.liveParams.Load() | |||
| if lp == nil { | |||
| @@ -404,15 +396,6 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame | |||
| r := g.audioLPF_R.Process(float64(in.R)) | |||
| r = g.pilotNotchR.Process(r) | |||
| // Watermark gate level measurement — done BEFORE drive/clip/cleanup. | |||
| // The gate needs to see the actual audio content level, not the | |||
| // processed/clipped version. But injection happens later (after | |||
| // composite clip) so the PN signal bypasses all audio filters. | |||
| if g.watermark != nil { | |||
| audioLevel := (math.Abs(l) + math.Abs(r)) / 2.0 | |||
| g.watermark.SetAudioLevel(audioLevel) | |||
| } | |||
| // --- Stage 2: Drive + Compress + Clip₁ --- | |||
| l *= lp.OutputDrive | |||
| r *= lp.OutputDrive | |||
| @@ -428,6 +411,59 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame | |||
| l = dsp.HardClip(l, ceiling) | |||
| r = dsp.HardClip(r, ceiling) | |||
| lBuf[i] = l | |||
| rBuf[i] = r | |||
| } | |||
| // --- STFT Watermark: decimate → embed → upsample → add to L/R --- | |||
| if g.stftEmbedder != nil { | |||
| decimFactor := int(g.sampleRate) / watermark.WMRate // 228000/12000 = 19 | |||
| if decimFactor < 1 { | |||
| decimFactor = 1 | |||
| } | |||
| nDown := samples / decimFactor | |||
| // Anti-alias: LPF ALL composite-rate samples, THEN decimate. | |||
| // The LPF must see every sample for correct IIR state update. | |||
| mono12k := make([]float64, nDown) | |||
| lpfState := 0.0 | |||
| decimCount := 0 | |||
| outIdx := 0 | |||
| for i := 0; i < samples && outIdx < nDown; i++ { | |||
| mono := (lBuf[i] + rBuf[i]) / 2 | |||
| if g.wmDecimLPF != nil { | |||
| lpfState = g.wmDecimLPF.Process(mono) | |||
| } else { | |||
| lpfState = mono | |||
| } | |||
| decimCount++ | |||
| if decimCount >= decimFactor { | |||
| decimCount = 0 | |||
| mono12k[outIdx] = lpfState | |||
| outIdx++ | |||
| } | |||
| } | |||
| // STFT embed at 12 kHz | |||
| embedded := g.stftEmbedder.ProcessBlock(mono12k) | |||
| // Extract watermark signal (difference) and upsample via ZOH | |||
| for i := 0; i < samples; i++ { | |||
| wmIdx := i / decimFactor | |||
| if wmIdx >= nDown { | |||
| wmIdx = nDown - 1 | |||
| } | |||
| wmSig := embedded[wmIdx] - mono12k[wmIdx] | |||
| lBuf[i] += wmSig | |||
| rBuf[i] += wmSig | |||
| } | |||
| } | |||
| // --- Pass 2: Stereo encode + composite processing --- | |||
| for i := 0; i < samples; i++ { | |||
| l := lBuf[i] | |||
| r := rBuf[i] | |||
| // --- Stage 4: Stereo encode --- | |||
| limited := audio.NewFrame(audio.Sample(l), audio.Sample(r)) | |||
| comps := g.stereoEncoder.Encode(limited) | |||
| @@ -453,22 +489,6 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame | |||
| } | |||
| bs412PowerAccum += audioMPX * audioMPX | |||
| // --- Watermark injection: into audio composite AFTER all processing --- | |||
| // Injected after the entire clip-filter-clip chain, notch filters, and | |||
| // BS.412 power measurement. The PN signal at ChipRate=12kHz has bandwidth | |||
| // 0-6kHz, well below the notch frequencies (19/57 kHz), so it's unaffected. | |||
| // At -48 dBFS the watermark causes <0.05 dB of over-modulation, negligible. | |||
| // Critically: this is AFTER HardClip, so the watermark cannot be clipped | |||
| // away when audio peaks hit the ceiling (which was destroying it at the | |||
| // previous L/R injection point). | |||
| if g.watermark != nil { | |||
| wm := g.watermark.NextSample() | |||
| if g.wmShapeLPF != nil { | |||
| wm = g.wmShapeLPF.Process(wm) | |||
| } | |||
| audioMPX += wm | |||
| } | |||
| // --- Stage 6: Add protected components --- | |||
| composite := audioMPX | |||
| if lp.StereoEnabled { | |||
| @@ -503,15 +523,9 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame | |||
| g.bs412.ProcessChunk(bs412PowerAccum / float64(samples)) | |||
| } | |||
| // Watermark diagnostic: log state every 100 chunks (~5s) so we can verify | |||
| // the embedder is actually running and producing non-zero output. | |||
| if g.watermark != nil && g.frameSeq%100 == 1 { | |||
| wm := g.watermark.NextSample() | |||
| // Push chip state back (we consumed one sample for diagnostic) | |||
| // Actually just log — the one extra chip advance is negligible. | |||
| stats := g.watermark.DiagnosticState() | |||
| log.Printf("watermark diag: frame=%d gateGain=%.4f chipIdx=%d bitIdx=%d symbol=%d lastSample=%.6f enabled=%t", | |||
| g.frameSeq, stats.GateGain, stats.ChipIdx, stats.BitIdx, stats.Symbol, wm, stats.GateEnabled) | |||
| // STFT watermark diagnostic | |||
| if g.stftEmbedder != nil && g.frameSeq%100 == 1 { | |||
| log.Printf("watermark stft: frame=%d, active", g.frameSeq) | |||
| } | |||
| return frame | |||
| @@ -0,0 +1,163 @@ | |||
| package watermark | |||
| import ( | |||
| "math" | |||
| "testing" | |||
| ) | |||
| func TestSTFTRoundTrip(t *testing.T) { | |||
| const key = "test-stft-key" | |||
| const duration = 150.0 // seconds — need > 136.5s for one full WM cycle | |||
| nSamples := int(duration * WMRate) | |||
| t.Logf("Generating %d samples @ %d Hz (%.1fs)", nSamples, WMRate, duration) | |||
| t.Logf("WM cycle: %d STFT frames, %.1fs", FramesPerWM, float64(SamplesPerWM)/WMRate) | |||
| // Generate test signal: broadband noise (the multiplicative watermark | |||
| // needs energy in all frequency bins to work — a pure tone only has | |||
| // energy in one bin and the watermark has no effect on silent bins) | |||
| audio := make([]float64, nSamples) | |||
| // Simple LCG pseudo-random for reproducibility | |||
| var lcg uint64 = 12345 | |||
| for i := range audio { | |||
| lcg = lcg*6364136223846793005 + 1442695040888963407 | |||
| audio[i] = 0.3 * (float64(int32(lcg>>33))/float64(1<<31)) | |||
| } | |||
| rmsIn := rmsF64(audio) | |||
| t.Logf("Input RMS: %.1f dBFS", 20*math.Log10(rmsIn+1e-12)) | |||
| // Embed watermark | |||
| embedder := NewSTFTEmbedder(key) | |||
| watermarked := embedder.ProcessBlock(audio) | |||
| rmsOut := rmsF64(watermarked) | |||
| t.Logf("Output RMS: %.1f dBFS", 20*math.Log10(rmsOut+1e-12)) | |||
| t.Logf("RMS change: %.2f dB", 20*math.Log10(rmsOut/rmsIn)) | |||
| // Detect watermark | |||
| detector := NewSTFTDetector(key) | |||
| corrs, offset := detector.Detect(watermarked) | |||
| t.Logf("Detection offset: %d", offset) | |||
| // Check correlations | |||
| var nPositive, nNegative int | |||
| var sumAbs float64 | |||
| for _, c := range corrs { | |||
| sumAbs += math.Abs(c) | |||
| if c > 0 { | |||
| nPositive++ | |||
| } else { | |||
| nNegative++ | |||
| } | |||
| } | |||
| avgAbs := sumAbs / float64(payloadBits) | |||
| t.Logf("Correlations: avg|c|=%.1f, positive=%d, negative=%d", avgAbs, nPositive, nNegative) | |||
| if avgAbs < 1.0 { | |||
| t.Errorf("avg|c| too low: %.1f (expected >> 1.0)", avgAbs) | |||
| } | |||
| // Check against known payload | |||
| payload := KeyToPayload(key) | |||
| codeword := RSEncode(payload) | |||
| var expectedBits [payloadBits]int | |||
| for i := 0; i < payloadBits; i++ { | |||
| expectedBits[i] = int((codeword[i/8] >> uint(7-(i%8))) & 1) | |||
| } | |||
| nerr := 0 | |||
| for i := 0; i < payloadBits; i++ { | |||
| hard := 0 | |||
| if corrs[i] < 0 { | |||
| hard = 1 | |||
| } | |||
| if hard != expectedBits[i] { | |||
| nerr++ | |||
| } | |||
| } | |||
| t.Logf("BER: %d/%d (%.1f%%)", nerr, payloadBits, 100*float64(nerr)/float64(payloadBits)) | |||
| if nerr > 20 { | |||
| t.Errorf("BER too high: %d/%d", nerr, payloadBits) | |||
| } | |||
| // Try RS decode | |||
| var recv [rsTotalBytes]byte | |||
| for i := 0; i < payloadBits; i++ { | |||
| if corrs[i] < 0 { | |||
| recv[i/8] |= 1 << uint(7-(i%8)) | |||
| } | |||
| } | |||
| // Try with erasures if needed | |||
| decoded := false | |||
| for nErase := 0; nErase <= rsCheckBytes; nErase++ { | |||
| if nErase == 0 { | |||
| // Try zero erasures (valid if BER=0) | |||
| p, ok := RSDecode(recv, nil) | |||
| if ok { | |||
| if KeyMatchesPayload(key, p) { | |||
| t.Logf("Decoded with 0 erasures: MATCH ✓") | |||
| decoded = true | |||
| break | |||
| } | |||
| } | |||
| continue | |||
| } | |||
| // Erase weakest bytes by |correlation| | |||
| type bc struct{ idx int; conf float64 } | |||
| byteConfs := make([]bc, rsTotalBytes) | |||
| for b := 0; b < rsTotalBytes; b++ { | |||
| minC := math.Abs(corrs[b*8]) | |||
| for bit := 1; bit < 8; bit++ { | |||
| c := math.Abs(corrs[b*8+bit]) | |||
| if c < minC { | |||
| minC = c | |||
| } | |||
| } | |||
| byteConfs[b] = bc{b, minC} | |||
| } | |||
| // Sort by confidence (weakest first) | |||
| for i := 0; i < len(byteConfs); i++ { | |||
| for j := i + 1; j < len(byteConfs); j++ { | |||
| if byteConfs[j].conf < byteConfs[i].conf { | |||
| byteConfs[i], byteConfs[j] = byteConfs[j], byteConfs[i] | |||
| } | |||
| } | |||
| } | |||
| erasePos := make([]int, nErase) | |||
| for i := 0; i < nErase; i++ { | |||
| erasePos[i] = byteConfs[i].idx | |||
| } | |||
| // Sort positions | |||
| for i := 0; i < len(erasePos); i++ { | |||
| for j := i + 1; j < len(erasePos); j++ { | |||
| if erasePos[j] < erasePos[i] { | |||
| erasePos[i], erasePos[j] = erasePos[j], erasePos[i] | |||
| } | |||
| } | |||
| } | |||
| p, ok := RSDecode(recv, erasePos) | |||
| if ok { | |||
| if KeyMatchesPayload(key, p) { | |||
| t.Logf("Decoded with %d erasures: MATCH ✓", nErase) | |||
| decoded = true | |||
| break | |||
| } | |||
| } | |||
| } | |||
| if !decoded { | |||
| t.Errorf("RS decode FAILED") | |||
| } | |||
| } | |||
| func rmsF64(s []float64) float64 { | |||
| var acc float64 | |||
| for _, v := range s { | |||
| acc += v * v | |||
| } | |||
| return math.Sqrt(acc / float64(len(s))) | |||
| } | |||
| @@ -0,0 +1,413 @@ | |||
| // Package watermark implements STFT-domain spread-spectrum audio watermarking | |||
| // based on Kirovski & Malvar (IEEE TSP 2003). | |||
| // | |||
| // Architecture: | |||
| // - Embedding in STFT magnitude (dB scale) — multiplicative, natural masking | |||
| // - Block repetition coding (R=5 time frames) — automatic drift tolerance | |||
| // - Cepstrum filtering at detection — 6 dB carrier noise reduction | |||
| // - PCC covert channel — PN partitioned into M=128 subsets for 128-bit payload | |||
| // - Multi-test sync — scan R frame offsets to find alignment | |||
| // | |||
| // Both encoder and decoder operate at 12 kHz (WMRate). The encoder decimates | |||
| // from composite rate (÷19), processes STFT, and upsamples back. The decoder | |||
| // decimates from recording rate (÷16 from 192kHz, ÷4 from 48kHz, etc.). | |||
| // Same STFT parameters → bins align perfectly → no rate mismatch. | |||
| package watermark | |||
| import ( | |||
| "crypto/sha256" | |||
| "math" | |||
| "math/cmplx" | |||
| "github.com/jan/fm-rds-tx/internal/dsp" | |||
| ) | |||
| // STFT watermark constants. | |||
| const ( | |||
| WMRate = 12000 // watermark processing sample rate (Hz) | |||
| FFTSize = 512 // STFT frame size (samples at WMRate) | |||
| FFTHop = 256 // 50% overlap | |||
| BinLow = 9 // ~211 Hz at WMRate/FFTSize | |||
| BinHigh = 213 // ~4992 Hz at WMRate/FFTSize | |||
| NumBins = BinHigh - BinLow // 204 frequency chips per STFT frame | |||
| TimeRep = 5 // block repetition factor (±2 frame drift tolerance) | |||
| GroupsPerBit = 10 // time groups per data bit | |||
| WMLevelDB = 1.5 // embedding level (dB) | |||
| TotalGroups = GroupsPerBit * payloadBits // 10 × 128 = 1280 | |||
| FramesPerWM = TotalGroups * TimeRep // 1280 × 5 = 6400 | |||
| SamplesPerWM = FramesPerWM * FFTHop // 6400 × 256 = 1638400 | |||
| // Duration at WMRate: 1638400 / 12000 = 136.5 seconds | |||
| ) | |||
| // STFTEmbedder processes audio blocks and adds the STFT-domain watermark. | |||
| // It works at WMRate (12 kHz). The caller must decimate input to WMRate | |||
| // and upsample output back to the desired rate. | |||
| type STFTEmbedder struct { | |||
| // PN chip matrix: pnChips[group][bin] ∈ {-1, +1} | |||
| // group ∈ [0, TotalGroups), bin ∈ [0, NumBins) | |||
| pnChips [TotalGroups][NumBins]int8 | |||
| // Bit assignment: which data bit owns each group (PCC permutation) | |||
| groupToBit [TotalGroups]int | |||
| // RS-encoded codeword: 128 bits → symbol[bit] = +1 or -1 | |||
| symbols [payloadBits]int8 | |||
| // STFT state | |||
| window [FFTSize]float64 | |||
| inBuf [FFTSize]float64 // analysis window buffer | |||
| outBuf [FFTSize + FFTHop]float64 // overlap-add output buffer | |||
| inPos int // samples written to inBuf | |||
| outPos int // samples read from outBuf | |||
| frameIdx int // STFT frame counter (wraps at FramesPerWM) | |||
| primed bool // true after first full frame | |||
| // Level in linear scale: 10^(WMLevelDB/20) - 1 ≈ 0.189 for 1.5 dB | |||
| levelLinear float64 | |||
| } | |||
| // NewSTFTEmbedder creates an embedder for the given license key. | |||
| func NewSTFTEmbedder(key string) *STFTEmbedder { | |||
| e := &STFTEmbedder{} | |||
| // Compute RS-encoded payload | |||
| var data [rsDataBytes]byte | |||
| if key != "" { | |||
| h := sha256.Sum256([]byte(key)) | |||
| copy(data[:], h[:rsDataBytes]) | |||
| } | |||
| codeword := rsEncode(data) | |||
| // BPSK symbols: bit 0 → +1, bit 1 → -1 | |||
| for i := 0; i < payloadBits; i++ { | |||
| if (codeword[i/8]>>uint(7-(i%8)))&1 == 1 { | |||
| e.symbols[i] = -1 | |||
| } else { | |||
| e.symbols[i] = 1 | |||
| } | |||
| } | |||
| // Generate PN chips from key-seeded PRNG | |||
| seed := sha256.Sum256(append([]byte("stft-pn-"), key...)) | |||
| prng := newPRNG(seed[:]) | |||
| for g := 0; g < TotalGroups; g++ { | |||
| for b := 0; b < NumBins; b++ { | |||
| if prng.next()&1 == 0 { | |||
| e.pnChips[g][b] = 1 | |||
| } else { | |||
| e.pnChips[g][b] = -1 | |||
| } | |||
| } | |||
| } | |||
| // PCC permutation: assign groups to bits (interleaved + permuted) | |||
| // Simple interleaving first, then Fisher-Yates shuffle | |||
| for g := 0; g < TotalGroups; g++ { | |||
| e.groupToBit[g] = g % payloadBits | |||
| } | |||
| // Permute within each bit's groups using key-seeded PRNG | |||
| permSeed := sha256.Sum256(append([]byte("stft-perm-"), key...)) | |||
| permRNG := newPRNG(permSeed[:]) | |||
| for i := TotalGroups - 1; i > 0; i-- { | |||
| j := permRNG.next() % uint32(i+1) | |||
| e.groupToBit[i], e.groupToBit[j] = e.groupToBit[j], e.groupToBit[i] | |||
| } | |||
| // Hann window | |||
| dsp.HannWindow(e.window[:]) | |||
| // Embedding level | |||
| e.levelLinear = math.Pow(10, WMLevelDB/20) - 1 // fractional magnitude change | |||
| return e | |||
| } | |||
| // ProcessBlock takes mono audio at WMRate and returns watermarked audio. | |||
| // The input and output lengths are the same. Internally buffers for STFT | |||
| // overlap-add processing. Call with chunks of any size. | |||
| func (e *STFTEmbedder) ProcessBlock(in []float64) []float64 { | |||
| out := make([]float64, len(in)) | |||
| for i, s := range in { | |||
| // Feed sample into STFT input buffer | |||
| e.inBuf[e.inPos] = s | |||
| e.inPos++ | |||
| if e.inPos == FFTSize { | |||
| // Full frame: process STFT | |||
| e.processFrame() | |||
| e.inPos = FFTHop // shift: keep last hop samples for next frame overlap | |||
| copy(e.inBuf[:FFTHop], e.inBuf[FFTHop:FFTSize]) | |||
| } | |||
| // Read from overlap-add output buffer | |||
| if e.primed { | |||
| out[i] = e.outBuf[e.outPos] | |||
| e.outPos++ | |||
| if e.outPos >= FFTHop { | |||
| e.outPos = 0 | |||
| // Shift output buffer: move overlap region to start | |||
| copy(e.outBuf[:FFTSize], e.outBuf[FFTHop:FFTSize+FFTHop]) | |||
| // Zero the new region | |||
| for j := FFTSize - FFTHop; j < FFTSize+FFTHop; j++ { | |||
| if j < len(e.outBuf) { | |||
| e.outBuf[j] = 0 | |||
| } | |||
| } | |||
| } | |||
| } else { | |||
| out[i] = s // pass-through until first frame is processed | |||
| } | |||
| } | |||
| return out | |||
| } | |||
| // processFrame computes one STFT frame: window → FFT → modify magnitudes → IFFT → overlap-add. | |||
| func (e *STFTEmbedder) processFrame() { | |||
| // Determine which group this frame belongs to | |||
| wmFrame := e.frameIdx % FramesPerWM | |||
| groupIdx := wmFrame / TimeRep | |||
| repIdx := wmFrame % TimeRep | |||
| centerRep := TimeRep / 2 // only center repetition carries the watermark for detection | |||
| // Apply window and convert to complex | |||
| var buf [FFTSize]complex128 | |||
| for i := 0; i < FFTSize; i++ { | |||
| buf[i] = complex(e.inBuf[i]*e.window[i], 0) | |||
| } | |||
| // Forward FFT | |||
| dsp.FFT(buf[:]) | |||
| // Modify magnitudes in the watermark sub-band | |||
| // Only modify if this is within a valid group AND at the center repetition | |||
| // (we embed in ALL repetitions so the watermark energy is present everywhere, | |||
| // but the PN pattern is the same for all R frames in a group) | |||
| if groupIdx < TotalGroups { | |||
| bitIdx := e.groupToBit[groupIdx] | |||
| dataSign := float64(e.symbols[bitIdx]) | |||
| _ = repIdx | |||
| _ = centerRep | |||
| for b := 0; b < NumBins; b++ { | |||
| bin := BinLow + b | |||
| chip := float64(e.pnChips[groupIdx][b]) | |||
| // Modify magnitude: |Y| = |X| × (1 + level × chip × data) | |||
| // Phase preserved | |||
| mag := cmplx.Abs(buf[bin]) | |||
| if mag < 1e-10 { | |||
| continue // skip near-silence bins to avoid division by zero | |||
| } | |||
| phase := cmplx.Phase(buf[bin]) | |||
| newMag := mag * (1.0 + e.levelLinear*chip*dataSign) | |||
| buf[bin] = cmplx.Rect(newMag, phase) | |||
| // Mirror for negative frequencies (conjugate symmetry) | |||
| if bin > 0 && bin < FFTSize/2 { | |||
| buf[FFTSize-bin] = cmplx.Conj(buf[bin]) | |||
| } | |||
| } | |||
| } | |||
| // Inverse FFT | |||
| dsp.IFFT(buf[:]) | |||
| // Overlap-add to output buffer | |||
| for i := 0; i < FFTSize; i++ { | |||
| e.outBuf[i] += real(buf[i]) | |||
| } | |||
| if !e.primed { | |||
| e.primed = true | |||
| e.outPos = 0 | |||
| } | |||
| e.frameIdx++ | |||
| } | |||
| // STFTDetector extracts watermark bits from an audio recording. | |||
| type STFTDetector struct { | |||
| pnChips [TotalGroups][NumBins]int8 | |||
| groupToBit [TotalGroups]int | |||
| } | |||
| // NewSTFTDetector creates a detector matching the given key's PN sequence. | |||
| func NewSTFTDetector(key string) *STFTDetector { | |||
| d := &STFTDetector{} | |||
| // Same PN generation as embedder | |||
| seed := sha256.Sum256(append([]byte("stft-pn-"), key...)) | |||
| prng := newPRNG(seed[:]) | |||
| for g := 0; g < TotalGroups; g++ { | |||
| for b := 0; b < NumBins; b++ { | |||
| if prng.next()&1 == 0 { | |||
| d.pnChips[g][b] = 1 | |||
| } else { | |||
| d.pnChips[g][b] = -1 | |||
| } | |||
| } | |||
| } | |||
| // Same permutation | |||
| for g := 0; g < TotalGroups; g++ { | |||
| d.groupToBit[g] = g % payloadBits | |||
| } | |||
| permSeed := sha256.Sum256(append([]byte("stft-perm-"), key...)) | |||
| permRNG := newPRNG(permSeed[:]) | |||
| for i := TotalGroups - 1; i > 0; i-- { | |||
| j := permRNG.next() % uint32(i+1) | |||
| d.groupToBit[i], d.groupToBit[j] = d.groupToBit[j], d.groupToBit[i] | |||
| } | |||
| return d | |||
| } | |||
| // Detect processes audio at WMRate and returns soft bit decisions. | |||
| // The audio should already be decimated/resampled to WMRate and LPF'd. | |||
| // | |||
| // Multi-test: tries TimeRep frame offsets (the block repetition candidates). | |||
| // Cepstrum filtering is applied to reduce carrier noise. | |||
| // | |||
| // Returns: 128 soft correlation values (sign = bit decision, magnitude = confidence), | |||
| // and the frame offset that gave the best detection metric. | |||
| func (d *STFTDetector) Detect(audio []float64) (corrs [payloadBits]float64, bestOffset int) { | |||
| // Compute all STFT frames | |||
| var window [FFTSize]float64 | |||
| dsp.HannWindow(window[:]) | |||
| nFrames := (len(audio) - FFTSize) / FFTHop | |||
| if nFrames < FramesPerWM { | |||
| // Not enough data for a full watermark cycle — use what we have | |||
| } | |||
| // Compute STFT magnitudes (dB) for all frames | |||
| type stftFrame struct { | |||
| magDB [FFTSize / 2]float64 | |||
| } | |||
| frames := make([]stftFrame, nFrames) | |||
| for f := 0; f < nFrames; f++ { | |||
| offset := f * FFTHop | |||
| var buf [FFTSize]complex128 | |||
| for i := 0; i < FFTSize; i++ { | |||
| if offset+i < len(audio) { | |||
| buf[i] = complex(audio[offset+i]*window[i], 0) | |||
| } | |||
| } | |||
| dsp.FFT(buf[:]) | |||
| for bin := 0; bin < FFTSize/2; bin++ { | |||
| mag := cmplx.Abs(buf[bin]) | |||
| if mag < 1e-12 { | |||
| mag = 1e-12 | |||
| } | |||
| frames[f].magDB[bin] = 20 * math.Log10(mag) | |||
| } | |||
| // Cepstrum filtering: remove spectral envelope | |||
| // DCT of dB magnitudes, zero first N_ceps coefficients, IDCT | |||
| cepstrumFilter(frames[f].magDB[:], 8) | |||
| } | |||
| // Multi-test: try each of TimeRep frame offsets within the repetition block | |||
| bestMetric := -1.0 | |||
| bestOffset = 0 | |||
| for startOffset := 0; startOffset < TimeRep; startOffset++ { | |||
| var testCorrs [payloadBits]float64 | |||
| // For each group, use the CENTER frame of the repetition block | |||
| for g := 0; g < TotalGroups; g++ { | |||
| bitIdx := d.groupToBit[g] | |||
| frameInWM := g*TimeRep + startOffset + TimeRep/2 | |||
| if frameInWM >= nFrames { | |||
| continue | |||
| } | |||
| // Correlate this frame's magnitudes with the PN chips | |||
| var corr float64 | |||
| for b := 0; b < NumBins; b++ { | |||
| bin := BinLow + b | |||
| corr += frames[frameInWM].magDB[bin] * float64(d.pnChips[g][b]) | |||
| } | |||
| testCorrs[bitIdx] += corr | |||
| } | |||
| // Detection metric: sum of squared partial correlations (chi-squared) | |||
| // From paper equation (10): Q = Σ (corr_m)² | |||
| var metric float64 | |||
| for _, c := range testCorrs { | |||
| metric += c * c | |||
| } | |||
| if metric > bestMetric { | |||
| bestMetric = metric | |||
| bestOffset = startOffset | |||
| corrs = testCorrs | |||
| } | |||
| } | |||
| return corrs, bestOffset | |||
| } | |||
| // cepstrumFilter removes the spectral envelope from dB magnitudes. | |||
| // It zeros the first nCeps DCT coefficients (the smooth spectral shape). | |||
| // This is Kirovski's "CF" technique: reduces carrier noise by ~6 dB. | |||
| func cepstrumFilter(magDB []float64, nCeps int) { | |||
| n := len(magDB) | |||
| if n < nCeps*2 { | |||
| return | |||
| } | |||
| // DCT-II (simplified, not optimized) | |||
| ceps := make([]float64, n) | |||
| for k := 0; k < n; k++ { | |||
| var sum float64 | |||
| for i := 0; i < n; i++ { | |||
| sum += magDB[i] * math.Cos(math.Pi*float64(k)*(float64(i)+0.5)/float64(n)) | |||
| } | |||
| ceps[k] = sum | |||
| } | |||
| // Zero low-order cepstral coefficients (spectral envelope) | |||
| for k := 0; k < nCeps; k++ { | |||
| ceps[k] = 0 | |||
| } | |||
| // IDCT (inverse DCT-II) | |||
| for i := 0; i < n; i++ { | |||
| var sum float64 | |||
| for k := 0; k < n; k++ { | |||
| w := 1.0 | |||
| if k == 0 { | |||
| w = 0.5 | |||
| } | |||
| sum += w * ceps[k] * math.Cos(math.Pi*float64(k)*(float64(i)+0.5)/float64(n)) | |||
| } | |||
| magDB[i] = sum * 2.0 / float64(n) | |||
| } | |||
| } | |||
| // Simple xorshift32 PRNG for deterministic chip generation. | |||
| type simplePRNG struct { | |||
| state uint32 | |||
| } | |||
| func newPRNG(seed []byte) *simplePRNG { | |||
| var s uint32 | |||
| for i, b := range seed { | |||
| s ^= uint32(b) << (uint(i%4) * 8) | |||
| } | |||
| if s == 0 { | |||
| s = 1 | |||
| } | |||
| return &simplePRNG{state: s} | |||
| } | |||
| func (p *simplePRNG) next() uint32 { | |||
| p.state ^= p.state << 13 | |||
| p.state ^= p.state >> 17 | |||
| p.state ^= p.state << 5 | |||
| return p.state | |||
| } | |||
| @@ -395,7 +395,17 @@ func RSEncode(data [rsDataBytes]byte) [rsTotalBytes]byte { | |||
| return rsEncode(data) | |||
| } | |||
| // Constants exported for the recovery tool. | |||
| // PNChipAt returns the PN chip value at group g, bin b. | |||
| func (d *STFTDetector) PNChipAt(g, b int) int8 { | |||
| return d.pnChips[g][b] | |||
| } | |||
| // GroupBit returns the data bit index for group g. | |||
| func (d *STFTDetector) GroupBit(g int) int { | |||
| return d.groupToBit[g] | |||
| } | |||
| // Constants exported for the recovery tool and legacy tools. | |||
| const ( | |||
| PnChips = pnChips | |||
| PayloadBits = payloadBits | |||