Add an STFT watermark path inspired by Kirovski & Malvar, including the frequency-domain embedder/decoder, FFT support, and round-trip coverage. Wire the generator and CLI tools to use the new analysis/synthesis flow for watermark experiments on the watermark-rework branch.main
| @@ -186,7 +186,8 @@ func runTXMode(cfg cfgpkg.Config, configPath string, driver platform.SoapyDriver | |||||
| log.Printf("license: no valid key — evaluation jingle every %d minutes", license.JingleIntervalMinutes) | log.Printf("license: no valid key — evaluation jingle every %d minutes", license.JingleIntervalMinutes) | ||||
| } | } | ||||
| engine.SetLicenseState(licState, licenseKey) | engine.SetLicenseState(licState, licenseKey) | ||||
| log.Printf("watermark: embedding key fingerprint — Level=%.3f ChipRate=%d", watermark.Level, watermark.ChipRate) | |||||
| log.Printf("watermark: STFT-domain embedding — WMRate=%d FFTSize=%d Level=%.1fdB", | |||||
| watermark.WMRate, watermark.FFTSize, watermark.WMLevelDB) | |||||
| cfg = applyLegacyAudioFlags(cfg, audioStdin, audioRate, audioHTTP) | cfg = applyLegacyAudioFlags(cfg, audioStdin, audioRate, audioHTTP) | ||||
| var streamSrc *audio.StreamSource | var streamSrc *audio.StreamSource | ||||
| @@ -1,16 +1,23 @@ | |||||
| // cmd/wmdecode — fm-rds-tx spread-spectrum watermark recovery tool. | |||||
| // cmd/wmdecode — STFT-domain spread-spectrum watermark decoder. | |||||
| // | // | ||||
| // Approach: downsample to chip rate (12 kHz), correlate at 1 sample/chip. | |||||
| // No fractional stepping, no clock drift issues. FFT-free phase search. | |||||
| // Decodes watermark from FM broadcast recordings following | |||||
| // Kirovski & Malvar (IEEE TSP 2003) architecture. | |||||
| // | |||||
| // Usage: | |||||
| // | |||||
| // wmdecode <file.wav> [key ...] | |||||
| package main | package main | ||||
| import ( | import ( | ||||
| "encoding/binary" | "encoding/binary" | ||||
| "fmt" | "fmt" | ||||
| "math" | "math" | ||||
| "math/cmplx" | |||||
| "os" | "os" | ||||
| "sort" | "sort" | ||||
| "time" | |||||
| "github.com/jan/fm-rds-tx/internal/dsp" | |||||
| "github.com/jan/fm-rds-tx/internal/watermark" | "github.com/jan/fm-rds-tx/internal/watermark" | ||||
| ) | ) | ||||
| @@ -20,6 +27,8 @@ func main() { | |||||
| os.Exit(1) | os.Exit(1) | ||||
| } | } | ||||
| t0 := time.Now() | |||||
| samples, recRate, err := readMonoWAV(os.Args[1]) | samples, recRate, err := readMonoWAV(os.Args[1]) | ||||
| if err != nil { | if err != nil { | ||||
| fmt.Fprintf(os.Stderr, "read WAV: %v\n", err) | fmt.Fprintf(os.Stderr, "read WAV: %v\n", err) | ||||
| @@ -29,295 +38,235 @@ func main() { | |||||
| fmt.Printf("WAV: %d samples @ %.0f Hz = %.2fs, RMS %.1f dBFS\n", | fmt.Printf("WAV: %d samples @ %.0f Hz = %.2fs, RMS %.1f dBFS\n", | ||||
| len(samples), recRate, float64(len(samples))/recRate, 20*math.Log10(rms+1e-9)) | len(samples), recRate, float64(len(samples))/recRate, 20*math.Log10(rms+1e-9)) | ||||
| chipRate := float64(watermark.ChipRate) // 12000 | |||||
| pnChips := watermark.PnChips // 2048 | |||||
| // Step 1: LPF at ChipRate/2 then downsample to ChipRate. | |||||
| // At chip rate: 1 sample = 1 chip. No fractional stepping. | |||||
| decimFactor := int(recRate / chipRate) // 192000/12000 = 16 | |||||
| // Step 1: Decimate to WMRate (12 kHz) | |||||
| wmRate := float64(watermark.WMRate) | |||||
| decimFactor := int(recRate / wmRate) | |||||
| if decimFactor < 1 { | if decimFactor < 1 { | ||||
| decimFactor = 1 | decimFactor = 1 | ||||
| } | } | ||||
| actualChipRate := recRate / float64(decimFactor) // should be exactly chipRate | |||||
| fmt.Printf("Downsample: %d:1 (%.0f Hz → %.0f Hz)\n", decimFactor, recRate, actualChipRate) | |||||
| actualRate := recRate / float64(decimFactor) | |||||
| fmt.Printf("Downsample: %d:1 (%.0f Hz → %.0f Hz)\n", decimFactor, recRate, actualRate) | |||||
| // Anti-alias LPF (8th-order IIR at 5.5 kHz) | |||||
| lpfCoeffs := designLPF8(5500, recRate) | lpfCoeffs := designLPF8(5500, recRate) | ||||
| filtered := applyIIR(samples, lpfCoeffs) | filtered := applyIIR(samples, lpfCoeffs) | ||||
| // Decimate | |||||
| nDown := len(filtered) / decimFactor | nDown := len(filtered) / decimFactor | ||||
| down := make([]float64, nDown) | down := make([]float64, nDown) | ||||
| for i := 0; i < nDown; i++ { | for i := 0; i < nDown; i++ { | ||||
| down[i] = filtered[i*decimFactor] | down[i] = filtered[i*decimFactor] | ||||
| } | } | ||||
| rmsDown := rmsLevel(down) | |||||
| fmt.Printf("Downsampled: %d samples @ %.0f Hz, RMS %.1f dBFS\n", | |||||
| nDown, actualChipRate, 20*math.Log10(rmsDown+1e-9)) | |||||
| // Step 2: Phase search — slide 1-bit PN template across [0, pnChips). | |||||
| // At chip rate this is a simple 2048-element dot product per offset. | |||||
| // Test all 2048 phases, accumulate energy over many bits. | |||||
| fmt.Printf("Phase search: %d candidates\n", pnChips) | |||||
| nSearchBits := nDown / pnChips | |||||
| if nSearchBits > 500 { | |||||
| nSearchBits = 500 | |||||
| fmt.Printf("Downsampled: %d samples, %.1fs\n", nDown, float64(nDown)/wmRate) | |||||
| // Step 2: Compute ALL STFT frames with cepstrum filtering | |||||
| fftSize := watermark.FFTSize | |||||
| hop := watermark.FFTHop | |||||
| nFrames := (nDown - fftSize) / hop | |||||
| if nFrames <= 0 { | |||||
| fmt.Fprintln(os.Stderr, "Recording too short") | |||||
| os.Exit(1) | |||||
| } | } | ||||
| bestPhase := 0 | |||||
| bestEnergy := 0.0 | |||||
| for phase := 0; phase < pnChips; phase++ { | |||||
| var energy float64 | |||||
| for b := 0; b < nSearchBits; b++ { | |||||
| start := phase + b*pnChips | |||||
| if start+pnChips > nDown { | |||||
| break | |||||
| } | |||||
| c := corrChipRate(down, start, pnChips) | |||||
| energy += c * c | |||||
| var window [watermark.FFTSize]float64 | |||||
| dsp.HannWindow(window[:]) | |||||
| fmt.Printf("STFT: %d frames (%d-point, hop=%d)\n", nFrames, fftSize, hop) | |||||
| type stftMag [watermark.FFTSize / 2]float64 | |||||
| frameMags := make([]stftMag, nFrames) | |||||
| for f := 0; f < nFrames; f++ { | |||||
| offset := f * hop | |||||
| var buf [watermark.FFTSize]complex128 | |||||
| for i := 0; i < fftSize; i++ { | |||||
| buf[i] = complex(down[offset+i]*window[i], 0) | |||||
| } | } | ||||
| if energy > bestEnergy { | |||||
| bestEnergy = energy | |||||
| bestPhase = phase | |||||
| dsp.FFT(buf[:]) | |||||
| for bin := 0; bin < fftSize/2; bin++ { | |||||
| mag := cmplx.Abs(buf[bin]) | |||||
| if mag < 1e-12 { | |||||
| mag = 1e-12 | |||||
| } | |||||
| frameMags[f][bin] = 20 * math.Log10(mag) | |||||
| } | } | ||||
| cepstrumFilter(frameMags[f][:], 8) | |||||
| } | } | ||||
| fmt.Printf("Phase: offset=%d (%.2fms), energy=%.0f\n", | |||||
| bestPhase, float64(bestPhase)/actualChipRate*1000, bestEnergy) | |||||
| // Step 3: Per-bit correlation with ±4 sample sliding (handles residual drift). | |||||
| // At chip rate, ±4 samples = ±0.33ms — covers ~±40 ppm over 22s frame. | |||||
| nCompleteBits := (nDown - bestPhase) / pnChips | |||||
| nFrames := nCompleteBits / watermark.PayloadBits | |||||
| if nFrames < 1 { | |||||
| nFrames = 1 | |||||
| // Step 3: For each key, search cycle offset + rep offset | |||||
| keys := os.Args[2:] | |||||
| if len(keys) == 0 { | |||||
| fmt.Println("No keys supplied.") | |||||
| os.Exit(1) | |||||
| } | } | ||||
| fmt.Printf("Sync: %d complete bits, %d frames\n", nCompleteBits, nFrames) | |||||
| const slideWindow = 200 // ±200 chips — handles phase errors + drift | |||||
| corrs := make([]float64, watermark.PayloadBits) | |||||
| for i := 0; i < watermark.PayloadBits; i++ { | |||||
| // For each offset, sum correlation across ALL frames first. | |||||
| // Signal adds coherently (×nFrames), noise adds as √nFrames. | |||||
| // Then pick the offset with maximum |sum|. | |||||
| bestAbs := 0.0 | |||||
| bestVal := 0.0 | |||||
| for off := -slideWindow; off <= slideWindow; off++ { | |||||
| var sum float64 | |||||
| for f := 0; f < nFrames; f++ { | |||||
| nominal := bestPhase + (f*watermark.PayloadBits+i)*pnChips + off | |||||
| if nominal < 0 || nominal+pnChips > nDown { | |||||
| continue | |||||
| for _, key := range keys { | |||||
| fmt.Printf("\nKey: %q\n", key) | |||||
| det := watermark.NewSTFTDetector(key) | |||||
| totalGroups := watermark.TotalGroups | |||||
| timeRep := watermark.TimeRep | |||||
| framesPerWM := watermark.FramesPerWM | |||||
| numBins := watermark.NumBins | |||||
| binLow := watermark.BinLow | |||||
| centerRep := timeRep / 2 | |||||
| bestMetric := -1.0 | |||||
| var bestCorrs [watermark.PayloadBits]float64 | |||||
| bestCycleOff := 0 | |||||
| bestRepOff := 0 | |||||
| nCandidates := 0 | |||||
| for cycleOff := 0; cycleOff < framesPerWM; cycleOff += timeRep { | |||||
| for repOff := 0; repOff < timeRep; repOff++ { | |||||
| var testCorrs [watermark.PayloadBits]float64 | |||||
| for f := 0; f < nFrames; f++ { | |||||
| wmFrame := ((f - cycleOff - repOff) % framesPerWM + framesPerWM) % framesPerWM | |||||
| if wmFrame%timeRep != centerRep { | |||||
| continue | |||||
| } | |||||
| g := wmFrame / timeRep | |||||
| if g >= totalGroups { | |||||
| continue | |||||
| } | |||||
| var corr float64 | |||||
| for b := 0; b < numBins; b++ { | |||||
| corr += frameMags[f][binLow+b] * float64(det.PNChipAt(g, b)) | |||||
| } | |||||
| testCorrs[det.GroupBit(g)] += corr | |||||
| } | } | ||||
| sum += corrChipRate(down, nominal, pnChips) | |||||
| } | |||||
| if math.Abs(sum) > bestAbs { | |||||
| bestAbs = math.Abs(sum) | |||||
| bestVal = sum | |||||
| var metric float64 | |||||
| for _, c := range testCorrs { | |||||
| metric += c * c | |||||
| } | |||||
| if metric > bestMetric { | |||||
| bestMetric = metric | |||||
| bestCorrs = testCorrs | |||||
| bestCycleOff = cycleOff | |||||
| bestRepOff = repOff | |||||
| } | |||||
| nCandidates++ | |||||
| } | } | ||||
| } | } | ||||
| corrs[i] = bestVal | |||||
| } | |||||
| // Diagnostics | |||||
| var corrMin, corrMax, sumAbs float64 | |||||
| var nStrong, nDead int | |||||
| for i, c := range corrs { | |||||
| ac := math.Abs(c) | |||||
| sumAbs += ac | |||||
| if i == 0 || ac < corrMin { | |||||
| corrMin = ac | |||||
| } | |||||
| if ac > corrMax { | |||||
| corrMax = ac | |||||
| } | |||||
| if ac > sumAbs/float64(i+1)*2 { | |||||
| nStrong++ | |||||
| } | |||||
| if ac < 3 { | |||||
| nDead++ | |||||
| } | |||||
| } | |||||
| avgCorr := sumAbs / 128 | |||||
| nStrong = 0 | |||||
| for _, c := range corrs { | |||||
| if math.Abs(c) > avgCorr*0.5 { | |||||
| nStrong++ | |||||
| } | |||||
| } | |||||
| fmt.Printf("Corrs: min|c|=%.1f, max|c|=%.1f, avg|c|=%.1f (strong=%d, dead=%d)\n", | |||||
| corrMin, corrMax, avgCorr, nStrong, nDead) | |||||
| fmt.Printf("Searched %d candidates in %v\n", nCandidates, time.Since(t0).Round(time.Millisecond)) | |||||
| fmt.Printf("Best: cycleOff=%d, repOff=%d, metric=%.0f\n", bestCycleOff, bestRepOff, bestMetric) | |||||
| // Step 4: Frame sync — 128 rotations × byte-level erasure + bit-flipping. | |||||
| var sumAbs float64 | |||||
| for _, c := range bestCorrs { | |||||
| sumAbs += math.Abs(c) | |||||
| } | |||||
| fmt.Printf("Corrs: avg|c|=%.1f\n", sumAbs/128) | |||||
| // Verbose: compute BER at each rotation against the known key (if supplied) | |||||
| knownPayload := [watermark.RsDataBytes]byte{} | |||||
| hasKnown := false | |||||
| if len(os.Args) >= 3 { | |||||
| hasKnown = true | |||||
| knownPayload = watermark.KeyToPayload(os.Args[2]) | |||||
| // BER diagnostic against known key | |||||
| knownPayload := watermark.KeyToPayload(key) | |||||
| knownCW := watermark.RSEncode(knownPayload) | knownCW := watermark.RSEncode(knownPayload) | ||||
| var knownBits [watermark.PayloadBits]int | var knownBits [watermark.PayloadBits]int | ||||
| for i := 0; i < watermark.PayloadBits; i++ { | for i := 0; i < watermark.PayloadBits; i++ { | ||||
| knownBits[i] = int((knownCW[i/8] >> uint(7-(i%8))) & 1) | knownBits[i] = int((knownCW[i/8] >> uint(7-(i%8))) & 1) | ||||
| } | } | ||||
| fmt.Println("\nRotation sweep (top 10 by BER):") | |||||
| type rotBER struct{ rot, ber int } | |||||
| var results []rotBER | |||||
| for rot := 0; rot < watermark.PayloadBits; rot++ { | |||||
| nerr := 0 | |||||
| for i := 0; i < watermark.PayloadBits; i++ { | |||||
| srcBit := (i + rot) % watermark.PayloadBits | |||||
| hard := 0 | |||||
| if corrs[srcBit] < 0 { | |||||
| hard = 1 | |||||
| } | |||||
| if hard != knownBits[i] { | |||||
| nerr++ | |||||
| } | |||||
| nerr := 0 | |||||
| for i := 0; i < watermark.PayloadBits; i++ { | |||||
| hard := 0 | |||||
| if bestCorrs[i] < 0 { | |||||
| hard = 1 | |||||
| } | } | ||||
| results = append(results, rotBER{rot, nerr}) | |||||
| } | |||||
| sort.Slice(results, func(a, b int) bool { return results[a].ber < results[b].ber }) | |||||
| for j := 0; j < 10 && j < len(results); j++ { | |||||
| r := results[j] | |||||
| fmt.Printf(" rot=%3d: BER=%d/128 (%4.1f%%)\n", r.rot, r.ber, 100*float64(r.ber)/128) | |||||
| } | |||||
| // Show byte error pattern at best rotation | |||||
| bestRot := results[0].rot | |||||
| fmt.Printf("\nByte errors at rot=%d:\n ", bestRot) | |||||
| for b := 0; b < watermark.RsTotalBytes; b++ { | |||||
| nerr := 0 | |||||
| for bit := 0; bit < 8; bit++ { | |||||
| srcBit := (b*8 + bit + bestRot) % watermark.PayloadBits | |||||
| hard := 0 | |||||
| if corrs[srcBit] < 0 { | |||||
| hard = 1 | |||||
| } | |||||
| if hard != knownBits[b*8+bit] { | |||||
| nerr++ | |||||
| } | |||||
| if hard != knownBits[i] { | |||||
| nerr++ | |||||
| } | } | ||||
| fmt.Printf("B%d:%d ", b, nerr) | |||||
| } | } | ||||
| fmt.Println() | |||||
| fmt.Printf("BER: %d/128 (%.1f%%)\n", nerr, 100*float64(nerr)/128) | |||||
| // Show received vs expected codeword at best rotation | |||||
| // Show recv vs expected | |||||
| var recv [watermark.RsTotalBytes]byte | var recv [watermark.RsTotalBytes]byte | ||||
| confs := make([]float64, watermark.PayloadBits) | |||||
| for i := 0; i < watermark.PayloadBits; i++ { | for i := 0; i < watermark.PayloadBits; i++ { | ||||
| srcBit := (i + bestRot) % watermark.PayloadBits | |||||
| if corrs[srcBit] < 0 { | |||||
| confs[i] = math.Abs(bestCorrs[i]) | |||||
| if bestCorrs[i] < 0 { | |||||
| recv[i/8] |= 1 << uint(7-(i%8)) | recv[i/8] |= 1 << uint(7-(i%8)) | ||||
| } | } | ||||
| } | } | ||||
| fmt.Printf(" recv: %x\n", recv) | |||||
| fmt.Printf(" want: %x\n", knownCW) | |||||
| } | |||||
| _ = hasKnown | |||||
| _ = knownPayload | |||||
| type decodeResult struct { | |||||
| rotation int | |||||
| payload [watermark.RsDataBytes]byte | |||||
| flips int | |||||
| } | |||||
| var best *decodeResult | |||||
| fmt.Printf("recv: %x\nwant: %x\n", recv, knownCW) | |||||
| for rot := 0; rot < watermark.PayloadBits; rot++ { | |||||
| var recv [watermark.RsTotalBytes]byte | |||||
| confs := make([]float64, watermark.PayloadBits) | |||||
| for i := 0; i < watermark.PayloadBits; i++ { | |||||
| srcBit := (i + rot) % watermark.PayloadBits | |||||
| c := corrs[srcBit] | |||||
| confs[i] = math.Abs(c) | |||||
| if c < 0 { | |||||
| recv[i/8] |= 1 << uint(7-(i%8)) | |||||
| // Confidence-based erasure (MIN bit confidence per byte) | |||||
| type bc struct{ idx int; conf float64 } | |||||
| byteConfs := make([]bc, watermark.RsTotalBytes) | |||||
| for b := 0; b < watermark.RsTotalBytes; b++ { | |||||
| minC := confs[b*8] | |||||
| for bit := 1; bit < 8; bit++ { | |||||
| if confs[b*8+bit] < minC { | |||||
| minC = confs[b*8+bit] | |||||
| } | |||||
| } | } | ||||
| byteConfs[b] = bc{b, minC} | |||||
| } | } | ||||
| sort.Slice(byteConfs, func(a, b int) bool { return byteConfs[a].conf < byteConfs[b].conf }) | |||||
| // Brute-force RS decode: try ALL possible erasure subsets of size 1..8. | |||||
| // With sliding correlation, confidence values are unreliable for erasure | |||||
| // selection (all bits look "strong"). Instead, let RS tell us which | |||||
| // subsets produce a valid codeword. This is fast: sum(C(16,k), k=1..8) | |||||
| // = ~39k RS decodes per rotation, ~5M total. Each takes <1µs. | |||||
| decoded := false | decoded := false | ||||
| for nErase := 1; nErase <= watermark.RsCheckBytes; nErase++ { | |||||
| if decoded { break } | |||||
| indices := make([]int, nErase) | |||||
| for i := range indices { indices[i] = i } | |||||
| for { | |||||
| erasePos := make([]int, nErase) | |||||
| copy(erasePos, indices) | |||||
| payload, ok := watermark.RSDecode(recv, erasePos) | |||||
| if ok { | |||||
| if best == nil { | |||||
| best = &decodeResult{rot, payload, nErase} | |||||
| } | |||||
| for nErase := 0; nErase <= watermark.RsCheckBytes; nErase++ { | |||||
| if nErase == 0 { | |||||
| p, ok := watermark.RSDecode(recv, nil) | |||||
| if ok && watermark.KeyMatchesPayload(key, p) { | |||||
| fmt.Printf(" ✓ MATCH (0 erasures), payload=%x\n", p) | |||||
| decoded = true | decoded = true | ||||
| break | break | ||||
| } | } | ||||
| // Next combination | |||||
| i := nErase - 1 | |||||
| for i >= 0 && indices[i] == watermark.RsTotalBytes-nErase+i { | |||||
| i-- | |||||
| } | |||||
| if i < 0 { break } | |||||
| indices[i]++ | |||||
| for j := i + 1; j < nErase; j++ { | |||||
| indices[j] = indices[j-1] + 1 | |||||
| } | |||||
| continue | |||||
| } | |||||
| erasePos := make([]int, nErase) | |||||
| for i := 0; i < nErase; i++ { | |||||
| erasePos[i] = byteConfs[i].idx | |||||
| } | |||||
| sort.Ints(erasePos) | |||||
| p, ok := watermark.RSDecode(recv, erasePos) | |||||
| if ok && watermark.KeyMatchesPayload(key, p) { | |||||
| fmt.Printf(" ✓ MATCH (%d erasures), payload=%x\n", nErase, p) | |||||
| decoded = true | |||||
| break | |||||
| } | } | ||||
| } | } | ||||
| if decoded && best != nil && best.flips <= 4 { | |||||
| break // clean decode with few erasures — stop early | |||||
| } | |||||
| } | |||||
| if best == nil { | |||||
| fmt.Println("RS decode: FAILED — no valid frame alignment found.") | |||||
| fmt.Println("Watermark may not be present, or recording is too noisy/short.") | |||||
| os.Exit(1) | |||||
| if !decoded { | |||||
| fmt.Println(" ✗ NOT FOUND") | |||||
| } | |||||
| } | } | ||||
| fmt.Printf("\nFrame sync: rotation=%d, %d byte erasures\n", best.rotation, best.flips) | |||||
| fmt.Printf("Payload: %x\n\n", best.payload) | |||||
| fmt.Printf("\nDone in %v\n", time.Since(t0).Round(time.Millisecond)) | |||||
| } | |||||
| keys := os.Args[2:] | |||||
| if len(keys) == 0 { | |||||
| fmt.Println("No keys supplied — payload shown above.") | |||||
| func cepstrumFilter(magDB []float64, nCeps int) { | |||||
| n := len(magDB) | |||||
| if n < nCeps*2 { | |||||
| return | return | ||||
| } | } | ||||
| fmt.Println("Key check:") | |||||
| matched := false | |||||
| for _, key := range keys { | |||||
| if watermark.KeyMatchesPayload(key, best.payload) { | |||||
| fmt.Printf(" ✓ MATCH: %q\n", key) | |||||
| matched = true | |||||
| } else { | |||||
| fmt.Printf(" ✗ : %q\n", key) | |||||
| ceps := make([]float64, n) | |||||
| for k := 0; k < n; k++ { | |||||
| var sum float64 | |||||
| for i := 0; i < n; i++ { | |||||
| sum += magDB[i] * math.Cos(math.Pi*float64(k)*(float64(i)+0.5)/float64(n)) | |||||
| } | } | ||||
| ceps[k] = sum | |||||
| } | } | ||||
| if !matched { | |||||
| fmt.Println("\nNo key matched.") | |||||
| for k := 0; k < nCeps; k++ { | |||||
| ceps[k] = 0 | |||||
| } | } | ||||
| } | |||||
| // corrChipRate correlates at chip rate (1 sample = 1 chip). | |||||
| func corrChipRate(down []float64, start, pnChips int) float64 { | |||||
| var acc float64 | |||||
| for i := 0; i < pnChips; i++ { | |||||
| acc += down[start+i] * float64(watermark.PNSequence[i]) | |||||
| for i := 0; i < n; i++ { | |||||
| var sum float64 | |||||
| for k := 0; k < n; k++ { | |||||
| w := 1.0 | |||||
| if k == 0 { | |||||
| w = 0.5 | |||||
| } | |||||
| sum += w * ceps[k] * math.Cos(math.Pi*float64(k)*(float64(i)+0.5)/float64(n)) | |||||
| } | |||||
| magDB[i] = sum * 2.0 / float64(n) | |||||
| } | } | ||||
| return acc | |||||
| } | } | ||||
| // --- 8th-order Butterworth LPF (4 cascaded biquads) --- | |||||
| type biquad struct{ b0, b1, b2, a1, a2 float64 } | type biquad struct{ b0, b1, b2, a1, a2 float64 } | ||||
| type iirCoeffs []biquad | type iirCoeffs []biquad | ||||
| func designLPF8(cutoffHz, sampleRate float64) iirCoeffs { | func designLPF8(cutoffHz, sampleRate float64) iirCoeffs { | ||||
| // 8th-order Butterworth = 4 biquad sections | |||||
| angles := []float64{math.Pi / 16, 3 * math.Pi / 16, 5 * math.Pi / 16, 7 * math.Pi / 16} | angles := []float64{math.Pi / 16, 3 * math.Pi / 16, 5 * math.Pi / 16, 7 * math.Pi / 16} | ||||
| coeffs := make(iirCoeffs, 4) | coeffs := make(iirCoeffs, 4) | ||||
| for i, angle := range angles { | for i, angle := range angles { | ||||
| @@ -328,11 +277,8 @@ func designLPF8(cutoffHz, sampleRate float64) iirCoeffs { | |||||
| alpha := sinW / (2 * q) | alpha := sinW / (2 * q) | ||||
| a0 := 1 + alpha | a0 := 1 + alpha | ||||
| coeffs[i] = biquad{ | coeffs[i] = biquad{ | ||||
| b0: (1 - cosW) / 2 / a0, | |||||
| b1: (1 - cosW) / a0, | |||||
| b2: (1 - cosW) / 2 / a0, | |||||
| a1: (-2 * cosW) / a0, | |||||
| a2: (1 - alpha) / a0, | |||||
| b0: (1 - cosW) / 2 / a0, b1: (1 - cosW) / a0, b2: (1 - cosW) / 2 / a0, | |||||
| a1: (-2 * cosW) / a0, a2: (1 - alpha) / a0, | |||||
| } | } | ||||
| } | } | ||||
| return coeffs | return coeffs | ||||
| @@ -396,7 +342,7 @@ func readMonoWAV(path string) ([]float64, float64, error) { | |||||
| } | } | ||||
| } | } | ||||
| if dataStart == 0 || bitsPerSample != 16 || channels == 0 { | if dataStart == 0 || bitsPerSample != 16 || channels == 0 { | ||||
| return nil, 0, fmt.Errorf("unsupported WAV (need 16-bit PCM, got bits=%d ch=%d)", bitsPerSample, channels) | |||||
| return nil, 0, fmt.Errorf("unsupported WAV") | |||||
| } | } | ||||
| if dataStart+dataLen > len(data) { | if dataStart+dataLen > len(data) { | ||||
| dataLen = len(data) - dataStart | dataLen = len(data) - dataStart | ||||
| @@ -0,0 +1,65 @@ | |||||
| package dsp | |||||
| import ( | |||||
| "math" | |||||
| "math/cmplx" | |||||
| ) | |||||
| // FFT computes the discrete Fourier transform of x (in-place, radix-2). | |||||
| // len(x) must be a power of 2. | |||||
| func FFT(x []complex128) { | |||||
| n := len(x) | |||||
| if n <= 1 { | |||||
| return | |||||
| } | |||||
| // Bit-reversal permutation | |||||
| j := 0 | |||||
| for i := 1; i < n; i++ { | |||||
| bit := n >> 1 | |||||
| for j&bit != 0 { | |||||
| j ^= bit | |||||
| bit >>= 1 | |||||
| } | |||||
| j ^= bit | |||||
| if i < j { | |||||
| x[i], x[j] = x[j], x[i] | |||||
| } | |||||
| } | |||||
| // Cooley-Tukey butterfly | |||||
| for size := 2; size <= n; size <<= 1 { | |||||
| half := size >> 1 | |||||
| wn := cmplx.Exp(complex(0, -2*math.Pi/float64(size))) | |||||
| for start := 0; start < n; start += size { | |||||
| w := complex(1, 0) | |||||
| for k := 0; k < half; k++ { | |||||
| u := x[start+k] | |||||
| v := x[start+k+half] * w | |||||
| x[start+k] = u + v | |||||
| x[start+k+half] = u - v | |||||
| w *= wn | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| // IFFT computes the inverse DFT of x (in-place). | |||||
| func IFFT(x []complex128) { | |||||
| n := len(x) | |||||
| // Conjugate, FFT, conjugate, scale | |||||
| for i := range x { | |||||
| x[i] = cmplx.Conj(x[i]) | |||||
| } | |||||
| FFT(x) | |||||
| scale := 1.0 / float64(n) | |||||
| for i := range x { | |||||
| x[i] = cmplx.Conj(x[i]) * complex(scale, 0) | |||||
| } | |||||
| } | |||||
| // HannWindow fills w with a Hann window of length n. | |||||
| func HannWindow(w []float64) { | |||||
| n := len(w) | |||||
| for i := range w { | |||||
| w[i] = 0.5 * (1.0 - math.Cos(2*math.Pi*float64(i)/float64(n))) | |||||
| } | |||||
| } | |||||
| @@ -5,7 +5,6 @@ import ( | |||||
| "encoding/binary" | "encoding/binary" | ||||
| "fmt" | "fmt" | ||||
| "log" | "log" | ||||
| "math" | |||||
| "path/filepath" | "path/filepath" | ||||
| "sync/atomic" | "sync/atomic" | ||||
| "time" | "time" | ||||
| @@ -133,24 +132,20 @@ type Generator struct { | |||||
| licenseState *license.State | licenseState *license.State | ||||
| jingleFrames []license.JingleFrame | jingleFrames []license.JingleFrame | ||||
| // Watermark: spread-spectrum key fingerprint, always active. | |||||
| watermark *watermark.Embedder | |||||
| wmShapeLPF *dsp.FilterChain // pulse-shaping: confines PN energy to 0-6kHz | |||||
| // Watermark: STFT-domain spread-spectrum (Kirovski & Malvar 2003). | |||||
| stftEmbedder *watermark.STFTEmbedder | |||||
| wmDecimLPF *dsp.FilterChain // anti-alias LPF for 228k→12k decimation | |||||
| } | } | ||||
| func NewGenerator(cfg cfgpkg.Config) *Generator { | func NewGenerator(cfg cfgpkg.Config) *Generator { | ||||
| return &Generator{cfg: cfg} | return &Generator{cfg: cfg} | ||||
| } | } | ||||
| // SetLicense configures license state (jingle) and creates the watermark | |||||
| // SetLicense configures license state (jingle) and creates the STFT watermark | |||||
| // embedder. Must be called before the first GenerateFrame. | // embedder. Must be called before the first GenerateFrame. | ||||
| func (g *Generator) SetLicense(state *license.State, key string) { | func (g *Generator) SetLicense(state *license.State, key string) { | ||||
| g.licenseState = state | g.licenseState = state | ||||
| g.watermark = watermark.NewEmbedder(key) | |||||
| // Gate threshold: -40 dBFS ≈ 0.01 linear amplitude. | |||||
| // Watermark is muted during silence to prevent audibility. | |||||
| // Composite rate will be set in init(); use 228000 as default. | |||||
| g.watermark.EnableGate(0.001, 228000) | |||||
| g.stftEmbedder = watermark.NewSTFTEmbedder(key) | |||||
| } | } | ||||
| // SetExternalSource sets a live audio source (e.g. StreamResampler) that | // SetExternalSource sets a live audio source (e.g. StreamResampler) that | ||||
| @@ -285,17 +280,10 @@ func (g *Generator) init() { | |||||
| } | } | ||||
| } | } | ||||
| // Update watermark gate ramp rate with actual composite rate (may differ | |||||
| // from the 228000 default used in SetLicense). | |||||
| if g.watermark != nil { | |||||
| g.watermark.EnableGate(0.001, g.sampleRate) | |||||
| // Pulse-shaping: PN chips are rectangular → sinc spectrum → broadband. | |||||
| // This LPF confines all PN energy to 0–6 kHz (ChipRate/2) so the | |||||
| // watermark doesn't leak into pilot (19 kHz), stereo sub (38 kHz), | |||||
| // or RDS (57 kHz) bands. Also prevents audible wideband noise. | |||||
| // 4th-order Butterworth: -24 dB/octave rolloff. At 12 kHz: -24 dB, | |||||
| // at 19 kHz: -38 dB, at 38 kHz: -62 dB. Clean enough. | |||||
| g.wmShapeLPF = dsp.NewLPF4(float64(watermark.ChipRate)/2, g.sampleRate) | |||||
| // STFT watermark: anti-alias LPF for decimation to WMRate (12 kHz). | |||||
| // Nyquist at 12 kHz = 6 kHz. Cut at 5.5 kHz with margin. | |||||
| if g.stftEmbedder != nil { | |||||
| g.wmDecimLPF = dsp.NewLPF4(5500, g.sampleRate) | |||||
| } | } | ||||
| g.initialized = true | g.initialized = true | ||||
| @@ -338,6 +326,10 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame | |||||
| g.frameSeq++ | g.frameSeq++ | ||||
| frame.Sequence = g.frameSeq | frame.Sequence = g.frameSeq | ||||
| // L/R buffers for two-pass processing (STFT watermark between stages 3 and 4) | |||||
| lBuf := make([]float64, samples) | |||||
| rBuf := make([]float64, samples) | |||||
| // Load live params once per chunk — single atomic read, zero per-sample cost | // Load live params once per chunk — single atomic read, zero per-sample cost | ||||
| lp := g.liveParams.Load() | lp := g.liveParams.Load() | ||||
| if lp == nil { | if lp == nil { | ||||
| @@ -404,15 +396,6 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame | |||||
| r := g.audioLPF_R.Process(float64(in.R)) | r := g.audioLPF_R.Process(float64(in.R)) | ||||
| r = g.pilotNotchR.Process(r) | r = g.pilotNotchR.Process(r) | ||||
| // Watermark gate level measurement — done BEFORE drive/clip/cleanup. | |||||
| // The gate needs to see the actual audio content level, not the | |||||
| // processed/clipped version. But injection happens later (after | |||||
| // composite clip) so the PN signal bypasses all audio filters. | |||||
| if g.watermark != nil { | |||||
| audioLevel := (math.Abs(l) + math.Abs(r)) / 2.0 | |||||
| g.watermark.SetAudioLevel(audioLevel) | |||||
| } | |||||
| // --- Stage 2: Drive + Compress + Clip₁ --- | // --- Stage 2: Drive + Compress + Clip₁ --- | ||||
| l *= lp.OutputDrive | l *= lp.OutputDrive | ||||
| r *= lp.OutputDrive | r *= lp.OutputDrive | ||||
| @@ -428,6 +411,59 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame | |||||
| l = dsp.HardClip(l, ceiling) | l = dsp.HardClip(l, ceiling) | ||||
| r = dsp.HardClip(r, ceiling) | r = dsp.HardClip(r, ceiling) | ||||
| lBuf[i] = l | |||||
| rBuf[i] = r | |||||
| } | |||||
| // --- STFT Watermark: decimate → embed → upsample → add to L/R --- | |||||
| if g.stftEmbedder != nil { | |||||
| decimFactor := int(g.sampleRate) / watermark.WMRate // 228000/12000 = 19 | |||||
| if decimFactor < 1 { | |||||
| decimFactor = 1 | |||||
| } | |||||
| nDown := samples / decimFactor | |||||
| // Anti-alias: LPF ALL composite-rate samples, THEN decimate. | |||||
| // The LPF must see every sample for correct IIR state update. | |||||
| mono12k := make([]float64, nDown) | |||||
| lpfState := 0.0 | |||||
| decimCount := 0 | |||||
| outIdx := 0 | |||||
| for i := 0; i < samples && outIdx < nDown; i++ { | |||||
| mono := (lBuf[i] + rBuf[i]) / 2 | |||||
| if g.wmDecimLPF != nil { | |||||
| lpfState = g.wmDecimLPF.Process(mono) | |||||
| } else { | |||||
| lpfState = mono | |||||
| } | |||||
| decimCount++ | |||||
| if decimCount >= decimFactor { | |||||
| decimCount = 0 | |||||
| mono12k[outIdx] = lpfState | |||||
| outIdx++ | |||||
| } | |||||
| } | |||||
| // STFT embed at 12 kHz | |||||
| embedded := g.stftEmbedder.ProcessBlock(mono12k) | |||||
| // Extract watermark signal (difference) and upsample via ZOH | |||||
| for i := 0; i < samples; i++ { | |||||
| wmIdx := i / decimFactor | |||||
| if wmIdx >= nDown { | |||||
| wmIdx = nDown - 1 | |||||
| } | |||||
| wmSig := embedded[wmIdx] - mono12k[wmIdx] | |||||
| lBuf[i] += wmSig | |||||
| rBuf[i] += wmSig | |||||
| } | |||||
| } | |||||
| // --- Pass 2: Stereo encode + composite processing --- | |||||
| for i := 0; i < samples; i++ { | |||||
| l := lBuf[i] | |||||
| r := rBuf[i] | |||||
| // --- Stage 4: Stereo encode --- | // --- Stage 4: Stereo encode --- | ||||
| limited := audio.NewFrame(audio.Sample(l), audio.Sample(r)) | limited := audio.NewFrame(audio.Sample(l), audio.Sample(r)) | ||||
| comps := g.stereoEncoder.Encode(limited) | comps := g.stereoEncoder.Encode(limited) | ||||
| @@ -453,22 +489,6 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame | |||||
| } | } | ||||
| bs412PowerAccum += audioMPX * audioMPX | bs412PowerAccum += audioMPX * audioMPX | ||||
| // --- Watermark injection: into audio composite AFTER all processing --- | |||||
| // Injected after the entire clip-filter-clip chain, notch filters, and | |||||
| // BS.412 power measurement. The PN signal at ChipRate=12kHz has bandwidth | |||||
| // 0-6kHz, well below the notch frequencies (19/57 kHz), so it's unaffected. | |||||
| // At -48 dBFS the watermark causes <0.05 dB of over-modulation, negligible. | |||||
| // Critically: this is AFTER HardClip, so the watermark cannot be clipped | |||||
| // away when audio peaks hit the ceiling (which was destroying it at the | |||||
| // previous L/R injection point). | |||||
| if g.watermark != nil { | |||||
| wm := g.watermark.NextSample() | |||||
| if g.wmShapeLPF != nil { | |||||
| wm = g.wmShapeLPF.Process(wm) | |||||
| } | |||||
| audioMPX += wm | |||||
| } | |||||
| // --- Stage 6: Add protected components --- | // --- Stage 6: Add protected components --- | ||||
| composite := audioMPX | composite := audioMPX | ||||
| if lp.StereoEnabled { | if lp.StereoEnabled { | ||||
| @@ -503,15 +523,9 @@ func (g *Generator) GenerateFrame(duration time.Duration) *output.CompositeFrame | |||||
| g.bs412.ProcessChunk(bs412PowerAccum / float64(samples)) | g.bs412.ProcessChunk(bs412PowerAccum / float64(samples)) | ||||
| } | } | ||||
| // Watermark diagnostic: log state every 100 chunks (~5s) so we can verify | |||||
| // the embedder is actually running and producing non-zero output. | |||||
| if g.watermark != nil && g.frameSeq%100 == 1 { | |||||
| wm := g.watermark.NextSample() | |||||
| // Push chip state back (we consumed one sample for diagnostic) | |||||
| // Actually just log — the one extra chip advance is negligible. | |||||
| stats := g.watermark.DiagnosticState() | |||||
| log.Printf("watermark diag: frame=%d gateGain=%.4f chipIdx=%d bitIdx=%d symbol=%d lastSample=%.6f enabled=%t", | |||||
| g.frameSeq, stats.GateGain, stats.ChipIdx, stats.BitIdx, stats.Symbol, wm, stats.GateEnabled) | |||||
| // STFT watermark diagnostic | |||||
| if g.stftEmbedder != nil && g.frameSeq%100 == 1 { | |||||
| log.Printf("watermark stft: frame=%d, active", g.frameSeq) | |||||
| } | } | ||||
| return frame | return frame | ||||
| @@ -0,0 +1,163 @@ | |||||
| package watermark | |||||
| import ( | |||||
| "math" | |||||
| "testing" | |||||
| ) | |||||
| func TestSTFTRoundTrip(t *testing.T) { | |||||
| const key = "test-stft-key" | |||||
| const duration = 150.0 // seconds — need > 136.5s for one full WM cycle | |||||
| nSamples := int(duration * WMRate) | |||||
| t.Logf("Generating %d samples @ %d Hz (%.1fs)", nSamples, WMRate, duration) | |||||
| t.Logf("WM cycle: %d STFT frames, %.1fs", FramesPerWM, float64(SamplesPerWM)/WMRate) | |||||
| // Generate test signal: broadband noise (the multiplicative watermark | |||||
| // needs energy in all frequency bins to work — a pure tone only has | |||||
| // energy in one bin and the watermark has no effect on silent bins) | |||||
| audio := make([]float64, nSamples) | |||||
| // Simple LCG pseudo-random for reproducibility | |||||
| var lcg uint64 = 12345 | |||||
| for i := range audio { | |||||
| lcg = lcg*6364136223846793005 + 1442695040888963407 | |||||
| audio[i] = 0.3 * (float64(int32(lcg>>33))/float64(1<<31)) | |||||
| } | |||||
| rmsIn := rmsF64(audio) | |||||
| t.Logf("Input RMS: %.1f dBFS", 20*math.Log10(rmsIn+1e-12)) | |||||
| // Embed watermark | |||||
| embedder := NewSTFTEmbedder(key) | |||||
| watermarked := embedder.ProcessBlock(audio) | |||||
| rmsOut := rmsF64(watermarked) | |||||
| t.Logf("Output RMS: %.1f dBFS", 20*math.Log10(rmsOut+1e-12)) | |||||
| t.Logf("RMS change: %.2f dB", 20*math.Log10(rmsOut/rmsIn)) | |||||
| // Detect watermark | |||||
| detector := NewSTFTDetector(key) | |||||
| corrs, offset := detector.Detect(watermarked) | |||||
| t.Logf("Detection offset: %d", offset) | |||||
| // Check correlations | |||||
| var nPositive, nNegative int | |||||
| var sumAbs float64 | |||||
| for _, c := range corrs { | |||||
| sumAbs += math.Abs(c) | |||||
| if c > 0 { | |||||
| nPositive++ | |||||
| } else { | |||||
| nNegative++ | |||||
| } | |||||
| } | |||||
| avgAbs := sumAbs / float64(payloadBits) | |||||
| t.Logf("Correlations: avg|c|=%.1f, positive=%d, negative=%d", avgAbs, nPositive, nNegative) | |||||
| if avgAbs < 1.0 { | |||||
| t.Errorf("avg|c| too low: %.1f (expected >> 1.0)", avgAbs) | |||||
| } | |||||
| // Check against known payload | |||||
| payload := KeyToPayload(key) | |||||
| codeword := RSEncode(payload) | |||||
| var expectedBits [payloadBits]int | |||||
| for i := 0; i < payloadBits; i++ { | |||||
| expectedBits[i] = int((codeword[i/8] >> uint(7-(i%8))) & 1) | |||||
| } | |||||
| nerr := 0 | |||||
| for i := 0; i < payloadBits; i++ { | |||||
| hard := 0 | |||||
| if corrs[i] < 0 { | |||||
| hard = 1 | |||||
| } | |||||
| if hard != expectedBits[i] { | |||||
| nerr++ | |||||
| } | |||||
| } | |||||
| t.Logf("BER: %d/%d (%.1f%%)", nerr, payloadBits, 100*float64(nerr)/float64(payloadBits)) | |||||
| if nerr > 20 { | |||||
| t.Errorf("BER too high: %d/%d", nerr, payloadBits) | |||||
| } | |||||
| // Try RS decode | |||||
| var recv [rsTotalBytes]byte | |||||
| for i := 0; i < payloadBits; i++ { | |||||
| if corrs[i] < 0 { | |||||
| recv[i/8] |= 1 << uint(7-(i%8)) | |||||
| } | |||||
| } | |||||
| // Try with erasures if needed | |||||
| decoded := false | |||||
| for nErase := 0; nErase <= rsCheckBytes; nErase++ { | |||||
| if nErase == 0 { | |||||
| // Try zero erasures (valid if BER=0) | |||||
| p, ok := RSDecode(recv, nil) | |||||
| if ok { | |||||
| if KeyMatchesPayload(key, p) { | |||||
| t.Logf("Decoded with 0 erasures: MATCH ✓") | |||||
| decoded = true | |||||
| break | |||||
| } | |||||
| } | |||||
| continue | |||||
| } | |||||
| // Erase weakest bytes by |correlation| | |||||
| type bc struct{ idx int; conf float64 } | |||||
| byteConfs := make([]bc, rsTotalBytes) | |||||
| for b := 0; b < rsTotalBytes; b++ { | |||||
| minC := math.Abs(corrs[b*8]) | |||||
| for bit := 1; bit < 8; bit++ { | |||||
| c := math.Abs(corrs[b*8+bit]) | |||||
| if c < minC { | |||||
| minC = c | |||||
| } | |||||
| } | |||||
| byteConfs[b] = bc{b, minC} | |||||
| } | |||||
| // Sort by confidence (weakest first) | |||||
| for i := 0; i < len(byteConfs); i++ { | |||||
| for j := i + 1; j < len(byteConfs); j++ { | |||||
| if byteConfs[j].conf < byteConfs[i].conf { | |||||
| byteConfs[i], byteConfs[j] = byteConfs[j], byteConfs[i] | |||||
| } | |||||
| } | |||||
| } | |||||
| erasePos := make([]int, nErase) | |||||
| for i := 0; i < nErase; i++ { | |||||
| erasePos[i] = byteConfs[i].idx | |||||
| } | |||||
| // Sort positions | |||||
| for i := 0; i < len(erasePos); i++ { | |||||
| for j := i + 1; j < len(erasePos); j++ { | |||||
| if erasePos[j] < erasePos[i] { | |||||
| erasePos[i], erasePos[j] = erasePos[j], erasePos[i] | |||||
| } | |||||
| } | |||||
| } | |||||
| p, ok := RSDecode(recv, erasePos) | |||||
| if ok { | |||||
| if KeyMatchesPayload(key, p) { | |||||
| t.Logf("Decoded with %d erasures: MATCH ✓", nErase) | |||||
| decoded = true | |||||
| break | |||||
| } | |||||
| } | |||||
| } | |||||
| if !decoded { | |||||
| t.Errorf("RS decode FAILED") | |||||
| } | |||||
| } | |||||
| func rmsF64(s []float64) float64 { | |||||
| var acc float64 | |||||
| for _, v := range s { | |||||
| acc += v * v | |||||
| } | |||||
| return math.Sqrt(acc / float64(len(s))) | |||||
| } | |||||
| @@ -0,0 +1,413 @@ | |||||
| // Package watermark implements STFT-domain spread-spectrum audio watermarking | |||||
| // based on Kirovski & Malvar (IEEE TSP 2003). | |||||
| // | |||||
| // Architecture: | |||||
| // - Embedding in STFT magnitude (dB scale) — multiplicative, natural masking | |||||
| // - Block repetition coding (R=5 time frames) — automatic drift tolerance | |||||
| // - Cepstrum filtering at detection — 6 dB carrier noise reduction | |||||
| // - PCC covert channel — PN partitioned into M=128 subsets for 128-bit payload | |||||
| // - Multi-test sync — scan R frame offsets to find alignment | |||||
| // | |||||
| // Both encoder and decoder operate at 12 kHz (WMRate). The encoder decimates | |||||
| // from composite rate (÷19), processes STFT, and upsamples back. The decoder | |||||
| // decimates from recording rate (÷16 from 192kHz, ÷4 from 48kHz, etc.). | |||||
| // Same STFT parameters → bins align perfectly → no rate mismatch. | |||||
| package watermark | |||||
| import ( | |||||
| "crypto/sha256" | |||||
| "math" | |||||
| "math/cmplx" | |||||
| "github.com/jan/fm-rds-tx/internal/dsp" | |||||
| ) | |||||
| // STFT watermark constants. | |||||
| const ( | |||||
| WMRate = 12000 // watermark processing sample rate (Hz) | |||||
| FFTSize = 512 // STFT frame size (samples at WMRate) | |||||
| FFTHop = 256 // 50% overlap | |||||
| BinLow = 9 // ~211 Hz at WMRate/FFTSize | |||||
| BinHigh = 213 // ~4992 Hz at WMRate/FFTSize | |||||
| NumBins = BinHigh - BinLow // 204 frequency chips per STFT frame | |||||
| TimeRep = 5 // block repetition factor (±2 frame drift tolerance) | |||||
| GroupsPerBit = 10 // time groups per data bit | |||||
| WMLevelDB = 1.5 // embedding level (dB) | |||||
| TotalGroups = GroupsPerBit * payloadBits // 10 × 128 = 1280 | |||||
| FramesPerWM = TotalGroups * TimeRep // 1280 × 5 = 6400 | |||||
| SamplesPerWM = FramesPerWM * FFTHop // 6400 × 256 = 1638400 | |||||
| // Duration at WMRate: 1638400 / 12000 = 136.5 seconds | |||||
| ) | |||||
| // STFTEmbedder processes audio blocks and adds the STFT-domain watermark. | |||||
| // It works at WMRate (12 kHz). The caller must decimate input to WMRate | |||||
| // and upsample output back to the desired rate. | |||||
| type STFTEmbedder struct { | |||||
| // PN chip matrix: pnChips[group][bin] ∈ {-1, +1} | |||||
| // group ∈ [0, TotalGroups), bin ∈ [0, NumBins) | |||||
| pnChips [TotalGroups][NumBins]int8 | |||||
| // Bit assignment: which data bit owns each group (PCC permutation) | |||||
| groupToBit [TotalGroups]int | |||||
| // RS-encoded codeword: 128 bits → symbol[bit] = +1 or -1 | |||||
| symbols [payloadBits]int8 | |||||
| // STFT state | |||||
| window [FFTSize]float64 | |||||
| inBuf [FFTSize]float64 // analysis window buffer | |||||
| outBuf [FFTSize + FFTHop]float64 // overlap-add output buffer | |||||
| inPos int // samples written to inBuf | |||||
| outPos int // samples read from outBuf | |||||
| frameIdx int // STFT frame counter (wraps at FramesPerWM) | |||||
| primed bool // true after first full frame | |||||
| // Level in linear scale: 10^(WMLevelDB/20) - 1 ≈ 0.189 for 1.5 dB | |||||
| levelLinear float64 | |||||
| } | |||||
| // NewSTFTEmbedder creates an embedder for the given license key. | |||||
| func NewSTFTEmbedder(key string) *STFTEmbedder { | |||||
| e := &STFTEmbedder{} | |||||
| // Compute RS-encoded payload | |||||
| var data [rsDataBytes]byte | |||||
| if key != "" { | |||||
| h := sha256.Sum256([]byte(key)) | |||||
| copy(data[:], h[:rsDataBytes]) | |||||
| } | |||||
| codeword := rsEncode(data) | |||||
| // BPSK symbols: bit 0 → +1, bit 1 → -1 | |||||
| for i := 0; i < payloadBits; i++ { | |||||
| if (codeword[i/8]>>uint(7-(i%8)))&1 == 1 { | |||||
| e.symbols[i] = -1 | |||||
| } else { | |||||
| e.symbols[i] = 1 | |||||
| } | |||||
| } | |||||
| // Generate PN chips from key-seeded PRNG | |||||
| seed := sha256.Sum256(append([]byte("stft-pn-"), key...)) | |||||
| prng := newPRNG(seed[:]) | |||||
| for g := 0; g < TotalGroups; g++ { | |||||
| for b := 0; b < NumBins; b++ { | |||||
| if prng.next()&1 == 0 { | |||||
| e.pnChips[g][b] = 1 | |||||
| } else { | |||||
| e.pnChips[g][b] = -1 | |||||
| } | |||||
| } | |||||
| } | |||||
| // PCC permutation: assign groups to bits (interleaved + permuted) | |||||
| // Simple interleaving first, then Fisher-Yates shuffle | |||||
| for g := 0; g < TotalGroups; g++ { | |||||
| e.groupToBit[g] = g % payloadBits | |||||
| } | |||||
| // Permute within each bit's groups using key-seeded PRNG | |||||
| permSeed := sha256.Sum256(append([]byte("stft-perm-"), key...)) | |||||
| permRNG := newPRNG(permSeed[:]) | |||||
| for i := TotalGroups - 1; i > 0; i-- { | |||||
| j := permRNG.next() % uint32(i+1) | |||||
| e.groupToBit[i], e.groupToBit[j] = e.groupToBit[j], e.groupToBit[i] | |||||
| } | |||||
| // Hann window | |||||
| dsp.HannWindow(e.window[:]) | |||||
| // Embedding level | |||||
| e.levelLinear = math.Pow(10, WMLevelDB/20) - 1 // fractional magnitude change | |||||
| return e | |||||
| } | |||||
| // ProcessBlock takes mono audio at WMRate and returns watermarked audio. | |||||
| // The input and output lengths are the same. Internally buffers for STFT | |||||
| // overlap-add processing. Call with chunks of any size. | |||||
| func (e *STFTEmbedder) ProcessBlock(in []float64) []float64 { | |||||
| out := make([]float64, len(in)) | |||||
| for i, s := range in { | |||||
| // Feed sample into STFT input buffer | |||||
| e.inBuf[e.inPos] = s | |||||
| e.inPos++ | |||||
| if e.inPos == FFTSize { | |||||
| // Full frame: process STFT | |||||
| e.processFrame() | |||||
| e.inPos = FFTHop // shift: keep last hop samples for next frame overlap | |||||
| copy(e.inBuf[:FFTHop], e.inBuf[FFTHop:FFTSize]) | |||||
| } | |||||
| // Read from overlap-add output buffer | |||||
| if e.primed { | |||||
| out[i] = e.outBuf[e.outPos] | |||||
| e.outPos++ | |||||
| if e.outPos >= FFTHop { | |||||
| e.outPos = 0 | |||||
| // Shift output buffer: move overlap region to start | |||||
| copy(e.outBuf[:FFTSize], e.outBuf[FFTHop:FFTSize+FFTHop]) | |||||
| // Zero the new region | |||||
| for j := FFTSize - FFTHop; j < FFTSize+FFTHop; j++ { | |||||
| if j < len(e.outBuf) { | |||||
| e.outBuf[j] = 0 | |||||
| } | |||||
| } | |||||
| } | |||||
| } else { | |||||
| out[i] = s // pass-through until first frame is processed | |||||
| } | |||||
| } | |||||
| return out | |||||
| } | |||||
| // processFrame computes one STFT frame: window → FFT → modify magnitudes → IFFT → overlap-add. | |||||
| func (e *STFTEmbedder) processFrame() { | |||||
| // Determine which group this frame belongs to | |||||
| wmFrame := e.frameIdx % FramesPerWM | |||||
| groupIdx := wmFrame / TimeRep | |||||
| repIdx := wmFrame % TimeRep | |||||
| centerRep := TimeRep / 2 // only center repetition carries the watermark for detection | |||||
| // Apply window and convert to complex | |||||
| var buf [FFTSize]complex128 | |||||
| for i := 0; i < FFTSize; i++ { | |||||
| buf[i] = complex(e.inBuf[i]*e.window[i], 0) | |||||
| } | |||||
| // Forward FFT | |||||
| dsp.FFT(buf[:]) | |||||
| // Modify magnitudes in the watermark sub-band | |||||
| // Only modify if this is within a valid group AND at the center repetition | |||||
| // (we embed in ALL repetitions so the watermark energy is present everywhere, | |||||
| // but the PN pattern is the same for all R frames in a group) | |||||
| if groupIdx < TotalGroups { | |||||
| bitIdx := e.groupToBit[groupIdx] | |||||
| dataSign := float64(e.symbols[bitIdx]) | |||||
| _ = repIdx | |||||
| _ = centerRep | |||||
| for b := 0; b < NumBins; b++ { | |||||
| bin := BinLow + b | |||||
| chip := float64(e.pnChips[groupIdx][b]) | |||||
| // Modify magnitude: |Y| = |X| × (1 + level × chip × data) | |||||
| // Phase preserved | |||||
| mag := cmplx.Abs(buf[bin]) | |||||
| if mag < 1e-10 { | |||||
| continue // skip near-silence bins to avoid division by zero | |||||
| } | |||||
| phase := cmplx.Phase(buf[bin]) | |||||
| newMag := mag * (1.0 + e.levelLinear*chip*dataSign) | |||||
| buf[bin] = cmplx.Rect(newMag, phase) | |||||
| // Mirror for negative frequencies (conjugate symmetry) | |||||
| if bin > 0 && bin < FFTSize/2 { | |||||
| buf[FFTSize-bin] = cmplx.Conj(buf[bin]) | |||||
| } | |||||
| } | |||||
| } | |||||
| // Inverse FFT | |||||
| dsp.IFFT(buf[:]) | |||||
| // Overlap-add to output buffer | |||||
| for i := 0; i < FFTSize; i++ { | |||||
| e.outBuf[i] += real(buf[i]) | |||||
| } | |||||
| if !e.primed { | |||||
| e.primed = true | |||||
| e.outPos = 0 | |||||
| } | |||||
| e.frameIdx++ | |||||
| } | |||||
| // STFTDetector extracts watermark bits from an audio recording. | |||||
| type STFTDetector struct { | |||||
| pnChips [TotalGroups][NumBins]int8 | |||||
| groupToBit [TotalGroups]int | |||||
| } | |||||
| // NewSTFTDetector creates a detector matching the given key's PN sequence. | |||||
| func NewSTFTDetector(key string) *STFTDetector { | |||||
| d := &STFTDetector{} | |||||
| // Same PN generation as embedder | |||||
| seed := sha256.Sum256(append([]byte("stft-pn-"), key...)) | |||||
| prng := newPRNG(seed[:]) | |||||
| for g := 0; g < TotalGroups; g++ { | |||||
| for b := 0; b < NumBins; b++ { | |||||
| if prng.next()&1 == 0 { | |||||
| d.pnChips[g][b] = 1 | |||||
| } else { | |||||
| d.pnChips[g][b] = -1 | |||||
| } | |||||
| } | |||||
| } | |||||
| // Same permutation | |||||
| for g := 0; g < TotalGroups; g++ { | |||||
| d.groupToBit[g] = g % payloadBits | |||||
| } | |||||
| permSeed := sha256.Sum256(append([]byte("stft-perm-"), key...)) | |||||
| permRNG := newPRNG(permSeed[:]) | |||||
| for i := TotalGroups - 1; i > 0; i-- { | |||||
| j := permRNG.next() % uint32(i+1) | |||||
| d.groupToBit[i], d.groupToBit[j] = d.groupToBit[j], d.groupToBit[i] | |||||
| } | |||||
| return d | |||||
| } | |||||
| // Detect processes audio at WMRate and returns soft bit decisions. | |||||
| // The audio should already be decimated/resampled to WMRate and LPF'd. | |||||
| // | |||||
| // Multi-test: tries TimeRep frame offsets (the block repetition candidates). | |||||
| // Cepstrum filtering is applied to reduce carrier noise. | |||||
| // | |||||
| // Returns: 128 soft correlation values (sign = bit decision, magnitude = confidence), | |||||
| // and the frame offset that gave the best detection metric. | |||||
| func (d *STFTDetector) Detect(audio []float64) (corrs [payloadBits]float64, bestOffset int) { | |||||
| // Compute all STFT frames | |||||
| var window [FFTSize]float64 | |||||
| dsp.HannWindow(window[:]) | |||||
| nFrames := (len(audio) - FFTSize) / FFTHop | |||||
| if nFrames < FramesPerWM { | |||||
| // Not enough data for a full watermark cycle — use what we have | |||||
| } | |||||
| // Compute STFT magnitudes (dB) for all frames | |||||
| type stftFrame struct { | |||||
| magDB [FFTSize / 2]float64 | |||||
| } | |||||
| frames := make([]stftFrame, nFrames) | |||||
| for f := 0; f < nFrames; f++ { | |||||
| offset := f * FFTHop | |||||
| var buf [FFTSize]complex128 | |||||
| for i := 0; i < FFTSize; i++ { | |||||
| if offset+i < len(audio) { | |||||
| buf[i] = complex(audio[offset+i]*window[i], 0) | |||||
| } | |||||
| } | |||||
| dsp.FFT(buf[:]) | |||||
| for bin := 0; bin < FFTSize/2; bin++ { | |||||
| mag := cmplx.Abs(buf[bin]) | |||||
| if mag < 1e-12 { | |||||
| mag = 1e-12 | |||||
| } | |||||
| frames[f].magDB[bin] = 20 * math.Log10(mag) | |||||
| } | |||||
| // Cepstrum filtering: remove spectral envelope | |||||
| // DCT of dB magnitudes, zero first N_ceps coefficients, IDCT | |||||
| cepstrumFilter(frames[f].magDB[:], 8) | |||||
| } | |||||
| // Multi-test: try each of TimeRep frame offsets within the repetition block | |||||
| bestMetric := -1.0 | |||||
| bestOffset = 0 | |||||
| for startOffset := 0; startOffset < TimeRep; startOffset++ { | |||||
| var testCorrs [payloadBits]float64 | |||||
| // For each group, use the CENTER frame of the repetition block | |||||
| for g := 0; g < TotalGroups; g++ { | |||||
| bitIdx := d.groupToBit[g] | |||||
| frameInWM := g*TimeRep + startOffset + TimeRep/2 | |||||
| if frameInWM >= nFrames { | |||||
| continue | |||||
| } | |||||
| // Correlate this frame's magnitudes with the PN chips | |||||
| var corr float64 | |||||
| for b := 0; b < NumBins; b++ { | |||||
| bin := BinLow + b | |||||
| corr += frames[frameInWM].magDB[bin] * float64(d.pnChips[g][b]) | |||||
| } | |||||
| testCorrs[bitIdx] += corr | |||||
| } | |||||
| // Detection metric: sum of squared partial correlations (chi-squared) | |||||
| // From paper equation (10): Q = Σ (corr_m)² | |||||
| var metric float64 | |||||
| for _, c := range testCorrs { | |||||
| metric += c * c | |||||
| } | |||||
| if metric > bestMetric { | |||||
| bestMetric = metric | |||||
| bestOffset = startOffset | |||||
| corrs = testCorrs | |||||
| } | |||||
| } | |||||
| return corrs, bestOffset | |||||
| } | |||||
| // cepstrumFilter removes the spectral envelope from dB magnitudes. | |||||
| // It zeros the first nCeps DCT coefficients (the smooth spectral shape). | |||||
| // This is Kirovski's "CF" technique: reduces carrier noise by ~6 dB. | |||||
| func cepstrumFilter(magDB []float64, nCeps int) { | |||||
| n := len(magDB) | |||||
| if n < nCeps*2 { | |||||
| return | |||||
| } | |||||
| // DCT-II (simplified, not optimized) | |||||
| ceps := make([]float64, n) | |||||
| for k := 0; k < n; k++ { | |||||
| var sum float64 | |||||
| for i := 0; i < n; i++ { | |||||
| sum += magDB[i] * math.Cos(math.Pi*float64(k)*(float64(i)+0.5)/float64(n)) | |||||
| } | |||||
| ceps[k] = sum | |||||
| } | |||||
| // Zero low-order cepstral coefficients (spectral envelope) | |||||
| for k := 0; k < nCeps; k++ { | |||||
| ceps[k] = 0 | |||||
| } | |||||
| // IDCT (inverse DCT-II) | |||||
| for i := 0; i < n; i++ { | |||||
| var sum float64 | |||||
| for k := 0; k < n; k++ { | |||||
| w := 1.0 | |||||
| if k == 0 { | |||||
| w = 0.5 | |||||
| } | |||||
| sum += w * ceps[k] * math.Cos(math.Pi*float64(k)*(float64(i)+0.5)/float64(n)) | |||||
| } | |||||
| magDB[i] = sum * 2.0 / float64(n) | |||||
| } | |||||
| } | |||||
| // Simple xorshift32 PRNG for deterministic chip generation. | |||||
| type simplePRNG struct { | |||||
| state uint32 | |||||
| } | |||||
| func newPRNG(seed []byte) *simplePRNG { | |||||
| var s uint32 | |||||
| for i, b := range seed { | |||||
| s ^= uint32(b) << (uint(i%4) * 8) | |||||
| } | |||||
| if s == 0 { | |||||
| s = 1 | |||||
| } | |||||
| return &simplePRNG{state: s} | |||||
| } | |||||
| func (p *simplePRNG) next() uint32 { | |||||
| p.state ^= p.state << 13 | |||||
| p.state ^= p.state >> 17 | |||||
| p.state ^= p.state << 5 | |||||
| return p.state | |||||
| } | |||||
| @@ -395,7 +395,17 @@ func RSEncode(data [rsDataBytes]byte) [rsTotalBytes]byte { | |||||
| return rsEncode(data) | return rsEncode(data) | ||||
| } | } | ||||
| // Constants exported for the recovery tool. | |||||
| // PNChipAt returns the PN chip value at group g, bin b. | |||||
| func (d *STFTDetector) PNChipAt(g, b int) int8 { | |||||
| return d.pnChips[g][b] | |||||
| } | |||||
| // GroupBit returns the data bit index for group g. | |||||
| func (d *STFTDetector) GroupBit(g int) int { | |||||
| return d.groupToBit[g] | |||||
| } | |||||
| // Constants exported for the recovery tool and legacy tools. | |||||
| const ( | const ( | ||||
| PnChips = pnChips | PnChips = pnChips | ||||
| PayloadBits = payloadBits | PayloadBits = payloadBits | ||||