diff --git a/internal/watermark/stft_watermark.go b/internal/watermark/stft_watermark.go index 73fc967..460b687 100644 --- a/internal/watermark/stft_watermark.go +++ b/internal/watermark/stft_watermark.go @@ -34,12 +34,45 @@ const ( GroupsPerBit = 10 // time groups per data bit WMLevelDB = 0.5 // embedding level (dB) — inaudible, 20 dB margin for decode + // PAFM: Psycho-Acoustic Frequency Masking (Kirovski §III-A). + // Only embed/detect in bins where audio provides enough masking. + // Bins more than PAFMThresholdDB below local spectral peak are + // in spectral valleys — watermark would be audible there, and they + // contribute more carrier noise than signal to the correlation. + PAFMThresholdDB = 25.0 // dB below local peak → skip bin + PAFMNeighborhood = 4 // ± bins for local peak search + TotalGroups = GroupsPerBit * payloadBits // 10 × 128 = 1280 FramesPerWM = TotalGroups * TimeRep // 1280 × 5 = 6400 SamplesPerWM = FramesPerWM * FFTHop // 6400 × 256 = 1638400 // Duration at WMRate: 1638400 / 12000 = 136.5 seconds ) +// PafmMask computes which bins are "audible" (suitable for embedding/detection). +// A bin is audible if its magnitude is within PAFMThresholdDB of the local +// spectral peak (±PAFMNeighborhood bins). Bins in spectral valleys are +// excluded — they have weak masking and would make the watermark audible. +// +// Returns a bitmask: true = embed/detect here, false = skip. +// On average ~60% of bins are audible (matching Kirovski's observation). +func PafmMask(magDB []float64) [NumBins]bool { + var mask [NumBins]bool + for b := 0; b < NumBins; b++ { + bin := BinLow + b + // Find local peak in neighborhood + localPeak := magDB[bin] + for j := -PAFMNeighborhood; j <= PAFMNeighborhood; j++ { + idx := bin + j + if idx >= 0 && idx < len(magDB) && magDB[idx] > localPeak { + localPeak = magDB[idx] + } + } + // Bin is audible if within threshold of local peak + mask[b] = magDB[bin] >= localPeak-PAFMThresholdDB + } + return mask +} + // STFTEmbedder processes audio blocks and adds the STFT-domain watermark. // It works at WMRate (12 kHz). The caller must decimate input to WMRate // and upsample output back to the desired rate. @@ -189,7 +222,22 @@ func (e *STFTEmbedder) processFrame() { _ = repIdx _ = centerRep + // PAFM: compute masking threshold for this frame. + // Only embed in bins where audio provides enough masking. + var frameMagDB [FFTSize / 2]float64 + for bin := 0; bin < FFTSize/2; bin++ { + mag := cmplx.Abs(buf[bin]) + if mag < 1e-12 { + mag = 1e-12 + } + frameMagDB[bin] = 20 * math.Log10(mag) + } + mask := PafmMask(frameMagDB[:]) + for b := 0; b < NumBins; b++ { + if !mask[b] { + continue // PAFM: bin is in spectral valley, skip + } bin := BinLow + b chip := float64(e.pnChips[groupIdx][b]) @@ -282,9 +330,10 @@ func (d *STFTDetector) Detect(audio []float64) (corrs [payloadBits]float64, best // Not enough data for a full watermark cycle — use what we have } - // Compute STFT magnitudes (dB) for all frames + // Compute STFT magnitudes (dB) for all frames + PAFM masks type stftFrame struct { magDB [FFTSize / 2]float64 + mask [NumBins]bool // PAFM: which bins are audible } frames := make([]stftFrame, nFrames) @@ -306,8 +355,11 @@ func (d *STFTDetector) Detect(audio []float64) (corrs [payloadBits]float64, best frames[f].magDB[bin] = 20 * math.Log10(mag) } - // Cepstrum filtering: remove spectral envelope - // DCT of dB magnitudes, zero first N_ceps coefficients, IDCT + // PAFM mask: computed on ORIGINAL magnitudes (before cepstrum filtering) + // so the mask reflects the true spectral shape for masking decisions. + frames[f].mask = PafmMask(frames[f].magDB[:]) + + // Cepstrum filtering: remove spectral envelope (after mask computation) cepstrumFilter(frames[f].magDB[:], 8) } @@ -332,6 +384,9 @@ func (d *STFTDetector) Detect(audio []float64) (corrs [payloadBits]float64, best var corr float64 for b := 0; b < NumBins; b++ { + if !frames[f].mask[b] { + continue // PAFM: skip bins in spectral valleys + } bin := BinLow + b corr += frames[f].magDB[bin] * float64(d.pnChips[g][b]) }