|
|
|
@@ -34,12 +34,45 @@ const ( |
|
|
|
GroupsPerBit = 10 // time groups per data bit |
|
|
|
WMLevelDB = 0.5 // embedding level (dB) — inaudible, 20 dB margin for decode |
|
|
|
|
|
|
|
// PAFM: Psycho-Acoustic Frequency Masking (Kirovski §III-A). |
|
|
|
// Only embed/detect in bins where audio provides enough masking. |
|
|
|
// Bins more than PAFMThresholdDB below local spectral peak are |
|
|
|
// in spectral valleys — watermark would be audible there, and they |
|
|
|
// contribute more carrier noise than signal to the correlation. |
|
|
|
PAFMThresholdDB = 25.0 // dB below local peak → skip bin |
|
|
|
PAFMNeighborhood = 4 // ± bins for local peak search |
|
|
|
|
|
|
|
TotalGroups = GroupsPerBit * payloadBits // 10 × 128 = 1280 |
|
|
|
FramesPerWM = TotalGroups * TimeRep // 1280 × 5 = 6400 |
|
|
|
SamplesPerWM = FramesPerWM * FFTHop // 6400 × 256 = 1638400 |
|
|
|
// Duration at WMRate: 1638400 / 12000 = 136.5 seconds |
|
|
|
) |
|
|
|
|
|
|
|
// PafmMask computes which bins are "audible" (suitable for embedding/detection). |
|
|
|
// A bin is audible if its magnitude is within PAFMThresholdDB of the local |
|
|
|
// spectral peak (±PAFMNeighborhood bins). Bins in spectral valleys are |
|
|
|
// excluded — they have weak masking and would make the watermark audible. |
|
|
|
// |
|
|
|
// Returns a bitmask: true = embed/detect here, false = skip. |
|
|
|
// On average ~60% of bins are audible (matching Kirovski's observation). |
|
|
|
func PafmMask(magDB []float64) [NumBins]bool { |
|
|
|
var mask [NumBins]bool |
|
|
|
for b := 0; b < NumBins; b++ { |
|
|
|
bin := BinLow + b |
|
|
|
// Find local peak in neighborhood |
|
|
|
localPeak := magDB[bin] |
|
|
|
for j := -PAFMNeighborhood; j <= PAFMNeighborhood; j++ { |
|
|
|
idx := bin + j |
|
|
|
if idx >= 0 && idx < len(magDB) && magDB[idx] > localPeak { |
|
|
|
localPeak = magDB[idx] |
|
|
|
} |
|
|
|
} |
|
|
|
// Bin is audible if within threshold of local peak |
|
|
|
mask[b] = magDB[bin] >= localPeak-PAFMThresholdDB |
|
|
|
} |
|
|
|
return mask |
|
|
|
} |
|
|
|
|
|
|
|
// STFTEmbedder processes audio blocks and adds the STFT-domain watermark. |
|
|
|
// It works at WMRate (12 kHz). The caller must decimate input to WMRate |
|
|
|
// and upsample output back to the desired rate. |
|
|
|
@@ -189,7 +222,22 @@ func (e *STFTEmbedder) processFrame() { |
|
|
|
_ = repIdx |
|
|
|
_ = centerRep |
|
|
|
|
|
|
|
// PAFM: compute masking threshold for this frame. |
|
|
|
// Only embed in bins where audio provides enough masking. |
|
|
|
var frameMagDB [FFTSize / 2]float64 |
|
|
|
for bin := 0; bin < FFTSize/2; bin++ { |
|
|
|
mag := cmplx.Abs(buf[bin]) |
|
|
|
if mag < 1e-12 { |
|
|
|
mag = 1e-12 |
|
|
|
} |
|
|
|
frameMagDB[bin] = 20 * math.Log10(mag) |
|
|
|
} |
|
|
|
mask := PafmMask(frameMagDB[:]) |
|
|
|
|
|
|
|
for b := 0; b < NumBins; b++ { |
|
|
|
if !mask[b] { |
|
|
|
continue // PAFM: bin is in spectral valley, skip |
|
|
|
} |
|
|
|
bin := BinLow + b |
|
|
|
chip := float64(e.pnChips[groupIdx][b]) |
|
|
|
|
|
|
|
@@ -282,9 +330,10 @@ func (d *STFTDetector) Detect(audio []float64) (corrs [payloadBits]float64, best |
|
|
|
// Not enough data for a full watermark cycle — use what we have |
|
|
|
} |
|
|
|
|
|
|
|
// Compute STFT magnitudes (dB) for all frames |
|
|
|
// Compute STFT magnitudes (dB) for all frames + PAFM masks |
|
|
|
type stftFrame struct { |
|
|
|
magDB [FFTSize / 2]float64 |
|
|
|
mask [NumBins]bool // PAFM: which bins are audible |
|
|
|
} |
|
|
|
frames := make([]stftFrame, nFrames) |
|
|
|
|
|
|
|
@@ -306,8 +355,11 @@ func (d *STFTDetector) Detect(audio []float64) (corrs [payloadBits]float64, best |
|
|
|
frames[f].magDB[bin] = 20 * math.Log10(mag) |
|
|
|
} |
|
|
|
|
|
|
|
// Cepstrum filtering: remove spectral envelope |
|
|
|
// DCT of dB magnitudes, zero first N_ceps coefficients, IDCT |
|
|
|
// PAFM mask: computed on ORIGINAL magnitudes (before cepstrum filtering) |
|
|
|
// so the mask reflects the true spectral shape for masking decisions. |
|
|
|
frames[f].mask = PafmMask(frames[f].magDB[:]) |
|
|
|
|
|
|
|
// Cepstrum filtering: remove spectral envelope (after mask computation) |
|
|
|
cepstrumFilter(frames[f].magDB[:], 8) |
|
|
|
} |
|
|
|
|
|
|
|
@@ -332,6 +384,9 @@ func (d *STFTDetector) Detect(audio []float64) (corrs [payloadBits]float64, best |
|
|
|
|
|
|
|
var corr float64 |
|
|
|
for b := 0; b < NumBins; b++ { |
|
|
|
if !frames[f].mask[b] { |
|
|
|
continue // PAFM: skip bins in spectral valleys |
|
|
|
} |
|
|
|
bin := BinLow + b |
|
|
|
corr += frames[f].magDB[bin] * float64(d.pnChips[g][b]) |
|
|
|
} |
|
|
|
|