Go-based FM stereo transmitter with RDS, Windows-first and cross-platform
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

499 rindas
14KB

  1. // Package watermark implements STFT-domain spread-spectrum audio watermarking
  2. // based on Kirovski & Malvar (IEEE TSP 2003).
  3. //
  4. // Architecture:
  5. // - Embedding in STFT magnitude (dB scale) — multiplicative, natural masking
  6. // - Block repetition coding (R=5 time frames) — automatic drift tolerance
  7. // - Cepstrum filtering at detection — 6 dB carrier noise reduction
  8. // - PCC covert channel — PN partitioned into M=128 subsets for 128-bit payload
  9. // - Multi-test sync — scan R frame offsets to find alignment
  10. //
  11. // Both encoder and decoder operate at 12 kHz (WMRate). The encoder decimates
  12. // from composite rate (÷19), processes STFT, and upsamples back. The decoder
  13. // decimates from recording rate (÷16 from 192kHz, ÷4 from 48kHz, etc.).
  14. // Same STFT parameters → bins align perfectly → no rate mismatch.
  15. package watermark
  16. import (
  17. "crypto/sha256"
  18. "math"
  19. "math/cmplx"
  20. "github.com/jan/fm-rds-tx/internal/dsp"
  21. )
  22. // STFT watermark constants.
  23. const (
  24. WMRate = 12000 // watermark processing sample rate (Hz)
  25. FFTSize = 512 // STFT frame size (samples at WMRate)
  26. FFTHop = 256 // 50% overlap
  27. BinLow = 9 // ~211 Hz at WMRate/FFTSize
  28. BinHigh = 213 // ~4992 Hz at WMRate/FFTSize
  29. NumBins = BinHigh - BinLow // 204 frequency chips per STFT frame
  30. TimeRep = 5 // block repetition factor (±2 frame drift tolerance)
  31. GroupsPerBit = 10 // time groups per data bit
  32. WMLevelDB = 0.5 // embedding level (dB) — inaudible, 20 dB margin for decode
  33. // PAFM: Psycho-Acoustic Frequency Masking (Kirovski §III-A).
  34. // Only embed/detect in bins where audio provides enough masking.
  35. // Bins more than PAFMThresholdDB below local spectral peak are
  36. // in spectral valleys — watermark would be audible there, and they
  37. // contribute more carrier noise than signal to the correlation.
  38. PAFMThresholdDB = 25.0 // dB below local peak → skip bin
  39. PAFMNeighborhood = 4 // ± bins for local peak search
  40. TotalGroups = GroupsPerBit * payloadBits // 10 × 128 = 1280
  41. FramesPerWM = TotalGroups * TimeRep // 1280 × 5 = 6400
  42. SamplesPerWM = FramesPerWM * FFTHop // 6400 × 256 = 1638400
  43. // Duration at WMRate: 1638400 / 12000 = 136.5 seconds
  44. )
  45. // PafmMask computes which bins are "audible" (suitable for embedding/detection).
  46. // A bin is audible if its magnitude is within PAFMThresholdDB of the local
  47. // spectral peak (±PAFMNeighborhood bins). Bins in spectral valleys are
  48. // excluded — they have weak masking and would make the watermark audible.
  49. //
  50. // Returns a bitmask: true = embed/detect here, false = skip.
  51. // On average ~60% of bins are audible (matching Kirovski's observation).
  52. func PafmMask(magDB []float64) [NumBins]bool {
  53. var mask [NumBins]bool
  54. for b := 0; b < NumBins; b++ {
  55. bin := BinLow + b
  56. // Find local peak in neighborhood
  57. localPeak := magDB[bin]
  58. for j := -PAFMNeighborhood; j <= PAFMNeighborhood; j++ {
  59. idx := bin + j
  60. if idx >= 0 && idx < len(magDB) && magDB[idx] > localPeak {
  61. localPeak = magDB[idx]
  62. }
  63. }
  64. // Bin is audible if within threshold of local peak
  65. mask[b] = magDB[bin] >= localPeak-PAFMThresholdDB
  66. }
  67. return mask
  68. }
  69. // STFTEmbedder processes audio blocks and adds the STFT-domain watermark.
  70. // It works at WMRate (12 kHz). The caller must decimate input to WMRate
  71. // and upsample output back to the desired rate.
  72. type STFTEmbedder struct {
  73. // PN chip matrix: pnChips[group][bin] ∈ {-1, +1}
  74. // group ∈ [0, TotalGroups), bin ∈ [0, NumBins)
  75. pnChips [TotalGroups][NumBins]int8
  76. // Bit assignment: which data bit owns each group (PCC permutation)
  77. groupToBit [TotalGroups]int
  78. // RS-encoded codeword: 128 bits → symbol[bit] = +1 or -1
  79. symbols [payloadBits]int8
  80. // STFT state
  81. window [FFTSize]float64
  82. inBuf [FFTSize]float64 // analysis window buffer
  83. outBuf [FFTSize + FFTHop]float64 // overlap-add output buffer
  84. inPos int // samples written to inBuf
  85. outPos int // samples read from outBuf
  86. frameIdx int // STFT frame counter (wraps at FramesPerWM)
  87. primed bool // true after first full frame
  88. // Level in linear scale: 10^(WMLevelDB/20) - 1 ≈ 0.059 for 0.5 dB
  89. levelLinear float64
  90. }
  91. // NewSTFTEmbedder creates an embedder for the given license key.
  92. func NewSTFTEmbedder(key string) *STFTEmbedder {
  93. e := &STFTEmbedder{}
  94. // Compute RS-encoded payload
  95. var data [rsDataBytes]byte
  96. if key != "" {
  97. h := sha256.Sum256([]byte(key))
  98. copy(data[:], h[:rsDataBytes])
  99. }
  100. codeword := rsEncode(data)
  101. // BPSK symbols: bit 0 → +1, bit 1 → -1
  102. for i := 0; i < payloadBits; i++ {
  103. if (codeword[i/8]>>uint(7-(i%8)))&1 == 1 {
  104. e.symbols[i] = -1
  105. } else {
  106. e.symbols[i] = 1
  107. }
  108. }
  109. // Generate PN chips from key-seeded PRNG
  110. seed := sha256.Sum256([]byte("fmrtx-stft-pn-v1"))
  111. prng := newPRNG(seed[:])
  112. for g := 0; g < TotalGroups; g++ {
  113. for b := 0; b < NumBins; b++ {
  114. if prng.next()&1 == 0 {
  115. e.pnChips[g][b] = 1
  116. } else {
  117. e.pnChips[g][b] = -1
  118. }
  119. }
  120. }
  121. // PCC permutation: assign groups to bits (interleaved + permuted)
  122. // Simple interleaving first, then Fisher-Yates shuffle
  123. for g := 0; g < TotalGroups; g++ {
  124. e.groupToBit[g] = g % payloadBits
  125. }
  126. // Permute within each bit's groups using key-seeded PRNG
  127. permSeed := sha256.Sum256([]byte("fmrtx-stft-perm-v1"))
  128. permRNG := newPRNG(permSeed[:])
  129. for i := TotalGroups - 1; i > 0; i-- {
  130. j := permRNG.next() % uint32(i+1)
  131. e.groupToBit[i], e.groupToBit[j] = e.groupToBit[j], e.groupToBit[i]
  132. }
  133. // Hann window
  134. dsp.HannWindow(e.window[:])
  135. // Embedding level
  136. e.levelLinear = math.Pow(10, WMLevelDB/20) - 1 // fractional magnitude change
  137. return e
  138. }
  139. // ProcessBlock takes mono audio at WMRate and returns watermarked audio.
  140. // The input and output lengths are the same. Internally buffers for STFT
  141. // overlap-add processing. Call with chunks of any size.
  142. func (e *STFTEmbedder) ProcessBlock(in []float64) []float64 {
  143. out := make([]float64, len(in))
  144. for i, s := range in {
  145. // Feed sample into STFT input buffer
  146. e.inBuf[e.inPos] = s
  147. e.inPos++
  148. if e.inPos == FFTSize {
  149. // Full frame: process STFT
  150. e.processFrame()
  151. e.inPos = FFTHop // shift: keep last hop samples for next frame overlap
  152. copy(e.inBuf[:FFTHop], e.inBuf[FFTHop:FFTSize])
  153. }
  154. // Read from overlap-add output buffer
  155. if e.primed {
  156. out[i] = e.outBuf[e.outPos]
  157. e.outPos++
  158. if e.outPos >= FFTHop {
  159. e.outPos = 0
  160. // Shift output buffer: move overlap region to start
  161. copy(e.outBuf[:FFTSize], e.outBuf[FFTHop:FFTSize+FFTHop])
  162. // Zero the new region
  163. for j := FFTSize - FFTHop; j < FFTSize+FFTHop; j++ {
  164. if j < len(e.outBuf) {
  165. e.outBuf[j] = 0
  166. }
  167. }
  168. }
  169. } else {
  170. out[i] = s // pass-through until first frame is processed
  171. }
  172. }
  173. return out
  174. }
  175. // processFrame computes one STFT frame: window → FFT → modify magnitudes → IFFT → overlap-add.
  176. func (e *STFTEmbedder) processFrame() {
  177. // Determine which group this frame belongs to
  178. wmFrame := e.frameIdx % FramesPerWM
  179. groupIdx := wmFrame / TimeRep
  180. repIdx := wmFrame % TimeRep
  181. centerRep := TimeRep / 2 // only center repetition carries the watermark for detection
  182. // Apply window and convert to complex
  183. var buf [FFTSize]complex128
  184. for i := 0; i < FFTSize; i++ {
  185. buf[i] = complex(e.inBuf[i]*e.window[i], 0)
  186. }
  187. // Forward FFT
  188. dsp.FFT(buf[:])
  189. // Modify magnitudes in the watermark sub-band
  190. // Only modify if this is within a valid group AND at the center repetition
  191. // (we embed in ALL repetitions so the watermark energy is present everywhere,
  192. // but the PN pattern is the same for all R frames in a group)
  193. if groupIdx < TotalGroups {
  194. bitIdx := e.groupToBit[groupIdx]
  195. dataSign := float64(e.symbols[bitIdx])
  196. _ = repIdx
  197. _ = centerRep
  198. // PAFM: compute masking threshold for this frame.
  199. // Only embed in bins where audio provides enough masking.
  200. var frameMagDB [FFTSize / 2]float64
  201. for bin := 0; bin < FFTSize/2; bin++ {
  202. mag := cmplx.Abs(buf[bin])
  203. if mag < 1e-12 {
  204. mag = 1e-12
  205. }
  206. frameMagDB[bin] = 20 * math.Log10(mag)
  207. }
  208. mask := PafmMask(frameMagDB[:])
  209. for b := 0; b < NumBins; b++ {
  210. if !mask[b] {
  211. continue // PAFM: bin is in spectral valley, skip
  212. }
  213. bin := BinLow + b
  214. chip := float64(e.pnChips[groupIdx][b])
  215. // Modify magnitude: |Y| = |X| × (1 + level × chip × data)
  216. // Phase preserved
  217. mag := cmplx.Abs(buf[bin])
  218. if mag < 1e-10 {
  219. continue // skip near-silence bins to avoid division by zero
  220. }
  221. phase := cmplx.Phase(buf[bin])
  222. newMag := mag * (1.0 + e.levelLinear*chip*dataSign)
  223. buf[bin] = cmplx.Rect(newMag, phase)
  224. // Mirror for negative frequencies (conjugate symmetry)
  225. if bin > 0 && bin < FFTSize/2 {
  226. buf[FFTSize-bin] = cmplx.Conj(buf[bin])
  227. }
  228. }
  229. }
  230. // Inverse FFT
  231. dsp.IFFT(buf[:])
  232. // Overlap-add to output buffer
  233. for i := 0; i < FFTSize; i++ {
  234. e.outBuf[i] += real(buf[i])
  235. }
  236. if !e.primed {
  237. e.primed = true
  238. e.outPos = 0
  239. }
  240. e.frameIdx++
  241. }
  242. // STFTDetector extracts watermark bits from an audio recording.
  243. type STFTDetector struct {
  244. pnChips [TotalGroups][NumBins]int8
  245. groupToBit [TotalGroups]int
  246. }
  247. // NewSTFTDetector creates a detector. No key needed — the PN sequence is
  248. // public (fixed). The detector extracts the payload blindly.
  249. func NewSTFTDetector() *STFTDetector {
  250. d := &STFTDetector{}
  251. // Same PN generation as embedder
  252. seed := sha256.Sum256([]byte("fmrtx-stft-pn-v1"))
  253. prng := newPRNG(seed[:])
  254. for g := 0; g < TotalGroups; g++ {
  255. for b := 0; b < NumBins; b++ {
  256. if prng.next()&1 == 0 {
  257. d.pnChips[g][b] = 1
  258. } else {
  259. d.pnChips[g][b] = -1
  260. }
  261. }
  262. }
  263. // Same permutation
  264. for g := 0; g < TotalGroups; g++ {
  265. d.groupToBit[g] = g % payloadBits
  266. }
  267. permSeed := sha256.Sum256([]byte("fmrtx-stft-perm-v1"))
  268. permRNG := newPRNG(permSeed[:])
  269. for i := TotalGroups - 1; i > 0; i-- {
  270. j := permRNG.next() % uint32(i+1)
  271. d.groupToBit[i], d.groupToBit[j] = d.groupToBit[j], d.groupToBit[i]
  272. }
  273. return d
  274. }
  275. // Detect processes audio at WMRate and returns soft bit decisions.
  276. // The audio should already be decimated/resampled to WMRate and LPF'd.
  277. //
  278. // Multi-test: tries TimeRep frame offsets (the block repetition candidates).
  279. // Cepstrum filtering is applied to reduce carrier noise.
  280. //
  281. // Returns: 128 soft correlation values (sign = bit decision, magnitude = confidence),
  282. // and the frame offset that gave the best detection metric.
  283. func (d *STFTDetector) Detect(audio []float64) (corrs [payloadBits]float64, bestOffset int) {
  284. // Compute all STFT frames
  285. var window [FFTSize]float64
  286. dsp.HannWindow(window[:])
  287. nFrames := (len(audio) - FFTSize) / FFTHop
  288. if nFrames < FramesPerWM {
  289. // Not enough data for a full watermark cycle — use what we have
  290. }
  291. // Compute STFT magnitudes (dB) for all frames + PAFM masks
  292. type stftFrame struct {
  293. magDB [FFTSize / 2]float64
  294. mask [NumBins]bool // PAFM: which bins are audible
  295. }
  296. frames := make([]stftFrame, nFrames)
  297. for f := 0; f < nFrames; f++ {
  298. offset := f * FFTHop
  299. var buf [FFTSize]complex128
  300. for i := 0; i < FFTSize; i++ {
  301. if offset+i < len(audio) {
  302. buf[i] = complex(audio[offset+i]*window[i], 0)
  303. }
  304. }
  305. dsp.FFT(buf[:])
  306. for bin := 0; bin < FFTSize/2; bin++ {
  307. mag := cmplx.Abs(buf[bin])
  308. if mag < 1e-12 {
  309. mag = 1e-12
  310. }
  311. frames[f].magDB[bin] = 20 * math.Log10(mag)
  312. }
  313. // PAFM mask: computed on ORIGINAL magnitudes (before cepstrum filtering)
  314. // so the mask reflects the true spectral shape for masking decisions.
  315. frames[f].mask = PafmMask(frames[f].magDB[:])
  316. // Cepstrum filtering: remove spectral envelope (after mask computation)
  317. cepstrumFilter(frames[f].magDB[:], 8)
  318. }
  319. // Multi-test: try each of TimeRep frame offsets within the repetition block
  320. bestMetric := -1.0
  321. bestOffset = 0
  322. for startOffset := 0; startOffset < TimeRep; startOffset++ {
  323. var testCorrs [payloadBits]float64
  324. // Iterate over ALL recording frames — multiple WM cycles accumulate
  325. // automatically via modular wrapping. This gives √N_cycles SNR gain.
  326. for f := 0; f < nFrames; f++ {
  327. wmFrame := ((f - startOffset) % FramesPerWM + FramesPerWM) % FramesPerWM
  328. if wmFrame%TimeRep != TimeRep/2 {
  329. continue // not center of repetition block
  330. }
  331. g := wmFrame / TimeRep
  332. if g >= TotalGroups {
  333. continue
  334. }
  335. var corr float64
  336. for b := 0; b < NumBins; b++ {
  337. if !frames[f].mask[b] {
  338. continue // PAFM: skip bins in spectral valleys
  339. }
  340. bin := BinLow + b
  341. corr += frames[f].magDB[bin] * float64(d.pnChips[g][b])
  342. }
  343. testCorrs[d.groupToBit[g]] += corr
  344. }
  345. // Detection metric: sum of squared partial correlations (chi-squared)
  346. // From paper equation (10): Q = Σ (corr_m)²
  347. var metric float64
  348. for _, c := range testCorrs {
  349. metric += c * c
  350. }
  351. if metric > bestMetric {
  352. bestMetric = metric
  353. bestOffset = startOffset
  354. corrs = testCorrs
  355. }
  356. }
  357. return corrs, bestOffset
  358. }
  359. // cepstrumFilter removes the spectral envelope from dB magnitudes.
  360. // It zeros the first nCeps DCT coefficients (the smooth spectral shape).
  361. // This is Kirovski's "CF" technique: reduces carrier noise by ~6 dB.
  362. //
  363. // Uses precomputed cosine table for O(N²) DCT without math.Cos calls.
  364. func cepstrumFilter(magDB []float64, nCeps int) {
  365. n := len(magDB)
  366. if n < nCeps*2 {
  367. return
  368. }
  369. cosTable := getCosTable(n)
  370. // DCT-II
  371. ceps := make([]float64, n)
  372. for k := 0; k < n; k++ {
  373. var sum float64
  374. row := cosTable[k]
  375. for i := 0; i < n; i++ {
  376. sum += magDB[i] * row[i]
  377. }
  378. ceps[k] = sum
  379. }
  380. // Zero low-order coefficients
  381. for k := 0; k < nCeps; k++ {
  382. ceps[k] = 0
  383. }
  384. // IDCT
  385. scale := 2.0 / float64(n)
  386. for i := 0; i < n; i++ {
  387. var sum float64
  388. for k := 0; k < n; k++ {
  389. w := 1.0
  390. if k == 0 {
  391. w = 0.5
  392. }
  393. sum += w * ceps[k] * cosTable[k][i]
  394. }
  395. magDB[i] = sum * scale
  396. }
  397. }
  398. // Cached cosine table for DCT. cosTable[k][i] = cos(π·k·(i+0.5)/N).
  399. var cachedCosTable [][]float64
  400. var cachedCosN int
  401. func getCosTable(n int) [][]float64 {
  402. if cachedCosN == n {
  403. return cachedCosTable
  404. }
  405. table := make([][]float64, n)
  406. for k := 0; k < n; k++ {
  407. table[k] = make([]float64, n)
  408. for i := 0; i < n; i++ {
  409. table[k][i] = math.Cos(math.Pi * float64(k) * (float64(i) + 0.5) / float64(n))
  410. }
  411. }
  412. cachedCosTable = table
  413. cachedCosN = n
  414. return table
  415. }
  416. // Simple xorshift32 PRNG for deterministic chip generation.
  417. type simplePRNG struct {
  418. state uint32
  419. }
  420. func newPRNG(seed []byte) *simplePRNG {
  421. var s uint32
  422. for i, b := range seed {
  423. s ^= uint32(b) << (uint(i%4) * 8)
  424. }
  425. if s == 0 {
  426. s = 1
  427. }
  428. return &simplePRNG{state: s}
  429. }
  430. func (p *simplePRNG) next() uint32 {
  431. p.state ^= p.state << 13
  432. p.state ^= p.state >> 17
  433. p.state ^= p.state << 5
  434. return p.state
  435. }