Go-based FM stereo transmitter with RDS, Windows-first and cross-platform
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

444 line
12KB

  1. // Package watermark implements STFT-domain spread-spectrum audio watermarking
  2. // based on Kirovski & Malvar (IEEE TSP 2003).
  3. //
  4. // Architecture:
  5. // - Embedding in STFT magnitude (dB scale) — multiplicative, natural masking
  6. // - Block repetition coding (R=5 time frames) — automatic drift tolerance
  7. // - Cepstrum filtering at detection — 6 dB carrier noise reduction
  8. // - PCC covert channel — PN partitioned into M=128 subsets for 128-bit payload
  9. // - Multi-test sync — scan R frame offsets to find alignment
  10. //
  11. // Both encoder and decoder operate at 12 kHz (WMRate). The encoder decimates
  12. // from composite rate (÷19), processes STFT, and upsamples back. The decoder
  13. // decimates from recording rate (÷16 from 192kHz, ÷4 from 48kHz, etc.).
  14. // Same STFT parameters → bins align perfectly → no rate mismatch.
  15. package watermark
  16. import (
  17. "crypto/sha256"
  18. "math"
  19. "math/cmplx"
  20. "github.com/jan/fm-rds-tx/internal/dsp"
  21. )
  22. // STFT watermark constants.
  23. const (
  24. WMRate = 12000 // watermark processing sample rate (Hz)
  25. FFTSize = 512 // STFT frame size (samples at WMRate)
  26. FFTHop = 256 // 50% overlap
  27. BinLow = 9 // ~211 Hz at WMRate/FFTSize
  28. BinHigh = 213 // ~4992 Hz at WMRate/FFTSize
  29. NumBins = BinHigh - BinLow // 204 frequency chips per STFT frame
  30. TimeRep = 5 // block repetition factor (±2 frame drift tolerance)
  31. GroupsPerBit = 10 // time groups per data bit
  32. WMLevelDB = 0.5 // embedding level (dB) — inaudible, 20 dB margin for decode
  33. TotalGroups = GroupsPerBit * payloadBits // 10 × 128 = 1280
  34. FramesPerWM = TotalGroups * TimeRep // 1280 × 5 = 6400
  35. SamplesPerWM = FramesPerWM * FFTHop // 6400 × 256 = 1638400
  36. // Duration at WMRate: 1638400 / 12000 = 136.5 seconds
  37. )
  38. // STFTEmbedder processes audio blocks and adds the STFT-domain watermark.
  39. // It works at WMRate (12 kHz). The caller must decimate input to WMRate
  40. // and upsample output back to the desired rate.
  41. type STFTEmbedder struct {
  42. // PN chip matrix: pnChips[group][bin] ∈ {-1, +1}
  43. // group ∈ [0, TotalGroups), bin ∈ [0, NumBins)
  44. pnChips [TotalGroups][NumBins]int8
  45. // Bit assignment: which data bit owns each group (PCC permutation)
  46. groupToBit [TotalGroups]int
  47. // RS-encoded codeword: 128 bits → symbol[bit] = +1 or -1
  48. symbols [payloadBits]int8
  49. // STFT state
  50. window [FFTSize]float64
  51. inBuf [FFTSize]float64 // analysis window buffer
  52. outBuf [FFTSize + FFTHop]float64 // overlap-add output buffer
  53. inPos int // samples written to inBuf
  54. outPos int // samples read from outBuf
  55. frameIdx int // STFT frame counter (wraps at FramesPerWM)
  56. primed bool // true after first full frame
  57. // Level in linear scale: 10^(WMLevelDB/20) - 1 ≈ 0.059 for 0.5 dB
  58. levelLinear float64
  59. }
  60. // NewSTFTEmbedder creates an embedder for the given license key.
  61. func NewSTFTEmbedder(key string) *STFTEmbedder {
  62. e := &STFTEmbedder{}
  63. // Compute RS-encoded payload
  64. var data [rsDataBytes]byte
  65. if key != "" {
  66. h := sha256.Sum256([]byte(key))
  67. copy(data[:], h[:rsDataBytes])
  68. }
  69. codeword := rsEncode(data)
  70. // BPSK symbols: bit 0 → +1, bit 1 → -1
  71. for i := 0; i < payloadBits; i++ {
  72. if (codeword[i/8]>>uint(7-(i%8)))&1 == 1 {
  73. e.symbols[i] = -1
  74. } else {
  75. e.symbols[i] = 1
  76. }
  77. }
  78. // Generate PN chips from key-seeded PRNG
  79. seed := sha256.Sum256([]byte("fmrtx-stft-pn-v1"))
  80. prng := newPRNG(seed[:])
  81. for g := 0; g < TotalGroups; g++ {
  82. for b := 0; b < NumBins; b++ {
  83. if prng.next()&1 == 0 {
  84. e.pnChips[g][b] = 1
  85. } else {
  86. e.pnChips[g][b] = -1
  87. }
  88. }
  89. }
  90. // PCC permutation: assign groups to bits (interleaved + permuted)
  91. // Simple interleaving first, then Fisher-Yates shuffle
  92. for g := 0; g < TotalGroups; g++ {
  93. e.groupToBit[g] = g % payloadBits
  94. }
  95. // Permute within each bit's groups using key-seeded PRNG
  96. permSeed := sha256.Sum256([]byte("fmrtx-stft-perm-v1"))
  97. permRNG := newPRNG(permSeed[:])
  98. for i := TotalGroups - 1; i > 0; i-- {
  99. j := permRNG.next() % uint32(i+1)
  100. e.groupToBit[i], e.groupToBit[j] = e.groupToBit[j], e.groupToBit[i]
  101. }
  102. // Hann window
  103. dsp.HannWindow(e.window[:])
  104. // Embedding level
  105. e.levelLinear = math.Pow(10, WMLevelDB/20) - 1 // fractional magnitude change
  106. return e
  107. }
  108. // ProcessBlock takes mono audio at WMRate and returns watermarked audio.
  109. // The input and output lengths are the same. Internally buffers for STFT
  110. // overlap-add processing. Call with chunks of any size.
  111. func (e *STFTEmbedder) ProcessBlock(in []float64) []float64 {
  112. out := make([]float64, len(in))
  113. for i, s := range in {
  114. // Feed sample into STFT input buffer
  115. e.inBuf[e.inPos] = s
  116. e.inPos++
  117. if e.inPos == FFTSize {
  118. // Full frame: process STFT
  119. e.processFrame()
  120. e.inPos = FFTHop // shift: keep last hop samples for next frame overlap
  121. copy(e.inBuf[:FFTHop], e.inBuf[FFTHop:FFTSize])
  122. }
  123. // Read from overlap-add output buffer
  124. if e.primed {
  125. out[i] = e.outBuf[e.outPos]
  126. e.outPos++
  127. if e.outPos >= FFTHop {
  128. e.outPos = 0
  129. // Shift output buffer: move overlap region to start
  130. copy(e.outBuf[:FFTSize], e.outBuf[FFTHop:FFTSize+FFTHop])
  131. // Zero the new region
  132. for j := FFTSize - FFTHop; j < FFTSize+FFTHop; j++ {
  133. if j < len(e.outBuf) {
  134. e.outBuf[j] = 0
  135. }
  136. }
  137. }
  138. } else {
  139. out[i] = s // pass-through until first frame is processed
  140. }
  141. }
  142. return out
  143. }
  144. // processFrame computes one STFT frame: window → FFT → modify magnitudes → IFFT → overlap-add.
  145. func (e *STFTEmbedder) processFrame() {
  146. // Determine which group this frame belongs to
  147. wmFrame := e.frameIdx % FramesPerWM
  148. groupIdx := wmFrame / TimeRep
  149. repIdx := wmFrame % TimeRep
  150. centerRep := TimeRep / 2 // only center repetition carries the watermark for detection
  151. // Apply window and convert to complex
  152. var buf [FFTSize]complex128
  153. for i := 0; i < FFTSize; i++ {
  154. buf[i] = complex(e.inBuf[i]*e.window[i], 0)
  155. }
  156. // Forward FFT
  157. dsp.FFT(buf[:])
  158. // Modify magnitudes in the watermark sub-band
  159. // Only modify if this is within a valid group AND at the center repetition
  160. // (we embed in ALL repetitions so the watermark energy is present everywhere,
  161. // but the PN pattern is the same for all R frames in a group)
  162. if groupIdx < TotalGroups {
  163. bitIdx := e.groupToBit[groupIdx]
  164. dataSign := float64(e.symbols[bitIdx])
  165. _ = repIdx
  166. _ = centerRep
  167. for b := 0; b < NumBins; b++ {
  168. bin := BinLow + b
  169. chip := float64(e.pnChips[groupIdx][b])
  170. // Modify magnitude: |Y| = |X| × (1 + level × chip × data)
  171. // Phase preserved
  172. mag := cmplx.Abs(buf[bin])
  173. if mag < 1e-10 {
  174. continue // skip near-silence bins to avoid division by zero
  175. }
  176. phase := cmplx.Phase(buf[bin])
  177. newMag := mag * (1.0 + e.levelLinear*chip*dataSign)
  178. buf[bin] = cmplx.Rect(newMag, phase)
  179. // Mirror for negative frequencies (conjugate symmetry)
  180. if bin > 0 && bin < FFTSize/2 {
  181. buf[FFTSize-bin] = cmplx.Conj(buf[bin])
  182. }
  183. }
  184. }
  185. // Inverse FFT
  186. dsp.IFFT(buf[:])
  187. // Overlap-add to output buffer
  188. for i := 0; i < FFTSize; i++ {
  189. e.outBuf[i] += real(buf[i])
  190. }
  191. if !e.primed {
  192. e.primed = true
  193. e.outPos = 0
  194. }
  195. e.frameIdx++
  196. }
  197. // STFTDetector extracts watermark bits from an audio recording.
  198. type STFTDetector struct {
  199. pnChips [TotalGroups][NumBins]int8
  200. groupToBit [TotalGroups]int
  201. }
  202. // NewSTFTDetector creates a detector. No key needed — the PN sequence is
  203. // public (fixed). The detector extracts the payload blindly.
  204. func NewSTFTDetector() *STFTDetector {
  205. d := &STFTDetector{}
  206. // Same PN generation as embedder
  207. seed := sha256.Sum256([]byte("fmrtx-stft-pn-v1"))
  208. prng := newPRNG(seed[:])
  209. for g := 0; g < TotalGroups; g++ {
  210. for b := 0; b < NumBins; b++ {
  211. if prng.next()&1 == 0 {
  212. d.pnChips[g][b] = 1
  213. } else {
  214. d.pnChips[g][b] = -1
  215. }
  216. }
  217. }
  218. // Same permutation
  219. for g := 0; g < TotalGroups; g++ {
  220. d.groupToBit[g] = g % payloadBits
  221. }
  222. permSeed := sha256.Sum256([]byte("fmrtx-stft-perm-v1"))
  223. permRNG := newPRNG(permSeed[:])
  224. for i := TotalGroups - 1; i > 0; i-- {
  225. j := permRNG.next() % uint32(i+1)
  226. d.groupToBit[i], d.groupToBit[j] = d.groupToBit[j], d.groupToBit[i]
  227. }
  228. return d
  229. }
  230. // Detect processes audio at WMRate and returns soft bit decisions.
  231. // The audio should already be decimated/resampled to WMRate and LPF'd.
  232. //
  233. // Multi-test: tries TimeRep frame offsets (the block repetition candidates).
  234. // Cepstrum filtering is applied to reduce carrier noise.
  235. //
  236. // Returns: 128 soft correlation values (sign = bit decision, magnitude = confidence),
  237. // and the frame offset that gave the best detection metric.
  238. func (d *STFTDetector) Detect(audio []float64) (corrs [payloadBits]float64, bestOffset int) {
  239. // Compute all STFT frames
  240. var window [FFTSize]float64
  241. dsp.HannWindow(window[:])
  242. nFrames := (len(audio) - FFTSize) / FFTHop
  243. if nFrames < FramesPerWM {
  244. // Not enough data for a full watermark cycle — use what we have
  245. }
  246. // Compute STFT magnitudes (dB) for all frames
  247. type stftFrame struct {
  248. magDB [FFTSize / 2]float64
  249. }
  250. frames := make([]stftFrame, nFrames)
  251. for f := 0; f < nFrames; f++ {
  252. offset := f * FFTHop
  253. var buf [FFTSize]complex128
  254. for i := 0; i < FFTSize; i++ {
  255. if offset+i < len(audio) {
  256. buf[i] = complex(audio[offset+i]*window[i], 0)
  257. }
  258. }
  259. dsp.FFT(buf[:])
  260. for bin := 0; bin < FFTSize/2; bin++ {
  261. mag := cmplx.Abs(buf[bin])
  262. if mag < 1e-12 {
  263. mag = 1e-12
  264. }
  265. frames[f].magDB[bin] = 20 * math.Log10(mag)
  266. }
  267. // Cepstrum filtering: remove spectral envelope
  268. // DCT of dB magnitudes, zero first N_ceps coefficients, IDCT
  269. cepstrumFilter(frames[f].magDB[:], 8)
  270. }
  271. // Multi-test: try each of TimeRep frame offsets within the repetition block
  272. bestMetric := -1.0
  273. bestOffset = 0
  274. for startOffset := 0; startOffset < TimeRep; startOffset++ {
  275. var testCorrs [payloadBits]float64
  276. // Iterate over ALL recording frames — multiple WM cycles accumulate
  277. // automatically via modular wrapping. This gives √N_cycles SNR gain.
  278. for f := 0; f < nFrames; f++ {
  279. wmFrame := ((f - startOffset) % FramesPerWM + FramesPerWM) % FramesPerWM
  280. if wmFrame%TimeRep != TimeRep/2 {
  281. continue // not center of repetition block
  282. }
  283. g := wmFrame / TimeRep
  284. if g >= TotalGroups {
  285. continue
  286. }
  287. var corr float64
  288. for b := 0; b < NumBins; b++ {
  289. bin := BinLow + b
  290. corr += frames[f].magDB[bin] * float64(d.pnChips[g][b])
  291. }
  292. testCorrs[d.groupToBit[g]] += corr
  293. }
  294. // Detection metric: sum of squared partial correlations (chi-squared)
  295. // From paper equation (10): Q = Σ (corr_m)²
  296. var metric float64
  297. for _, c := range testCorrs {
  298. metric += c * c
  299. }
  300. if metric > bestMetric {
  301. bestMetric = metric
  302. bestOffset = startOffset
  303. corrs = testCorrs
  304. }
  305. }
  306. return corrs, bestOffset
  307. }
  308. // cepstrumFilter removes the spectral envelope from dB magnitudes.
  309. // It zeros the first nCeps DCT coefficients (the smooth spectral shape).
  310. // This is Kirovski's "CF" technique: reduces carrier noise by ~6 dB.
  311. //
  312. // Uses precomputed cosine table for O(N²) DCT without math.Cos calls.
  313. func cepstrumFilter(magDB []float64, nCeps int) {
  314. n := len(magDB)
  315. if n < nCeps*2 {
  316. return
  317. }
  318. cosTable := getCosTable(n)
  319. // DCT-II
  320. ceps := make([]float64, n)
  321. for k := 0; k < n; k++ {
  322. var sum float64
  323. row := cosTable[k]
  324. for i := 0; i < n; i++ {
  325. sum += magDB[i] * row[i]
  326. }
  327. ceps[k] = sum
  328. }
  329. // Zero low-order coefficients
  330. for k := 0; k < nCeps; k++ {
  331. ceps[k] = 0
  332. }
  333. // IDCT
  334. scale := 2.0 / float64(n)
  335. for i := 0; i < n; i++ {
  336. var sum float64
  337. for k := 0; k < n; k++ {
  338. w := 1.0
  339. if k == 0 {
  340. w = 0.5
  341. }
  342. sum += w * ceps[k] * cosTable[k][i]
  343. }
  344. magDB[i] = sum * scale
  345. }
  346. }
  347. // Cached cosine table for DCT. cosTable[k][i] = cos(π·k·(i+0.5)/N).
  348. var cachedCosTable [][]float64
  349. var cachedCosN int
  350. func getCosTable(n int) [][]float64 {
  351. if cachedCosN == n {
  352. return cachedCosTable
  353. }
  354. table := make([][]float64, n)
  355. for k := 0; k < n; k++ {
  356. table[k] = make([]float64, n)
  357. for i := 0; i < n; i++ {
  358. table[k][i] = math.Cos(math.Pi * float64(k) * (float64(i) + 0.5) / float64(n))
  359. }
  360. }
  361. cachedCosTable = table
  362. cachedCosN = n
  363. return table
  364. }
  365. // Simple xorshift32 PRNG for deterministic chip generation.
  366. type simplePRNG struct {
  367. state uint32
  368. }
  369. func newPRNG(seed []byte) *simplePRNG {
  370. var s uint32
  371. for i, b := range seed {
  372. s ^= uint32(b) << (uint(i%4) * 8)
  373. }
  374. if s == 0 {
  375. s = 1
  376. }
  377. return &simplePRNG{state: s}
  378. }
  379. func (p *simplePRNG) next() uint32 {
  380. p.state ^= p.state << 13
  381. p.state ^= p.state >> 17
  382. p.state ^= p.state << 5
  383. return p.state
  384. }