Go-based FM stereo transmitter with RDS, Windows-first and cross-platform
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

418 lines
12KB

  1. // Package watermark implements STFT-domain spread-spectrum audio watermarking
  2. // based on Kirovski & Malvar (IEEE TSP 2003).
  3. //
  4. // Architecture:
  5. // - Embedding in STFT magnitude (dB scale) — multiplicative, natural masking
  6. // - Block repetition coding (R=5 time frames) — automatic drift tolerance
  7. // - Cepstrum filtering at detection — 6 dB carrier noise reduction
  8. // - PCC covert channel — PN partitioned into M=128 subsets for 128-bit payload
  9. // - Multi-test sync — scan R frame offsets to find alignment
  10. //
  11. // Both encoder and decoder operate at 12 kHz (WMRate). The encoder decimates
  12. // from composite rate (÷19), processes STFT, and upsamples back. The decoder
  13. // decimates from recording rate (÷16 from 192kHz, ÷4 from 48kHz, etc.).
  14. // Same STFT parameters → bins align perfectly → no rate mismatch.
  15. package watermark
  16. import (
  17. "crypto/sha256"
  18. "math"
  19. "math/cmplx"
  20. "github.com/jan/fm-rds-tx/internal/dsp"
  21. )
  22. // STFT watermark constants.
  23. const (
  24. WMRate = 12000 // watermark processing sample rate (Hz)
  25. FFTSize = 512 // STFT frame size (samples at WMRate)
  26. FFTHop = 256 // 50% overlap
  27. BinLow = 9 // ~211 Hz at WMRate/FFTSize
  28. BinHigh = 213 // ~4992 Hz at WMRate/FFTSize
  29. NumBins = BinHigh - BinLow // 204 frequency chips per STFT frame
  30. TimeRep = 5 // block repetition factor (±2 frame drift tolerance)
  31. GroupsPerBit = 10 // time groups per data bit
  32. WMLevelDB = 0.5 // embedding level (dB) — inaudible, 20 dB margin for decode
  33. TotalGroups = GroupsPerBit * payloadBits // 10 × 128 = 1280
  34. FramesPerWM = TotalGroups * TimeRep // 1280 × 5 = 6400
  35. SamplesPerWM = FramesPerWM * FFTHop // 6400 × 256 = 1638400
  36. // Duration at WMRate: 1638400 / 12000 = 136.5 seconds
  37. )
  38. // STFTEmbedder processes audio blocks and adds the STFT-domain watermark.
  39. // It works at WMRate (12 kHz). The caller must decimate input to WMRate
  40. // and upsample output back to the desired rate.
  41. type STFTEmbedder struct {
  42. // PN chip matrix: pnChips[group][bin] ∈ {-1, +1}
  43. // group ∈ [0, TotalGroups), bin ∈ [0, NumBins)
  44. pnChips [TotalGroups][NumBins]int8
  45. // Bit assignment: which data bit owns each group (PCC permutation)
  46. groupToBit [TotalGroups]int
  47. // RS-encoded codeword: 128 bits → symbol[bit] = +1 or -1
  48. symbols [payloadBits]int8
  49. // STFT state
  50. window [FFTSize]float64
  51. inBuf [FFTSize]float64 // analysis window buffer
  52. outBuf [FFTSize + FFTHop]float64 // overlap-add output buffer
  53. inPos int // samples written to inBuf
  54. outPos int // samples read from outBuf
  55. frameIdx int // STFT frame counter (wraps at FramesPerWM)
  56. primed bool // true after first full frame
  57. // Level in linear scale: 10^(WMLevelDB/20) - 1 ≈ 0.059 for 0.5 dB
  58. levelLinear float64
  59. }
  60. // NewSTFTEmbedder creates an embedder for the given license key.
  61. func NewSTFTEmbedder(key string) *STFTEmbedder {
  62. e := &STFTEmbedder{}
  63. // Compute RS-encoded payload
  64. var data [rsDataBytes]byte
  65. if key != "" {
  66. h := sha256.Sum256([]byte(key))
  67. copy(data[:], h[:rsDataBytes])
  68. }
  69. codeword := rsEncode(data)
  70. // BPSK symbols: bit 0 → +1, bit 1 → -1
  71. for i := 0; i < payloadBits; i++ {
  72. if (codeword[i/8]>>uint(7-(i%8)))&1 == 1 {
  73. e.symbols[i] = -1
  74. } else {
  75. e.symbols[i] = 1
  76. }
  77. }
  78. // Generate PN chips from key-seeded PRNG
  79. seed := sha256.Sum256([]byte("fmrtx-stft-pn-v1"))
  80. prng := newPRNG(seed[:])
  81. for g := 0; g < TotalGroups; g++ {
  82. for b := 0; b < NumBins; b++ {
  83. if prng.next()&1 == 0 {
  84. e.pnChips[g][b] = 1
  85. } else {
  86. e.pnChips[g][b] = -1
  87. }
  88. }
  89. }
  90. // PCC permutation: assign groups to bits (interleaved + permuted)
  91. // Simple interleaving first, then Fisher-Yates shuffle
  92. for g := 0; g < TotalGroups; g++ {
  93. e.groupToBit[g] = g % payloadBits
  94. }
  95. // Permute within each bit's groups using key-seeded PRNG
  96. permSeed := sha256.Sum256([]byte("fmrtx-stft-perm-v1"))
  97. permRNG := newPRNG(permSeed[:])
  98. for i := TotalGroups - 1; i > 0; i-- {
  99. j := permRNG.next() % uint32(i+1)
  100. e.groupToBit[i], e.groupToBit[j] = e.groupToBit[j], e.groupToBit[i]
  101. }
  102. // Hann window
  103. dsp.HannWindow(e.window[:])
  104. // Embedding level
  105. e.levelLinear = math.Pow(10, WMLevelDB/20) - 1 // fractional magnitude change
  106. return e
  107. }
  108. // ProcessBlock takes mono audio at WMRate and returns watermarked audio.
  109. // The input and output lengths are the same. Internally buffers for STFT
  110. // overlap-add processing. Call with chunks of any size.
  111. func (e *STFTEmbedder) ProcessBlock(in []float64) []float64 {
  112. out := make([]float64, len(in))
  113. for i, s := range in {
  114. // Feed sample into STFT input buffer
  115. e.inBuf[e.inPos] = s
  116. e.inPos++
  117. if e.inPos == FFTSize {
  118. // Full frame: process STFT
  119. e.processFrame()
  120. e.inPos = FFTHop // shift: keep last hop samples for next frame overlap
  121. copy(e.inBuf[:FFTHop], e.inBuf[FFTHop:FFTSize])
  122. }
  123. // Read from overlap-add output buffer
  124. if e.primed {
  125. out[i] = e.outBuf[e.outPos]
  126. e.outPos++
  127. if e.outPos >= FFTHop {
  128. e.outPos = 0
  129. // Shift output buffer: move overlap region to start
  130. copy(e.outBuf[:FFTSize], e.outBuf[FFTHop:FFTSize+FFTHop])
  131. // Zero the new region
  132. for j := FFTSize - FFTHop; j < FFTSize+FFTHop; j++ {
  133. if j < len(e.outBuf) {
  134. e.outBuf[j] = 0
  135. }
  136. }
  137. }
  138. } else {
  139. out[i] = s // pass-through until first frame is processed
  140. }
  141. }
  142. return out
  143. }
  144. // processFrame computes one STFT frame: window → FFT → modify magnitudes → IFFT → overlap-add.
  145. func (e *STFTEmbedder) processFrame() {
  146. // Determine which group this frame belongs to
  147. wmFrame := e.frameIdx % FramesPerWM
  148. groupIdx := wmFrame / TimeRep
  149. repIdx := wmFrame % TimeRep
  150. centerRep := TimeRep / 2 // only center repetition carries the watermark for detection
  151. // Apply window and convert to complex
  152. var buf [FFTSize]complex128
  153. for i := 0; i < FFTSize; i++ {
  154. buf[i] = complex(e.inBuf[i]*e.window[i], 0)
  155. }
  156. // Forward FFT
  157. dsp.FFT(buf[:])
  158. // Modify magnitudes in the watermark sub-band
  159. // Only modify if this is within a valid group AND at the center repetition
  160. // (we embed in ALL repetitions so the watermark energy is present everywhere,
  161. // but the PN pattern is the same for all R frames in a group)
  162. if groupIdx < TotalGroups {
  163. bitIdx := e.groupToBit[groupIdx]
  164. dataSign := float64(e.symbols[bitIdx])
  165. _ = repIdx
  166. _ = centerRep
  167. for b := 0; b < NumBins; b++ {
  168. bin := BinLow + b
  169. chip := float64(e.pnChips[groupIdx][b])
  170. // Modify magnitude: |Y| = |X| × (1 + level × chip × data)
  171. // Phase preserved
  172. mag := cmplx.Abs(buf[bin])
  173. if mag < 1e-10 {
  174. continue // skip near-silence bins to avoid division by zero
  175. }
  176. phase := cmplx.Phase(buf[bin])
  177. newMag := mag * (1.0 + e.levelLinear*chip*dataSign)
  178. buf[bin] = cmplx.Rect(newMag, phase)
  179. // Mirror for negative frequencies (conjugate symmetry)
  180. if bin > 0 && bin < FFTSize/2 {
  181. buf[FFTSize-bin] = cmplx.Conj(buf[bin])
  182. }
  183. }
  184. }
  185. // Inverse FFT
  186. dsp.IFFT(buf[:])
  187. // Overlap-add to output buffer
  188. for i := 0; i < FFTSize; i++ {
  189. e.outBuf[i] += real(buf[i])
  190. }
  191. if !e.primed {
  192. e.primed = true
  193. e.outPos = 0
  194. }
  195. e.frameIdx++
  196. }
  197. // STFTDetector extracts watermark bits from an audio recording.
  198. type STFTDetector struct {
  199. pnChips [TotalGroups][NumBins]int8
  200. groupToBit [TotalGroups]int
  201. }
  202. // NewSTFTDetector creates a detector. No key needed — the PN sequence is
  203. // public (fixed). The detector extracts the payload blindly.
  204. func NewSTFTDetector() *STFTDetector {
  205. d := &STFTDetector{}
  206. // Same PN generation as embedder
  207. seed := sha256.Sum256([]byte("fmrtx-stft-pn-v1"))
  208. prng := newPRNG(seed[:])
  209. for g := 0; g < TotalGroups; g++ {
  210. for b := 0; b < NumBins; b++ {
  211. if prng.next()&1 == 0 {
  212. d.pnChips[g][b] = 1
  213. } else {
  214. d.pnChips[g][b] = -1
  215. }
  216. }
  217. }
  218. // Same permutation
  219. for g := 0; g < TotalGroups; g++ {
  220. d.groupToBit[g] = g % payloadBits
  221. }
  222. permSeed := sha256.Sum256([]byte("fmrtx-stft-perm-v1"))
  223. permRNG := newPRNG(permSeed[:])
  224. for i := TotalGroups - 1; i > 0; i-- {
  225. j := permRNG.next() % uint32(i+1)
  226. d.groupToBit[i], d.groupToBit[j] = d.groupToBit[j], d.groupToBit[i]
  227. }
  228. return d
  229. }
  230. // Detect processes audio at WMRate and returns soft bit decisions.
  231. // The audio should already be decimated/resampled to WMRate and LPF'd.
  232. //
  233. // Multi-test: tries TimeRep frame offsets (the block repetition candidates).
  234. // Cepstrum filtering is applied to reduce carrier noise.
  235. //
  236. // Returns: 128 soft correlation values (sign = bit decision, magnitude = confidence),
  237. // and the frame offset that gave the best detection metric.
  238. func (d *STFTDetector) Detect(audio []float64) (corrs [payloadBits]float64, bestOffset int) {
  239. // Compute all STFT frames
  240. var window [FFTSize]float64
  241. dsp.HannWindow(window[:])
  242. nFrames := (len(audio) - FFTSize) / FFTHop
  243. if nFrames < FramesPerWM {
  244. // Not enough data for a full watermark cycle — use what we have
  245. }
  246. // Compute STFT magnitudes (dB) for all frames
  247. type stftFrame struct {
  248. magDB [FFTSize / 2]float64
  249. }
  250. frames := make([]stftFrame, nFrames)
  251. for f := 0; f < nFrames; f++ {
  252. offset := f * FFTHop
  253. var buf [FFTSize]complex128
  254. for i := 0; i < FFTSize; i++ {
  255. if offset+i < len(audio) {
  256. buf[i] = complex(audio[offset+i]*window[i], 0)
  257. }
  258. }
  259. dsp.FFT(buf[:])
  260. for bin := 0; bin < FFTSize/2; bin++ {
  261. mag := cmplx.Abs(buf[bin])
  262. if mag < 1e-12 {
  263. mag = 1e-12
  264. }
  265. frames[f].magDB[bin] = 20 * math.Log10(mag)
  266. }
  267. // Cepstrum filtering: remove spectral envelope
  268. // DCT of dB magnitudes, zero first N_ceps coefficients, IDCT
  269. cepstrumFilter(frames[f].magDB[:], 8)
  270. }
  271. // Multi-test: try each of TimeRep frame offsets within the repetition block
  272. bestMetric := -1.0
  273. bestOffset = 0
  274. for startOffset := 0; startOffset < TimeRep; startOffset++ {
  275. var testCorrs [payloadBits]float64
  276. // Iterate over ALL recording frames — multiple WM cycles accumulate
  277. // automatically via modular wrapping. This gives √N_cycles SNR gain.
  278. for f := 0; f < nFrames; f++ {
  279. wmFrame := ((f - startOffset) % FramesPerWM + FramesPerWM) % FramesPerWM
  280. if wmFrame%TimeRep != TimeRep/2 {
  281. continue // not center of repetition block
  282. }
  283. g := wmFrame / TimeRep
  284. if g >= TotalGroups {
  285. continue
  286. }
  287. var corr float64
  288. for b := 0; b < NumBins; b++ {
  289. bin := BinLow + b
  290. corr += frames[f].magDB[bin] * float64(d.pnChips[g][b])
  291. }
  292. testCorrs[d.groupToBit[g]] += corr
  293. }
  294. // Detection metric: sum of squared partial correlations (chi-squared)
  295. // From paper equation (10): Q = Σ (corr_m)²
  296. var metric float64
  297. for _, c := range testCorrs {
  298. metric += c * c
  299. }
  300. if metric > bestMetric {
  301. bestMetric = metric
  302. bestOffset = startOffset
  303. corrs = testCorrs
  304. }
  305. }
  306. return corrs, bestOffset
  307. }
  308. // cepstrumFilter removes the spectral envelope from dB magnitudes.
  309. // It zeros the first nCeps DCT coefficients (the smooth spectral shape).
  310. // This is Kirovski's "CF" technique: reduces carrier noise by ~6 dB.
  311. func cepstrumFilter(magDB []float64, nCeps int) {
  312. n := len(magDB)
  313. if n < nCeps*2 {
  314. return
  315. }
  316. // DCT-II (simplified, not optimized)
  317. ceps := make([]float64, n)
  318. for k := 0; k < n; k++ {
  319. var sum float64
  320. for i := 0; i < n; i++ {
  321. sum += magDB[i] * math.Cos(math.Pi*float64(k)*(float64(i)+0.5)/float64(n))
  322. }
  323. ceps[k] = sum
  324. }
  325. // Zero low-order cepstral coefficients (spectral envelope)
  326. for k := 0; k < nCeps; k++ {
  327. ceps[k] = 0
  328. }
  329. // IDCT (inverse DCT-II)
  330. for i := 0; i < n; i++ {
  331. var sum float64
  332. for k := 0; k < n; k++ {
  333. w := 1.0
  334. if k == 0 {
  335. w = 0.5
  336. }
  337. sum += w * ceps[k] * math.Cos(math.Pi*float64(k)*(float64(i)+0.5)/float64(n))
  338. }
  339. magDB[i] = sum * 2.0 / float64(n)
  340. }
  341. }
  342. // Simple xorshift32 PRNG for deterministic chip generation.
  343. type simplePRNG struct {
  344. state uint32
  345. }
  346. func newPRNG(seed []byte) *simplePRNG {
  347. var s uint32
  348. for i, b := range seed {
  349. s ^= uint32(b) << (uint(i%4) * 8)
  350. }
  351. if s == 0 {
  352. s = 1
  353. }
  354. return &simplePRNG{state: s}
  355. }
  356. func (p *simplePRNG) next() uint32 {
  357. p.state ^= p.state << 13
  358. p.state ^= p.state >> 17
  359. p.state ^= p.state << 5
  360. return p.state
  361. }