Web-based Winamp controller for CarPC � Go backend, mobile-first UI
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

406 lines
12KB

  1. //go:build windows
  2. // Package viz captures the Windows audio loopback via WASAPI and emits
  3. // FFT spectrum data for visualisation in the web frontend.
  4. package viz
  5. import (
  6. "context"
  7. "fmt"
  8. "log"
  9. "math"
  10. "math/cmplx"
  11. "syscall"
  12. "time"
  13. "unsafe"
  14. "golang.org/x/sys/windows"
  15. )
  16. // NumBars is the number of frequency bars emitted per frame.
  17. const NumBars = 64
  18. const (
  19. fftN = 2048 // FFT window size (power of 2)
  20. // WASAPI
  21. audclntShareModeShared = 0
  22. audclntStreamFlagsLoopback = 0x00020000
  23. audclntBufferFlagsSilent = 0x2
  24. bufDuration = 1_000_000 // 100 ms in 100-ns units
  25. // Wave format tags
  26. waveFormatPCM = 1
  27. waveFormatFloat = 3
  28. waveFormatExtensibleTag = 0xFFFE
  29. )
  30. // ── GUIDs ─────────────────────────────────────────────────────────────────────
  31. var (
  32. clsidMMDeviceEnumerator = windows.GUID{
  33. Data1: 0xBCDE0395, Data2: 0xE52F, Data3: 0x467C,
  34. Data4: [8]byte{0x8E, 0x3D, 0xC4, 0x57, 0x92, 0x91, 0x69, 0x2E},
  35. }
  36. iidIMMDeviceEnumerator = windows.GUID{
  37. Data1: 0xA95664D2, Data2: 0x9614, Data3: 0x4F35,
  38. Data4: [8]byte{0xA7, 0x46, 0xDE, 0x8D, 0xB6, 0x36, 0x17, 0xE6},
  39. }
  40. iidIAudioClient = windows.GUID{
  41. Data1: 0x1CB9AD4C, Data2: 0xDBFA, Data3: 0x4c32,
  42. Data4: [8]byte{0xB1, 0x78, 0xC2, 0xF5, 0x68, 0xA7, 0x03, 0xB2},
  43. }
  44. iidIAudioCaptureClient = windows.GUID{
  45. Data1: 0xC8ADBD64, Data2: 0xE71E, Data3: 0x48a0,
  46. Data4: [8]byte{0xA4, 0xDE, 0x18, 0x5C, 0x39, 0x5C, 0xD3, 0x17},
  47. }
  48. subFormatFloat = windows.GUID{
  49. Data1: 0x00000003, Data2: 0x0000, Data3: 0x0010,
  50. Data4: [8]byte{0x80, 0x00, 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71},
  51. }
  52. )
  53. // ── WAVEFORMAT structs ────────────────────────────────────────────────────────
  54. type waveFormatEx struct {
  55. FormatTag uint16
  56. Channels uint16
  57. SamplesPerSec uint32
  58. AvgBytesPerSec uint32
  59. BlockAlign uint16
  60. BitsPerSample uint16
  61. Size uint16
  62. }
  63. type waveFormatExtensibleEx struct {
  64. Format waveFormatEx
  65. Samples uint16
  66. ChannelMask uint32
  67. SubFormat windows.GUID
  68. }
  69. // ── DLL procs ─────────────────────────────────────────────────────────────────
  70. var (
  71. ole32 = windows.NewLazySystemDLL("ole32.dll")
  72. coInitializeEx = ole32.NewProc("CoInitializeEx")
  73. coUninitialize = ole32.NewProc("CoUninitialize")
  74. coCreateInstance = ole32.NewProc("CoCreateInstance")
  75. coTaskMemFree = ole32.NewProc("CoTaskMemFree")
  76. )
  77. // ── COM vtable helpers ────────────────────────────────────────────────────────
  78. var ptrSize = unsafe.Sizeof(uintptr(0))
  79. func procAt(comObj uintptr, methodIdx int) uintptr {
  80. vtbl := *(*uintptr)(unsafe.Pointer(comObj))
  81. return *(*uintptr)(unsafe.Pointer(vtbl + uintptr(methodIdx)*ptrSize))
  82. }
  83. func comRelease(p uintptr) {
  84. if p != 0 {
  85. syscall.Syscall(procAt(p, 2), 1, p, 0, 0)
  86. }
  87. }
  88. // ── Capturer ──────────────────────────────────────────────────────────────────
  89. // Capturer streams FFT spectrum bars from the system audio loopback.
  90. type Capturer struct {
  91. // C receives slices of NumBars float32 values in [0.0, 1.0] at ~30 fps.
  92. // Slow consumers cause frames to be dropped (non-blocking send).
  93. C chan []float32
  94. }
  95. // NewCapturer creates a Capturer ready to Start.
  96. func NewCapturer() *Capturer {
  97. return &Capturer{C: make(chan []float32, 4)}
  98. }
  99. // Start begins the capture loop; blocks until ctx is cancelled.
  100. // Errors are logged but never fatal — the channel simply stays empty.
  101. func (c *Capturer) Start(ctx context.Context) {
  102. if err := c.run(ctx); err != nil {
  103. log.Printf("viz: %v", err)
  104. }
  105. }
  106. func (c *Capturer) run(ctx context.Context) error {
  107. coInitializeEx.Call(0, 0) // COINIT_MULTITHREADED
  108. defer coUninitialize.Call()
  109. // ── IMMDeviceEnumerator ──────────────────────────────────────────────────
  110. var enumerator uintptr
  111. if hr, _, _ := coCreateInstance.Call(
  112. uintptr(unsafe.Pointer(&clsidMMDeviceEnumerator)), 0, 0x17,
  113. uintptr(unsafe.Pointer(&iidIMMDeviceEnumerator)),
  114. uintptr(unsafe.Pointer(&enumerator)),
  115. ); hr != 0 {
  116. return fmt.Errorf("CoCreateInstance(MMDeviceEnumerator): 0x%08X", hr)
  117. }
  118. defer comRelease(enumerator)
  119. // ── Default render device ────────────────────────────────────────────────
  120. // GetDefaultAudioEndpoint(eRender, eConsole, &device) — vtable index 4, 4 args
  121. var device uintptr
  122. if hr, _, _ := syscall.Syscall6(
  123. procAt(enumerator, 4), 4,
  124. enumerator, 0, 0, uintptr(unsafe.Pointer(&device)), 0, 0,
  125. ); hr != 0 {
  126. return fmt.Errorf("GetDefaultAudioEndpoint: 0x%08X", hr)
  127. }
  128. defer comRelease(device)
  129. // ── IAudioClient ────────────────────────────────────────────────────────
  130. // IMMDevice::Activate(riid, clsCtx, pParams, &ppv) — vtable index 3, 5 args
  131. var ac uintptr
  132. if hr, _, _ := syscall.Syscall6(
  133. procAt(device, 3), 5,
  134. device, uintptr(unsafe.Pointer(&iidIAudioClient)), 0x17, 0,
  135. uintptr(unsafe.Pointer(&ac)), 0,
  136. ); hr != 0 {
  137. return fmt.Errorf("Activate(IAudioClient): 0x%08X", hr)
  138. }
  139. defer comRelease(ac)
  140. // ── Mix format ──────────────────────────────────────────────────────────
  141. var fmtPtr uintptr
  142. if hr, _, _ := syscall.Syscall(
  143. procAt(ac, 8), 2, // GetMixFormat
  144. ac, uintptr(unsafe.Pointer(&fmtPtr)), 0,
  145. ); hr != 0 {
  146. return fmt.Errorf("GetMixFormat: 0x%08X", hr)
  147. }
  148. defer coTaskMemFree.Call(fmtPtr)
  149. wfx := (*waveFormatEx)(unsafe.Pointer(fmtPtr))
  150. sampleRate := int(wfx.SamplesPerSec)
  151. channels := int(wfx.Channels)
  152. isFloat := wfx.FormatTag == waveFormatFloat
  153. if wfx.FormatTag == waveFormatExtensibleTag && wfx.Size >= 22 {
  154. ext := (*waveFormatExtensibleEx)(unsafe.Pointer(fmtPtr))
  155. isFloat = ext.SubFormat == subFormatFloat
  156. }
  157. log.Printf("viz: loopback format %d Hz, %d ch, %d bit, float=%v",
  158. sampleRate, channels, wfx.BitsPerSample, isFloat)
  159. if !isFloat || wfx.BitsPerSample != 32 {
  160. return fmt.Errorf("viz: unsupported format (need float32); got tag=%04X bits=%d",
  161. wfx.FormatTag, wfx.BitsPerSample)
  162. }
  163. // ── Initialize loopback ──────────────────────────────────────────────────
  164. if hr, _, _ := syscall.Syscall9(
  165. procAt(ac, 3), 7, // IAudioClient::Initialize
  166. ac,
  167. audclntShareModeShared,
  168. audclntStreamFlagsLoopback,
  169. uintptr(bufDuration), 0, // hnsBufferDuration, hnsPeriodicity
  170. fmtPtr, 0, // pFormat, AudioSessionGuid
  171. 0, 0,
  172. ); hr != 0 {
  173. return fmt.Errorf("IAudioClient::Initialize: 0x%08X", hr)
  174. }
  175. // ── IAudioCaptureClient ──────────────────────────────────────────────────
  176. var acc uintptr
  177. if hr, _, _ := syscall.Syscall(
  178. procAt(ac, 14), 3, // GetService
  179. ac,
  180. uintptr(unsafe.Pointer(&iidIAudioCaptureClient)),
  181. uintptr(unsafe.Pointer(&acc)),
  182. ); hr != 0 {
  183. return fmt.Errorf("GetService(IAudioCaptureClient): 0x%08X", hr)
  184. }
  185. defer comRelease(acc)
  186. // ── Start ────────────────────────────────────────────────────────────────
  187. if hr, _, _ := syscall.Syscall(procAt(ac, 10), 1, ac, 0, 0); hr != 0 {
  188. return fmt.Errorf("IAudioClient::Start: 0x%08X", hr)
  189. }
  190. defer syscall.Syscall(procAt(ac, 11), 1, ac, 0, 0) // Stop
  191. // ── Capture loop ─────────────────────────────────────────────────────────
  192. buf := make([]float64, 0, fftN*2)
  193. smooth := make([]float32, NumBars)
  194. tick := time.NewTicker(10 * time.Millisecond)
  195. defer tick.Stop()
  196. for {
  197. select {
  198. case <-ctx.Done():
  199. return nil
  200. case <-tick.C:
  201. buf = drainLoopback(acc, channels, buf)
  202. for len(buf) >= fftN {
  203. bars := spectrum(buf[:fftN], sampleRate, smooth)
  204. copy(smooth, bars)
  205. select {
  206. case c.C <- bars:
  207. default:
  208. }
  209. buf = buf[fftN:]
  210. }
  211. }
  212. }
  213. }
  214. // drainLoopback reads all pending audio frames into buf and returns it.
  215. func drainLoopback(acc uintptr, channels int, buf []float64) []float64 {
  216. for {
  217. // GetNextPacketSize
  218. var packetFrames uint32
  219. if hr, _, _ := syscall.Syscall(
  220. procAt(acc, 5), 2,
  221. acc, uintptr(unsafe.Pointer(&packetFrames)), 0,
  222. ); hr != 0 || packetFrames == 0 {
  223. break
  224. }
  225. // GetBuffer(ppData, &numFrames, &flags, NULL, NULL) — 6 args
  226. var dataPtr uintptr
  227. var numFrames uint32
  228. var flags uint32
  229. if hr, _, _ := syscall.Syscall6(
  230. procAt(acc, 3), 6,
  231. acc,
  232. uintptr(unsafe.Pointer(&dataPtr)),
  233. uintptr(unsafe.Pointer(&numFrames)),
  234. uintptr(unsafe.Pointer(&flags)),
  235. 0, 0,
  236. ); hr != 0 {
  237. break
  238. }
  239. if flags&audclntBufferFlagsSilent == 0 && dataPtr != 0 && numFrames > 0 {
  240. samples := unsafe.Slice((*float32)(unsafe.Pointer(dataPtr)), int(numFrames)*channels)
  241. for i := 0; i < int(numFrames); i++ {
  242. var mono float64
  243. for ch := 0; ch < channels; ch++ {
  244. mono += float64(samples[i*channels+ch])
  245. }
  246. buf = append(buf, mono/float64(channels))
  247. }
  248. }
  249. // ReleaseBuffer
  250. syscall.Syscall(procAt(acc, 4), 2, acc, uintptr(numFrames), 0)
  251. }
  252. return buf
  253. }
  254. // ── Spectrum analysis ─────────────────────────────────────────────────────────
  255. // spectrum applies a Hanning window, runs the FFT, maps to NumBars
  256. // log-spaced frequency bins, and applies fast-attack/slow-decay smoothing.
  257. func spectrum(samples []float64, sampleRate int, prev []float32) []float32 {
  258. n := len(samples)
  259. // Hanning window
  260. cx := make([]complex128, n)
  261. for i, s := range samples {
  262. w := 0.5 * (1 - math.Cos(2*math.Pi*float64(i)/float64(n-1)))
  263. cx[i] = complex(s*w, 0)
  264. }
  265. ditFFT(cx)
  266. // Magnitude of positive frequencies, normalised
  267. bins := make([]float64, n/2)
  268. scale := 2.0 / float64(n)
  269. for i := range bins {
  270. bins[i] = cmplx.Abs(cx[i]) * scale
  271. }
  272. // Log-spaced output bars: 40 Hz → 20 kHz
  273. const fMin, fMax = 40.0, 20_000.0
  274. freqRes := float64(sampleRate) / float64(n)
  275. bars := make([]float32, NumBars)
  276. for b := 0; b < NumBars; b++ {
  277. t := float64(b) / float64(NumBars-1)
  278. f := fMin * math.Pow(fMax/fMin, t)
  279. var fNext float64
  280. if b < NumBars-1 {
  281. t2 := float64(b+1) / float64(NumBars-1)
  282. fNext = fMin * math.Pow(fMax/fMin, t2)
  283. } else {
  284. fNext = fMax
  285. }
  286. lo := clamp(int(f/freqRes), 0, len(bins)-1)
  287. hi := clamp(int(fNext/freqRes), lo+1, len(bins))
  288. var sum float64
  289. for i := lo; i < hi; i++ {
  290. sum += bins[i]
  291. }
  292. avg := sum / float64(hi-lo)
  293. // dB → [0, 1]
  294. dB := 20 * math.Log10(avg+1e-9)
  295. norm := float32((dB + 80) / 80)
  296. if norm < 0 {
  297. norm = 0
  298. }
  299. if norm > 1 {
  300. norm = 1
  301. }
  302. // Fast attack, slow decay
  303. if norm > prev[b] {
  304. bars[b] = norm
  305. } else {
  306. bars[b] = prev[b] * 0.88
  307. }
  308. }
  309. return bars
  310. }
  311. func clamp(v, lo, hi int) int {
  312. if v < lo {
  313. return lo
  314. }
  315. if v > hi {
  316. return hi
  317. }
  318. return v
  319. }
  320. // ── Cooley-Tukey FFT ─────────────────────────────────────────────────────────
  321. // ditFFT is an in-place, decimation-in-time FFT. len(x) must be a power of 2.
  322. func ditFFT(x []complex128) {
  323. n := len(x)
  324. // Bit-reversal permutation
  325. j := 0
  326. for i := 1; i < n; i++ {
  327. bit := n >> 1
  328. for j&bit != 0 {
  329. j ^= bit
  330. bit >>= 1
  331. }
  332. j ^= bit
  333. if i < j {
  334. x[i], x[j] = x[j], x[i]
  335. }
  336. }
  337. // Butterfly stages
  338. for length := 2; length <= n; length <<= 1 {
  339. half := length >> 1
  340. wStep := cmplx.Exp(complex(0, -math.Pi/float64(half)))
  341. for i := 0; i < n; i += length {
  342. w := complex(1, 0)
  343. for k := 0; k < half; k++ {
  344. u := x[i+k]
  345. v := x[i+k+half] * w
  346. x[i+k] = u + v
  347. x[i+k+half] = u - v
  348. w *= wStep
  349. }
  350. }
  351. }
  352. }