Web-based Winamp controller for CarPC � Go backend, mobile-first UI
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

416 行
13KB

  1. //go:build windows
  2. // Package viz captures the Windows audio loopback via WASAPI and emits
  3. // FFT spectrum data for visualisation in the web frontend.
  4. package viz
  5. import (
  6. "context"
  7. "fmt"
  8. "log"
  9. "math"
  10. "math/cmplx"
  11. "syscall"
  12. "time"
  13. "unsafe"
  14. "golang.org/x/sys/windows"
  15. )
  16. // NumBars is the number of frequency bars emitted per frame.
  17. const NumBars = 64
  18. const (
  19. fftN = 2048 // FFT window size (power of 2)
  20. // WASAPI
  21. audclntShareModeShared = 0
  22. audclntStreamFlagsLoopback = 0x00020000
  23. audclntBufferFlagsSilent = 0x2
  24. bufDuration = 1_000_000 // 100 ms in 100-ns units
  25. // Wave format tags
  26. waveFormatPCM = 1
  27. waveFormatFloat = 3
  28. waveFormatExtensibleTag = 0xFFFE
  29. )
  30. // ── GUIDs ─────────────────────────────────────────────────────────────────────
  31. var (
  32. clsidMMDeviceEnumerator = windows.GUID{
  33. Data1: 0xBCDE0395, Data2: 0xE52F, Data3: 0x467C,
  34. Data4: [8]byte{0x8E, 0x3D, 0xC4, 0x57, 0x92, 0x91, 0x69, 0x2E},
  35. }
  36. iidIMMDeviceEnumerator = windows.GUID{
  37. Data1: 0xA95664D2, Data2: 0x9614, Data3: 0x4F35,
  38. Data4: [8]byte{0xA7, 0x46, 0xDE, 0x8D, 0xB6, 0x36, 0x17, 0xE6},
  39. }
  40. iidIAudioClient = windows.GUID{
  41. Data1: 0x1CB9AD4C, Data2: 0xDBFA, Data3: 0x4c32,
  42. Data4: [8]byte{0xB1, 0x78, 0xC2, 0xF5, 0x68, 0xA7, 0x03, 0xB2},
  43. }
  44. iidIAudioCaptureClient = windows.GUID{
  45. Data1: 0xC8ADBD64, Data2: 0xE71E, Data3: 0x48a0,
  46. Data4: [8]byte{0xA4, 0xDE, 0x18, 0x5C, 0x39, 0x5C, 0xD3, 0x17},
  47. }
  48. subFormatFloat = windows.GUID{
  49. Data1: 0x00000003, Data2: 0x0000, Data3: 0x0010,
  50. Data4: [8]byte{0x80, 0x00, 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71},
  51. }
  52. )
  53. // ── WAVEFORMAT structs ────────────────────────────────────────────────────────
  54. type waveFormatEx struct {
  55. FormatTag uint16
  56. Channels uint16
  57. SamplesPerSec uint32
  58. AvgBytesPerSec uint32
  59. BlockAlign uint16
  60. BitsPerSample uint16
  61. Size uint16
  62. }
  63. // waveFormatExtensibleEx is a flat representation of WAVEFORMATEXTENSIBLE.
  64. // We cannot embed waveFormatEx because Go pads the struct to 20 bytes
  65. // (alignment of largest field = uint32), but the C layout is 18 bytes —
  66. // so SubFormat would land at the wrong offset if we used struct embedding.
  67. type waveFormatExtensibleEx struct {
  68. FormatTag uint16
  69. Channels uint16
  70. SamplesPerSec uint32
  71. AvgBytesPerSec uint32
  72. BlockAlign uint16
  73. BitsPerSample uint16
  74. Size uint16
  75. Samples uint16 // wValidBitsPerSample / wSamplesPerBlock
  76. ChannelMask uint32
  77. SubFormat windows.GUID // 16 bytes → total 40 bytes, matches C layout
  78. }
  79. // ── DLL procs ─────────────────────────────────────────────────────────────────
  80. var (
  81. ole32 = windows.NewLazySystemDLL("ole32.dll")
  82. coInitializeEx = ole32.NewProc("CoInitializeEx")
  83. coUninitialize = ole32.NewProc("CoUninitialize")
  84. coCreateInstance = ole32.NewProc("CoCreateInstance")
  85. coTaskMemFree = ole32.NewProc("CoTaskMemFree")
  86. )
  87. // ── COM vtable helpers ────────────────────────────────────────────────────────
  88. var ptrSize = unsafe.Sizeof(uintptr(0))
  89. func procAt(comObj uintptr, methodIdx int) uintptr {
  90. vtbl := *(*uintptr)(unsafe.Pointer(comObj))
  91. return *(*uintptr)(unsafe.Pointer(vtbl + uintptr(methodIdx)*ptrSize))
  92. }
  93. func comRelease(p uintptr) {
  94. if p != 0 {
  95. syscall.Syscall(procAt(p, 2), 1, p, 0, 0)
  96. }
  97. }
  98. // ── Capturer ──────────────────────────────────────────────────────────────────
  99. // Capturer streams FFT spectrum bars from the system audio loopback.
  100. type Capturer struct {
  101. // C receives slices of NumBars float32 values in [0.0, 1.0] at ~30 fps.
  102. // Slow consumers cause frames to be dropped (non-blocking send).
  103. C chan []float32
  104. }
  105. // NewCapturer creates a Capturer ready to Start.
  106. func NewCapturer() *Capturer {
  107. return &Capturer{C: make(chan []float32, 4)}
  108. }
  109. // Start begins the capture loop; blocks until ctx is cancelled.
  110. // Errors are logged but never fatal — the channel simply stays empty.
  111. func (c *Capturer) Start(ctx context.Context) {
  112. if err := c.run(ctx); err != nil {
  113. log.Printf("viz: %v", err)
  114. }
  115. }
  116. func (c *Capturer) run(ctx context.Context) error {
  117. coInitializeEx.Call(0, 0) // COINIT_MULTITHREADED
  118. defer coUninitialize.Call()
  119. // ── IMMDeviceEnumerator ──────────────────────────────────────────────────
  120. var enumerator uintptr
  121. if hr, _, _ := coCreateInstance.Call(
  122. uintptr(unsafe.Pointer(&clsidMMDeviceEnumerator)), 0, 0x17,
  123. uintptr(unsafe.Pointer(&iidIMMDeviceEnumerator)),
  124. uintptr(unsafe.Pointer(&enumerator)),
  125. ); hr != 0 {
  126. return fmt.Errorf("CoCreateInstance(MMDeviceEnumerator): 0x%08X", hr)
  127. }
  128. defer comRelease(enumerator)
  129. // ── Default render device ────────────────────────────────────────────────
  130. // GetDefaultAudioEndpoint(eRender, eConsole, &device) — vtable index 4, 4 args
  131. var device uintptr
  132. if hr, _, _ := syscall.Syscall6(
  133. procAt(enumerator, 4), 4,
  134. enumerator, 0, 0, uintptr(unsafe.Pointer(&device)), 0, 0,
  135. ); hr != 0 {
  136. return fmt.Errorf("GetDefaultAudioEndpoint: 0x%08X", hr)
  137. }
  138. defer comRelease(device)
  139. // ── IAudioClient ────────────────────────────────────────────────────────
  140. // IMMDevice::Activate(riid, clsCtx, pParams, &ppv) — vtable index 3, 5 args
  141. var ac uintptr
  142. if hr, _, _ := syscall.Syscall6(
  143. procAt(device, 3), 5,
  144. device, uintptr(unsafe.Pointer(&iidIAudioClient)), 0x17, 0,
  145. uintptr(unsafe.Pointer(&ac)), 0,
  146. ); hr != 0 {
  147. return fmt.Errorf("Activate(IAudioClient): 0x%08X", hr)
  148. }
  149. defer comRelease(ac)
  150. // ── Mix format ──────────────────────────────────────────────────────────
  151. var fmtPtr uintptr
  152. if hr, _, _ := syscall.Syscall(
  153. procAt(ac, 8), 2, // GetMixFormat
  154. ac, uintptr(unsafe.Pointer(&fmtPtr)), 0,
  155. ); hr != 0 {
  156. return fmt.Errorf("GetMixFormat: 0x%08X", hr)
  157. }
  158. defer coTaskMemFree.Call(fmtPtr)
  159. wfx := (*waveFormatEx)(unsafe.Pointer(fmtPtr))
  160. sampleRate := int(wfx.SamplesPerSec)
  161. channels := int(wfx.Channels)
  162. isFloat := wfx.FormatTag == waveFormatFloat
  163. if wfx.FormatTag == waveFormatExtensibleTag && wfx.Size >= 22 {
  164. ext := (*waveFormatExtensibleEx)(unsafe.Pointer(fmtPtr))
  165. isFloat = ext.SubFormat == subFormatFloat
  166. }
  167. log.Printf("viz: loopback format %d Hz, %d ch, %d bit, float=%v",
  168. sampleRate, channels, wfx.BitsPerSample, isFloat)
  169. if !isFloat || wfx.BitsPerSample != 32 {
  170. return fmt.Errorf("viz: unsupported format (need float32); got tag=%04X bits=%d",
  171. wfx.FormatTag, wfx.BitsPerSample)
  172. }
  173. // ── Initialize loopback ──────────────────────────────────────────────────
  174. if hr, _, _ := syscall.Syscall9(
  175. procAt(ac, 3), 7, // IAudioClient::Initialize
  176. ac,
  177. audclntShareModeShared,
  178. audclntStreamFlagsLoopback,
  179. uintptr(bufDuration), 0, // hnsBufferDuration, hnsPeriodicity
  180. fmtPtr, 0, // pFormat, AudioSessionGuid
  181. 0, 0,
  182. ); hr != 0 {
  183. return fmt.Errorf("IAudioClient::Initialize: 0x%08X", hr)
  184. }
  185. // ── IAudioCaptureClient ──────────────────────────────────────────────────
  186. var acc uintptr
  187. if hr, _, _ := syscall.Syscall(
  188. procAt(ac, 14), 3, // GetService
  189. ac,
  190. uintptr(unsafe.Pointer(&iidIAudioCaptureClient)),
  191. uintptr(unsafe.Pointer(&acc)),
  192. ); hr != 0 {
  193. return fmt.Errorf("GetService(IAudioCaptureClient): 0x%08X", hr)
  194. }
  195. defer comRelease(acc)
  196. // ── Start ────────────────────────────────────────────────────────────────
  197. if hr, _, _ := syscall.Syscall(procAt(ac, 10), 1, ac, 0, 0); hr != 0 {
  198. return fmt.Errorf("IAudioClient::Start: 0x%08X", hr)
  199. }
  200. defer syscall.Syscall(procAt(ac, 11), 1, ac, 0, 0) // Stop
  201. // ── Capture loop ─────────────────────────────────────────────────────────
  202. buf := make([]float64, 0, fftN*2)
  203. smooth := make([]float32, NumBars)
  204. tick := time.NewTicker(10 * time.Millisecond)
  205. defer tick.Stop()
  206. for {
  207. select {
  208. case <-ctx.Done():
  209. return nil
  210. case <-tick.C:
  211. buf = drainLoopback(acc, channels, buf)
  212. for len(buf) >= fftN {
  213. bars := spectrum(buf[:fftN], sampleRate, smooth)
  214. copy(smooth, bars)
  215. select {
  216. case c.C <- bars:
  217. default:
  218. }
  219. buf = buf[fftN:]
  220. }
  221. }
  222. }
  223. }
  224. // drainLoopback reads all pending audio frames into buf and returns it.
  225. func drainLoopback(acc uintptr, channels int, buf []float64) []float64 {
  226. for {
  227. // GetNextPacketSize
  228. var packetFrames uint32
  229. if hr, _, _ := syscall.Syscall(
  230. procAt(acc, 5), 2,
  231. acc, uintptr(unsafe.Pointer(&packetFrames)), 0,
  232. ); hr != 0 || packetFrames == 0 {
  233. break
  234. }
  235. // GetBuffer(ppData, &numFrames, &flags, NULL, NULL) — 6 args
  236. var dataPtr uintptr
  237. var numFrames uint32
  238. var flags uint32
  239. if hr, _, _ := syscall.Syscall6(
  240. procAt(acc, 3), 6,
  241. acc,
  242. uintptr(unsafe.Pointer(&dataPtr)),
  243. uintptr(unsafe.Pointer(&numFrames)),
  244. uintptr(unsafe.Pointer(&flags)),
  245. 0, 0,
  246. ); hr != 0 {
  247. break
  248. }
  249. if flags&audclntBufferFlagsSilent == 0 && dataPtr != 0 && numFrames > 0 {
  250. samples := unsafe.Slice((*float32)(unsafe.Pointer(dataPtr)), int(numFrames)*channels)
  251. for i := 0; i < int(numFrames); i++ {
  252. var mono float64
  253. for ch := 0; ch < channels; ch++ {
  254. mono += float64(samples[i*channels+ch])
  255. }
  256. buf = append(buf, mono/float64(channels))
  257. }
  258. }
  259. // ReleaseBuffer
  260. syscall.Syscall(procAt(acc, 4), 2, acc, uintptr(numFrames), 0)
  261. }
  262. return buf
  263. }
  264. // ── Spectrum analysis ─────────────────────────────────────────────────────────
  265. // spectrum applies a Hanning window, runs the FFT, maps to NumBars
  266. // log-spaced frequency bins, and applies fast-attack/slow-decay smoothing.
  267. func spectrum(samples []float64, sampleRate int, prev []float32) []float32 {
  268. n := len(samples)
  269. // Hanning window
  270. cx := make([]complex128, n)
  271. for i, s := range samples {
  272. w := 0.5 * (1 - math.Cos(2*math.Pi*float64(i)/float64(n-1)))
  273. cx[i] = complex(s*w, 0)
  274. }
  275. ditFFT(cx)
  276. // Magnitude of positive frequencies, normalised
  277. bins := make([]float64, n/2)
  278. scale := 2.0 / float64(n)
  279. for i := range bins {
  280. bins[i] = cmplx.Abs(cx[i]) * scale
  281. }
  282. // Log-spaced output bars: 40 Hz → 20 kHz
  283. const fMin, fMax = 40.0, 20_000.0
  284. freqRes := float64(sampleRate) / float64(n)
  285. bars := make([]float32, NumBars)
  286. for b := 0; b < NumBars; b++ {
  287. t := float64(b) / float64(NumBars-1)
  288. f := fMin * math.Pow(fMax/fMin, t)
  289. var fNext float64
  290. if b < NumBars-1 {
  291. t2 := float64(b+1) / float64(NumBars-1)
  292. fNext = fMin * math.Pow(fMax/fMin, t2)
  293. } else {
  294. fNext = fMax
  295. }
  296. lo := clamp(int(f/freqRes), 0, len(bins)-1)
  297. hi := clamp(int(fNext/freqRes), lo+1, len(bins))
  298. var sum float64
  299. for i := lo; i < hi; i++ {
  300. sum += bins[i]
  301. }
  302. avg := sum / float64(hi-lo)
  303. // dB → [0, 1]
  304. dB := 20 * math.Log10(avg+1e-9)
  305. norm := float32((dB + 80) / 80)
  306. if norm < 0 {
  307. norm = 0
  308. }
  309. if norm > 1 {
  310. norm = 1
  311. }
  312. // Fast attack, slow decay
  313. if norm > prev[b] {
  314. bars[b] = norm
  315. } else {
  316. bars[b] = prev[b] * 0.88
  317. }
  318. }
  319. return bars
  320. }
  321. func clamp(v, lo, hi int) int {
  322. if v < lo {
  323. return lo
  324. }
  325. if v > hi {
  326. return hi
  327. }
  328. return v
  329. }
  330. // ── Cooley-Tukey FFT ─────────────────────────────────────────────────────────
  331. // ditFFT is an in-place, decimation-in-time FFT. len(x) must be a power of 2.
  332. func ditFFT(x []complex128) {
  333. n := len(x)
  334. // Bit-reversal permutation
  335. j := 0
  336. for i := 1; i < n; i++ {
  337. bit := n >> 1
  338. for j&bit != 0 {
  339. j ^= bit
  340. bit >>= 1
  341. }
  342. j ^= bit
  343. if i < j {
  344. x[i], x[j] = x[j], x[i]
  345. }
  346. }
  347. // Butterfly stages
  348. for length := 2; length <= n; length <<= 1 {
  349. half := length >> 1
  350. wStep := cmplx.Exp(complex(0, -math.Pi/float64(half)))
  351. for i := 0; i < n; i += length {
  352. w := complex(1, 0)
  353. for k := 0; k < half; k++ {
  354. u := x[i+k]
  355. v := x[i+k+half] * w
  356. x[i+k] = u + v
  357. x[i+k+half] = u - v
  358. w *= wStep
  359. }
  360. }
  361. }
  362. }