|
- //go:build windows
-
- // Package viz captures the Windows audio loopback via WASAPI and emits
- // FFT spectrum data for visualisation in the web frontend.
- package viz
-
- import (
- "context"
- "fmt"
- "log"
- "math"
- "math/cmplx"
- "syscall"
- "time"
- "unsafe"
-
- "golang.org/x/sys/windows"
- )
-
- // NumBars is the number of frequency bars emitted per frame.
- const NumBars = 64
-
- const (
- fftN = 2048 // FFT window size (power of 2)
-
- // WASAPI
- audclntShareModeShared = 0
- audclntStreamFlagsLoopback = 0x00020000
- audclntBufferFlagsSilent = 0x2
- bufDuration = 1_000_000 // 100 ms in 100-ns units
-
- // Wave format tags
- waveFormatPCM = 1
- waveFormatFloat = 3
- waveFormatExtensibleTag = 0xFFFE
- )
-
- // ── GUIDs ─────────────────────────────────────────────────────────────────────
-
- var (
- clsidMMDeviceEnumerator = windows.GUID{
- Data1: 0xBCDE0395, Data2: 0xE52F, Data3: 0x467C,
- Data4: [8]byte{0x8E, 0x3D, 0xC4, 0x57, 0x92, 0x91, 0x69, 0x2E},
- }
- iidIMMDeviceEnumerator = windows.GUID{
- Data1: 0xA95664D2, Data2: 0x9614, Data3: 0x4F35,
- Data4: [8]byte{0xA7, 0x46, 0xDE, 0x8D, 0xB6, 0x36, 0x17, 0xE6},
- }
- iidIAudioClient = windows.GUID{
- Data1: 0x1CB9AD4C, Data2: 0xDBFA, Data3: 0x4c32,
- Data4: [8]byte{0xB1, 0x78, 0xC2, 0xF5, 0x68, 0xA7, 0x03, 0xB2},
- }
- iidIAudioCaptureClient = windows.GUID{
- Data1: 0xC8ADBD64, Data2: 0xE71E, Data3: 0x48a0,
- Data4: [8]byte{0xA4, 0xDE, 0x18, 0x5C, 0x39, 0x5C, 0xD3, 0x17},
- }
- subFormatFloat = windows.GUID{
- Data1: 0x00000003, Data2: 0x0000, Data3: 0x0010,
- Data4: [8]byte{0x80, 0x00, 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71},
- }
- )
-
- // ── WAVEFORMAT structs ────────────────────────────────────────────────────────
-
- type waveFormatEx struct {
- FormatTag uint16
- Channels uint16
- SamplesPerSec uint32
- AvgBytesPerSec uint32
- BlockAlign uint16
- BitsPerSample uint16
- Size uint16
- }
-
- type waveFormatExtensibleEx struct {
- Format waveFormatEx
- Samples uint16
- ChannelMask uint32
- SubFormat windows.GUID
- }
-
- // ── DLL procs ─────────────────────────────────────────────────────────────────
-
- var (
- ole32 = windows.NewLazySystemDLL("ole32.dll")
- coInitializeEx = ole32.NewProc("CoInitializeEx")
- coUninitialize = ole32.NewProc("CoUninitialize")
- coCreateInstance = ole32.NewProc("CoCreateInstance")
- coTaskMemFree = ole32.NewProc("CoTaskMemFree")
- )
-
- // ── COM vtable helpers ────────────────────────────────────────────────────────
-
- var ptrSize = unsafe.Sizeof(uintptr(0))
-
- func procAt(comObj uintptr, methodIdx int) uintptr {
- vtbl := *(*uintptr)(unsafe.Pointer(comObj))
- return *(*uintptr)(unsafe.Pointer(vtbl + uintptr(methodIdx)*ptrSize))
- }
-
- func comRelease(p uintptr) {
- if p != 0 {
- syscall.Syscall(procAt(p, 2), 1, p, 0, 0)
- }
- }
-
- // ── Capturer ──────────────────────────────────────────────────────────────────
-
- // Capturer streams FFT spectrum bars from the system audio loopback.
- type Capturer struct {
- // C receives slices of NumBars float32 values in [0.0, 1.0] at ~30 fps.
- // Slow consumers cause frames to be dropped (non-blocking send).
- C chan []float32
- }
-
- // NewCapturer creates a Capturer ready to Start.
- func NewCapturer() *Capturer {
- return &Capturer{C: make(chan []float32, 4)}
- }
-
- // Start begins the capture loop; blocks until ctx is cancelled.
- // Errors are logged but never fatal — the channel simply stays empty.
- func (c *Capturer) Start(ctx context.Context) {
- if err := c.run(ctx); err != nil {
- log.Printf("viz: %v", err)
- }
- }
-
- func (c *Capturer) run(ctx context.Context) error {
- coInitializeEx.Call(0, 0) // COINIT_MULTITHREADED
- defer coUninitialize.Call()
-
- // ── IMMDeviceEnumerator ──────────────────────────────────────────────────
- var enumerator uintptr
- if hr, _, _ := coCreateInstance.Call(
- uintptr(unsafe.Pointer(&clsidMMDeviceEnumerator)), 0, 0x17,
- uintptr(unsafe.Pointer(&iidIMMDeviceEnumerator)),
- uintptr(unsafe.Pointer(&enumerator)),
- ); hr != 0 {
- return fmt.Errorf("CoCreateInstance(MMDeviceEnumerator): 0x%08X", hr)
- }
- defer comRelease(enumerator)
-
- // ── Default render device ────────────────────────────────────────────────
- // GetDefaultAudioEndpoint(eRender, eConsole, &device) — vtable index 4, 4 args
- var device uintptr
- if hr, _, _ := syscall.Syscall6(
- procAt(enumerator, 4), 4,
- enumerator, 0, 0, uintptr(unsafe.Pointer(&device)), 0, 0,
- ); hr != 0 {
- return fmt.Errorf("GetDefaultAudioEndpoint: 0x%08X", hr)
- }
- defer comRelease(device)
-
- // ── IAudioClient ────────────────────────────────────────────────────────
- // IMMDevice::Activate(riid, clsCtx, pParams, &ppv) — vtable index 3, 5 args
- var ac uintptr
- if hr, _, _ := syscall.Syscall6(
- procAt(device, 3), 5,
- device, uintptr(unsafe.Pointer(&iidIAudioClient)), 0x17, 0,
- uintptr(unsafe.Pointer(&ac)), 0,
- ); hr != 0 {
- return fmt.Errorf("Activate(IAudioClient): 0x%08X", hr)
- }
- defer comRelease(ac)
-
- // ── Mix format ──────────────────────────────────────────────────────────
- var fmtPtr uintptr
- if hr, _, _ := syscall.Syscall(
- procAt(ac, 8), 2, // GetMixFormat
- ac, uintptr(unsafe.Pointer(&fmtPtr)), 0,
- ); hr != 0 {
- return fmt.Errorf("GetMixFormat: 0x%08X", hr)
- }
- defer coTaskMemFree.Call(fmtPtr)
-
- wfx := (*waveFormatEx)(unsafe.Pointer(fmtPtr))
- sampleRate := int(wfx.SamplesPerSec)
- channels := int(wfx.Channels)
- isFloat := wfx.FormatTag == waveFormatFloat
- if wfx.FormatTag == waveFormatExtensibleTag && wfx.Size >= 22 {
- ext := (*waveFormatExtensibleEx)(unsafe.Pointer(fmtPtr))
- isFloat = ext.SubFormat == subFormatFloat
- }
- log.Printf("viz: loopback format %d Hz, %d ch, %d bit, float=%v",
- sampleRate, channels, wfx.BitsPerSample, isFloat)
-
- if !isFloat || wfx.BitsPerSample != 32 {
- return fmt.Errorf("viz: unsupported format (need float32); got tag=%04X bits=%d",
- wfx.FormatTag, wfx.BitsPerSample)
- }
-
- // ── Initialize loopback ──────────────────────────────────────────────────
- if hr, _, _ := syscall.Syscall9(
- procAt(ac, 3), 7, // IAudioClient::Initialize
- ac,
- audclntShareModeShared,
- audclntStreamFlagsLoopback,
- uintptr(bufDuration), 0, // hnsBufferDuration, hnsPeriodicity
- fmtPtr, 0, // pFormat, AudioSessionGuid
- 0, 0,
- ); hr != 0 {
- return fmt.Errorf("IAudioClient::Initialize: 0x%08X", hr)
- }
-
- // ── IAudioCaptureClient ──────────────────────────────────────────────────
- var acc uintptr
- if hr, _, _ := syscall.Syscall(
- procAt(ac, 14), 3, // GetService
- ac,
- uintptr(unsafe.Pointer(&iidIAudioCaptureClient)),
- uintptr(unsafe.Pointer(&acc)),
- ); hr != 0 {
- return fmt.Errorf("GetService(IAudioCaptureClient): 0x%08X", hr)
- }
- defer comRelease(acc)
-
- // ── Start ────────────────────────────────────────────────────────────────
- if hr, _, _ := syscall.Syscall(procAt(ac, 10), 1, ac, 0, 0); hr != 0 {
- return fmt.Errorf("IAudioClient::Start: 0x%08X", hr)
- }
- defer syscall.Syscall(procAt(ac, 11), 1, ac, 0, 0) // Stop
-
- // ── Capture loop ─────────────────────────────────────────────────────────
- buf := make([]float64, 0, fftN*2)
- smooth := make([]float32, NumBars)
- tick := time.NewTicker(10 * time.Millisecond)
- defer tick.Stop()
-
- for {
- select {
- case <-ctx.Done():
- return nil
- case <-tick.C:
- buf = drainLoopback(acc, channels, buf)
- for len(buf) >= fftN {
- bars := spectrum(buf[:fftN], sampleRate, smooth)
- copy(smooth, bars)
- select {
- case c.C <- bars:
- default:
- }
- buf = buf[fftN:]
- }
- }
- }
- }
-
- // drainLoopback reads all pending audio frames into buf and returns it.
- func drainLoopback(acc uintptr, channels int, buf []float64) []float64 {
- for {
- // GetNextPacketSize
- var packetFrames uint32
- if hr, _, _ := syscall.Syscall(
- procAt(acc, 5), 2,
- acc, uintptr(unsafe.Pointer(&packetFrames)), 0,
- ); hr != 0 || packetFrames == 0 {
- break
- }
-
- // GetBuffer(ppData, &numFrames, &flags, NULL, NULL) — 6 args
- var dataPtr uintptr
- var numFrames uint32
- var flags uint32
- if hr, _, _ := syscall.Syscall6(
- procAt(acc, 3), 6,
- acc,
- uintptr(unsafe.Pointer(&dataPtr)),
- uintptr(unsafe.Pointer(&numFrames)),
- uintptr(unsafe.Pointer(&flags)),
- 0, 0,
- ); hr != 0 {
- break
- }
-
- if flags&audclntBufferFlagsSilent == 0 && dataPtr != 0 && numFrames > 0 {
- samples := unsafe.Slice((*float32)(unsafe.Pointer(dataPtr)), int(numFrames)*channels)
- for i := 0; i < int(numFrames); i++ {
- var mono float64
- for ch := 0; ch < channels; ch++ {
- mono += float64(samples[i*channels+ch])
- }
- buf = append(buf, mono/float64(channels))
- }
- }
-
- // ReleaseBuffer
- syscall.Syscall(procAt(acc, 4), 2, acc, uintptr(numFrames), 0)
- }
- return buf
- }
-
- // ── Spectrum analysis ─────────────────────────────────────────────────────────
-
- // spectrum applies a Hanning window, runs the FFT, maps to NumBars
- // log-spaced frequency bins, and applies fast-attack/slow-decay smoothing.
- func spectrum(samples []float64, sampleRate int, prev []float32) []float32 {
- n := len(samples)
-
- // Hanning window
- cx := make([]complex128, n)
- for i, s := range samples {
- w := 0.5 * (1 - math.Cos(2*math.Pi*float64(i)/float64(n-1)))
- cx[i] = complex(s*w, 0)
- }
-
- ditFFT(cx)
-
- // Magnitude of positive frequencies, normalised
- bins := make([]float64, n/2)
- scale := 2.0 / float64(n)
- for i := range bins {
- bins[i] = cmplx.Abs(cx[i]) * scale
- }
-
- // Log-spaced output bars: 40 Hz → 20 kHz
- const fMin, fMax = 40.0, 20_000.0
- freqRes := float64(sampleRate) / float64(n)
- bars := make([]float32, NumBars)
-
- for b := 0; b < NumBars; b++ {
- t := float64(b) / float64(NumBars-1)
- f := fMin * math.Pow(fMax/fMin, t)
-
- var fNext float64
- if b < NumBars-1 {
- t2 := float64(b+1) / float64(NumBars-1)
- fNext = fMin * math.Pow(fMax/fMin, t2)
- } else {
- fNext = fMax
- }
-
- lo := clamp(int(f/freqRes), 0, len(bins)-1)
- hi := clamp(int(fNext/freqRes), lo+1, len(bins))
-
- var sum float64
- for i := lo; i < hi; i++ {
- sum += bins[i]
- }
- avg := sum / float64(hi-lo)
-
- // dB → [0, 1]
- dB := 20 * math.Log10(avg+1e-9)
- norm := float32((dB + 80) / 80)
- if norm < 0 {
- norm = 0
- }
- if norm > 1 {
- norm = 1
- }
-
- // Fast attack, slow decay
- if norm > prev[b] {
- bars[b] = norm
- } else {
- bars[b] = prev[b] * 0.88
- }
- }
- return bars
- }
-
- func clamp(v, lo, hi int) int {
- if v < lo {
- return lo
- }
- if v > hi {
- return hi
- }
- return v
- }
-
- // ── Cooley-Tukey FFT ─────────────────────────────────────────────────────────
-
- // ditFFT is an in-place, decimation-in-time FFT. len(x) must be a power of 2.
- func ditFFT(x []complex128) {
- n := len(x)
- // Bit-reversal permutation
- j := 0
- for i := 1; i < n; i++ {
- bit := n >> 1
- for j&bit != 0 {
- j ^= bit
- bit >>= 1
- }
- j ^= bit
- if i < j {
- x[i], x[j] = x[j], x[i]
- }
- }
- // Butterfly stages
- for length := 2; length <= n; length <<= 1 {
- half := length >> 1
- wStep := cmplx.Exp(complex(0, -math.Pi/float64(half)))
- for i := 0; i < n; i += length {
- w := complex(1, 0)
- for k := 0; k < half; k++ {
- u := x[i+k]
- v := x[i+k+half] * w
- x[i+k] = u + v
- x[i+k+half] = u - v
- w *= wStep
- }
- }
- }
- }
|