//go:build windows // Package viz captures the Windows audio loopback via WASAPI and emits // FFT spectrum data for visualisation in the web frontend. package viz import ( "context" "fmt" "log" "math" "math/cmplx" "syscall" "time" "unsafe" "golang.org/x/sys/windows" ) // NumBars is the number of frequency bars emitted per frame. const NumBars = 64 const ( fftN = 2048 // FFT window size (power of 2) // WASAPI audclntShareModeShared = 0 audclntStreamFlagsLoopback = 0x00020000 audclntBufferFlagsSilent = 0x2 bufDuration = 1_000_000 // 100 ms in 100-ns units // Wave format tags waveFormatPCM = 1 waveFormatFloat = 3 waveFormatExtensibleTag = 0xFFFE ) // ── GUIDs ───────────────────────────────────────────────────────────────────── var ( clsidMMDeviceEnumerator = windows.GUID{ Data1: 0xBCDE0395, Data2: 0xE52F, Data3: 0x467C, Data4: [8]byte{0x8E, 0x3D, 0xC4, 0x57, 0x92, 0x91, 0x69, 0x2E}, } iidIMMDeviceEnumerator = windows.GUID{ Data1: 0xA95664D2, Data2: 0x9614, Data3: 0x4F35, Data4: [8]byte{0xA7, 0x46, 0xDE, 0x8D, 0xB6, 0x36, 0x17, 0xE6}, } iidIAudioClient = windows.GUID{ Data1: 0x1CB9AD4C, Data2: 0xDBFA, Data3: 0x4c32, Data4: [8]byte{0xB1, 0x78, 0xC2, 0xF5, 0x68, 0xA7, 0x03, 0xB2}, } iidIAudioCaptureClient = windows.GUID{ Data1: 0xC8ADBD64, Data2: 0xE71E, Data3: 0x48a0, Data4: [8]byte{0xA4, 0xDE, 0x18, 0x5C, 0x39, 0x5C, 0xD3, 0x17}, } subFormatFloat = windows.GUID{ Data1: 0x00000003, Data2: 0x0000, Data3: 0x0010, Data4: [8]byte{0x80, 0x00, 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71}, } ) // ── WAVEFORMAT structs ──────────────────────────────────────────────────────── type waveFormatEx struct { FormatTag uint16 Channels uint16 SamplesPerSec uint32 AvgBytesPerSec uint32 BlockAlign uint16 BitsPerSample uint16 Size uint16 } // waveFormatExtensibleEx is a flat representation of WAVEFORMATEXTENSIBLE. // We cannot embed waveFormatEx because Go pads the struct to 20 bytes // (alignment of largest field = uint32), but the C layout is 18 bytes — // so SubFormat would land at the wrong offset if we used struct embedding. type waveFormatExtensibleEx struct { FormatTag uint16 Channels uint16 SamplesPerSec uint32 AvgBytesPerSec uint32 BlockAlign uint16 BitsPerSample uint16 Size uint16 Samples uint16 // wValidBitsPerSample / wSamplesPerBlock ChannelMask uint32 SubFormat windows.GUID // 16 bytes → total 40 bytes, matches C layout } // ── DLL procs ───────────────────────────────────────────────────────────────── var ( ole32 = windows.NewLazySystemDLL("ole32.dll") coInitializeEx = ole32.NewProc("CoInitializeEx") coUninitialize = ole32.NewProc("CoUninitialize") coCreateInstance = ole32.NewProc("CoCreateInstance") coTaskMemFree = ole32.NewProc("CoTaskMemFree") ) // ── COM vtable helpers ──────────────────────────────────────────────────────── var ptrSize = unsafe.Sizeof(uintptr(0)) func procAt(comObj uintptr, methodIdx int) uintptr { vtbl := *(*uintptr)(unsafe.Pointer(comObj)) return *(*uintptr)(unsafe.Pointer(vtbl + uintptr(methodIdx)*ptrSize)) } func comRelease(p uintptr) { if p != 0 { syscall.Syscall(procAt(p, 2), 1, p, 0, 0) } } // ── Capturer ────────────────────────────────────────────────────────────────── // Capturer streams FFT spectrum bars from the system audio loopback. type Capturer struct { // C receives slices of NumBars float32 values in [0.0, 1.0] at ~30 fps. // Slow consumers cause frames to be dropped (non-blocking send). C chan []float32 } // NewCapturer creates a Capturer ready to Start. func NewCapturer() *Capturer { return &Capturer{C: make(chan []float32, 4)} } // Start begins the capture loop; blocks until ctx is cancelled. // Errors are logged but never fatal — the channel simply stays empty. func (c *Capturer) Start(ctx context.Context) { if err := c.run(ctx); err != nil { log.Printf("viz: %v", err) } } func (c *Capturer) run(ctx context.Context) error { coInitializeEx.Call(0, 0) // COINIT_MULTITHREADED defer coUninitialize.Call() // ── IMMDeviceEnumerator ────────────────────────────────────────────────── var enumerator uintptr if hr, _, _ := coCreateInstance.Call( uintptr(unsafe.Pointer(&clsidMMDeviceEnumerator)), 0, 0x17, uintptr(unsafe.Pointer(&iidIMMDeviceEnumerator)), uintptr(unsafe.Pointer(&enumerator)), ); hr != 0 { return fmt.Errorf("CoCreateInstance(MMDeviceEnumerator): 0x%08X", hr) } defer comRelease(enumerator) // ── Default render device ──────────────────────────────────────────────── // GetDefaultAudioEndpoint(eRender, eConsole, &device) — vtable index 4, 4 args var device uintptr if hr, _, _ := syscall.Syscall6( procAt(enumerator, 4), 4, enumerator, 0, 0, uintptr(unsafe.Pointer(&device)), 0, 0, ); hr != 0 { return fmt.Errorf("GetDefaultAudioEndpoint: 0x%08X", hr) } defer comRelease(device) // ── IAudioClient ──────────────────────────────────────────────────────── // IMMDevice::Activate(riid, clsCtx, pParams, &ppv) — vtable index 3, 5 args var ac uintptr if hr, _, _ := syscall.Syscall6( procAt(device, 3), 5, device, uintptr(unsafe.Pointer(&iidIAudioClient)), 0x17, 0, uintptr(unsafe.Pointer(&ac)), 0, ); hr != 0 { return fmt.Errorf("Activate(IAudioClient): 0x%08X", hr) } defer comRelease(ac) // ── Mix format ────────────────────────────────────────────────────────── var fmtPtr uintptr if hr, _, _ := syscall.Syscall( procAt(ac, 8), 2, // GetMixFormat ac, uintptr(unsafe.Pointer(&fmtPtr)), 0, ); hr != 0 { return fmt.Errorf("GetMixFormat: 0x%08X", hr) } defer coTaskMemFree.Call(fmtPtr) wfx := (*waveFormatEx)(unsafe.Pointer(fmtPtr)) sampleRate := int(wfx.SamplesPerSec) channels := int(wfx.Channels) isFloat := wfx.FormatTag == waveFormatFloat if wfx.FormatTag == waveFormatExtensibleTag && wfx.Size >= 22 { ext := (*waveFormatExtensibleEx)(unsafe.Pointer(fmtPtr)) isFloat = ext.SubFormat == subFormatFloat } log.Printf("viz: loopback format %d Hz, %d ch, %d bit, float=%v", sampleRate, channels, wfx.BitsPerSample, isFloat) if !isFloat || wfx.BitsPerSample != 32 { return fmt.Errorf("viz: unsupported format (need float32); got tag=%04X bits=%d", wfx.FormatTag, wfx.BitsPerSample) } // ── Initialize loopback ────────────────────────────────────────────────── if hr, _, _ := syscall.Syscall9( procAt(ac, 3), 7, // IAudioClient::Initialize ac, audclntShareModeShared, audclntStreamFlagsLoopback, uintptr(bufDuration), 0, // hnsBufferDuration, hnsPeriodicity fmtPtr, 0, // pFormat, AudioSessionGuid 0, 0, ); hr != 0 { return fmt.Errorf("IAudioClient::Initialize: 0x%08X", hr) } // ── IAudioCaptureClient ────────────────────────────────────────────────── var acc uintptr if hr, _, _ := syscall.Syscall( procAt(ac, 14), 3, // GetService ac, uintptr(unsafe.Pointer(&iidIAudioCaptureClient)), uintptr(unsafe.Pointer(&acc)), ); hr != 0 { return fmt.Errorf("GetService(IAudioCaptureClient): 0x%08X", hr) } defer comRelease(acc) // ── Start ──────────────────────────────────────────────────────────────── if hr, _, _ := syscall.Syscall(procAt(ac, 10), 1, ac, 0, 0); hr != 0 { return fmt.Errorf("IAudioClient::Start: 0x%08X", hr) } defer syscall.Syscall(procAt(ac, 11), 1, ac, 0, 0) // Stop // ── Capture loop ───────────────────────────────────────────────────────── buf := make([]float64, 0, fftN*2) smooth := make([]float32, NumBars) tick := time.NewTicker(10 * time.Millisecond) defer tick.Stop() for { select { case <-ctx.Done(): return nil case <-tick.C: buf = drainLoopback(acc, channels, buf) for len(buf) >= fftN { bars := spectrum(buf[:fftN], sampleRate, smooth) copy(smooth, bars) select { case c.C <- bars: default: } buf = buf[fftN:] } } } } // drainLoopback reads all pending audio frames into buf and returns it. func drainLoopback(acc uintptr, channels int, buf []float64) []float64 { for { // GetNextPacketSize var packetFrames uint32 if hr, _, _ := syscall.Syscall( procAt(acc, 5), 2, acc, uintptr(unsafe.Pointer(&packetFrames)), 0, ); hr != 0 || packetFrames == 0 { break } // GetBuffer(ppData, &numFrames, &flags, NULL, NULL) — 6 args var dataPtr uintptr var numFrames uint32 var flags uint32 if hr, _, _ := syscall.Syscall6( procAt(acc, 3), 6, acc, uintptr(unsafe.Pointer(&dataPtr)), uintptr(unsafe.Pointer(&numFrames)), uintptr(unsafe.Pointer(&flags)), 0, 0, ); hr != 0 { break } if flags&audclntBufferFlagsSilent == 0 && dataPtr != 0 && numFrames > 0 { samples := unsafe.Slice((*float32)(unsafe.Pointer(dataPtr)), int(numFrames)*channels) for i := 0; i < int(numFrames); i++ { var mono float64 for ch := 0; ch < channels; ch++ { mono += float64(samples[i*channels+ch]) } buf = append(buf, mono/float64(channels)) } } // ReleaseBuffer syscall.Syscall(procAt(acc, 4), 2, acc, uintptr(numFrames), 0) } return buf } // ── Spectrum analysis ───────────────────────────────────────────────────────── // spectrum applies a Hanning window, runs the FFT, maps to NumBars // log-spaced frequency bins, and applies fast-attack/slow-decay smoothing. func spectrum(samples []float64, sampleRate int, prev []float32) []float32 { n := len(samples) // Hanning window cx := make([]complex128, n) for i, s := range samples { w := 0.5 * (1 - math.Cos(2*math.Pi*float64(i)/float64(n-1))) cx[i] = complex(s*w, 0) } ditFFT(cx) // Magnitude of positive frequencies, normalised bins := make([]float64, n/2) scale := 2.0 / float64(n) for i := range bins { bins[i] = cmplx.Abs(cx[i]) * scale } // Log-spaced output bars: 40 Hz → 20 kHz const fMin, fMax = 40.0, 20_000.0 freqRes := float64(sampleRate) / float64(n) bars := make([]float32, NumBars) for b := 0; b < NumBars; b++ { t := float64(b) / float64(NumBars-1) f := fMin * math.Pow(fMax/fMin, t) var fNext float64 if b < NumBars-1 { t2 := float64(b+1) / float64(NumBars-1) fNext = fMin * math.Pow(fMax/fMin, t2) } else { fNext = fMax } lo := clamp(int(f/freqRes), 0, len(bins)-1) hi := clamp(int(fNext/freqRes), lo+1, len(bins)) var sum float64 for i := lo; i < hi; i++ { sum += bins[i] } avg := sum / float64(hi-lo) // dB → [0, 1] dB := 20 * math.Log10(avg+1e-9) norm := float32((dB + 80) / 80) if norm < 0 { norm = 0 } if norm > 1 { norm = 1 } // Fast attack, slow decay if norm > prev[b] { bars[b] = norm } else { bars[b] = prev[b] * 0.88 } } return bars } func clamp(v, lo, hi int) int { if v < lo { return lo } if v > hi { return hi } return v } // ── Cooley-Tukey FFT ───────────────────────────────────────────────────────── // ditFFT is an in-place, decimation-in-time FFT. len(x) must be a power of 2. func ditFFT(x []complex128) { n := len(x) // Bit-reversal permutation j := 0 for i := 1; i < n; i++ { bit := n >> 1 for j&bit != 0 { j ^= bit bit >>= 1 } j ^= bit if i < j { x[i], x[j] = x[j], x[i] } } // Butterfly stages for length := 2; length <= n; length <<= 1 { half := length >> 1 wStep := cmplx.Exp(complex(0, -math.Pi/float64(half))) for i := 0; i < n; i += length { w := complex(1, 0) for k := 0; k < half; k++ { u := x[i+k] v := x[i+k+half] * w x[i+k] = u + v x[i+k+half] = u - v w *= wStep } } } }