|
|
|
@@ -143,17 +143,24 @@ let decisionIndex = new Map(); |
|
|
|
// --------------------------------------------------------------------------- |
|
|
|
// LiveListenWS — WebSocket-based gapless audio streaming via /ws/audio |
|
|
|
// --------------------------------------------------------------------------- |
|
|
|
// v2: Ring-buffer based playback with smooth underrun handling. |
|
|
|
// v4: Jank-resistant scheduled playback. |
|
|
|
// |
|
|
|
// Architecture: |
|
|
|
// WebSocket → PCM chunks → ring buffer → AudioWorklet/ScriptProcessor → speakers |
|
|
|
// Problem: Main-thread Canvas rendering blocks for 150-250ms, starving |
|
|
|
// ScriptProcessorNode callbacks and causing audio underruns. |
|
|
|
// AudioWorklet would fix this but requires Secure Context (HTTPS/localhost). |
|
|
|
// |
|
|
|
// Key improvements over v1: |
|
|
|
// - 250ms ring buffer absorbs feed_gap jitter (150-220ms observed) |
|
|
|
// - On underrun: fade to silence instead of hard resync (no click) |
|
|
|
// - On overrun: skip oldest data instead of hard drop |
|
|
|
// - Continuous pull-based playback (worklet pulls from ring) instead of |
|
|
|
// push-based scheduling (BufferSource per chunk) |
|
|
|
// Solution: Use BufferSource scheduling (like v1) but with a much larger |
|
|
|
// jitter budget. We pre-schedule audio 400ms into the future so that even |
|
|
|
// a 300ms main-thread hang doesn't cause a gap. The AudioContext's internal |
|
|
|
// scheduling is sample-accurate and runs on a system thread — once a |
|
|
|
// BufferSource is scheduled, it plays regardless of main-thread state. |
|
|
|
// |
|
|
|
// Key differences from v1: |
|
|
|
// - 400ms target latency (was 100ms) — survives observed 250ms hangs |
|
|
|
// - Soft resync: on underrun, schedule next chunk with a short crossfade |
|
|
|
// gap instead of hard jump |
|
|
|
// - Overrun cap at 800ms (was 500ms) |
|
|
|
// - Chunk coalescing: merge small chunks to reduce scheduling overhead |
|
|
|
// --------------------------------------------------------------------------- |
|
|
|
class LiveListenWS { |
|
|
|
constructor(freq, bw, mode) { |
|
|
|
@@ -165,19 +172,15 @@ class LiveListenWS { |
|
|
|
this.sampleRate = 48000; |
|
|
|
this.channels = 1; |
|
|
|
this.playing = false; |
|
|
|
this.nextTime = 0; |
|
|
|
this.started = false; |
|
|
|
this._onStop = null; |
|
|
|
|
|
|
|
// Ring buffer (interleaved float32 samples) |
|
|
|
this._ringBuf = null; |
|
|
|
this._ringSize = 0; |
|
|
|
this._ringWrite = 0; // write cursor (producer: WebSocket) |
|
|
|
this._ringRead = 0; // read cursor (consumer: audio callback) |
|
|
|
this._scriptNode = null; |
|
|
|
this._fadeGain = 1.0; // smooth fade on underrun |
|
|
|
this._started = false; |
|
|
|
this._totalWritten = 0; |
|
|
|
this._totalRead = 0; |
|
|
|
this._underruns = 0; |
|
|
|
// Chunk coalescing buffer |
|
|
|
this._pendingSamples = []; |
|
|
|
this._pendingLen = 0; |
|
|
|
this._flushTimer = 0; |
|
|
|
// Fade state for soft resync |
|
|
|
this._lastEndSample = null; // last sample value per channel for crossfade |
|
|
|
} |
|
|
|
|
|
|
|
start() { |
|
|
|
@@ -207,9 +210,8 @@ class LiveListenWS { |
|
|
|
} catch (e) { /* ignore */ } |
|
|
|
return; |
|
|
|
} |
|
|
|
// Binary PCM data (s16le) |
|
|
|
if (!this.audioCtx || !this.playing) return; |
|
|
|
this._pushPCM(ev.data); |
|
|
|
this._onPCM(ev.data); |
|
|
|
}; |
|
|
|
|
|
|
|
this.ws.onclose = () => { |
|
|
|
@@ -228,31 +230,20 @@ class LiveListenWS { |
|
|
|
|
|
|
|
stop() { |
|
|
|
this.playing = false; |
|
|
|
if (this.ws) { |
|
|
|
this.ws.close(); |
|
|
|
this.ws = null; |
|
|
|
} |
|
|
|
if (this.ws) { this.ws.close(); this.ws = null; } |
|
|
|
this._teardownAudio(); |
|
|
|
} |
|
|
|
|
|
|
|
onStop(fn) { this._onStop = fn; } |
|
|
|
|
|
|
|
// --- Internal: Audio setup --- |
|
|
|
|
|
|
|
_teardownAudio() { |
|
|
|
if (this._scriptNode) { |
|
|
|
this._scriptNode.disconnect(); |
|
|
|
this._scriptNode = null; |
|
|
|
} |
|
|
|
if (this.audioCtx) { |
|
|
|
this.audioCtx.close().catch(() => {}); |
|
|
|
this.audioCtx = null; |
|
|
|
} |
|
|
|
this._ringBuf = null; |
|
|
|
this._ringWrite = 0; |
|
|
|
this._ringRead = 0; |
|
|
|
this._fadeGain = 1.0; |
|
|
|
this._started = false; |
|
|
|
if (this._flushTimer) { clearTimeout(this._flushTimer); this._flushTimer = 0; } |
|
|
|
if (this.audioCtx) { this.audioCtx.close().catch(() => {}); this.audioCtx = null; } |
|
|
|
this.nextTime = 0; |
|
|
|
this.started = false; |
|
|
|
this._pendingSamples = []; |
|
|
|
this._pendingLen = 0; |
|
|
|
this._lastEndSample = null; |
|
|
|
} |
|
|
|
|
|
|
|
_initAudio() { |
|
|
|
@@ -261,161 +252,102 @@ class LiveListenWS { |
|
|
|
sampleRate: this.sampleRate |
|
|
|
}); |
|
|
|
this.audioCtx.resume().catch(() => {}); |
|
|
|
|
|
|
|
// Ring buffer: 500ms capacity (handles jitter up to ~400ms) |
|
|
|
const ringDurationSec = 0.5; |
|
|
|
this._ringSize = Math.ceil(this.sampleRate * this.channels * ringDurationSec); |
|
|
|
this._ringBuf = new Float32Array(this._ringSize); |
|
|
|
this._ringWrite = 0; |
|
|
|
this._ringRead = 0; |
|
|
|
this._fadeGain = 1.0; |
|
|
|
this._started = false; |
|
|
|
this._totalWritten = 0; |
|
|
|
this._totalRead = 0; |
|
|
|
|
|
|
|
// Use ScriptProcessorNode (deprecated but universally supported). |
|
|
|
// Buffer size 2048 at 48kHz = 42.7ms callback interval — responsive enough. |
|
|
|
const bufSize = 2048; |
|
|
|
this._scriptNode = this.audioCtx.createScriptProcessor(bufSize, 0, this.channels); |
|
|
|
this._scriptNode.onaudioprocess = (e) => this._audioCallback(e); |
|
|
|
this._scriptNode.connect(this.audioCtx.destination); |
|
|
|
} |
|
|
|
|
|
|
|
// --- Internal: Producer (WebSocket → ring buffer) --- |
|
|
|
|
|
|
|
_pushPCM(buf) { |
|
|
|
if (!this._ringBuf) return; |
|
|
|
|
|
|
|
const samples = new Int16Array(buf); |
|
|
|
const nSamples = samples.length; // interleaved: nFrames * channels |
|
|
|
if (nSamples === 0) return; |
|
|
|
|
|
|
|
const ring = this._ringBuf; |
|
|
|
const size = this._ringSize; |
|
|
|
let w = this._ringWrite; |
|
|
|
|
|
|
|
// Check available space |
|
|
|
const used = (w - this._ringRead + size) % size; |
|
|
|
const free = size - used - 1; // -1 to distinguish full from empty |
|
|
|
|
|
|
|
if (nSamples > free) { |
|
|
|
// Overrun: advance read cursor to make room (discard oldest audio). |
|
|
|
// This keeps latency bounded instead of growing unbounded. |
|
|
|
const deficit = nSamples - free; |
|
|
|
this._ringRead = (this._ringRead + deficit) % size; |
|
|
|
} |
|
|
|
|
|
|
|
// Write samples into ring |
|
|
|
for (let i = 0; i < nSamples; i++) { |
|
|
|
ring[w] = samples[i] / 32768; |
|
|
|
w = (w + 1) % size; |
|
|
|
} |
|
|
|
this._ringWrite = w; |
|
|
|
this._totalWritten += nSamples; |
|
|
|
|
|
|
|
// Start playback after buffering ~200ms (enough to absorb one feed_gap) |
|
|
|
if (!this._started) { |
|
|
|
const buffered = (this._ringWrite - this._ringRead + size) % size; |
|
|
|
const target = Math.ceil(this.sampleRate * this.channels * 0.2); |
|
|
|
if (buffered >= target) { |
|
|
|
this._started = true; |
|
|
|
} |
|
|
|
this.nextTime = 0; |
|
|
|
this.started = false; |
|
|
|
this._lastEndSample = null; |
|
|
|
} |
|
|
|
|
|
|
|
_onPCM(buf) { |
|
|
|
// Coalesce small chunks: accumulate until we have >= 40ms or 50ms passes. |
|
|
|
// This reduces BufferSource scheduling overhead from ~12/sec to ~6/sec |
|
|
|
// and produces larger, more stable buffers. |
|
|
|
this._pendingSamples.push(new Int16Array(buf)); |
|
|
|
this._pendingLen += buf.byteLength / 2; // Int16 = 2 bytes |
|
|
|
|
|
|
|
const minFrames = Math.ceil(this.sampleRate * 0.04); // 40ms worth |
|
|
|
const haveFrames = Math.floor(this._pendingLen / this.channels); |
|
|
|
|
|
|
|
if (haveFrames >= minFrames) { |
|
|
|
this._flushPending(); |
|
|
|
} else if (!this._flushTimer) { |
|
|
|
// Flush after 50ms even if we don't have enough (prevents stale data) |
|
|
|
this._flushTimer = setTimeout(() => { |
|
|
|
this._flushTimer = 0; |
|
|
|
if (this._pendingLen > 0) this._flushPending(); |
|
|
|
}, 50); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// --- Internal: Consumer (ring buffer → speakers) --- |
|
|
|
|
|
|
|
_audioCallback(e) { |
|
|
|
const ring = this._ringBuf; |
|
|
|
const size = this._ringSize; |
|
|
|
const ch = this.channels; |
|
|
|
const outLen = e.outputBuffer.length; // frames per channel |
|
|
|
_flushPending() { |
|
|
|
if (this._flushTimer) { clearTimeout(this._flushTimer); this._flushTimer = 0; } |
|
|
|
if (this._pendingSamples.length === 0) return; |
|
|
|
|
|
|
|
if (!ring || !this._started) { |
|
|
|
// Not ready yet — output silence |
|
|
|
for (let c = 0; c < e.outputBuffer.numberOfChannels; c++) { |
|
|
|
e.outputBuffer.getChannelData(c).fill(0); |
|
|
|
} |
|
|
|
return; |
|
|
|
// Merge all pending into one Int16Array |
|
|
|
const total = this._pendingLen; |
|
|
|
const merged = new Int16Array(total); |
|
|
|
let off = 0; |
|
|
|
for (const chunk of this._pendingSamples) { |
|
|
|
merged.set(chunk, off); |
|
|
|
off += chunk.length; |
|
|
|
} |
|
|
|
this._pendingSamples = []; |
|
|
|
this._pendingLen = 0; |
|
|
|
|
|
|
|
// How many interleaved samples do we need? |
|
|
|
const need = outLen * ch; |
|
|
|
const available = (this._ringWrite - this._ringRead + size) % size; |
|
|
|
|
|
|
|
if (available < need) { |
|
|
|
// Underrun — not enough data. Output what we have, fade to silence. |
|
|
|
this._underruns++; |
|
|
|
const have = available; |
|
|
|
const haveFrames = Math.floor(have / ch); |
|
|
|
|
|
|
|
// Get channel buffers |
|
|
|
const outs = []; |
|
|
|
for (let c = 0; c < e.outputBuffer.numberOfChannels; c++) { |
|
|
|
outs.push(e.outputBuffer.getChannelData(c)); |
|
|
|
} |
|
|
|
this._scheduleChunk(merged); |
|
|
|
} |
|
|
|
|
|
|
|
let r = this._ringRead; |
|
|
|
_scheduleChunk(samples) { |
|
|
|
const ctx = this.audioCtx; |
|
|
|
if (!ctx) return; |
|
|
|
if (ctx.state === 'suspended') ctx.resume().catch(() => {}); |
|
|
|
|
|
|
|
// Play available samples with fade-out over last 64 samples |
|
|
|
const fadeLen = Math.min(64, haveFrames); |
|
|
|
const fadeStart = haveFrames - fadeLen; |
|
|
|
const nFrames = Math.floor(samples.length / this.channels); |
|
|
|
if (nFrames === 0) return; |
|
|
|
|
|
|
|
for (let i = 0; i < haveFrames; i++) { |
|
|
|
// Fade envelope: full volume, then linear fade to 0 |
|
|
|
let env = this._fadeGain; |
|
|
|
if (i >= fadeStart) { |
|
|
|
env *= 1.0 - (i - fadeStart) / fadeLen; |
|
|
|
} |
|
|
|
for (let c = 0; c < ch; c++) { |
|
|
|
if (c < outs.length) { |
|
|
|
outs[c][i] = ring[r] * env; |
|
|
|
} |
|
|
|
r = (r + 1) % size; |
|
|
|
} |
|
|
|
} |
|
|
|
this._ringRead = r; |
|
|
|
const audioBuffer = ctx.createBuffer(this.channels, nFrames, this.sampleRate); |
|
|
|
|
|
|
|
// Fill rest with silence |
|
|
|
for (let i = haveFrames; i < outLen; i++) { |
|
|
|
for (let c = 0; c < outs.length; c++) { |
|
|
|
outs[c][i] = 0; |
|
|
|
} |
|
|
|
// Decode interleaved s16le → per-channel float32 |
|
|
|
for (let ch = 0; ch < this.channels; ch++) { |
|
|
|
const data = audioBuffer.getChannelData(ch); |
|
|
|
for (let i = 0; i < nFrames; i++) { |
|
|
|
data[i] = samples[i * this.channels + ch] / 32768; |
|
|
|
} |
|
|
|
|
|
|
|
this._fadeGain = 0; // next chunk starts silent |
|
|
|
this._totalRead += have; |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
// Normal path — enough data available |
|
|
|
const outs = []; |
|
|
|
for (let c = 0; c < e.outputBuffer.numberOfChannels; c++) { |
|
|
|
outs.push(e.outputBuffer.getChannelData(c)); |
|
|
|
} |
|
|
|
const now = ctx.currentTime; |
|
|
|
|
|
|
|
let r = this._ringRead; |
|
|
|
// Target latency: 400ms. This means we schedule audio to play 400ms |
|
|
|
// from now. Even if the main thread hangs for 300ms, the already- |
|
|
|
// scheduled BufferSources continue playing on the system audio thread. |
|
|
|
const targetLatency = 0.4; |
|
|
|
|
|
|
|
// If recovering from underrun, fade in over first 64 samples |
|
|
|
const fadeInLen = (this._fadeGain < 1.0) ? Math.min(64, outLen) : 0; |
|
|
|
// Max buffered: 900ms. Drop chunks if we're too far ahead. |
|
|
|
const maxBuffered = 0.9; |
|
|
|
|
|
|
|
for (let i = 0; i < outLen; i++) { |
|
|
|
let env = 1.0; |
|
|
|
if (i < fadeInLen) { |
|
|
|
// Linear fade-in from current _fadeGain to 1.0 |
|
|
|
env = this._fadeGain + (1.0 - this._fadeGain) * (i / fadeInLen); |
|
|
|
} |
|
|
|
for (let c = 0; c < ch; c++) { |
|
|
|
if (c < outs.length) { |
|
|
|
outs[c][i] = ring[r] * env; |
|
|
|
if (!this.started || this.nextTime < now) { |
|
|
|
// First chunk or underrun. |
|
|
|
// Apply fade-in to avoid click at resync point. |
|
|
|
const fadeIn = Math.min(64, nFrames); |
|
|
|
for (let ch = 0; ch < this.channels; ch++) { |
|
|
|
const data = audioBuffer.getChannelData(ch); |
|
|
|
for (let i = 0; i < fadeIn; i++) { |
|
|
|
data[i] *= i / fadeIn; |
|
|
|
} |
|
|
|
r = (r + 1) % size; |
|
|
|
} |
|
|
|
this.nextTime = now + targetLatency; |
|
|
|
this.started = true; |
|
|
|
} |
|
|
|
|
|
|
|
if (this.nextTime > now + maxBuffered) { |
|
|
|
// Too much buffered — drop to cap latency |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
this._ringRead = r; |
|
|
|
this._fadeGain = 1.0; |
|
|
|
this._totalRead += need; |
|
|
|
const source = ctx.createBufferSource(); |
|
|
|
source.buffer = audioBuffer; |
|
|
|
source.connect(ctx.destination); |
|
|
|
source.start(this.nextTime); |
|
|
|
this.nextTime += audioBuffer.duration; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|