| @@ -252,24 +252,25 @@ func (b *txBridge) StopTX() error { return b.engine.Stop(context.Background()) | |||||
| func (b *txBridge) TXStats() map[string]any { | func (b *txBridge) TXStats() map[string]any { | ||||
| s := b.engine.Stats() | s := b.engine.Stats() | ||||
| return map[string]any{ | return map[string]any{ | ||||
| "state": s.State, | |||||
| "chunksProduced": s.ChunksProduced, | |||||
| "totalSamples": s.TotalSamples, | |||||
| "underruns": s.Underruns, | |||||
| "lateBuffers": s.LateBuffers, | |||||
| "lastError": s.LastError, | |||||
| "uptimeSeconds": s.UptimeSeconds, | |||||
| "maxCycleMs": s.MaxCycleMs, | |||||
| "maxGenerateMs": s.MaxGenerateMs, | |||||
| "maxUpsampleMs": s.MaxUpsampleMs, | |||||
| "maxWriteMs": s.MaxWriteMs, | |||||
| "queue": s.Queue, | |||||
| "runtimeIndicator": s.RuntimeIndicator, | |||||
| "runtimeAlert": s.RuntimeAlert, | |||||
| "state": s.State, | |||||
| "chunksProduced": s.ChunksProduced, | |||||
| "totalSamples": s.TotalSamples, | |||||
| "underruns": s.Underruns, | |||||
| "lateBuffers": s.LateBuffers, | |||||
| "lastError": s.LastError, | |||||
| "uptimeSeconds": s.UptimeSeconds, | |||||
| "maxCycleMs": s.MaxCycleMs, | |||||
| "maxGenerateMs": s.MaxGenerateMs, | |||||
| "maxUpsampleMs": s.MaxUpsampleMs, | |||||
| "maxWriteMs": s.MaxWriteMs, | |||||
| "queue": s.Queue, | |||||
| "runtimeIndicator": s.RuntimeIndicator, | |||||
| "runtimeAlert": s.RuntimeAlert, | |||||
| "degradedTransitions": s.DegradedTransitions, | "degradedTransitions": s.DegradedTransitions, | ||||
| "mutedTransitions": s.MutedTransitions, | "mutedTransitions": s.MutedTransitions, | ||||
| "faultedTransitions": s.FaultedTransitions, | "faultedTransitions": s.FaultedTransitions, | ||||
| "faultCount": s.FaultCount, | "faultCount": s.FaultCount, | ||||
| "faultHistory": s.FaultHistory, | |||||
| "lastFault": s.LastFault, | "lastFault": s.LastFault, | ||||
| } | } | ||||
| } | } | ||||
| @@ -45,4 +45,11 @@ func TestTxBridgeExportsQueueStats(t *testing.T) { | |||||
| if indicator != apppkg.RuntimeIndicatorQueueCritical { | if indicator != apppkg.RuntimeIndicatorQueueCritical { | ||||
| t.Fatalf("runtime indicator should be queueCritical, got %s", indicator) | t.Fatalf("runtime indicator should be queueCritical, got %s", indicator) | ||||
| } | } | ||||
| if historyRaw, ok := stats["faultHistory"]; !ok { | |||||
| t.Fatalf("expected faultHistory in tx stats") | |||||
| } else if history, ok := historyRaw.([]apppkg.FaultEvent); !ok { | |||||
| t.Fatalf("faultHistory type mismatch: %T", historyRaw) | |||||
| } else if len(history) != 0 { | |||||
| t.Fatalf("expected no faults yet, got %d", len(history)) | |||||
| } | |||||
| } | } | ||||
| @@ -17,6 +17,7 @@ Health check. | |||||
| `engine.state` spiegelt jetzt die Runtime-State-Maschine wider (idle, arming, prebuffering, running, degraded, muted, faulted, stopping) und bietet eine erste beobachtbare Basis für Fault-Transitions. | `engine.state` spiegelt jetzt die Runtime-State-Maschine wider (idle, arming, prebuffering, running, degraded, muted, faulted, stopping) und bietet eine erste beobachtbare Basis für Fault-Transitions. | ||||
| --- | --- | ||||
| ### `GET /status` | ### `GET /status` | ||||
| @@ -62,7 +63,22 @@ Live engine and driver telemetry. Only populated when TX is active. | |||||
| "totalSamples": 1408950000, | "totalSamples": 1408950000, | ||||
| "underruns": 0, | "underruns": 0, | ||||
| "lastError": "", | "lastError": "", | ||||
| "uptimeSeconds": 3614.2 | |||||
| "uptimeSeconds": 3614.2, | |||||
| "faultCount": 2, | |||||
| "lastFault": { | |||||
| "time": "2026-04-06T00:00:00Z", | |||||
| "reason": "queueCritical", | |||||
| "severity": "faulted", | |||||
| "message": "queue health critical for 5 checks" | |||||
| }, | |||||
| "faultHistory": [ | |||||
| { | |||||
| "time": "2026-04-06T00:00:00Z", | |||||
| "reason": "queueCritical", | |||||
| "severity": "faulted", | |||||
| "message": "queue health critical for 5 checks" | |||||
| } | |||||
| ] | |||||
| }, | }, | ||||
| "driver": { | "driver": { | ||||
| "txEnabled": true, | "txEnabled": true, | ||||
| @@ -69,25 +69,26 @@ func durationMs(ns uint64) float64 { | |||||
| } | } | ||||
| type EngineStats struct { | type EngineStats struct { | ||||
| State string `json:"state"` | |||||
| ChunksProduced uint64 `json:"chunksProduced"` | |||||
| TotalSamples uint64 `json:"totalSamples"` | |||||
| Underruns uint64 `json:"underruns"` | |||||
| LateBuffers uint64 `json:"lateBuffers,omitempty"` | |||||
| LastError string `json:"lastError,omitempty"` | |||||
| UptimeSeconds float64 `json:"uptimeSeconds"` | |||||
| MaxCycleMs float64 `json:"maxCycleMs,omitempty"` | |||||
| MaxGenerateMs float64 `json:"maxGenerateMs,omitempty"` | |||||
| MaxUpsampleMs float64 `json:"maxUpsampleMs,omitempty"` | |||||
| MaxWriteMs float64 `json:"maxWriteMs,omitempty"` | |||||
| Queue output.QueueStats `json:"queue"` | |||||
| RuntimeIndicator RuntimeIndicator `json:"runtimeIndicator"` | |||||
| RuntimeAlert string `json:"runtimeAlert,omitempty"` | |||||
| LastFault *FaultEvent `json:"lastFault,omitempty"` | |||||
| DegradedTransitions uint64 `json:"degradedTransitions"` | |||||
| MutedTransitions uint64 `json:"mutedTransitions"` | |||||
| FaultedTransitions uint64 `json:"faultedTransitions"` | |||||
| FaultCount uint64 `json:"faultCount"` | |||||
| State string `json:"state"` | |||||
| ChunksProduced uint64 `json:"chunksProduced"` | |||||
| TotalSamples uint64 `json:"totalSamples"` | |||||
| Underruns uint64 `json:"underruns"` | |||||
| LateBuffers uint64 `json:"lateBuffers,omitempty"` | |||||
| LastError string `json:"lastError,omitempty"` | |||||
| UptimeSeconds float64 `json:"uptimeSeconds"` | |||||
| MaxCycleMs float64 `json:"maxCycleMs,omitempty"` | |||||
| MaxGenerateMs float64 `json:"maxGenerateMs,omitempty"` | |||||
| MaxUpsampleMs float64 `json:"maxUpsampleMs,omitempty"` | |||||
| MaxWriteMs float64 `json:"maxWriteMs,omitempty"` | |||||
| Queue output.QueueStats `json:"queue"` | |||||
| RuntimeIndicator RuntimeIndicator `json:"runtimeIndicator"` | |||||
| RuntimeAlert string `json:"runtimeAlert,omitempty"` | |||||
| LastFault *FaultEvent `json:"lastFault,omitempty"` | |||||
| DegradedTransitions uint64 `json:"degradedTransitions"` | |||||
| MutedTransitions uint64 `json:"mutedTransitions"` | |||||
| FaultedTransitions uint64 `json:"faultedTransitions"` | |||||
| FaultCount uint64 `json:"faultCount"` | |||||
| FaultHistory []FaultEvent `json:"faultHistory,omitempty"` | |||||
| } | } | ||||
| type RuntimeIndicator string | type RuntimeIndicator string | ||||
| @@ -146,10 +147,10 @@ type Engine struct { | |||||
| faultHistoryMu sync.Mutex | faultHistoryMu sync.Mutex | ||||
| faultHistory []FaultEvent | faultHistory []FaultEvent | ||||
| degradedTransitions atomic.Uint64 | |||||
| mutedTransitions atomic.Uint64 | |||||
| faultedTransitions atomic.Uint64 | |||||
| faultEvents atomic.Uint64 | |||||
| degradedTransitions atomic.Uint64 | |||||
| mutedTransitions atomic.Uint64 | |||||
| faultedTransitions atomic.Uint64 | |||||
| faultEvents atomic.Uint64 | |||||
| // Live config: pending frequency change, applied between chunks | // Live config: pending frequency change, applied between chunks | ||||
| pendingFreq atomic.Pointer[float64] | pendingFreq atomic.Pointer[float64] | ||||
| @@ -402,25 +403,26 @@ func (e *Engine) Stats() EngineStats { | |||||
| ri := runtimeIndicator(queue.Health, hasRecentLateBuffers) | ri := runtimeIndicator(queue.Health, hasRecentLateBuffers) | ||||
| lastFault := e.lastFaultEvent() | lastFault := e.lastFaultEvent() | ||||
| return EngineStats{ | return EngineStats{ | ||||
| State: string(e.currentRuntimeState()), | |||||
| ChunksProduced: e.chunksProduced.Load(), | |||||
| TotalSamples: e.totalSamples.Load(), | |||||
| Underruns: e.underruns.Load(), | |||||
| LateBuffers: lateBuffers, | |||||
| LastError: errVal, | |||||
| UptimeSeconds: uptime, | |||||
| MaxCycleMs: durationMs(e.maxCycleNs.Load()), | |||||
| MaxGenerateMs: durationMs(e.maxGenerateNs.Load()), | |||||
| MaxUpsampleMs: durationMs(e.maxUpsampleNs.Load()), | |||||
| MaxWriteMs: durationMs(e.maxWriteNs.Load()), | |||||
| Queue: queue, | |||||
| RuntimeIndicator: ri, | |||||
| RuntimeAlert: runtimeAlert(queue.Health, hasRecentLateBuffers), | |||||
| LastFault: lastFault, | |||||
| State: string(e.currentRuntimeState()), | |||||
| ChunksProduced: e.chunksProduced.Load(), | |||||
| TotalSamples: e.totalSamples.Load(), | |||||
| Underruns: e.underruns.Load(), | |||||
| LateBuffers: lateBuffers, | |||||
| LastError: errVal, | |||||
| UptimeSeconds: uptime, | |||||
| MaxCycleMs: durationMs(e.maxCycleNs.Load()), | |||||
| MaxGenerateMs: durationMs(e.maxGenerateNs.Load()), | |||||
| MaxUpsampleMs: durationMs(e.maxUpsampleNs.Load()), | |||||
| MaxWriteMs: durationMs(e.maxWriteNs.Load()), | |||||
| Queue: queue, | |||||
| RuntimeIndicator: ri, | |||||
| RuntimeAlert: runtimeAlert(queue.Health, hasRecentLateBuffers), | |||||
| LastFault: lastFault, | |||||
| DegradedTransitions: e.degradedTransitions.Load(), | DegradedTransitions: e.degradedTransitions.Load(), | ||||
| MutedTransitions: e.mutedTransitions.Load(), | MutedTransitions: e.mutedTransitions.Load(), | ||||
| FaultedTransitions: e.faultedTransitions.Load(), | FaultedTransitions: e.faultedTransitions.Load(), | ||||
| FaultCount: e.faultEvents.Load(), | FaultCount: e.faultEvents.Load(), | ||||
| FaultHistory: e.FaultHistory(), | |||||
| } | } | ||||
| } | } | ||||
| @@ -142,6 +142,39 @@ func TestRuntimeWithoutDriver(t *testing.T) { | |||||
| } | } | ||||
| } | } | ||||
| func TestRuntimeReportsFaultHistory(t *testing.T) { | |||||
| srv := NewServer(cfgpkg.Default()) | |||||
| history := []map[string]any{ | |||||
| { | |||||
| "time": "2026-04-06T00:00:00Z", | |||||
| "reason": "queueCritical", | |||||
| "severity": "faulted", | |||||
| "message": "queue critical", | |||||
| }, | |||||
| } | |||||
| srv.SetTXController(&fakeTXController{stats: map[string]any{"faultHistory": history}}) | |||||
| rec := httptest.NewRecorder() | |||||
| srv.Handler().ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/runtime", nil)) | |||||
| if rec.Code != 200 { | |||||
| t.Fatalf("status: %d", rec.Code) | |||||
| } | |||||
| var body map[string]any | |||||
| if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil { | |||||
| t.Fatalf("unmarshal runtime: %v", err) | |||||
| } | |||||
| engineRaw, ok := body["engine"].(map[string]any) | |||||
| if !ok { | |||||
| t.Fatalf("runtime engine missing") | |||||
| } | |||||
| histRaw, ok := engineRaw["faultHistory"].([]any) | |||||
| if !ok { | |||||
| t.Fatalf("faultHistory missing or wrong type: %T", engineRaw["faultHistory"]) | |||||
| } | |||||
| if len(histRaw) != len(history) { | |||||
| t.Fatalf("faultHistory length mismatch: want %d got %d", len(history), len(histRaw)) | |||||
| } | |||||
| } | |||||
| func TestRuntimeFaultResetRejectsGet(t *testing.T) { | func TestRuntimeFaultResetRejectsGet(t *testing.T) { | ||||
| srv := NewServer(cfgpkg.Default()) | srv := NewServer(cfgpkg.Default()) | ||||
| rec := httptest.NewRecorder() | rec := httptest.NewRecorder() | ||||
| @@ -771,6 +771,50 @@ input.input-error { | |||||
| .health-line .val.warn { color: var(--amber); } | .health-line .val.warn { color: var(--amber); } | ||||
| .health-line .val.err { color: var(--accent); } | .health-line .val.err { color: var(--accent); } | ||||
| .fault-history { | |||||
| margin-top: 12px; | |||||
| padding: 10px; | |||||
| border: 1px solid var(--border); | |||||
| border-radius: 6px; | |||||
| background: var(--surface1); | |||||
| font-size: 11px; | |||||
| max-height: 180px; | |||||
| overflow-y: auto; | |||||
| line-height: 1.3; | |||||
| } | |||||
| .fault-history-entry { | |||||
| display: flex; | |||||
| justify-content: space-between; | |||||
| gap: 10px; | |||||
| padding: 4px 0; | |||||
| border-bottom: 1px solid rgba(255, 255, 255, 0.08); | |||||
| } | |||||
| .fault-history-entry:last-child { | |||||
| border-bottom: none; | |||||
| } | |||||
| .fault-history-entry .fault-history-time { | |||||
| color: var(--text-dim); | |||||
| } | |||||
| .fault-history-entry.ok { color: var(--green); } | |||||
| .fault-history-entry.warn { color: var(--amber); } | |||||
| .fault-history-entry.err { color: var(--accent); } | |||||
| .fault-history-desc { | |||||
| font-size: 10px; | |||||
| flex: 1; | |||||
| text-transform: uppercase; | |||||
| letter-spacing: 0.5px; | |||||
| } | |||||
| .fault-history-empty { | |||||
| padding: 6px 0; | |||||
| color: var(--text-muted); | |||||
| font-size: 11px; | |||||
| } | |||||
| .section-note.reset-hint { | |||||
| font-size: 11px; | |||||
| color: var(--text-dim); | |||||
| margin-top: 10px; | |||||
| } | |||||
| .log { | .log { | ||||
| background: var(--bg); | background: var(--bg); | ||||
| border: 1px solid var(--border); | border: 1px solid var(--border); | ||||
| @@ -1122,6 +1166,24 @@ input.input-error { | |||||
| <button class="danger-btn" id="danger-stop" type="button">Emergency Stop TX</button> | <button class="danger-btn" id="danger-stop" type="button">Emergency Stop TX</button> | ||||
| <button class="danger-btn" id="danger-refresh" type="button">Hard Refresh Runtime</button> | <button class="danger-btn" id="danger-refresh" type="button">Hard Refresh Runtime</button> | ||||
| <button class="danger-btn secondary" id="danger-reset-fault" type="button">Reset Fault</button> | <button class="danger-btn secondary" id="danger-reset-fault" type="button">Reset Fault</button> | ||||
| </div> | |||||
| <div class="section-note reset-hint" id="reset-hint"> | |||||
| Reset Fault moves the runtime back to DEGRADED while the queue settles before running again. | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| <div class="card panel" data-panel-key="fault-history"> | |||||
| <div class="panel-head" data-panel> | |||||
| <h2>Fault History</h2> | |||||
| <div class="meta">recent faults</div> | |||||
| <span class="chevron">▼</span> | |||||
| </div> | |||||
| <div class="panel-body"> | |||||
| <div class="section-note">Recent fault events for quick ops situational awareness.</div> | |||||
| <div class="fault-history" id="fault-history"> | |||||
| <div class="fault-history-empty">No faults yet.</div> | |||||
| </div> | </div> | ||||
| </div> | </div> | ||||
| </div> | </div> | ||||
| @@ -1750,6 +1812,8 @@ function render() { | |||||
| updateText('info-live', engine.state ? `${String(engine.state).toUpperCase()} / ${state.server.runtimeOk ? 'runtime ok' : 'runtime pending'}` : (state.server.configOk ? 'config only' : '--')); | updateText('info-live', engine.state ? `${String(engine.state).toUpperCase()} / ${state.server.runtimeOk ? 'runtime ok' : 'runtime pending'}` : (state.server.configOk ? 'config only' : '--')); | ||||
| updateHealth(engine, audioStream); | updateHealth(engine, audioStream); | ||||
| updateFaultHistory(engine); | |||||
| updateResetHint(engine); | |||||
| updateMeters(engine, driver, audioStream); | updateMeters(engine, driver, audioStream); | ||||
| drawSparkline('spark-audio', state.charts.audio, 'good', 1); | drawSparkline('spark-audio', state.charts.audio, 'good', 1); | ||||
| drawSparkline('spark-underruns', state.charts.underruns, underruns > 0 ? 'err' : 'warn'); | drawSparkline('spark-underruns', state.charts.underruns, underruns > 0 ? 'err' : 'warn'); | ||||
| @@ -1916,6 +1980,40 @@ function updateHealth(engine, audioStream) { | |||||
| } | } | ||||
| } | } | ||||
| function updateFaultHistory(engine) { | |||||
| const container = $('fault-history'); | |||||
| if (!container) return; | |||||
| const history = Array.isArray(engine?.faultHistory) ? engine.faultHistory : []; | |||||
| if (!history.length) { | |||||
| container.innerHTML = '<div class="fault-history-empty">No faults recorded yet.</div>'; | |||||
| return; | |||||
| } | |||||
| const rows = history.slice().reverse().map((entry) => { | |||||
| const when = entry?.time ? new Date(entry.time) : null; | |||||
| const timeLabel = when && !Number.isNaN(when.getTime()) ? when.toLocaleTimeString() : '--:--'; | |||||
| const severity = String(entry?.severity || 'warn').toLowerCase(); | |||||
| const severityLabel = String(entry?.severity || 'Fault').toUpperCase(); | |||||
| const reasonLabel = entry?.reason ? ` ${entry.reason}` : ''; | |||||
| const messageLabel = entry?.message ? ` · ${entry.message}` : ''; | |||||
| return `<div class="fault-history-entry ${severity}"><span class="fault-history-time">${timeLabel}</span><span class="fault-history-desc">${severityLabel}${reasonLabel}${messageLabel}</span></div>`; | |||||
| }); | |||||
| container.innerHTML = rows.join(''); | |||||
| } | |||||
| function updateResetHint(engine) { | |||||
| const hint = $('reset-hint'); | |||||
| if (!hint) return; | |||||
| const stateName = String(engine?.state || '').toLowerCase(); | |||||
| let text = 'Manual fault reset drops runtime to DEGRADED while the queue recovers.'; | |||||
| if (stateName === 'faulted') { | |||||
| text = 'Faulted: reset moves runtime back to DEGRADED until the queue settles.'; | |||||
| } else if (stateName === 'muted' || stateName === 'degraded') { | |||||
| text = 'Reset Fault keeps the runtime in DEGRADED so the queue can recover before running again.'; | |||||
| } | |||||
| hint.textContent = text; | |||||
| } | |||||
| function updateMeters(engine, driver, audioStream) { | function updateMeters(engine, driver, audioStream) { | ||||
| if (audioStream && typeof audioStream.buffered === 'number') { | if (audioStream && typeof audioStream.buffered === 'number') { | ||||
| const ratio = Math.max(0, Math.min(1, audioStream.buffered)); | const ratio = Math.max(0, Math.min(1, audioStream.buffered)); | ||||