| @@ -287,3 +287,7 @@ func (b *txBridge) UpdateConfig(lp ctrlpkg.LivePatch) error { | |||||
| RadioText: lp.RadioText, | RadioText: lp.RadioText, | ||||
| }) | }) | ||||
| } | } | ||||
| func (b *txBridge) ResetFault() error { | |||||
| return b.engine.ResetFault() | |||||
| } | |||||
| @@ -79,6 +79,22 @@ Live engine and driver telemetry. Only populated when TX is active. | |||||
| --- | --- | ||||
| ### `POST /runtime/fault/reset` | |||||
| Manually acknowledge a `faulted` runtime state so the supervisor can re-enter the recovery path (the engine moves back to `degraded` once the reset succeeds). | |||||
| **Response:** | |||||
| ```json | |||||
| {"ok": true} | |||||
| ``` | |||||
| **Errors:** | |||||
| - `405 Method Not Allowed` if the request is not a POST | |||||
| - `503 Service Unavailable` when no TX controller is attached (`--tx` mode not active) | |||||
| - `409 Conflict` when the engine is not currently faulted or the reset was rejected (e.g. still throttled) | |||||
| --- | |||||
| ### `GET /config` | ### `GET /config` | ||||
| Full current configuration (all fields, including non-patchable). | Full current configuration (all fields, including non-patchable). | ||||
| @@ -278,6 +278,7 @@ Einführen eines klaren Betriebsmodells mit Fault-, Recovery- und Muted-Zuständ | |||||
| - Persistent queue-critical streaks while `muted` now escalate to `faulted` with `FaultSeverityFaulted`, keeping `RuntimeStateFaulted` observable. | - Persistent queue-critical streaks while `muted` now escalate to `faulted` with `FaultSeverityFaulted`, keeping `RuntimeStateFaulted` observable. | ||||
| - `EngineStats` and `txBridge` now expose transition/fault counters plus `lastFault`, surfacing the new telemetry through `/runtime`. | - `EngineStats` and `txBridge` now expose transition/fault counters plus `lastFault`, surfacing the new telemetry through `/runtime`. | ||||
| - Control-plane UI now renders those WS-02 transition counters, fault count, and last-fault summary so operators can watch runtime escalations without digging through logs. | - Control-plane UI now renders those WS-02 transition counters, fault count, and last-fault summary so operators can watch runtime escalations without digging through logs. | ||||
| - Control-plane now exposes `POST /runtime/fault/reset` so operators can acknowledge `faulted` state; `TestRuntimeFaultReset*` covers the new HTTP path. | |||||
| ## Zielzustände laut Konzept | ## Zielzustände laut Konzept | ||||
| @@ -327,11 +328,13 @@ Einführen eines klaren Betriebsmodells mit Fault-, Recovery- und Muted-Zuständ | |||||
| | Datum | Entscheidung | Notiz | | | Datum | Entscheidung | Notiz | | ||||
| |---|---|---| | |---|---|---| | ||||
| | 2026-04-05 | Faulted escalation on persistent critical queue | `muted` now surfaces `RuntimeStateFaulted` when queue health stays critical and metrics capture every transition. | | | 2026-04-05 | Faulted escalation on persistent critical queue | `muted` now surfaces `RuntimeStateFaulted` when queue health stays critical and metrics capture every transition. | | ||||
| | 2026-04-05 | Manual fault reset endpoint | Added `POST /runtime/fault/reset` so operators can acknowledge `faulted` before the supervisor re-enters recovery. | | |||||
| ## WS-02 Verifikation | ## WS-02 Verifikation | ||||
| | Datum | Fokus | Ergebnis | | | Datum | Fokus | Ergebnis | | ||||
| |---|---|---| | |---|---|---| | ||||
| | 2026-04-05 | Faulted path + transition counters | `go test ./...` exercises `TestEngineFaultsAfterMutedCriticalStreak` and `TestRuntimeTransitionCounters`, while `/runtime` now surfaces `engine.degradedTransitions`, `engine.mutedTransitions`, `engine.faultedTransitions`, `engine.faultCount`, and the last fault via `txBridge`. | | | 2026-04-05 | Faulted path + transition counters | `go test ./...` exercises `TestEngineFaultsAfterMutedCriticalStreak` and `TestRuntimeTransitionCounters`, while `/runtime` now surfaces `engine.degradedTransitions`, `engine.mutedTransitions`, `engine.faultedTransitions`, `engine.faultCount`, and the last fault via `txBridge`. | | ||||
| | 2026-04-05 | Runtime fault reset API | `go test ./...` now runs `TestRuntimeFaultReset*`, verifying the new HTTP path and controller error scenarios. | | |||||
| --- | --- | ||||
| @@ -744,3 +744,17 @@ func (e *Engine) evaluateRuntimeState(queue output.QueueStats, hasLateBuffers bo | |||||
| } | } | ||||
| e.setRuntimeState(RuntimeStateRunning) | e.setRuntimeState(RuntimeStateRunning) | ||||
| } | } | ||||
| // ResetFault attempts to move the engine out of the faulted state. | |||||
| func (e *Engine) ResetFault() error { | |||||
| state := e.currentRuntimeState() | |||||
| if state != RuntimeStateFaulted { | |||||
| return fmt.Errorf("engine not in faulted state (current=%s)", state) | |||||
| } | |||||
| e.criticalStreak.Store(0) | |||||
| e.mutedRecoveryStreak.Store(0) | |||||
| e.mutedFaultStreak.Store(0) | |||||
| e.setRuntimeState(RuntimeStateDegraded) | |||||
| return nil | |||||
| } | |||||
| @@ -179,3 +179,37 @@ func TestRuntimeTransitionCounters(t *testing.T) { | |||||
| t.Fatalf("expected one recorded fault, got %d", got) | t.Fatalf("expected one recorded fault, got %d", got) | ||||
| } | } | ||||
| } | } | ||||
| func TestEngineResetFaultRequiresFaultedState(t *testing.T) { | |||||
| e := NewEngine(cfgpkg.Default(), platform.NewSimulatedDriver(nil)) | |||||
| if err := e.ResetFault(); err == nil { | |||||
| t.Fatal("expected error when resetting non-faulted state") | |||||
| } | |||||
| } | |||||
| func TestEngineResetFaultTransitionsToDegraded(t *testing.T) { | |||||
| e := NewEngine(cfgpkg.Default(), platform.NewSimulatedDriver(nil)) | |||||
| e.criticalStreak.Store(7) | |||||
| e.mutedRecoveryStreak.Store(3) | |||||
| e.mutedFaultStreak.Store(1) | |||||
| e.setRuntimeState(RuntimeStateFaulted) | |||||
| if err := e.ResetFault(); err != nil { | |||||
| t.Fatalf("reset fault failed: %v", err) | |||||
| } | |||||
| if got := e.currentRuntimeState(); got != RuntimeStateDegraded { | |||||
| t.Fatalf("expected degraded after reset, got %s", got) | |||||
| } | |||||
| if e.criticalStreak.Load() != 0 { | |||||
| t.Fatalf("expected critical streak reset, got %d", e.criticalStreak.Load()) | |||||
| } | |||||
| if e.mutedRecoveryStreak.Load() != 0 { | |||||
| t.Fatalf("expected mute recovery streak reset, got %d", e.mutedRecoveryStreak.Load()) | |||||
| } | |||||
| if e.mutedFaultStreak.Load() != 0 { | |||||
| t.Fatalf("expected mute fault streak reset, got %d", e.mutedFaultStreak.Load()) | |||||
| } | |||||
| if err := e.ResetFault(); err == nil { | |||||
| t.Fatal("expected error when resetting after recovery") | |||||
| } | |||||
| } | |||||
| @@ -23,6 +23,7 @@ type TXController interface { | |||||
| StopTX() error | StopTX() error | ||||
| TXStats() map[string]any | TXStats() map[string]any | ||||
| UpdateConfig(patch LivePatch) error | UpdateConfig(patch LivePatch) error | ||||
| ResetFault() error | |||||
| } | } | ||||
| // LivePatch mirrors the patchable fields from ConfigPatch for the engine. | // LivePatch mirrors the patchable fields from ConfigPatch for the engine. | ||||
| @@ -95,6 +96,7 @@ func (s *Server) Handler() http.Handler { | |||||
| mux.HandleFunc("/dry-run", s.handleDryRun) | mux.HandleFunc("/dry-run", s.handleDryRun) | ||||
| mux.HandleFunc("/config", s.handleConfig) | mux.HandleFunc("/config", s.handleConfig) | ||||
| mux.HandleFunc("/runtime", s.handleRuntime) | mux.HandleFunc("/runtime", s.handleRuntime) | ||||
| mux.HandleFunc("/runtime/fault/reset", s.handleRuntimeFaultReset) | |||||
| mux.HandleFunc("/tx/start", s.handleTXStart) | mux.HandleFunc("/tx/start", s.handleTXStart) | ||||
| mux.HandleFunc("/tx/stop", s.handleTXStop) | mux.HandleFunc("/tx/stop", s.handleTXStop) | ||||
| mux.HandleFunc("/audio/stream", s.handleAudioStream) | mux.HandleFunc("/audio/stream", s.handleAudioStream) | ||||
| @@ -171,6 +173,26 @@ func (s *Server) handleRuntime(w http.ResponseWriter, _ *http.Request) { | |||||
| _ = json.NewEncoder(w).Encode(result) | _ = json.NewEncoder(w).Encode(result) | ||||
| } | } | ||||
| func (s *Server) handleRuntimeFaultReset(w http.ResponseWriter, r *http.Request) { | |||||
| if r.Method != http.MethodPost { | |||||
| http.Error(w, "method not allowed", http.StatusMethodNotAllowed) | |||||
| return | |||||
| } | |||||
| s.mu.RLock() | |||||
| tx := s.tx | |||||
| s.mu.RUnlock() | |||||
| if tx == nil { | |||||
| http.Error(w, "tx controller not available", http.StatusServiceUnavailable) | |||||
| return | |||||
| } | |||||
| if err := tx.ResetFault(); err != nil { | |||||
| http.Error(w, err.Error(), http.StatusConflict) | |||||
| return | |||||
| } | |||||
| w.Header().Set("Content-Type", "application/json") | |||||
| _ = json.NewEncoder(w).Encode(map[string]any{"ok": true}) | |||||
| } | |||||
| // handleAudioStream accepts raw S16LE stereo PCM via HTTP POST and pushes | // handleAudioStream accepts raw S16LE stereo PCM via HTTP POST and pushes | ||||
| // it into the live audio ring buffer. Use with: | // it into the live audio ring buffer. Use with: | ||||
| // curl -X POST --data-binary @- http://host:8088/audio/stream < audio.raw | // curl -X POST --data-binary @- http://host:8088/audio/stream < audio.raw | ||||
| @@ -125,6 +125,55 @@ func TestRuntimeWithoutDriver(t *testing.T) { | |||||
| } | } | ||||
| } | } | ||||
| func TestRuntimeFaultResetRejectsGet(t *testing.T) { | |||||
| srv := NewServer(cfgpkg.Default()) | |||||
| rec := httptest.NewRecorder() | |||||
| req := httptest.NewRequest(http.MethodGet, "/runtime/fault/reset", nil) | |||||
| srv.Handler().ServeHTTP(rec, req) | |||||
| if rec.Code != http.StatusMethodNotAllowed { | |||||
| t.Fatalf("expected 405 for fault reset GET, got %d", rec.Code) | |||||
| } | |||||
| } | |||||
| func TestRuntimeFaultResetRequiresController(t *testing.T) { | |||||
| srv := NewServer(cfgpkg.Default()) | |||||
| rec := httptest.NewRecorder() | |||||
| req := httptest.NewRequest(http.MethodPost, "/runtime/fault/reset", nil) | |||||
| srv.Handler().ServeHTTP(rec, req) | |||||
| if rec.Code != http.StatusServiceUnavailable { | |||||
| t.Fatalf("expected 503 without controller, got %d", rec.Code) | |||||
| } | |||||
| } | |||||
| func TestRuntimeFaultResetControllerError(t *testing.T) { | |||||
| srv := NewServer(cfgpkg.Default()) | |||||
| srv.SetTXController(&fakeTXController{resetErr: errors.New("boom")}) | |||||
| rec := httptest.NewRecorder() | |||||
| req := httptest.NewRequest(http.MethodPost, "/runtime/fault/reset", nil) | |||||
| srv.Handler().ServeHTTP(rec, req) | |||||
| if rec.Code != http.StatusConflict { | |||||
| t.Fatalf("expected 409 when controller rejects, got %d", rec.Code) | |||||
| } | |||||
| } | |||||
| func TestRuntimeFaultResetSuccess(t *testing.T) { | |||||
| srv := NewServer(cfgpkg.Default()) | |||||
| srv.SetTXController(&fakeTXController{}) | |||||
| rec := httptest.NewRecorder() | |||||
| req := httptest.NewRequest(http.MethodPost, "/runtime/fault/reset", nil) | |||||
| srv.Handler().ServeHTTP(rec, req) | |||||
| if rec.Code != 200 { | |||||
| t.Fatalf("expected 200 on success, got %d", rec.Code) | |||||
| } | |||||
| var body map[string]any | |||||
| if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil { | |||||
| t.Fatalf("unmarshal response: %v", err) | |||||
| } | |||||
| if ok, _ := body["ok"].(bool); !ok { | |||||
| t.Fatalf("expected ok true, got %v", body["ok"]) | |||||
| } | |||||
| } | |||||
| func TestAudioStreamRequiresSource(t *testing.T) { | func TestAudioStreamRequiresSource(t *testing.T) { | ||||
| srv := NewServer(cfgpkg.Default()) | srv := NewServer(cfgpkg.Default()) | ||||
| rec := httptest.NewRecorder() | rec := httptest.NewRecorder() | ||||
| @@ -239,6 +288,7 @@ func TestConfigPatchEngineRejectsDoesNotUpdateSnapshot(t *testing.T) { | |||||
| type fakeTXController struct { | type fakeTXController struct { | ||||
| updateErr error | updateErr error | ||||
| resetErr error | |||||
| stats map[string]any | stats map[string]any | ||||
| } | } | ||||
| @@ -251,3 +301,4 @@ func (f *fakeTXController) TXStats() map[string]any { | |||||
| return map[string]any{} | return map[string]any{} | ||||
| } | } | ||||
| func (f *fakeTXController) UpdateConfig(_ LivePatch) error { return f.updateErr } | func (f *fakeTXController) UpdateConfig(_ LivePatch) error { return f.updateErr } | ||||
| func (f *fakeTXController) ResetFault() error { return f.resetErr } | |||||