| @@ -273,6 +273,7 @@ Einführen eines klaren Betriebsmodells mit Fault-, Recovery- und Muted-Zuständ | |||||
| ## Fortschritt | ## Fortschritt | ||||
| - EngineStats liefert das Runtime-State-Feld (`idle`, `arming`, `prebuffering`, `running`) und reagiert nun auf Queue-Gesundheit bzw. späte Buffers, indem es bei `low`/`critical` oder späten Buffern in `degraded` wechselt und sonst auf `running` zurückkehrt. | - EngineStats liefert das Runtime-State-Feld (`idle`, `arming`, `prebuffering`, `running`) und reagiert nun auf Queue-Gesundheit bzw. späte Buffers, indem es bei `low`/`critical` oder späten Buffern in `degraded` wechselt und sonst auf `running` zurückkehrt. | ||||
| - `evaluateRuntimeState` escalates persistent `critical` queues from `degraded` to `muted`, while `FaultReasonQueueCritical` surfaces `muted` severity so the mute transition stays observable. | |||||
| ## Zielzustände laut Konzept | ## Zielzustände laut Konzept | ||||
| - `idle` | - `idle` | ||||
| @@ -97,6 +97,7 @@ const ( | |||||
| const ( | const ( | ||||
| lateBufferIndicatorWindow = 5 * time.Second | lateBufferIndicatorWindow = 5 * time.Second | ||||
| queueCriticalStreakThreshold = 3 | queueCriticalStreakThreshold = 3 | ||||
| queueMutedStreakThreshold = queueCriticalStreakThreshold * 2 | |||||
| faultRepeatWindow = 1 * time.Second | faultRepeatWindow = 1 * time.Second | ||||
| faultHistoryCapacity = 8 | faultHistoryCapacity = 8 | ||||
| ) | ) | ||||
| @@ -666,7 +667,14 @@ func (e *Engine) evaluateRuntimeState(queue output.QueueStats, hasLateBuffers bo | |||||
| } | } | ||||
| critical := queue.Health == output.QueueHealthCritical | critical := queue.Health == output.QueueHealthCritical | ||||
| if critical { | if critical { | ||||
| if e.criticalStreak.Add(1) >= queueCriticalStreakThreshold { | |||||
| count := e.criticalStreak.Add(1) | |||||
| if count >= queueMutedStreakThreshold { | |||||
| e.recordFault(FaultReasonQueueCritical, FaultSeverityMuted, | |||||
| fmt.Sprintf("queue health critical for %d consecutive checks (depth=%d)", count, queue.Depth)) | |||||
| e.setRuntimeState(RuntimeStateMuted) | |||||
| return | |||||
| } | |||||
| if count >= queueCriticalStreakThreshold { | |||||
| e.recordFault(FaultReasonQueueCritical, FaultSeverityDegraded, | e.recordFault(FaultReasonQueueCritical, FaultSeverityDegraded, | ||||
| fmt.Sprintf("queue health critical (depth=%d)", queue.Depth)) | fmt.Sprintf("queue health critical (depth=%d)", queue.Depth)) | ||||
| e.setRuntimeState(RuntimeStateDegraded) | e.setRuntimeState(RuntimeStateDegraded) | ||||
| @@ -55,3 +55,28 @@ func TestEngineRuntimeStateTransitions(t *testing.T) { | |||||
| t.Fatalf("expected degraded when late buffers seen, got %s", got) | t.Fatalf("expected degraded when late buffers seen, got %s", got) | ||||
| } | } | ||||
| } | } | ||||
| func TestEngineRuntimeStateMuteOnPersistentQueueCritical(t *testing.T) { | |||||
| e := NewEngine(cfgpkg.Default(), platform.NewSimulatedDriver(nil)) | |||||
| e.setRuntimeState(RuntimeStateRunning) | |||||
| queue := output.QueueStats{Depth: 1, Health: output.QueueHealthCritical} | |||||
| for i := 0; i < queueMutedStreakThreshold; i++ { | |||||
| e.evaluateRuntimeState(queue, false) | |||||
| } | |||||
| if got := e.currentRuntimeState(); got != RuntimeStateMuted { | |||||
| t.Fatalf("expected muted after prolonged queue critical, got %s", got) | |||||
| } | |||||
| last := e.LastFault() | |||||
| if last == nil { | |||||
| t.Fatal("expected fault recorded for the mute transition") | |||||
| } | |||||
| if last.Reason != FaultReasonQueueCritical { | |||||
| t.Fatalf("expected queue critical reason, got %s", last.Reason) | |||||
| } | |||||
| if last.Severity != FaultSeverityMuted { | |||||
| t.Fatalf("expected muted severity, got %s", last.Severity) | |||||
| } | |||||
| } | |||||