Преглед изворни кода

Add muted transition for persistent queue-critical

tags/v0.9.0
Jan Svabenik пре 1 месец
родитељ
комит
5d60f20f91
3 измењених фајлова са 35 додато и 1 уклоњено
  1. +1
    -0
      docs/pro-runtime-hardening-workboard.md
  2. +9
    -1
      internal/app/engine.go
  3. +25
    -0
      internal/app/runtime_state_test.go

+ 1
- 0
docs/pro-runtime-hardening-workboard.md Прегледај датотеку

@@ -273,6 +273,7 @@ Einführen eines klaren Betriebsmodells mit Fault-, Recovery- und Muted-Zuständ

## Fortschritt
- EngineStats liefert das Runtime-State-Feld (`idle`, `arming`, `prebuffering`, `running`) und reagiert nun auf Queue-Gesundheit bzw. späte Buffers, indem es bei `low`/`critical` oder späten Buffern in `degraded` wechselt und sonst auf `running` zurückkehrt.
- `evaluateRuntimeState` escalates persistent `critical` queues from `degraded` to `muted`, while `FaultReasonQueueCritical` surfaces `muted` severity so the mute transition stays observable.

## Zielzustände laut Konzept
- `idle`


+ 9
- 1
internal/app/engine.go Прегледај датотеку

@@ -97,6 +97,7 @@ const (
const (
lateBufferIndicatorWindow = 5 * time.Second
queueCriticalStreakThreshold = 3
queueMutedStreakThreshold = queueCriticalStreakThreshold * 2
faultRepeatWindow = 1 * time.Second
faultHistoryCapacity = 8
)
@@ -666,7 +667,14 @@ func (e *Engine) evaluateRuntimeState(queue output.QueueStats, hasLateBuffers bo
}
critical := queue.Health == output.QueueHealthCritical
if critical {
if e.criticalStreak.Add(1) >= queueCriticalStreakThreshold {
count := e.criticalStreak.Add(1)
if count >= queueMutedStreakThreshold {
e.recordFault(FaultReasonQueueCritical, FaultSeverityMuted,
fmt.Sprintf("queue health critical for %d consecutive checks (depth=%d)", count, queue.Depth))
e.setRuntimeState(RuntimeStateMuted)
return
}
if count >= queueCriticalStreakThreshold {
e.recordFault(FaultReasonQueueCritical, FaultSeverityDegraded,
fmt.Sprintf("queue health critical (depth=%d)", queue.Depth))
e.setRuntimeState(RuntimeStateDegraded)


+ 25
- 0
internal/app/runtime_state_test.go Прегледај датотеку

@@ -55,3 +55,28 @@ func TestEngineRuntimeStateTransitions(t *testing.T) {
t.Fatalf("expected degraded when late buffers seen, got %s", got)
}
}

func TestEngineRuntimeStateMuteOnPersistentQueueCritical(t *testing.T) {
e := NewEngine(cfgpkg.Default(), platform.NewSimulatedDriver(nil))
e.setRuntimeState(RuntimeStateRunning)

queue := output.QueueStats{Depth: 1, Health: output.QueueHealthCritical}
for i := 0; i < queueMutedStreakThreshold; i++ {
e.evaluateRuntimeState(queue, false)
}

if got := e.currentRuntimeState(); got != RuntimeStateMuted {
t.Fatalf("expected muted after prolonged queue critical, got %s", got)
}

last := e.LastFault()
if last == nil {
t.Fatal("expected fault recorded for the mute transition")
}
if last.Reason != FaultReasonQueueCritical {
t.Fatalf("expected queue critical reason, got %s", last.Reason)
}
if last.Severity != FaultSeverityMuted {
t.Fatalf("expected muted severity, got %s", last.Severity)
}
}

Loading…
Откажи
Сачувај