|
|
|
@@ -97,6 +97,7 @@ const ( |
|
|
|
const ( |
|
|
|
lateBufferIndicatorWindow = 5 * time.Second |
|
|
|
queueCriticalStreakThreshold = 3 |
|
|
|
queueMutedStreakThreshold = queueCriticalStreakThreshold * 2 |
|
|
|
faultRepeatWindow = 1 * time.Second |
|
|
|
faultHistoryCapacity = 8 |
|
|
|
) |
|
|
|
@@ -666,7 +667,14 @@ func (e *Engine) evaluateRuntimeState(queue output.QueueStats, hasLateBuffers bo |
|
|
|
} |
|
|
|
critical := queue.Health == output.QueueHealthCritical |
|
|
|
if critical { |
|
|
|
if e.criticalStreak.Add(1) >= queueCriticalStreakThreshold { |
|
|
|
count := e.criticalStreak.Add(1) |
|
|
|
if count >= queueMutedStreakThreshold { |
|
|
|
e.recordFault(FaultReasonQueueCritical, FaultSeverityMuted, |
|
|
|
fmt.Sprintf("queue health critical for %d consecutive checks (depth=%d)", count, queue.Depth)) |
|
|
|
e.setRuntimeState(RuntimeStateMuted) |
|
|
|
return |
|
|
|
} |
|
|
|
if count >= queueCriticalStreakThreshold { |
|
|
|
e.recordFault(FaultReasonQueueCritical, FaultSeverityDegraded, |
|
|
|
fmt.Sprintf("queue health critical (depth=%d)", queue.Depth)) |
|
|
|
e.setRuntimeState(RuntimeStateDegraded) |
|
|
|
|