| @@ -113,13 +113,14 @@ type RuntimeTransition struct { | |||||
| } | } | ||||
| const ( | const ( | ||||
| lateBufferIndicatorWindow = 5 * time.Second | |||||
| writeLateTolerance = 1 * time.Millisecond | |||||
| lateBufferIndicatorWindow = 2 * time.Second | |||||
| writeLateTolerance = 10 * time.Millisecond | |||||
| queueCriticalStreakThreshold = 3 | queueCriticalStreakThreshold = 3 | ||||
| queueMutedStreakThreshold = queueCriticalStreakThreshold * 2 | queueMutedStreakThreshold = queueCriticalStreakThreshold * 2 | ||||
| queueMutedRecoveryThreshold = queueCriticalStreakThreshold | queueMutedRecoveryThreshold = queueCriticalStreakThreshold | ||||
| queueFaultedStreakThreshold = queueCriticalStreakThreshold | queueFaultedStreakThreshold = queueCriticalStreakThreshold | ||||
| faultRepeatWindow = 1 * time.Second | faultRepeatWindow = 1 * time.Second | ||||
| lateBufferStreakThreshold = 3 // consecutive late writes required before alerting | |||||
| faultHistoryCapacity = 8 | faultHistoryCapacity = 8 | ||||
| runtimeTransitionHistoryCapacity = 8 | runtimeTransitionHistoryCapacity = 8 | ||||
| ) | ) | ||||
| @@ -150,6 +151,7 @@ type Engine struct { | |||||
| underruns atomic.Uint64 | underruns atomic.Uint64 | ||||
| lateBuffers atomic.Uint64 | lateBuffers atomic.Uint64 | ||||
| lateBufferAlertAt atomic.Uint64 | lateBufferAlertAt atomic.Uint64 | ||||
| lateBufferStreak atomic.Uint64 // consecutive late writes; reset on clean write | |||||
| criticalStreak atomic.Uint64 | criticalStreak atomic.Uint64 | ||||
| mutedRecoveryStreak atomic.Uint64 | mutedRecoveryStreak atomic.Uint64 | ||||
| mutedFaultStreak atomic.Uint64 | mutedFaultStreak atomic.Uint64 | ||||
| @@ -604,12 +606,23 @@ func (e *Engine) writerLoop(ctx context.Context) { | |||||
| lateOver := writeDur - e.chunkDuration | lateOver := writeDur - e.chunkDuration | ||||
| if lateOver > writeLateTolerance { | if lateOver > writeLateTolerance { | ||||
| streak := e.lateBufferStreak.Add(1) | |||||
| late := e.lateBuffers.Add(1) | late := e.lateBuffers.Add(1) | ||||
| e.lateBufferAlertAt.Store(uint64(time.Now().UnixNano())) | |||||
| // Only arm the alert window once the streak threshold is reached. | |||||
| // Isolated OS-scheduling or USB jitter spikes (single late writes) | |||||
| // are normal on a loaded system and must not trigger degraded state. | |||||
| // This mirrors the queue-health streak logic. | |||||
| if streak >= lateBufferStreakThreshold { | |||||
| e.lateBufferAlertAt.Store(uint64(time.Now().UnixNano())) | |||||
| } | |||||
| if late <= 5 || late%20 == 0 { | if late <= 5 || late%20 == 0 { | ||||
| log.Printf("TX LATE: write=%s budget=%s over=%s tolerance=%s queueResidence=%s pipeline=%s", | |||||
| writeDur, e.chunkDuration, lateOver, writeLateTolerance, queueResidence, pipelineLatency) | |||||
| log.Printf("TX LATE [streak=%d]: write=%s budget=%s over=%s tolerance=%s queueResidence=%s pipeline=%s", | |||||
| streak, writeDur, e.chunkDuration, lateOver, writeLateTolerance, queueResidence, pipelineLatency) | |||||
| } | } | ||||
| } else { | |||||
| // Clean write — reset the consecutive streak so isolated spikes | |||||
| // never accumulate toward the threshold. | |||||
| e.lateBufferStreak.Store(0) | |||||
| } | } | ||||
| if err != nil { | if err != nil { | ||||