diff --git a/docs/API.md b/docs/API.md index b8513f3..97742f3 100644 --- a/docs/API.md +++ b/docs/API.md @@ -43,7 +43,7 @@ Current transmitter status (read-only snapshot). Runtime indicator, alert, and q } ``` -`runtimeIndicator` is derived from the engine queue health plus any late buffers and can be "normal", "degraded", or "queueCritical". `runtimeAlert` surfaces a short reason (e.g. "queue health low" or "late buffers") when the indicator is not "normal", otherwise it stays empty. +`runtimeIndicator` is derived from the engine queue health plus any late buffers observed in the last 5 seconds and can be "normal", "degraded", or "queueCritical". `runtimeAlert` surfaces a short reason (e.g. "queue health low" or "late buffers") when the indicator is not "normal", but late-buffer alerts expire after a few seconds once cycle times settle so the signal doesn't stay stuck on degraded. The cumulative `lateBuffers` counter returned by `/runtime` still shows how many late cycles have occurred since start for post-mortem diagnosis. --- diff --git a/internal/app/engine.go b/internal/app/engine.go index 008616d..a4836e9 100644 --- a/internal/app/engine.go +++ b/internal/app/engine.go @@ -80,6 +80,8 @@ const ( RuntimeIndicatorQueueCritical RuntimeIndicator = "queueCritical" ) +const lateBufferIndicatorWindow = 5 * time.Second + // Engine is the continuous TX loop. It generates composite IQ in chunks, // resamples to device rate, and pushes to hardware in a tight loop. // The hardware buffer_push call is blocking — it returns when the hardware @@ -100,15 +102,16 @@ type Engine struct { startedAt time.Time wg sync.WaitGroup - chunksProduced atomic.Uint64 - totalSamples atomic.Uint64 - underruns atomic.Uint64 - lateBuffers atomic.Uint64 - maxCycleNs atomic.Uint64 - maxGenerateNs atomic.Uint64 - maxUpsampleNs atomic.Uint64 - maxWriteNs atomic.Uint64 - lastError atomic.Value // string + chunksProduced atomic.Uint64 + totalSamples atomic.Uint64 + underruns atomic.Uint64 + lateBuffers atomic.Uint64 + lateBufferAlertAt atomic.Uint64 + maxCycleNs atomic.Uint64 + maxGenerateNs atomic.Uint64 + maxUpsampleNs atomic.Uint64 + maxWriteNs atomic.Uint64 + lastError atomic.Value // string // Live config: pending frequency change, applied between chunks pendingFreq atomic.Pointer[float64] @@ -351,7 +354,10 @@ func (e *Engine) Stats() EngineStats { queue := e.frameQueue.Stats() lateBuffers := e.lateBuffers.Load() - ri := runtimeIndicator(queue.Health, lateBuffers) + now := time.Now() + lateAlertAt := e.lateBufferAlertAt.Load() + hasRecentLateBuffers := lateAlertAt > 0 && now.Sub(time.Unix(0, int64(lateAlertAt))) <= lateBufferIndicatorWindow + ri := runtimeIndicator(queue.Health, hasRecentLateBuffers) return EngineStats{ State: state.String(), ChunksProduced: e.chunksProduced.Load(), @@ -366,26 +372,26 @@ func (e *Engine) Stats() EngineStats { MaxWriteMs: durationMs(e.maxWriteNs.Load()), Queue: queue, RuntimeIndicator: ri, - RuntimeAlert: runtimeAlert(queue.Health, lateBuffers), + RuntimeAlert: runtimeAlert(queue.Health, hasRecentLateBuffers), } } -func runtimeIndicator(queueHealth output.QueueHealth, lateBuffers uint64) RuntimeIndicator { +func runtimeIndicator(queueHealth output.QueueHealth, recentLateBuffers bool) RuntimeIndicator { switch { case queueHealth == output.QueueHealthCritical: return RuntimeIndicatorQueueCritical - case queueHealth == output.QueueHealthLow || lateBuffers > 0: + case queueHealth == output.QueueHealthLow || recentLateBuffers: return RuntimeIndicatorDegraded default: return RuntimeIndicatorNormal } } -func runtimeAlert(queueHealth output.QueueHealth, lateBuffers uint64) string { +func runtimeAlert(queueHealth output.QueueHealth, recentLateBuffers bool) string { switch { case queueHealth == output.QueueHealthCritical: return "queue health critical" - case lateBuffers > 0: + case recentLateBuffers: return "late buffers" case queueHealth == output.QueueHealthLow: return "queue health low" @@ -484,6 +490,7 @@ func (e *Engine) writerLoop(ctx context.Context) { if cycleDur > e.chunkDuration { late := e.lateBuffers.Add(1) + e.lateBufferAlertAt.Store(uint64(time.Now().UnixNano())) if late <= 5 || late%20 == 0 { log.Printf("TX LATE: cycle=%s budget=%s write=%s over=%s", cycleDur, e.chunkDuration, writeDur, cycleDur-e.chunkDuration) diff --git a/internal/app/runtime_indicator_test.go b/internal/app/runtime_indicator_test.go index 22ca93b..c27eb63 100644 --- a/internal/app/runtime_indicator_test.go +++ b/internal/app/runtime_indicator_test.go @@ -10,35 +10,39 @@ func TestRuntimeIndicatorAndAlert(t *testing.T) { cases := []struct { name string health output.QueueHealth - lateBuffers uint64 + recentLate bool wantIndicator RuntimeIndicator wantAlert string }{ { name: "queue critical", health: output.QueueHealthCritical, - lateBuffers: 0, wantIndicator: RuntimeIndicatorQueueCritical, wantAlert: "queue health critical", }, { name: "queue low", health: output.QueueHealthLow, - lateBuffers: 0, wantIndicator: RuntimeIndicatorDegraded, wantAlert: "queue health low", }, { name: "late buffers", health: output.QueueHealthNormal, - lateBuffers: 2, + recentLate: true, + wantIndicator: RuntimeIndicatorDegraded, + wantAlert: "late buffers", + }, + { + name: "late buffers override queue low", + health: output.QueueHealthLow, + recentLate: true, wantIndicator: RuntimeIndicatorDegraded, wantAlert: "late buffers", }, { name: "normal", health: output.QueueHealthNormal, - lateBuffers: 0, wantIndicator: RuntimeIndicatorNormal, wantAlert: "", }, @@ -48,11 +52,11 @@ func TestRuntimeIndicatorAndAlert(t *testing.T) { tc := tc t.Run(tc.name, func(t *testing.T) { t.Parallel() - got := runtimeIndicator(tc.health, tc.lateBuffers) + got := runtimeIndicator(tc.health, tc.recentLate) if got != tc.wantIndicator { t.Fatalf("indicator: expected %s, got %s", tc.wantIndicator, got) } - alert := runtimeAlert(tc.health, tc.lateBuffers) + alert := runtimeAlert(tc.health, tc.recentLate) if alert != tc.wantAlert { t.Fatalf("alert: expected %q, got %q", tc.wantAlert, alert) }