Pārlūkot izejas kodu

Make runtime indicator drop stale late alerts

tags/v0.9.0
Jan Svabenik pirms 1 mēnesi
vecāks
revīzija
38a6cf3d70
3 mainītis faili ar 34 papildinājumiem un 23 dzēšanām
  1. +1
    -1
      docs/API.md
  2. +22
    -15
      internal/app/engine.go
  3. +11
    -7
      internal/app/runtime_indicator_test.go

+ 1
- 1
docs/API.md Parādīt failu

@@ -43,7 +43,7 @@ Current transmitter status (read-only snapshot). Runtime indicator, alert, and q
}
```

`runtimeIndicator` is derived from the engine queue health plus any late buffers and can be "normal", "degraded", or "queueCritical". `runtimeAlert` surfaces a short reason (e.g. "queue health low" or "late buffers") when the indicator is not "normal", otherwise it stays empty.
`runtimeIndicator` is derived from the engine queue health plus any late buffers observed in the last 5 seconds and can be "normal", "degraded", or "queueCritical". `runtimeAlert` surfaces a short reason (e.g. "queue health low" or "late buffers") when the indicator is not "normal", but late-buffer alerts expire after a few seconds once cycle times settle so the signal doesn't stay stuck on degraded. The cumulative `lateBuffers` counter returned by `/runtime` still shows how many late cycles have occurred since start for post-mortem diagnosis.

---



+ 22
- 15
internal/app/engine.go Parādīt failu

@@ -80,6 +80,8 @@ const (
RuntimeIndicatorQueueCritical RuntimeIndicator = "queueCritical"
)

const lateBufferIndicatorWindow = 5 * time.Second

// Engine is the continuous TX loop. It generates composite IQ in chunks,
// resamples to device rate, and pushes to hardware in a tight loop.
// The hardware buffer_push call is blocking — it returns when the hardware
@@ -100,15 +102,16 @@ type Engine struct {
startedAt time.Time
wg sync.WaitGroup

chunksProduced atomic.Uint64
totalSamples atomic.Uint64
underruns atomic.Uint64
lateBuffers atomic.Uint64
maxCycleNs atomic.Uint64
maxGenerateNs atomic.Uint64
maxUpsampleNs atomic.Uint64
maxWriteNs atomic.Uint64
lastError atomic.Value // string
chunksProduced atomic.Uint64
totalSamples atomic.Uint64
underruns atomic.Uint64
lateBuffers atomic.Uint64
lateBufferAlertAt atomic.Uint64
maxCycleNs atomic.Uint64
maxGenerateNs atomic.Uint64
maxUpsampleNs atomic.Uint64
maxWriteNs atomic.Uint64
lastError atomic.Value // string

// Live config: pending frequency change, applied between chunks
pendingFreq atomic.Pointer[float64]
@@ -351,7 +354,10 @@ func (e *Engine) Stats() EngineStats {

queue := e.frameQueue.Stats()
lateBuffers := e.lateBuffers.Load()
ri := runtimeIndicator(queue.Health, lateBuffers)
now := time.Now()
lateAlertAt := e.lateBufferAlertAt.Load()
hasRecentLateBuffers := lateAlertAt > 0 && now.Sub(time.Unix(0, int64(lateAlertAt))) <= lateBufferIndicatorWindow
ri := runtimeIndicator(queue.Health, hasRecentLateBuffers)
return EngineStats{
State: state.String(),
ChunksProduced: e.chunksProduced.Load(),
@@ -366,26 +372,26 @@ func (e *Engine) Stats() EngineStats {
MaxWriteMs: durationMs(e.maxWriteNs.Load()),
Queue: queue,
RuntimeIndicator: ri,
RuntimeAlert: runtimeAlert(queue.Health, lateBuffers),
RuntimeAlert: runtimeAlert(queue.Health, hasRecentLateBuffers),
}
}

func runtimeIndicator(queueHealth output.QueueHealth, lateBuffers uint64) RuntimeIndicator {
func runtimeIndicator(queueHealth output.QueueHealth, recentLateBuffers bool) RuntimeIndicator {
switch {
case queueHealth == output.QueueHealthCritical:
return RuntimeIndicatorQueueCritical
case queueHealth == output.QueueHealthLow || lateBuffers > 0:
case queueHealth == output.QueueHealthLow || recentLateBuffers:
return RuntimeIndicatorDegraded
default:
return RuntimeIndicatorNormal
}
}

func runtimeAlert(queueHealth output.QueueHealth, lateBuffers uint64) string {
func runtimeAlert(queueHealth output.QueueHealth, recentLateBuffers bool) string {
switch {
case queueHealth == output.QueueHealthCritical:
return "queue health critical"
case lateBuffers > 0:
case recentLateBuffers:
return "late buffers"
case queueHealth == output.QueueHealthLow:
return "queue health low"
@@ -484,6 +490,7 @@ func (e *Engine) writerLoop(ctx context.Context) {

if cycleDur > e.chunkDuration {
late := e.lateBuffers.Add(1)
e.lateBufferAlertAt.Store(uint64(time.Now().UnixNano()))
if late <= 5 || late%20 == 0 {
log.Printf("TX LATE: cycle=%s budget=%s write=%s over=%s",
cycleDur, e.chunkDuration, writeDur, cycleDur-e.chunkDuration)


+ 11
- 7
internal/app/runtime_indicator_test.go Parādīt failu

@@ -10,35 +10,39 @@ func TestRuntimeIndicatorAndAlert(t *testing.T) {
cases := []struct {
name string
health output.QueueHealth
lateBuffers uint64
recentLate bool
wantIndicator RuntimeIndicator
wantAlert string
}{
{
name: "queue critical",
health: output.QueueHealthCritical,
lateBuffers: 0,
wantIndicator: RuntimeIndicatorQueueCritical,
wantAlert: "queue health critical",
},
{
name: "queue low",
health: output.QueueHealthLow,
lateBuffers: 0,
wantIndicator: RuntimeIndicatorDegraded,
wantAlert: "queue health low",
},
{
name: "late buffers",
health: output.QueueHealthNormal,
lateBuffers: 2,
recentLate: true,
wantIndicator: RuntimeIndicatorDegraded,
wantAlert: "late buffers",
},
{
name: "late buffers override queue low",
health: output.QueueHealthLow,
recentLate: true,
wantIndicator: RuntimeIndicatorDegraded,
wantAlert: "late buffers",
},
{
name: "normal",
health: output.QueueHealthNormal,
lateBuffers: 0,
wantIndicator: RuntimeIndicatorNormal,
wantAlert: "",
},
@@ -48,11 +52,11 @@ func TestRuntimeIndicatorAndAlert(t *testing.T) {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
got := runtimeIndicator(tc.health, tc.lateBuffers)
got := runtimeIndicator(tc.health, tc.recentLate)
if got != tc.wantIndicator {
t.Fatalf("indicator: expected %s, got %s", tc.wantIndicator, got)
}
alert := runtimeAlert(tc.health, tc.lateBuffers)
alert := runtimeAlert(tc.health, tc.recentLate)
if alert != tc.wantAlert {
t.Fatalf("alert: expected %q, got %q", tc.wantAlert, alert)
}


Notiek ielāde…
Atcelt
Saglabāt