|
|
@@ -83,6 +83,7 @@ type EngineStats struct { |
|
|
Queue output.QueueStats `json:"queue"` |
|
|
Queue output.QueueStats `json:"queue"` |
|
|
RuntimeIndicator RuntimeIndicator `json:"runtimeIndicator"` |
|
|
RuntimeIndicator RuntimeIndicator `json:"runtimeIndicator"` |
|
|
RuntimeAlert string `json:"runtimeAlert,omitempty"` |
|
|
RuntimeAlert string `json:"runtimeAlert,omitempty"` |
|
|
|
|
|
LastFault *FaultEvent `json:"lastFault,omitempty"` |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
type RuntimeIndicator string |
|
|
type RuntimeIndicator string |
|
|
@@ -93,8 +94,12 @@ const ( |
|
|
RuntimeIndicatorQueueCritical RuntimeIndicator = "queueCritical" |
|
|
RuntimeIndicatorQueueCritical RuntimeIndicator = "queueCritical" |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
const lateBufferIndicatorWindow = 5 * time.Second |
|
|
|
|
|
const queueCriticalStreakThreshold = 3 |
|
|
|
|
|
|
|
|
const ( |
|
|
|
|
|
lateBufferIndicatorWindow = 5 * time.Second |
|
|
|
|
|
queueCriticalStreakThreshold = 3 |
|
|
|
|
|
faultRepeatWindow = 1 * time.Second |
|
|
|
|
|
faultHistoryCapacity = 8 |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
// Engine is the continuous TX loop. It generates composite IQ in chunks, |
|
|
// Engine is the continuous TX loop. It generates composite IQ in chunks, |
|
|
// resamples to device rate, and pushes to hardware in a tight loop. |
|
|
// resamples to device rate, and pushes to hardware in a tight loop. |
|
|
@@ -128,6 +133,9 @@ type Engine struct { |
|
|
maxUpsampleNs atomic.Uint64 |
|
|
maxUpsampleNs atomic.Uint64 |
|
|
maxWriteNs atomic.Uint64 |
|
|
maxWriteNs atomic.Uint64 |
|
|
lastError atomic.Value // string |
|
|
lastError atomic.Value // string |
|
|
|
|
|
lastFault atomic.Value // *FaultEvent |
|
|
|
|
|
faultHistoryMu sync.Mutex |
|
|
|
|
|
faultHistory []FaultEvent |
|
|
|
|
|
|
|
|
// Live config: pending frequency change, applied between chunks |
|
|
// Live config: pending frequency change, applied between chunks |
|
|
pendingFreq atomic.Pointer[float64] |
|
|
pendingFreq atomic.Pointer[float64] |
|
|
@@ -202,6 +210,7 @@ func NewEngine(cfg cfgpkg.Config, driver platform.SoapyDriver) *Engine { |
|
|
deviceRate: deviceRate, |
|
|
deviceRate: deviceRate, |
|
|
state: EngineIdle, |
|
|
state: EngineIdle, |
|
|
frameQueue: output.NewFrameQueue(cfg.Runtime.FrameQueueCapacity), |
|
|
frameQueue: output.NewFrameQueue(cfg.Runtime.FrameQueueCapacity), |
|
|
|
|
|
faultHistory: make([]FaultEvent, 0, faultHistoryCapacity), |
|
|
} |
|
|
} |
|
|
engine.setRuntimeState(RuntimeStateIdle) |
|
|
engine.setRuntimeState(RuntimeStateIdle) |
|
|
return engine |
|
|
return engine |
|
|
@@ -377,6 +386,7 @@ func (e *Engine) Stats() EngineStats { |
|
|
lateBuffers := e.lateBuffers.Load() |
|
|
lateBuffers := e.lateBuffers.Load() |
|
|
hasRecentLateBuffers := e.hasRecentLateBuffers() |
|
|
hasRecentLateBuffers := e.hasRecentLateBuffers() |
|
|
ri := runtimeIndicator(queue.Health, hasRecentLateBuffers) |
|
|
ri := runtimeIndicator(queue.Health, hasRecentLateBuffers) |
|
|
|
|
|
lastFault := e.lastFaultEvent() |
|
|
return EngineStats{ |
|
|
return EngineStats{ |
|
|
State: string(e.currentRuntimeState()), |
|
|
State: string(e.currentRuntimeState()), |
|
|
ChunksProduced: e.chunksProduced.Load(), |
|
|
ChunksProduced: e.chunksProduced.Load(), |
|
|
@@ -392,6 +402,7 @@ func (e *Engine) Stats() EngineStats { |
|
|
Queue: queue, |
|
|
Queue: queue, |
|
|
RuntimeIndicator: ri, |
|
|
RuntimeIndicator: ri, |
|
|
RuntimeAlert: runtimeAlert(queue.Health, hasRecentLateBuffers), |
|
|
RuntimeAlert: runtimeAlert(queue.Health, hasRecentLateBuffers), |
|
|
|
|
|
LastFault: lastFault, |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
@@ -576,6 +587,71 @@ func (e *Engine) hasRecentLateBuffers() bool { |
|
|
return time.Since(time.Unix(0, int64(lateAlertAt))) <= lateBufferIndicatorWindow |
|
|
return time.Since(time.Unix(0, int64(lateAlertAt))) <= lateBufferIndicatorWindow |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func (e *Engine) lastFaultEvent() *FaultEvent { |
|
|
|
|
|
return copyFaultEvent(e.loadLastFault()) |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// LastFault exposes the most recent captured fault, if any. |
|
|
|
|
|
func (e *Engine) LastFault() *FaultEvent { |
|
|
|
|
|
return e.lastFaultEvent() |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func (e *Engine) FaultHistory() []FaultEvent { |
|
|
|
|
|
e.faultHistoryMu.Lock() |
|
|
|
|
|
defer e.faultHistoryMu.Unlock() |
|
|
|
|
|
history := make([]FaultEvent, len(e.faultHistory)) |
|
|
|
|
|
copy(history, e.faultHistory) |
|
|
|
|
|
return history |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func (e *Engine) recordFault(reason FaultReason, severity FaultSeverity, message string) { |
|
|
|
|
|
if reason == "" { |
|
|
|
|
|
reason = FaultReasonUnknown |
|
|
|
|
|
} |
|
|
|
|
|
now := time.Now() |
|
|
|
|
|
if last := e.loadLastFault(); last != nil { |
|
|
|
|
|
if last.Reason == reason && last.Severity == severity && now.Sub(last.Time) < faultRepeatWindow { |
|
|
|
|
|
return |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
ev := &FaultEvent{ |
|
|
|
|
|
Time: now, |
|
|
|
|
|
Reason: reason, |
|
|
|
|
|
Severity: severity, |
|
|
|
|
|
Message: message, |
|
|
|
|
|
} |
|
|
|
|
|
e.lastFault.Store(ev) |
|
|
|
|
|
e.appendFaultHistory(ev) |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func (e *Engine) loadLastFault() *FaultEvent { |
|
|
|
|
|
if v := e.lastFault.Load(); v != nil { |
|
|
|
|
|
if ev, ok := v.(*FaultEvent); ok { |
|
|
|
|
|
return ev |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
return nil |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func copyFaultEvent(source *FaultEvent) *FaultEvent { |
|
|
|
|
|
if source == nil { |
|
|
|
|
|
return nil |
|
|
|
|
|
} |
|
|
|
|
|
copy := *source |
|
|
|
|
|
return © |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func (e *Engine) appendFaultHistory(ev *FaultEvent) { |
|
|
|
|
|
e.faultHistoryMu.Lock() |
|
|
|
|
|
defer e.faultHistoryMu.Unlock() |
|
|
|
|
|
if len(e.faultHistory) >= faultHistoryCapacity { |
|
|
|
|
|
copy(e.faultHistory, e.faultHistory[1:]) |
|
|
|
|
|
e.faultHistory[len(e.faultHistory)-1] = *ev |
|
|
|
|
|
return |
|
|
|
|
|
} |
|
|
|
|
|
e.faultHistory = append(e.faultHistory, *ev) |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
func (e *Engine) evaluateRuntimeState(queue output.QueueStats, hasLateBuffers bool) { |
|
|
func (e *Engine) evaluateRuntimeState(queue output.QueueStats, hasLateBuffers bool) { |
|
|
state := e.currentRuntimeState() |
|
|
state := e.currentRuntimeState() |
|
|
switch state { |
|
|
switch state { |
|
|
@@ -591,6 +667,8 @@ func (e *Engine) evaluateRuntimeState(queue output.QueueStats, hasLateBuffers bo |
|
|
critical := queue.Health == output.QueueHealthCritical |
|
|
critical := queue.Health == output.QueueHealthCritical |
|
|
if critical { |
|
|
if critical { |
|
|
if e.criticalStreak.Add(1) >= queueCriticalStreakThreshold { |
|
|
if e.criticalStreak.Add(1) >= queueCriticalStreakThreshold { |
|
|
|
|
|
e.recordFault(FaultReasonQueueCritical, FaultSeverityDegraded, |
|
|
|
|
|
fmt.Sprintf("queue health critical (depth=%d)", queue.Depth)) |
|
|
e.setRuntimeState(RuntimeStateDegraded) |
|
|
e.setRuntimeState(RuntimeStateDegraded) |
|
|
return |
|
|
return |
|
|
} |
|
|
} |
|
|
@@ -598,6 +676,8 @@ func (e *Engine) evaluateRuntimeState(queue output.QueueStats, hasLateBuffers bo |
|
|
e.criticalStreak.Store(0) |
|
|
e.criticalStreak.Store(0) |
|
|
} |
|
|
} |
|
|
if hasLateBuffers { |
|
|
if hasLateBuffers { |
|
|
|
|
|
e.recordFault(FaultReasonLateBuffers, FaultSeverityWarn, |
|
|
|
|
|
fmt.Sprintf("late buffers detected (health=%s)", queue.Health)) |
|
|
e.setRuntimeState(RuntimeStateDegraded) |
|
|
e.setRuntimeState(RuntimeStateDegraded) |
|
|
return |
|
|
return |
|
|
} |
|
|
} |
|
|
|