package main import ( "log" "math" "os" "sort" "strconv" "strings" "time" "sdr-wideband-suite/internal/config" "sdr-wideband-suite/internal/demod/gpudemod" "sdr-wideband-suite/internal/detector" "sdr-wideband-suite/internal/dsp" "sdr-wideband-suite/internal/logging" "sdr-wideband-suite/internal/telemetry" ) func mustParseDuration(raw string, fallback time.Duration) time.Duration { if raw == "" { return fallback } if d, err := time.ParseDuration(raw); err == nil { return d } return fallback } func buildDecoderMap(cfg config.Config) map[string]string { out := map[string]string{} if cfg.Decoder.FT8Cmd != "" { out["FT8"] = cfg.Decoder.FT8Cmd } if cfg.Decoder.WSPRCmd != "" { out["WSPR"] = cfg.Decoder.WSPRCmd } if cfg.Decoder.DMRCmd != "" { out["DMR"] = cfg.Decoder.DMRCmd } if cfg.Decoder.DStarCmd != "" { out["D-STAR"] = cfg.Decoder.DStarCmd } if cfg.Decoder.FSKCmd != "" { out["FSK"] = cfg.Decoder.FSKCmd } if cfg.Decoder.PSKCmd != "" { out["PSK"] = cfg.Decoder.PSKCmd } return out } func decoderKeys(cfg config.Config) []string { m := buildDecoderMap(cfg) keys := make([]string, 0, len(m)) for k := range m { keys = append(keys, k) } sort.Strings(keys) return keys } func (m *extractionManager) reset() { if m == nil { return } m.mu.Lock() defer m.mu.Unlock() if m.runner != nil { m.runner.Close() m.runner = nil } } func (m *extractionManager) get(sampleCount int, sampleRate int) *gpudemod.BatchRunner { if m == nil || sampleCount <= 0 || sampleRate <= 0 || !gpudemod.Available() { return nil } m.mu.Lock() defer m.mu.Unlock() if m.runner != nil && sampleCount > m.maxSamples { m.runner.Close() m.runner = nil } if m.runner == nil { // Allocate generously: enough for full allIQ (sampleRate/10 ≈ 100ms) // so the runner never needs re-allocation when used for both // classification (FFT-block ~65k) and streaming (allIQ ~273k+). allocSize := sampleCount generous := sampleRate/10 + 1024 // ~400k at 4MHz — covers any scenario if generous > allocSize { allocSize = generous } if r, err := gpudemod.NewBatchRunner(allocSize, sampleRate); err == nil { m.runner = r m.maxSamples = allocSize } else { log.Printf("gpudemod: batch runner init failed: %v", err) } return m.runner } return m.runner } func extractSignalIQ(iq []complex64, sampleRate int, centerHz float64, sigHz float64, bwHz float64) []complex64 { if len(iq) == 0 || sampleRate <= 0 { return nil } results, _ := extractSignalIQBatch(nil, iq, sampleRate, centerHz, []detector.Signal{{CenterHz: sigHz, BWHz: bwHz}}) if len(results) == 0 { return nil } return results[0] } func extractSignalIQBatch(extractMgr *extractionManager, iq []complex64, sampleRate int, centerHz float64, signals []detector.Signal) ([][]complex64, []int) { out := make([][]complex64, len(signals)) rates := make([]int, len(signals)) if len(iq) == 0 || sampleRate <= 0 || len(signals) == 0 { return out, rates } decimTarget := 200000 if decimTarget <= 0 { decimTarget = sampleRate } runner := extractMgr.get(len(iq), sampleRate) if runner != nil { jobs := make([]gpudemod.ExtractJob, len(signals)) for i, sig := range signals { bw := sig.BWHz sigMHz := sig.CenterHz / 1e6 isWFM := (sigMHz >= 87.5 && sigMHz <= 108.0) || (sig.Class != nil && (sig.Class.ModType == "WFM" || sig.Class.ModType == "WFM_STEREO")) jobOutRate := decimTarget if isWFM { jobOutRate = wfmStreamOutRate } // Minimum extraction BW: ensure enough bandwidth for demod features // FM broadcast (87.5-108 MHz) needs >=250kHz for stereo pilot + RDS at 57kHz // Also widen for any signal classified as WFM (in case of re-extraction) if isWFM { if bw < wfmStreamMinBW { bw = wfmStreamMinBW } } else if bw < 20000 { bw = 20000 } jobs[i] = gpudemod.ExtractJob{OffsetHz: sig.CenterHz - centerHz, BW: bw, OutRate: jobOutRate} } if gpuOuts, gpuRates, err := runner.ShiftFilterDecimateBatch(iq, jobs); err == nil && len(gpuOuts) == len(signals) { // batch extraction OK (silent) for i := range gpuOuts { out[i] = gpuOuts[i] if i < len(gpuRates) { rates[i] = gpuRates[i] } } return out, rates } else if err != nil { log.Printf("gpudemod: batch extraction failed for %d signals: %v", len(signals), err) } } // CPU extraction fallback (silent — see batch extraction failed above if applicable) for i, sig := range signals { offset := sig.CenterHz - centerHz shifted := dsp.FreqShift(iq, sampleRate, offset) bw := sig.BWHz // FM broadcast (87.5-108 MHz) needs >=250kHz for stereo + RDS sigMHz := sig.CenterHz / 1e6 isWFM := (sigMHz >= 87.5 && sigMHz <= 108.0) || (sig.Class != nil && (sig.Class.ModType == "WFM" || sig.Class.ModType == "WFM_STEREO")) if isWFM { if bw < wfmStreamMinBW { bw = wfmStreamMinBW } } else if bw < 20000 { bw = 20000 } cutoff := bw / 2 if cutoff < 200 { cutoff = 200 } if cutoff > float64(sampleRate)/2-1 { cutoff = float64(sampleRate)/2 - 1 } taps := dsp.LowpassFIR(cutoff, sampleRate, 101) filtered := dsp.ApplyFIR(shifted, taps) decim := sampleRate / decimTarget if decim < 1 { decim = 1 } out[i] = dsp.Decimate(filtered, decim) rates[i] = sampleRate / decim } return out, rates } func parseSince(raw string) (time.Time, error) { if raw == "" { return time.Time{}, nil } if ms, err := strconv.ParseInt(raw, 10, 64); err == nil { if ms > 1e12 { return time.UnixMilli(ms), nil } return time.Unix(ms, 0), nil } if t, err := time.Parse(time.RFC3339Nano, raw); err == nil { return t, nil } return time.Parse(time.RFC3339, raw) } // streamExtractState holds per-signal persistent state for phase-continuous // GPU extraction. Stored in the DSP loop, keyed by signal ID. type streamExtractState struct { phase float64 // FreqShift phase accumulator } // streamIQOverlap holds the tail of the previous allIQ for FIR halo prepend. type streamIQOverlap struct { tail []complex64 } // extractionConfig holds audio quality settings for signal extraction. type extractionConfig struct { firTaps int // AQ-3: FIR tap count (default 101) bwMult float64 // AQ-5: BW multiplier (default 1.2) } const streamOverlapLen = 512 // must be >= FIR tap count with margin const ( wfmStreamOutRate = 500000 wfmStreamMinBW = 250000 ) var forceCPUStreamExtract = func() bool { raw := strings.TrimSpace(os.Getenv("SDR_FORCE_CPU_STREAM_EXTRACT")) if raw == "" { return false } v, err := strconv.ParseBool(raw) if err != nil { return false } return v }() // extractForStreaming performs GPU-accelerated extraction with: // - Per-signal phase-continuous FreqShift (via PhaseStart in ExtractJob) // - IQ overlap prepended to allIQ so FIR kernel has real data in halo // // Returns extracted snippets with overlap trimmed, and updates phase state. func extractForStreaming( extractMgr *extractionManager, allIQ []complex64, sampleRate int, centerHz float64, signals []detector.Signal, phaseState map[int64]*streamExtractState, overlap *streamIQOverlap, aqCfg extractionConfig, ) ([][]complex64, []int) { out := make([][]complex64, len(signals)) rates := make([]int, len(signals)) if len(allIQ) == 0 || sampleRate <= 0 || len(signals) == 0 { return out, rates } // AQ-3: Use configured overlap length (must cover FIR taps) overlapNeeded := streamOverlapLen if aqCfg.firTaps > 0 && aqCfg.firTaps+64 > overlapNeeded { overlapNeeded = aqCfg.firTaps + 64 } // Prepend overlap from previous frame so FIR kernel has real halo data var gpuIQ []complex64 overlapLen := len(overlap.tail) logging.Debug("extract", "overlap", "len", overlapLen, "needed", overlapNeeded, "allIQ", len(allIQ)) if overlapLen > 0 { gpuIQ = make([]complex64, overlapLen+len(allIQ)) copy(gpuIQ, overlap.tail) copy(gpuIQ[overlapLen:], allIQ) } else { gpuIQ = allIQ overlapLen = 0 } // Save tail for next frame (sized to cover configured FIR taps) if len(allIQ) > overlapNeeded { overlap.tail = append(overlap.tail[:0], allIQ[len(allIQ)-overlapNeeded:]...) } else { overlap.tail = append(overlap.tail[:0], allIQ...) } decimTarget := 200000 // AQ-5: BW multiplier for extraction (wider = better S/N for weak signals) bwMult := aqCfg.bwMult if bwMult <= 0 { bwMult = 1.0 } // Build jobs with per-signal phase jobs := make([]gpudemod.ExtractJob, len(signals)) for i, sig := range signals { bw := sig.BWHz * bwMult // AQ-5: widen extraction BW sigMHz := sig.CenterHz / 1e6 isWFM := (sigMHz >= 87.5 && sigMHz <= 108.0) || (sig.Class != nil && (sig.Class.ModType == "WFM" || sig.Class.ModType == "WFM_STEREO")) jobOutRate := decimTarget if isWFM { jobOutRate = wfmStreamOutRate if bw < wfmStreamMinBW { bw = wfmStreamMinBW } } else if bw < 20000 { bw = 20000 } ps := phaseState[sig.ID] if ps == nil { ps = &streamExtractState{} phaseState[sig.ID] = ps } // PhaseStart is where the NEW data begins. But gpuIQ has overlap // prepended, so the GPU kernel starts processing at the overlap. // We need to rewind the phase by overlapLen samples so that the // overlap region gets the correct phase, and the new data region // starts at ps.phase exactly. phaseInc := -2.0 * math.Pi * (sig.CenterHz - centerHz) / float64(sampleRate) gpuPhaseStart := ps.phase - phaseInc*float64(overlapLen) jobs[i] = gpudemod.ExtractJob{ OffsetHz: sig.CenterHz - centerHz, BW: bw, OutRate: jobOutRate, PhaseStart: gpuPhaseStart, } } // Try GPU BatchRunner with phase unless CPU-only debug is forced. var runner *gpudemod.BatchRunner if forceCPUStreamExtract { logging.Warn("boundary", "force_cpu_stream_extract", "allIQ_len", len(allIQ), "gpuIQ_len", len(gpuIQ), "signals", len(signals)) } else { runner = extractMgr.get(len(gpuIQ), sampleRate) } if runner != nil { results, err := runner.ShiftFilterDecimateBatchWithPhase(gpuIQ, jobs) if err == nil && len(results) == len(signals) { for i, res := range results { outRate := res.Rate if outRate <= 0 { outRate = decimTarget } sigMHz := signals[i].CenterHz / 1e6 isWFM := (sigMHz >= 87.5 && sigMHz <= 108.0) || (signals[i].Class != nil && (signals[i].Class.ModType == "WFM" || signals[i].Class.ModType == "WFM_STEREO")) if isWFM { outRate = wfmStreamOutRate } decim := sampleRate / outRate if decim < 1 { decim = 1 } trimSamples := (overlapLen + decim - 1) / decim if i == 0 { logging.Debug("extract", "gpu_result", "rate", res.Rate, "outRate", outRate, "decim", decim, "trim", trimSamples) } // Update phase state — advance only by NEW data length, not overlap phaseInc := -2.0 * math.Pi * jobs[i].OffsetHz / float64(sampleRate) phaseState[signals[i].ID].phase += phaseInc * float64(len(allIQ)) // Normalize to [-π, π) to prevent float64 drift over long runs phaseState[signals[i].ID].phase = math.Remainder(phaseState[signals[i].ID].phase, 2*math.Pi) // Trim overlap from output iq := res.IQ rawLen := len(iq) if trimSamples > 0 && trimSamples < len(iq) { iq = iq[trimSamples:] } if i == 0 { logging.Debug("boundary", "extract_trim", "path", "gpu", "raw_len", rawLen, "trim", trimSamples, "out_len", len(iq), "overlap_len", overlapLen, "allIQ_len", len(allIQ), "gpuIQ_len", len(gpuIQ), "outRate", outRate, "signal", signals[i].ID) logExtractorHeadComparison(signals[i].ID, "gpu", overlapLen, res.IQ, trimSamples, iq) } out[i] = iq rates[i] = res.Rate } return out, rates } else if err != nil { log.Printf("gpudemod: stream batch extraction failed: %v", err) } } // CPU fallback (with phase tracking) for i, sig := range signals { offset := sig.CenterHz - centerHz bw := jobs[i].BW ps := phaseState[sig.ID] // Phase-continuous FreqShift — rewind by overlap so new data starts at ps.phase shifted := make([]complex64, len(gpuIQ)) inc := -2.0 * math.Pi * offset / float64(sampleRate) phase := ps.phase - inc*float64(overlapLen) for k, v := range gpuIQ { phase += inc re := math.Cos(phase) im := math.Sin(phase) shifted[k] = complex( float32(float64(real(v))*re-float64(imag(v))*im), float32(float64(real(v))*im+float64(imag(v))*re), ) } // Advance phase by NEW data length only ps.phase += inc * float64(len(allIQ)) ps.phase = math.Remainder(ps.phase, 2*math.Pi) cutoff := bw / 2 if cutoff < 200 { cutoff = 200 } if cutoff > float64(sampleRate)/2-1 { cutoff = float64(sampleRate)/2 - 1 } firTaps := 101 if aqCfg.firTaps > 0 { firTaps = aqCfg.firTaps } taps := dsp.LowpassFIR(cutoff, sampleRate, firTaps) filtered := dsp.ApplyFIR(shifted, taps) sigMHz := sig.CenterHz / 1e6 isWFM := (sigMHz >= 87.5 && sigMHz <= 108.0) || (sig.Class != nil && (sig.Class.ModType == "WFM" || sig.Class.ModType == "WFM_STEREO")) outRate := decimTarget if isWFM { outRate = wfmStreamOutRate } decim := sampleRate / outRate if decim < 1 { decim = 1 } decimated := dsp.Decimate(filtered, decim) rates[i] = sampleRate / decim // Trim overlap — use ceil to ensure ALL overlap samples are removed. // Floor trim (overlapLen/decim) leaves a remainder for non-divisible // factors (e.g. 512/20=25 trims only 500 of 512 samples → 12 leak). trimSamples := (overlapLen + decim - 1) / decim if i == 0 { logging.Debug("extract", "cpu_result", "outRate", outRate, "decim", decim, "trim", trimSamples) } rawLen := len(decimated) if trimSamples > 0 && trimSamples < len(decimated) { decimated = decimated[trimSamples:] } if i == 0 { logging.Debug("boundary", "extract_trim", "path", "cpu", "raw_len", rawLen, "trim", trimSamples, "out_len", len(decimated), "overlap_len", overlapLen, "allIQ_len", len(allIQ), "gpuIQ_len", len(gpuIQ), "outRate", outRate, "signal", signals[i].ID) logExtractorHeadComparison(signals[i].ID, "cpu", overlapLen, decimated, trimSamples, decimated) } out[i] = decimated } return out, rates } type iqHeadStats struct { length int minMag float64 maxMag float64 meanMag float64 lowMag int maxStep float64 maxStepIdx int p95Step float64 headTail float64 headMinIdx int stepSamples []float64 } func computeIQHeadStats(iq []complex64, headLen int) iqHeadStats { stats := iqHeadStats{minMag: math.MaxFloat64, headMinIdx: -1, maxStepIdx: -1} if len(iq) == 0 { stats.minMag = 0 return stats } n := len(iq) if headLen > 0 && headLen < n { n = headLen } stats.length = n stats.stepSamples = make([]float64, 0, max(0, n-1)) sumMag := 0.0 headSum := 0.0 tailSum := 0.0 tailCount := 0 for i := 0; i < n; i++ { v := iq[i] mag := math.Hypot(float64(real(v)), float64(imag(v))) if mag < stats.minMag { stats.minMag = mag stats.headMinIdx = i } if mag > stats.maxMag { stats.maxMag = mag } sumMag += mag if mag < 0.05 { stats.lowMag++ } if i < min(16, n) { headSum += mag } if i >= max(0, n-16) { tailSum += mag tailCount++ } if i > 0 { p := iq[i-1] num := float64(real(p))*float64(imag(v)) - float64(imag(p))*float64(real(v)) den := float64(real(p))*float64(real(v)) + float64(imag(p))*float64(imag(v)) step := math.Abs(math.Atan2(num, den)) if step > stats.maxStep { stats.maxStep = step stats.maxStepIdx = i - 1 } stats.stepSamples = append(stats.stepSamples, step) } } stats.meanMag = sumMag / float64(n) if len(stats.stepSamples) > 0 { sorted := append([]float64(nil), stats.stepSamples...) sort.Float64s(sorted) idx := int(float64(len(sorted)-1) * 0.95) stats.p95Step = sorted[idx] } else { stats.p95Step = stats.maxStep } if headSum > 0 && tailCount > 0 { headMean := headSum / float64(min(16, n)) tailMean := tailSum / float64(tailCount) if tailMean > 0 { stats.headTail = headMean / tailMean } } return stats } func observeIQStats(coll *telemetry.Collector, stage string, iq []complex64, tags telemetry.Tags) { if coll == nil || len(iq) == 0 { return } stats := computeIQHeadStats(iq, len(iq)) stageTags := telemetry.TagsWith(tags, "stage", stage) coll.Observe("iq.magnitude.min", stats.minMag, stageTags) coll.Observe("iq.magnitude.max", stats.maxMag, stageTags) coll.Observe("iq.magnitude.mean", stats.meanMag, stageTags) coll.Observe("iq.phase_step.max", stats.maxStep, stageTags) coll.Observe("iq.phase_step.p95", stats.p95Step, stageTags) coll.Observe("iq.low_magnitude.count", float64(stats.lowMag), stageTags) coll.SetGauge("iq.length", float64(stats.length), stageTags) } func logExtractorHeadComparison(signalID int64, path string, overlapLen int, raw []complex64, trimSamples int, out []complex64) { rawStats := computeIQHeadStats(raw, 96) trimmedStats := computeIQHeadStats(out, 96) logging.Debug("boundary", "extract_head_compare", "signal", signalID, "path", path, "raw_len", len(raw), "trim", trimSamples, "out_len", len(out), "overlap_len", overlapLen, "raw_min_mag", rawStats.minMag, "raw_min_idx", rawStats.headMinIdx, "raw_max_step", rawStats.maxStep, "raw_max_step_idx", rawStats.maxStepIdx, "raw_head_tail", rawStats.headTail, "trimmed_min_mag", trimmedStats.minMag, "trimmed_min_idx", trimmedStats.headMinIdx, "trimmed_max_step", trimmedStats.maxStep, "trimmed_max_step_idx", trimmedStats.maxStepIdx, "trimmed_head_tail", trimmedStats.headTail, ) for _, off := range []int{2, 4, 8, 16} { if len(out) <= off+8 { continue } offStats := computeIQHeadStats(out[off:], 96) logging.Debug("boundary", "extract_head_offset_compare", "signal", signalID, "path", path, "offset", off, "base_min_mag", trimmedStats.minMag, "base_min_idx", trimmedStats.headMinIdx, "base_max_step", trimmedStats.maxStep, "base_max_step_idx", trimmedStats.maxStepIdx, "offset_min_mag", offStats.minMag, "offset_min_idx", offStats.headMinIdx, "offset_max_step", offStats.maxStep, "offset_max_step_idx", offStats.maxStepIdx, "offset_head_tail", offStats.headTail, ) } }