diff --git a/config.yaml b/config.yaml
index 6d8f0de..6d57eb6 100644
--- a/config.yaml
+++ b/config.yaml
@@ -248,10 +248,13 @@ decoder:
   dstar_cmd: tools/dsd-neo/bin/dsd-neo.exe -fd -i {audio} -s {sr} -o null
   fsk_cmd: tools/fsk/fsk_decoder --iq {iq} --sample-rate {sr}
   psk_cmd: tools/psk/psk_decoder --iq {iq} --sample-rate {sr}
+debug:
+  audio_dump_enabled: false
+  cpu_monitoring: false
 logging:
   level: debug
-  categories: [capture, extract, demod, resample, drop, ws, boundary]
-  rate_limit_ms: 500
+  categories: [gap, prefir, boundary]
+  rate_limit_ms: 100
   stdout: true
   stdout_color: true
   file: logs/trace.log
diff --git a/docs/audio-click-debug-notes-2026-03-24.md b/docs/audio-click-debug-notes-2026-03-24.md
new file mode 100644
index 0000000..a11c38c
--- /dev/null
+++ b/docs/audio-click-debug-notes-2026-03-24.md
@@ -0,0 +1,322 @@
+# Audio Click Debug Notes — 2026-03-24
+
+## Context
+
+This note captures the intermediate findings from the live/recording audio click investigation on `sdr-wideband-suite`.
+
+Goal: preserve the reasoning, experiments, false leads, and current best understanding so future work does not restart from scratch.
+
+---
+
+## High-level outcome so far
+
+We do **not** yet have the final root cause.
+
+But we now know substantially more about what the clicks are **not**, and we identified at least one real bug plus several strong behavioral constraints in the pipeline.
+
+---
+
+## What was tested
+
+### 1. Session/context recovery
+- Reconstructed prior debugging context from reset-session backup files.
+- Confirmed the relevant investigation was the persistent audio clicking bug in live audio / recordings.
+
+### 2. Codebase deep-read
+Reviewed in detail:
+- `cmd/sdrd/dsp_loop.go`
+- `cmd/sdrd/pipeline_runtime.go`
+- `cmd/sdrd/helpers.go`
+- `internal/recorder/streamer.go`
+- `internal/recorder/demod_live.go`
+- `internal/dsp/fir.go`
+- `internal/dsp/fir_stateful.go`
+- `internal/dsp/resample.go`
+- `internal/demod/fm.go`
+- `internal/demod/gpudemod/*`
+- `web/app.js`
+
+Main conclusion from static reading: the pipeline contains several stateful continuity mechanisms, so clicks are likely to emerge at boundaries or from phase/timing inconsistencies rather than from one obvious isolated bug.
+
+### 3. AM vs FM tests
+Observed by ear:
+- AM clicks too.
+- Therefore this is **not** an FM-only issue.
+- That shifted focus away from purely FM-specific explanations and toward shared-path / continuity / transport / demod-adjacent causes.
+
+### 4. Recording vs live path comparison
+Observed by ear:
+- Recordings click too.
+- Therefore browser/WebSocket/live playback is **not** the sole cause.
+- The root problem exists in the server-side audio pipeline before browser playback.
+
+### 5. Boundary instrumentation added
+Temporary diagnostics were added to inspect:
+- extract trimming
+- snippet lengths
+- demod path lengths
+- boundary click / intra-click detector
+- IQ continuity at various stages
+
+### 6. Discriminator-overlap hypothesis
+A test switch temporarily disabled the extra 1-sample discriminator overlap prepend in `streamer.go`.
+
+Result:
+- This extra overlap **was** a real problem.
+- It caused the downstream decimation phase to flip between blocks.
+- Removing it cleaned up the boundary model and was the correct change.
+
+However:
+- Removing it did **not** eliminate the audible clicks.
+- Therefore it was a real bug, but **not the main remaining root cause**.
+
+### 7. GPU vs CPU extraction test
+Forced CPU-only stream extraction.
+
+Result:
+- CPU-only made things dramatically worse in real time.
+- Large `feed_gap` values appeared.
+- Huge backlogs built up.
+- Therefore CPU-only is not a solution, and the GPU path is not the sole main problem.
+
+### 8. Fixed read-size test
+Forced a constant extraction read size (`389120`) instead of variable read sizing based on backlog.
+
+Result:
+- `allIQ`, `gpuIQ_len`, `raw_len`, and `out_len` became very stable.
+- This reduced pipeline variability and made logs much cleaner.
+- Subjectively, audio may have become slightly better, but clicks remained.
+- Therefore variable block sizing is likely a contributing factor, but not the full explanation.
+
+### 9. Multi-stage audio dump test
+Added optional debug dumping for:
+- demod audio (`*-demod.wav`)
+- final audio after resampler (`*-final.wav`)
+
+Observed by ear:
+- Clicks are present in **both** dump types.
+- Therefore the click is already present by the time demodulated audio exists.
+- Resampler/final audio path is not the primary origin.
+
+### 10. CPU monitoring
+A process-level CSV monitor was added and used.
+
+Result:
+- Overall process CPU usage was modest (not near full machine saturation).
+- This does **not** support “overall CPU is pegged” as the main explanation.
+- Caveat: this does not fully exclude a hot thread or scheduler issue, but gross total CPU overload is not the main story.
+
+---
+
+## What we now know with reasonable confidence
+
+### A. The issue is not primarily caused by:
+- Browser playback
+- WebSocket transport
+- Final PCM fanout only
+- Resampler alone
+- CPU-only vs GPU-only as the core dichotomy
+- The old extra discriminator overlap prepend (that was a bug, but not the remaining dominant one)
+- Purely variable block sizes alone
+- Gross whole-process CPU saturation
+
+### B. The issue is server-side and exists before final playback
+Because:
+- recordings click
+- demod dump clicks
+- final dump clicks
+
+### C. The issue is present by the demodulated audio stage
+This is one of the strongest current findings.
+
+### D. The WFM/FM-demod-adjacent path remains highly suspicious
+Current best area of suspicion:
+- decimated IQ may still contain subtle corruption/instability not fully captured by current metrics
+- OR the FM discriminator (`fmDiscrim`) is producing pathological output from otherwise “boundary-clean-looking” IQ
+
+---
+
+## Important runtime/pathology observations
+
+### 1. Backlog amplification is real
+Several debug runs showed severe buffer growth and drops:
+- large `buf=` values
+- growing `drop=` counts
+- repeated `audio_gap`
+
+This means some debug configurations can easily become self-distorting and produce additional artifacts that are not representative of the original bug.
+
+### 2. Too much debug output causes self-inflicted load
+At one point:
+- rate limiter was disabled (`rate_limit_ms: 0`)
+- aggressive boundary logging was enabled
+- many short WAV files were generated
+
+This clearly increased overhead and likely polluted some runs.
+
+### 3. Many short WAVs were a bad debug design
+That was replaced with a design intended to write one continuous window file instead of many micro-files.
+
+### 4. Total process CPU saturation does not appear to be the main cause
+A process-level CSV monitor was collected and showed only modest total CPU utilisation during the relevant tests.
+This does **not** support a simple “the machine is pegged” explanation.
+A hot thread / scheduling issue is still theoretically possible, but gross overall CPU overload is not the main signal.
+
+---
+
+## Current debug state in repo
+
+### Branch
+All current work is on:
+- `debug/audio-clicks`
+
+### Commits so far
+- `94c132d` — `debug: instrument audio click investigation`
+- `ffbc45d` — `debug: add advanced boundary metering`
+
+### Current config/logging state
+The active debug logging was trimmed down to:
+- `demod`
+- `discrim`
+- `gap`
+- `boundary`
+
+Rate limit is currently back to a nonzero value to avoid self-induced spam.
+
+### Dump/CPU debug state
+A `debug:` config section was added with:
+- `audio_dump_enabled: false`
+- `cpu_monitoring: false`
+
+Meaning:
+- heavy WAV dumping is now OFF by default
+- CPU monitoring is conceptually OFF by default (script still exists, but must be explicitly used)
+
+---
+
+## Most important code changes/findings to remember
+
+### 1. Removed the extra discriminator overlap prepend in `streamer.go`
+This was a correct fix.
+
+Reason:
+- it introduced a blockwise extra IQ sample
+- this shifted decimation phase between blocks
+- it created real boundary artifacts
+
+This should **not** be reintroduced casually.
+
+### 2. Fixed read-size test exists and is useful for investigation
+A temporary mechanism exists to force stable extraction block sizes.
+This is useful diagnostically because it removes one source of pipeline variability.
+
+### 3. FM discriminator metering exists
+`internal/demod/fm.go` now emits targeted discriminator stats under `discrim` logging, including:
+- min/max IQ magnitude
+- maximum absolute phase step
+- count of large phase steps
+
+This was useful to establish that large discriminator steps correlate with low IQ magnitude, but discriminator logging was later disabled from the active category list to reduce log spam.
+
+### 4. Strong `dec`-IQ findings before demod
+Additional metering in `streamer.go` showed:
+- repeated `dec_iq_head_dip`
+- repeated low magnitude near `min_idx ~= 25`
+- repeated early large local phase step near `max_step_idx ~= 24`
+- repeated `demod_boundary` and audible clicks shortly afterward
+
+This is the strongest currently known mechanism in the chain.
+
+### 5. Group delay observation
+For the current pre-demod FIR:
+- taps = `101`
+- FIR group delay = `(101 - 1) / 2 = 50` input samples
+- with `decim1 = 2`, this projects to about `25` output samples
+
+This matches the repeatedly observed problematic `dec` indices (~24-25) remarkably well.
+That strongly suggests the audible issue is connected to the FIR/decimation settling region at the beginning of the `dec` block.
+
+### 6. Pre-FIR vs post-FIR comparison
+A dedicated pre-FIR probe was added on `fullSnip` (the input to the pre-demod FIR) and compared against the existing `dec`-side probes.
+
+Observed pattern:
+- pre-FIR head probe usually looked relatively normal
+- no equally strong or equally reproducible hot spot appeared there
+- after FIR + decimation, the problematic dip/step repeatedly appeared near `dec` indices ~24-25
+
+Interpretation:
+- the strongest currently observed defect is **not already present in the same form before the FIR**
+- it is much more likely to emerge in the FIR/decimation section (or its settling behavior) than in the raw pre-FIR input
+
+### 7. Head-trim test results
+A debug head-trim on `dec` was tested.
+Subjective result:
+- `trim=32` sounded best among the tested values (`16/32/48/64`)
+- but it did **not** remove the clicks entirely
+
+Interpretation:
+- the early `dec` settling region is a real contributor
+- but it is probably not the only contributor, or trimming alone is not the final correct fix
+
+### 8. Current architectural conclusion
+The likely clean fix is **not** to keep trimming samples away.
+Instead, the likely correct direction is:
+- replace the current “stateful FIR, then separate decimation” handoff with a **stateful decimating FIR / polyphase decimator**
+- preserve phase and delay state explicitly
+- ensure the first emitted decimated samples are already truly valid for demodulation
+
+Important nuance:
+- the currently suspicious FIR + decimation section is already running in **Go/CPU** (`processSnippet`), not in CUDA
+- therefore the next correctness fix should be developed and validated in Go first
+
+---
+
+## Best current hypothesis
+
+The remaining audible clicks are most likely generated **at or immediately before FM demodulation**.
+
+Most plausible interpretations:
+1. The decimated IQ stream still contains subtle corruption/instability not fully captured by the earliest boundary metrics.
+2. The FM discriminator is reacting violently to short abnormal IQ behavior inside blocks, not just at block boundaries.
+3. The problematic region is likely a **very specific early decimated-IQ settling zone**, not broad corruption across the whole block.
+
+At this point, the most valuable next data is low-overhead IQ telemetry right before demod, plus carefully controlled demod-vs-final audio comparison.
+
+### Stronger updated working theory (later findings, same day)
+
+After discriminator-focused metering and targeted `dec`-IQ probes, the strongest current theory is:
+
+> A reproducible early defect in the `dec` IQ block appears around sample index **24-25**, where IQ magnitude dips sharply and the effective FM phase step becomes abnormally large. This then shows up as `demod_boundary` and audible clicks.
+
+Crucially:
+- this issue appears in `demod.wav`, so it exists before the final resampler/playback path
+- it is **not** spread uniformly across the whole `dec` block
+- it repeatedly appears near the same index
+- trimming the first ~32 samples subjectively reduces the click, but does not eliminate it entirely
+
+This strongly suggests a **settling/transient zone at the beginning of the decimated IQ block**.
+
+---
+
+## Recommended next steps
+
+1. Run with reduced logging only (`demod`, `gap`, `boundary`) unless discriminator logging is specifically needed again.
+2. Keep heavy dump features OFF unless explicitly needed.
+3. Treat the beginning of the `dec` block as the highest-priority investigation zone.
+4. Continue analysing whether the observed issue is:
+   - an expected FIR/decimation settling region being handled incorrectly, or
+   - evidence that corrupted IQ is already entering the pre-demod FIR
+5. When testing fixes, prefer low-overhead, theory-driven experiments over broad logging/dump spam.
+6. Only re-enable audio dump windows selectively and briefly.
+
+---
+
+## Meta note
+
+This investigation already disproved several plausible explanations. That is progress.
+
+The most important thing not to forget is:
+- the overlap prepend bug was real, but not sufficient
+- the click is already present in demod audio
+- whole-process CPU saturation is not the main explanation
+- excessive debug instrumentation can itself create misleading secondary problems
diff --git a/internal/config/config.go b/internal/config/config.go
index 1cd1fb7..5651873 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -96,6 +96,11 @@ type DecoderConfig struct {
 	PSKCmd   string `yaml:"psk_cmd" json:"psk_cmd"`
 }
 
+type DebugConfig struct {
+	AudioDumpEnabled bool `yaml:"audio_dump_enabled" json:"audio_dump_enabled"`
+	CPUMonitoring    bool `yaml:"cpu_monitoring" json:"cpu_monitoring"`
+}
+
 type PipelineGoalConfig struct {
 	Intent            string          `yaml:"intent" json:"intent"`
 	MonitorStartHz    float64         `yaml:"monitor_start_hz" json:"monitor_start_hz"`
@@ -169,6 +174,7 @@ type Config struct {
 	Detector       DetectorConfig     `yaml:"detector" json:"detector"`
 	Recorder       RecorderConfig     `yaml:"recorder" json:"recorder"`
 	Decoder        DecoderConfig      `yaml:"decoder" json:"decoder"`
+	Debug          DebugConfig        `yaml:"debug" json:"debug"`
 	Logging        LogConfig          `yaml:"logging" json:"logging"`
 	WebAddr        string             `yaml:"web_addr" json:"web_addr"`
 	EventPath      string             `yaml:"event_path" json:"event_path"`
@@ -421,6 +427,10 @@ func Default() Config {
 			ExtractionBwMult: 1.2,
 		},
 		Decoder:        DecoderConfig{},
+		Debug: DebugConfig{
+			AudioDumpEnabled: false,
+			CPUMonitoring:    false,
+		},
 		Logging: LogConfig{
 			Level:       "informal",
 			Categories:  []string{},
diff --git a/internal/demod/fm.go b/internal/demod/fm.go
index 6c1d153..7fe05dc 100644
--- a/internal/demod/fm.go
+++ b/internal/demod/fm.go
@@ -4,6 +4,7 @@ import (
 	"math"
 
 	"sdr-wideband-suite/internal/dsp"
+	"sdr-wideband-suite/internal/logging"
 )
 
 type NFM struct{}
@@ -45,12 +46,45 @@ func fmDiscrim(iq []complex64) []float32 {
 		return nil
 	}
 	out := make([]float32, len(iq)-1)
+	maxAbs := 0.0
+	maxIdx := 0
+	largeSteps := 0
+	minMag := math.MaxFloat64
+	maxMag := 0.0
 	for i := 1; i < len(iq); i++ {
 		p := iq[i-1]
 		c := iq[i]
+		pmag := math.Hypot(float64(real(p)), float64(imag(p)))
+		cmag := math.Hypot(float64(real(c)), float64(imag(c)))
+		if pmag < minMag {
+			minMag = pmag
+		}
+		if cmag < minMag {
+			minMag = cmag
+		}
+		if pmag > maxMag {
+			maxMag = pmag
+		}
+		if cmag > maxMag {
+			maxMag = cmag
+		}
 		num := float64(real(p))*float64(imag(c)) - float64(imag(p))*float64(real(c))
 		den := float64(real(p))*float64(real(c)) + float64(imag(p))*float64(imag(c))
-		out[i-1] = float32(math.Atan2(num, den))
+		step := math.Atan2(num, den)
+		if a := math.Abs(step); a > maxAbs {
+			maxAbs = a
+			maxIdx = i - 1
+		}
+		if math.Abs(step) > 1.5 {
+			largeSteps++
+		}
+		out[i-1] = float32(step)
+	}
+	if logging.EnabledCategory("discrim") {
+		logging.Debug("discrim", "fm_meter", "iq_len", len(iq), "audio_len", len(out), "min_mag", minMag, "max_mag", maxMag, "max_abs_step", maxAbs, "max_idx", maxIdx, "large_steps", largeSteps)
+		if largeSteps > 0 {
+			logging.Warn("discrim", "fm_large_steps", "iq_len", len(iq), "large_steps", largeSteps, "max_abs_step", maxAbs, "max_idx", maxIdx, "min_mag", minMag, "max_mag", maxMag)
+		}
 	}
 	return out
 }
diff --git a/internal/recorder/streamer.go b/internal/recorder/streamer.go
index 8afe7b7..d5147cc 100644
--- a/internal/recorder/streamer.go
+++ b/internal/recorder/streamer.go
@@ -10,6 +10,7 @@ import (
 	"math"
 	"os"
 	"path/filepath"
+	"strconv"
 	"strings"
 	"sync"
 	"time"
@@ -37,6 +38,13 @@ type streamSession struct {
 	playbackMode string
 	stereoState  string
 	lastAudioTs  time.Time
+
+	debugDumpStart time.Time
+	debugDumpUntil time.Time
+	debugDumpBase  string
+
+	demodDump []float32
+	finalDump []float32
 	lastAudioL   float32
 	lastAudioR   float32
 	prevAudioL   float64 // second-to-last L sample for boundary transient detection
@@ -156,6 +164,54 @@ const (
 	resamplerTaps   = 32 // taps per polyphase arm — good quality
 )
 
+var debugDumpDelay = func() time.Duration {
+	raw := strings.TrimSpace(os.Getenv("SDR_DEBUG_DUMP_DELAY_SECONDS"))
+	if raw == "" {
+		return 5 * time.Second
+	}
+	v, err := strconv.Atoi(raw)
+	if err != nil || v < 0 {
+		return 5 * time.Second
+	}
+	return time.Duration(v) * time.Second
+}()
+
+var debugDumpDuration = func() time.Duration {
+	raw := strings.TrimSpace(os.Getenv("SDR_DEBUG_DUMP_DURATION_SECONDS"))
+	if raw == "" {
+		return 15 * time.Second
+	}
+	v, err := strconv.Atoi(raw)
+	if err != nil || v <= 0 {
+		return 15 * time.Second
+	}
+	return time.Duration(v) * time.Second
+}()
+
+var audioDumpEnabled = func() bool {
+	raw := strings.TrimSpace(os.Getenv("SDR_DEBUG_AUDIO_DUMP_ENABLED"))
+	if raw == "" {
+		return false
+	}
+	v, err := strconv.ParseBool(raw)
+	if err != nil {
+		return false
+	}
+	return v
+}()
+
+var decHeadTrimSamples = func() int {
+	raw := strings.TrimSpace(os.Getenv("SDR_DEC_HEAD_TRIM"))
+	if raw == "" {
+		return 0
+	}
+	v, err := strconv.Atoi(raw)
+	if err != nil || v < 0 {
+		return 0
+	}
+	return v
+}()
+
 // ---------------------------------------------------------------------------
 // Streamer — manages all active streaming sessions
 // ---------------------------------------------------------------------------
@@ -588,8 +644,19 @@ func (st *Streamer) attachPendingListeners(sess *streamSession) {
 			default:
 			}
 
+			if audioDumpEnabled {
+				now := time.Now()
+				sess.debugDumpStart = now.Add(debugDumpDelay)
+				sess.debugDumpUntil = sess.debugDumpStart.Add(debugDumpDuration)
+				sess.debugDumpBase = filepath.Join("debug", fmt.Sprintf("signal-%d-window-%s", sess.signalID, now.Format("20060102-150405")))
+				sess.demodDump = nil
+				sess.finalDump = nil
+			}
 			log.Printf("STREAM: attached pending listener %d to signal %d (%.1fMHz %s ch=%d)",
 				subID, sess.signalID, sess.centerHz/1e6, sess.demodName, sess.channels)
+			if audioDumpEnabled {
+				log.Printf("STREAM: debug dump armed signal=%d start=%s until=%s", sess.signalID, sess.debugDumpStart.Format(time.RFC3339), sess.debugDumpUntil.Format(time.RFC3339))
+			}
 		}
 	}
 }
@@ -673,9 +740,20 @@ func (st *Streamer) SubscribeAudio(freq float64, bw float64, mode string) (int64
 
 	if bestSess != nil && bestDist < 200000 {
 		bestSess.audioSubs = append(bestSess.audioSubs, audioSub{id: subID, ch: ch})
+		if audioDumpEnabled {
+			now := time.Now()
+			bestSess.debugDumpStart = now.Add(debugDumpDelay)
+			bestSess.debugDumpUntil = bestSess.debugDumpStart.Add(debugDumpDuration)
+			bestSess.debugDumpBase = filepath.Join("debug", fmt.Sprintf("signal-%d-window-%s", bestSess.signalID, now.Format("20060102-150405")))
+			bestSess.demodDump = nil
+			bestSess.finalDump = nil
+		}
 		info := bestSess.audioInfo()
 		log.Printf("STREAM: subscriber %d attached to signal %d (%.1fMHz %s)",
 			subID, bestSess.signalID, bestSess.centerHz/1e6, bestSess.demodName)
+		if audioDumpEnabled {
+			log.Printf("STREAM: debug dump armed signal=%d start=%s until=%s", bestSess.signalID, bestSess.debugDumpStart.Format(time.RFC3339), bestSess.debugDumpUntil.Format(time.RFC3339))
+		}
 		return subID, ch, info, nil
 	}
 
@@ -749,6 +827,42 @@ func (sess *streamSession) processSnippet(snippet []complex64, snipRate int) ([]
 	overlapApplied := false
 	prevTailValid := false
 
+	if logging.EnabledCategory("prefir") && len(fullSnip) > 0 {
+		probeN := 64
+		if len(fullSnip) < probeN {
+			probeN = len(fullSnip)
+		}
+		minPreMag := math.MaxFloat64
+		minPreIdx := 0
+		maxPreStep := 0.0
+		maxPreStepIdx := 0
+		for i := 0; i < probeN; i++ {
+			v := fullSnip[i]
+			mag := math.Hypot(float64(real(v)), float64(imag(v)))
+			if mag < minPreMag {
+				minPreMag = mag
+				minPreIdx = i
+			}
+			if i > 0 {
+				p := fullSnip[i-1]
+				num := float64(real(p))*float64(imag(v)) - float64(imag(p))*float64(real(v))
+				den := float64(real(p))*float64(real(v)) + float64(imag(p))*float64(imag(v))
+				step := math.Abs(math.Atan2(num, den))
+				if step > maxPreStep {
+					maxPreStep = step
+					maxPreStepIdx = i - 1
+				}
+			}
+		}
+		logging.Debug("prefir", "pre_fir_head_probe", "signal", sess.signalID, "probe_len", probeN, "min_mag", minPreMag, "min_idx", minPreIdx, "max_step", maxPreStep, "max_step_idx", maxPreStepIdx, "snip_len", len(fullSnip))
+		if minPreMag < 0.18 {
+			logging.Warn("prefir", "pre_fir_head_dip", "signal", sess.signalID, "probe_len", probeN, "min_mag", minPreMag, "min_idx", minPreIdx, "max_step", maxPreStep, "max_step_idx", maxPreStepIdx)
+		}
+		if maxPreStep > 1.5 {
+			logging.Warn("prefir", "pre_fir_head_step", "signal", sess.signalID, "probe_len", probeN, "max_step", maxPreStep, "max_step_idx", maxPreStepIdx, "min_mag", minPreMag, "min_idx", minPreIdx)
+		}
+	}
+
 	// --- Stateful anti-alias FIR + decimation to demod rate ---
 	demodRate := d.OutputSampleRate()
 	decim1 := int(math.Round(float64(snipRate) / float64(demodRate)))
@@ -794,6 +908,11 @@ func (sess *streamSession) processSnippet(snippet []complex64, snipRate int) ([]
 		dec = fullSnip
 	}
 
+	if decHeadTrimSamples > 0 && decHeadTrimSamples < len(dec) {
+		logging.Warn("boundary", "dec_head_trim_applied", "signal", sess.signalID, "trim", decHeadTrimSamples, "before_len", len(dec))
+		dec = dec[decHeadTrimSamples:]
+	}
+
 	if logging.EnabledCategory("boundary") && len(dec) > 0 {
 		first := dec[0]
 		if sess.lastDecIQSet {
@@ -849,6 +968,40 @@ func (sess *streamSession) processSnippet(snippet []complex64, snipRate int) ([]
 			}
 		}
 
+		probeN := 64
+		if len(dec) < probeN {
+			probeN = len(dec)
+		}
+		minHeadMag := math.MaxFloat64
+		minHeadIdx := 0
+		maxHeadStep := 0.0
+		maxHeadStepIdx := 0
+		for i := 0; i < probeN; i++ {
+			v := dec[i]
+			mag := math.Hypot(float64(real(v)), float64(imag(v)))
+			if mag < minHeadMag {
+				minHeadMag = mag
+				minHeadIdx = i
+			}
+			if i > 0 {
+				p := dec[i-1]
+				num := float64(real(p))*float64(imag(v)) - float64(imag(p))*float64(real(v))
+				den := float64(real(p))*float64(real(v)) + float64(imag(p))*float64(imag(v))
+				step := math.Abs(math.Atan2(num, den))
+				if step > maxHeadStep {
+					maxHeadStep = step
+					maxHeadStepIdx = i - 1
+				}
+			}
+		}
+		logging.Debug("boundary", "dec_iq_head_probe", "signal", sess.signalID, "probe_len", probeN, "min_mag", minHeadMag, "min_idx", minHeadIdx, "max_step", maxHeadStep, "max_step_idx", maxHeadStepIdx)
+		if minHeadMag < 0.18 {
+			logging.Warn("boundary", "dec_iq_head_dip", "signal", sess.signalID, "probe_len", probeN, "min_mag", minHeadMag, "min_idx", minHeadIdx, "max_step", maxHeadStep, "max_step_idx", maxHeadStepIdx)
+		}
+		if maxHeadStep > 1.5 {
+			logging.Warn("boundary", "dec_iq_head_step", "signal", sess.signalID, "probe_len", probeN, "max_step", maxHeadStep, "max_step_idx", maxHeadStepIdx, "min_mag", minHeadMag, "min_idx", minHeadIdx)
+		}
+
 		if len(dec) >= 2 {
 			sess.prevDecIQ = dec[len(dec)-2]
 			sess.lastDecIQ = dec[len(dec)-1]
@@ -890,6 +1043,15 @@ func (sess *streamSession) processSnippet(snippet []complex64, snipRate int) ([]
 	}
 	logging.Debug("boundary", "audio_path", "signal", sess.signalID, "demod", demodName, "actual_rate", actualDemodRate, "audio_len", len(audio), "channels", d.Channels(), "overlap_applied", overlapApplied, "prev_tail_valid", prevTailValid)
 
+	shouldDump := !sess.debugDumpStart.IsZero() && !sess.debugDumpUntil.IsZero()
+	if shouldDump {
+		now := time.Now()
+		shouldDump = !now.Before(sess.debugDumpStart) && now.Before(sess.debugDumpUntil)
+	}
+	if shouldDump {
+		sess.demodDump = append(sess.demodDump, audio...)
+	}
+
 	// --- Stateful stereo decode with conservative lock/hysteresis ---
 	channels := 1
 	if isWFMStereo {
@@ -977,6 +1139,24 @@ func (sess *streamSession) processSnippet(snippet []complex64, snipRate int) ([]
 		}
 	}
 
+	if shouldDump {
+		sess.finalDump = append(sess.finalDump, audio...)
+	} else if !sess.debugDumpUntil.IsZero() && time.Now().After(sess.debugDumpUntil) && sess.debugDumpBase != "" {
+		_ = os.MkdirAll(filepath.Dir(sess.debugDumpBase), 0o755)
+		if len(sess.demodDump) > 0 {
+			_ = writeWAVFile(sess.debugDumpBase+"-demod.wav", sess.demodDump, actualDemodRate, d.Channels())
+		}
+		if len(sess.finalDump) > 0 {
+			_ = writeWAVFile(sess.debugDumpBase+"-final.wav", sess.finalDump, streamAudioRate, channels)
+		}
+		logging.Warn("boundary", "debug_audio_dump_window", "signal", sess.signalID, "base", sess.debugDumpBase)
+		sess.debugDumpBase = ""
+		sess.demodDump = nil
+		sess.finalDump = nil
+		sess.debugDumpStart = time.Time{}
+		sess.debugDumpUntil = time.Time{}
+	}
+
 	return audio, streamAudioRate
 }
 
@@ -1520,6 +1700,15 @@ var ErrNoSession = errors.New("no active or pending session for this frequency")
 // WAV header helpers
 // ---------------------------------------------------------------------------
 
+func writeWAVFile(path string, audio []float32, sampleRate int, channels int) error {
+	f, err := os.Create(path)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	return writeWAVTo(f, audio, sampleRate, channels)
+}
+
 func writeStreamWAVHeader(f *os.File, sampleRate int, channels int) error {
 	if channels <= 0 {
 		channels = 1