package audio import ( "encoding/binary" "fmt" "io" "os" ) // WAVSource loads a PCM WAV file into memory and provides frame-by-frame access. type WAVSource struct { frames []Frame index int SampleRate int Channels int } // LoadWAVSource reads and decodes a WAV file. It properly scans for the "fmt " // and "data" chunks, handling files with extra metadata chunks (LIST, INFO, // bext, etc.) that appear between headers. func LoadWAVSource(path string) (*WAVSource, error) { f, err := os.Open(path) if err != nil { return nil, err } defer f.Close() // Read RIFF header (12 bytes) riffHeader := make([]byte, 12) if _, err := io.ReadFull(f, riffHeader); err != nil { return nil, fmt.Errorf("read riff header: %w", err) } if string(riffHeader[0:4]) != "RIFF" || string(riffHeader[8:12]) != "WAVE" { return nil, fmt.Errorf("not a RIFF/WAVE file") } var ( audioFormat uint16 channels uint16 sampleRate uint32 bitsPerSample uint16 dataBytes []byte fmtFound bool dataFound bool ) // Scan chunks for { var chunkID [4]byte var chunkSize uint32 if _, err := io.ReadFull(f, chunkID[:]); err != nil { if err == io.EOF || err == io.ErrUnexpectedEOF { break } return nil, fmt.Errorf("read chunk id: %w", err) } if err := binary.Read(f, binary.LittleEndian, &chunkSize); err != nil { return nil, fmt.Errorf("read chunk size: %w", err) } switch string(chunkID[:]) { case "fmt ": if chunkSize < 16 { return nil, fmt.Errorf("fmt chunk too small: %d", chunkSize) } fmtData := make([]byte, chunkSize) if _, err := io.ReadFull(f, fmtData); err != nil { return nil, fmt.Errorf("read fmt chunk: %w", err) } audioFormat = binary.LittleEndian.Uint16(fmtData[0:2]) channels = binary.LittleEndian.Uint16(fmtData[2:4]) sampleRate = binary.LittleEndian.Uint32(fmtData[4:8]) bitsPerSample = binary.LittleEndian.Uint16(fmtData[14:16]) fmtFound = true case "data": dataBytes = make([]byte, chunkSize) if _, err := io.ReadFull(f, dataBytes); err != nil { return nil, fmt.Errorf("read data chunk: %w", err) } dataFound = true default: // Skip unknown chunks, respecting RIFF padding (chunks are word-aligned) skip := int64(chunkSize) if chunkSize%2 != 0 { skip++ } if _, err := io.CopyN(io.Discard, f, skip); err != nil { // Could be EOF if this is the last chunk break } } if fmtFound && dataFound { break } } if !fmtFound { return nil, fmt.Errorf("no fmt chunk found") } if !dataFound { return nil, fmt.Errorf("no data chunk found") } if audioFormat != 1 { return nil, fmt.Errorf("only PCM wav supported (format=%d)", audioFormat) } if bitsPerSample != 16 { return nil, fmt.Errorf("only 16-bit PCM wav supported (bits=%d)", bitsPerSample) } if channels != 1 && channels != 2 { return nil, fmt.Errorf("only mono/stereo wav supported (channels=%d)", channels) } if sampleRate == 0 { return nil, fmt.Errorf("invalid wav sample rate") } step := int(channels) * 2 frames := make([]Frame, 0, len(dataBytes)/step) for i := 0; i+step <= len(dataBytes); i += step { l := pcm16ToSample(int16(binary.LittleEndian.Uint16(dataBytes[i : i+2]))) r := l if channels == 2 { r = pcm16ToSample(int16(binary.LittleEndian.Uint16(dataBytes[i+2 : i+4]))) } frames = append(frames, NewFrame(l, r)) } return &WAVSource{ frames: frames, SampleRate: int(sampleRate), Channels: int(channels), }, nil } // NextFrame returns the next audio frame, looping at the end. func (s *WAVSource) NextFrame() Frame { if len(s.frames) == 0 { return NewFrame(0, 0) } frame := s.frames[s.index] s.index++ if s.index >= len(s.frames) { s.index = 0 } return frame } func pcm16ToSample(v int16) Sample { return Sample(float64(v) / 32768.0).Clamp() }