package extractor import ( "regexp" "sort" "strings" ) var urlPattern = regexp.MustCompile(`(?i)((?:https?:)?\/\/[^\s"'<>]+\.(mp3|aac|m3u8|ogg|opus|pls|m3u|xspf|json))`) var attrPattern = regexp.MustCompile(`(?i)(streamsrc|streamhash|stream|audioUrl|mp3Url|hls|playlist|source)\s*[:=]\s*['"]([^'"]+)['"]`) var srcPattern = regexp.MustCompile(`(?i)src\s*=\s*['"]([^'"]+)['"]`) var xspfPattern = regexp.MustCompile(`(?i)([^<]+)`) // ExtractStreams returns the unique streaming URLs found in the provided HTML/text. func ExtractStreams(data string) []string { candidates := make(map[string]struct{}) add := func(raw string) { raw = strings.TrimSpace(raw) if raw == "" { return } if !(strings.Contains(raw, "http") || strings.HasPrefix(raw, "//")) { return } if strings.HasPrefix(raw, "//") { raw = "https:" + raw } normalized := strings.TrimRight(raw, "+") normalized = strings.ReplaceAll(normalized, `\\`, "") candidates[normalized] = struct{}{} } for _, match := range urlPattern.FindAllStringSubmatch(data, -1) { add(match[1]) } for _, match := range attrPattern.FindAllStringSubmatch(data, -1) { add(match[2]) } for _, match := range srcPattern.FindAllStringSubmatch(data, -1) { add(match[1]) } streams := make([]string, 0, len(candidates)) for u := range candidates { if isStreamURL(u) { streams = append(streams, u) } } sort.Strings(streams) return streams } // ExtractPlaylistLinks returns URLs likely pointing to playlists (m3u/pls/xspf/json). func ExtractPlaylistLinks(data string) []string { candidates := make(map[string]struct{}) add := func(raw string) { raw = strings.TrimSpace(raw) if raw == "" { return } if !(strings.Contains(raw, "http") || strings.HasPrefix(raw, "//")) { return } if strings.HasPrefix(raw, "//") { raw = "https:" + raw } normalized := strings.TrimRight(raw, "+") normalized = strings.ReplaceAll(normalized, `\\`, "") if isPlaylistURL(normalized) { candidates[normalized] = struct{}{} } } for _, match := range urlPattern.FindAllStringSubmatch(data, -1) { add(match[1]) } for _, match := range attrPattern.FindAllStringSubmatch(data, -1) { add(match[2]) } for _, match := range srcPattern.FindAllStringSubmatch(data, -1) { add(match[1]) } links := make([]string, 0, len(candidates)) for u := range candidates { links = append(links, u) } sort.Strings(links) return links } // ParsePlaylist extracts stream URLs from playlist content. func ParsePlaylist(content string, contentType string) []string { candidates := make(map[string]struct{}) add := func(raw string) { raw = strings.TrimSpace(raw) if raw == "" { return } if strings.HasPrefix(raw, "//") { raw = "https:" + raw } if isStreamURL(raw) { candidates[raw] = struct{}{} } } lowerType := strings.ToLower(contentType) lines := strings.Split(content, "\n") if strings.Contains(lowerType, "xspf") || strings.Contains(strings.ToLower(content), "") { for _, match := range xspfPattern.FindAllStringSubmatch(content, -1) { add(match[1]) } } for _, match := range urlPattern.FindAllStringSubmatch(content, -1) { add(match[1]) } for _, line := range lines { line = strings.TrimSpace(line) if line == "" || strings.HasPrefix(line, "#") { continue } if strings.HasPrefix(strings.ToLower(line), "file") && strings.Contains(line, "=") { parts := strings.SplitN(line, "=", 2) add(parts[1]) continue } if strings.Contains(line, "http") { for _, match := range urlPattern.FindAllStringSubmatch(line, -1) { add(match[1]) } } } if strings.Contains(lowerType, "json") { for _, match := range urlPattern.FindAllStringSubmatch(content, -1) { add(match[1]) } } streams := make([]string, 0, len(candidates)) for u := range candidates { streams = append(streams, u) } sort.Strings(streams) return streams } func isStreamURL(u string) bool { lower := strings.ToLower(u) return strings.Contains(lower, ".mp3") || strings.Contains(lower, ".aac") || strings.Contains(lower, ".m3u8") || strings.Contains(lower, ".ogg") || strings.Contains(lower, ".opus") } func isPlaylistURL(u string) bool { lower := strings.ToLower(u) return strings.Contains(lower, ".m3u") || strings.Contains(lower, ".pls") || strings.Contains(lower, ".xspf") || strings.Contains(lower, ".json") }