package extractor import ( "regexp" "sort" "strings" ) var urlPattern = regexp.MustCompile(`(?i)((?:https?:)?\/\/[^\s"'<>]+\.(mp3|aac|m3u8|ogg|opus|pls|m3u|xspf|json))`) var attrPattern = regexp.MustCompile(`(?i)(streamsrc|streamhash|stream|audioUrl|mp3Url|hls|playlist|source)\s*[:=]\s*['"]([^'"]+)['"]`) var srcPattern = regexp.MustCompile(`(?i)src\s*=\s*['"]([^'"]+)['"]`) var iframePattern = regexp.MustCompile(`(?i)]+src\s*=\s*['"]([^'"]+)['"]`) var scriptPattern = regexp.MustCompile(`(?i)]+src\s*=\s*['"]([^'"]+)['"]`) var audioPattern = regexp.MustCompile(`(?i)]+src\s*=\s*['"]([^'"]+)['"]`) var sourcePattern = regexp.MustCompile(`(?i)]+src\s*=\s*['"]([^'"]+)['"]`) var xspfPattern = regexp.MustCompile(`(?i)([^<]+)`) // ExtractStreams returns the unique streaming URLs found in the provided HTML/text. func ExtractStreams(data string) []string { candidates := make(map[string]struct{}) special := make(map[string]struct{}) add := func(raw string) { if normalized, ok := normalizeCandidate(raw); ok { candidates[normalized] = struct{}{} } } addSpecial := func(raw string) { if normalized, ok := normalizeCandidate(raw); ok { candidates[normalized] = struct{}{} special[normalized] = struct{}{} } } for _, match := range urlPattern.FindAllStringSubmatch(data, -1) { add(match[1]) } for _, match := range attrPattern.FindAllStringSubmatch(data, -1) { add(match[2]) } for _, match := range srcPattern.FindAllStringSubmatch(data, -1) { add(match[1]) } for _, match := range audioPattern.FindAllStringSubmatch(data, -1) { addSpecial(match[1]) } for _, match := range sourcePattern.FindAllStringSubmatch(data, -1) { addSpecial(match[1]) } streams := make([]string, 0, len(candidates)) for u := range candidates { if isStreamURL(u) { streams = append(streams, u) continue } if _, ok := special[u]; ok { streams = append(streams, u) } } sort.Strings(streams) return streams } // ExtractPlaylistLinks returns URLs likely pointing to playlists (m3u/pls/xspf/json). func ExtractPlaylistLinks(data string) []string { candidates := make(map[string]struct{}) add := func(raw string) { if normalized, ok := normalizeCandidate(raw); ok && isPlaylistURL(normalized) { candidates[normalized] = struct{}{} } } for _, match := range urlPattern.FindAllStringSubmatch(data, -1) { add(match[1]) } for _, match := range attrPattern.FindAllStringSubmatch(data, -1) { add(match[2]) } for _, match := range srcPattern.FindAllStringSubmatch(data, -1) { add(match[1]) } links := make([]string, 0, len(candidates)) for u := range candidates { links = append(links, u) } sort.Strings(links) return links } // ExtractEmbedURLs returns URLs found in iframe embeds. func ExtractEmbedURLs(data string) []string { return extractURLs(iframePattern, data) } // ExtractScriptURLs returns URLs referenced by script tags. func ExtractScriptURLs(data string) []string { return extractURLs(scriptPattern, data) } // ParsePlaylist extracts stream URLs from playlist content. func ParsePlaylist(content string, contentType string) []string { candidates := make(map[string]struct{}) add := func(raw string) { raw = strings.TrimSpace(raw) if raw == "" { return } if strings.HasPrefix(raw, "//") { raw = "https:" + raw } if isStreamURL(raw) { candidates[raw] = struct{}{} } } addForce := func(raw string) { raw = strings.TrimSpace(raw) if raw == "" { return } if strings.HasPrefix(raw, "//") { raw = "https:" + raw } candidates[raw] = struct{}{} } lowerType := strings.ToLower(contentType) lines := strings.Split(content, "\n") if strings.Contains(lowerType, "xspf") || strings.Contains(strings.ToLower(content), "") { for _, match := range xspfPattern.FindAllStringSubmatch(content, -1) { add(match[1]) } } for _, match := range urlPattern.FindAllStringSubmatch(content, -1) { add(match[1]) } for _, line := range lines { line = strings.TrimSpace(line) if line == "" || strings.HasPrefix(line, "#") { continue } if strings.HasPrefix(strings.ToLower(line), "file") && strings.Contains(line, "=") { parts := strings.SplitN(line, "=", 2) add(parts[1]) continue } if strings.Contains(line, "http") { matched := false for _, match := range urlPattern.FindAllStringSubmatch(line, -1) { add(match[1]) matched = true } if !matched { addForce(line) } } } if strings.Contains(lowerType, "json") { for _, match := range urlPattern.FindAllStringSubmatch(content, -1) { add(match[1]) } } streams := make([]string, 0, len(candidates)) for u := range candidates { streams = append(streams, u) } sort.Strings(streams) return streams } func extractURLs(pattern *regexp.Regexp, data string) []string { candidates := make(map[string]struct{}) for _, match := range pattern.FindAllStringSubmatch(data, -1) { if normalized, ok := normalizeCandidate(match[1]); ok { candidates[normalized] = struct{}{} } } urls := make([]string, 0, len(candidates)) for u := range candidates { urls = append(urls, u) } sort.Strings(urls) return urls } func normalizeCandidate(raw string) (string, bool) { raw = strings.TrimSpace(raw) if raw == "" { return "", false } if !(strings.Contains(raw, "http") || strings.HasPrefix(raw, "//") || strings.HasPrefix(raw, "/")) { return "", false } if strings.HasPrefix(raw, "//") { raw = "https:" + raw } normalized := strings.TrimRight(raw, "+") normalized = strings.ReplaceAll(normalized, `\\`, "") if normalized == "" { return "", false } return normalized, true } func isStreamURL(u string) bool { lower := strings.ToLower(u) return strings.Contains(lower, ".mp3") || strings.Contains(lower, ".aac") || strings.Contains(lower, ".m3u8") || strings.Contains(lower, ".ogg") || strings.Contains(lower, ".opus") } func isPlaylistURL(u string) bool { lower := strings.ToLower(u) return strings.Contains(lower, ".m3u") || strings.Contains(lower, ".pls") || strings.Contains(lower, ".xspf") || strings.Contains(lower, ".json") }