package extractor
import (
"regexp"
"sort"
"strings"
)
var urlPattern = regexp.MustCompile(`(?i)((?:https?:)?\/\/[^\s"'<>]+\.(mp3|aac|m3u8|ogg|opus|pls|m3u|xspf|json))`)
var attrPattern = regexp.MustCompile(`(?i)(streamsrc|streamhash|stream|audioUrl|mp3Url|hls|playlist|source)\s*[:=]\s*['"]([^'"]+)['"]`)
var srcPattern = regexp.MustCompile(`(?i)src\s*=\s*['"]([^'"]+)['"]`)
var xspfPattern = regexp.MustCompile(`(?i)([^<]+)`)
// ExtractStreams returns the unique streaming URLs found in the provided HTML/text.
func ExtractStreams(data string) []string {
candidates := make(map[string]struct{})
add := func(raw string) {
raw = strings.TrimSpace(raw)
if raw == "" {
return
}
if !(strings.Contains(raw, "http") || strings.HasPrefix(raw, "//")) {
return
}
if strings.HasPrefix(raw, "//") {
raw = "https:" + raw
}
normalized := strings.TrimRight(raw, "+")
normalized = strings.ReplaceAll(normalized, `\\`, "")
candidates[normalized] = struct{}{}
}
for _, match := range urlPattern.FindAllStringSubmatch(data, -1) {
add(match[1])
}
for _, match := range attrPattern.FindAllStringSubmatch(data, -1) {
add(match[2])
}
for _, match := range srcPattern.FindAllStringSubmatch(data, -1) {
add(match[1])
}
streams := make([]string, 0, len(candidates))
for u := range candidates {
if isStreamURL(u) {
streams = append(streams, u)
}
}
sort.Strings(streams)
return streams
}
// ExtractPlaylistLinks returns URLs likely pointing to playlists (m3u/pls/xspf/json).
func ExtractPlaylistLinks(data string) []string {
candidates := make(map[string]struct{})
add := func(raw string) {
raw = strings.TrimSpace(raw)
if raw == "" {
return
}
if !(strings.Contains(raw, "http") || strings.HasPrefix(raw, "//")) {
return
}
if strings.HasPrefix(raw, "//") {
raw = "https:" + raw
}
normalized := strings.TrimRight(raw, "+")
normalized = strings.ReplaceAll(normalized, `\\`, "")
if isPlaylistURL(normalized) {
candidates[normalized] = struct{}{}
}
}
for _, match := range urlPattern.FindAllStringSubmatch(data, -1) {
add(match[1])
}
for _, match := range attrPattern.FindAllStringSubmatch(data, -1) {
add(match[2])
}
for _, match := range srcPattern.FindAllStringSubmatch(data, -1) {
add(match[1])
}
links := make([]string, 0, len(candidates))
for u := range candidates {
links = append(links, u)
}
sort.Strings(links)
return links
}
// ParsePlaylist extracts stream URLs from playlist content.
func ParsePlaylist(content string, contentType string) []string {
candidates := make(map[string]struct{})
add := func(raw string) {
raw = strings.TrimSpace(raw)
if raw == "" {
return
}
if strings.HasPrefix(raw, "//") {
raw = "https:" + raw
}
if isStreamURL(raw) {
candidates[raw] = struct{}{}
}
}
lowerType := strings.ToLower(contentType)
lines := strings.Split(content, "\n")
if strings.Contains(lowerType, "xspf") || strings.Contains(strings.ToLower(content), "") {
for _, match := range xspfPattern.FindAllStringSubmatch(content, -1) {
add(match[1])
}
}
for _, match := range urlPattern.FindAllStringSubmatch(content, -1) {
add(match[1])
}
for _, line := range lines {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") {
continue
}
if strings.HasPrefix(strings.ToLower(line), "file") && strings.Contains(line, "=") {
parts := strings.SplitN(line, "=", 2)
add(parts[1])
continue
}
if strings.Contains(line, "http") {
for _, match := range urlPattern.FindAllStringSubmatch(line, -1) {
add(match[1])
}
}
}
if strings.Contains(lowerType, "json") {
for _, match := range urlPattern.FindAllStringSubmatch(content, -1) {
add(match[1])
}
}
streams := make([]string, 0, len(candidates))
for u := range candidates {
streams = append(streams, u)
}
sort.Strings(streams)
return streams
}
func isStreamURL(u string) bool {
lower := strings.ToLower(u)
return strings.Contains(lower, ".mp3") || strings.Contains(lower, ".aac") || strings.Contains(lower, ".m3u8") ||
strings.Contains(lower, ".ogg") || strings.Contains(lower, ".opus")
}
func isPlaylistURL(u string) bool {
lower := strings.ToLower(u)
return strings.Contains(lower, ".m3u") || strings.Contains(lower, ".pls") ||
strings.Contains(lower, ".xspf") || strings.Contains(lower, ".json")
}