|
- package extractor
-
- import (
- "regexp"
- "sort"
- "strings"
- )
-
- var urlPattern = regexp.MustCompile(`(?i)((?:https?:)?\/\/[^\s"'<>]+\.(mp3|aac|m3u8|ogg|opus|pls|m3u|xspf|json))`)
- var attrPattern = regexp.MustCompile(`(?i)(streamsrc|streamhash|stream|audioUrl|mp3Url|hls|playlist|source)\s*[:=]\s*['"]([^'"]+)['"]`)
- var srcPattern = regexp.MustCompile(`(?i)src\s*=\s*['"]([^'"]+)['"]`)
- var xspfPattern = regexp.MustCompile(`(?i)<location>([^<]+)</location>`)
-
- // ExtractStreams returns the unique streaming URLs found in the provided HTML/text.
- func ExtractStreams(data string) []string {
- candidates := make(map[string]struct{})
- add := func(raw string) {
- raw = strings.TrimSpace(raw)
- if raw == "" {
- return
- }
- if !(strings.Contains(raw, "http") || strings.HasPrefix(raw, "//")) {
- return
- }
- if strings.HasPrefix(raw, "//") {
- raw = "https:" + raw
- }
- normalized := strings.TrimRight(raw, "+")
- normalized = strings.ReplaceAll(normalized, `\\`, "")
- candidates[normalized] = struct{}{}
- }
-
- for _, match := range urlPattern.FindAllStringSubmatch(data, -1) {
- add(match[1])
- }
- for _, match := range attrPattern.FindAllStringSubmatch(data, -1) {
- add(match[2])
- }
- for _, match := range srcPattern.FindAllStringSubmatch(data, -1) {
- add(match[1])
- }
-
- streams := make([]string, 0, len(candidates))
- for u := range candidates {
- if isStreamURL(u) {
- streams = append(streams, u)
- }
- }
- sort.Strings(streams)
- return streams
- }
-
- // ExtractPlaylistLinks returns URLs likely pointing to playlists (m3u/pls/xspf/json).
- func ExtractPlaylistLinks(data string) []string {
- candidates := make(map[string]struct{})
- add := func(raw string) {
- raw = strings.TrimSpace(raw)
- if raw == "" {
- return
- }
- if !(strings.Contains(raw, "http") || strings.HasPrefix(raw, "//")) {
- return
- }
- if strings.HasPrefix(raw, "//") {
- raw = "https:" + raw
- }
- normalized := strings.TrimRight(raw, "+")
- normalized = strings.ReplaceAll(normalized, `\\`, "")
- if isPlaylistURL(normalized) {
- candidates[normalized] = struct{}{}
- }
- }
-
- for _, match := range urlPattern.FindAllStringSubmatch(data, -1) {
- add(match[1])
- }
- for _, match := range attrPattern.FindAllStringSubmatch(data, -1) {
- add(match[2])
- }
- for _, match := range srcPattern.FindAllStringSubmatch(data, -1) {
- add(match[1])
- }
-
- links := make([]string, 0, len(candidates))
- for u := range candidates {
- links = append(links, u)
- }
- sort.Strings(links)
- return links
- }
-
- // ParsePlaylist extracts stream URLs from playlist content.
- func ParsePlaylist(content string, contentType string) []string {
- candidates := make(map[string]struct{})
- add := func(raw string) {
- raw = strings.TrimSpace(raw)
- if raw == "" {
- return
- }
- if strings.HasPrefix(raw, "//") {
- raw = "https:" + raw
- }
- if isStreamURL(raw) {
- candidates[raw] = struct{}{}
- }
- }
-
- lowerType := strings.ToLower(contentType)
- lines := strings.Split(content, "\n")
-
- if strings.Contains(lowerType, "xspf") || strings.Contains(strings.ToLower(content), "<location>") {
- for _, match := range xspfPattern.FindAllStringSubmatch(content, -1) {
- add(match[1])
- }
- }
-
- for _, match := range urlPattern.FindAllStringSubmatch(content, -1) {
- add(match[1])
- }
-
- for _, line := range lines {
- line = strings.TrimSpace(line)
- if line == "" || strings.HasPrefix(line, "#") {
- continue
- }
- if strings.HasPrefix(strings.ToLower(line), "file") && strings.Contains(line, "=") {
- parts := strings.SplitN(line, "=", 2)
- add(parts[1])
- continue
- }
- if strings.Contains(line, "http") {
- for _, match := range urlPattern.FindAllStringSubmatch(line, -1) {
- add(match[1])
- }
- }
- }
-
- if strings.Contains(lowerType, "json") {
- for _, match := range urlPattern.FindAllStringSubmatch(content, -1) {
- add(match[1])
- }
- }
-
- streams := make([]string, 0, len(candidates))
- for u := range candidates {
- streams = append(streams, u)
- }
- sort.Strings(streams)
- return streams
- }
-
- func isStreamURL(u string) bool {
- lower := strings.ToLower(u)
- return strings.Contains(lower, ".mp3") || strings.Contains(lower, ".aac") || strings.Contains(lower, ".m3u8") ||
- strings.Contains(lower, ".ogg") || strings.Contains(lower, ".opus")
- }
-
- func isPlaylistURL(u string) bool {
- lower := strings.ToLower(u)
- return strings.Contains(lower, ".m3u") || strings.Contains(lower, ".pls") ||
- strings.Contains(lower, ".xspf") || strings.Contains(lower, ".json")
- }
|