Переглянути джерело

Improve stream extraction robustness

master
Alfred 4 дні тому
джерело
коміт
76a7819dad
6 змінених файлів з 336 додано та 74 видалено
  1. +3
    -0
      README.md
  2. +217
    -40
      cmd/radiostreamscan/main.go
  3. +3
    -1
      go.mod
  4. +2
    -0
      go.sum
  5. +102
    -30
      internal/extractor/extractor.go
  6. +9
    -3
      internal/extractor/extractor_test.go

+ 3
- 0
README.md Переглянути файл

@@ -43,6 +43,9 @@ go run ./cmd/radiostreamscan https://live24.gr/radio/generic.jsp?sid=2676
- `-history history.jsonl` (leer = aus) - `-history history.jsonl` (leer = aus)
- `-watch 30s` (CLI wiederholt den Scan) - `-watch 30s` (CLI wiederholt den Scan)
- `-concurrency 4` - `-concurrency 4`
- `-timeout 15s` (HTTP Timeout pro Request)
- `-probe-timeout 8s`
- `-allow-private` (lokale/private Ziele erlauben)


## Build / EXE ## Build / EXE




+ 217
- 40
cmd/radiostreamscan/main.go Переглянути файл

@@ -2,11 +2,13 @@ package main


import ( import (
"bufio" "bufio"
"context"
"encoding/csv" "encoding/csv"
"encoding/json" "encoding/json"
"flag" "flag"
"fmt" "fmt"
"io" "io"
"net"
"net/http" "net/http"
"net/url" "net/url"
"os" "os"
@@ -34,13 +36,16 @@ type probeResult struct {
} }


type config struct { type config struct {
Format string
Probe bool
Headers headerList
Proxy string
HistoryPath string
Watch time.Duration
Concurrency int
Format string
Probe bool
Headers headerList
Proxy string
HistoryPath string
Watch time.Duration
Concurrency int
RequestTimeout time.Duration
ProbeTimeout time.Duration
AllowPrivate bool
} }


type headerList []string type headerList []string
@@ -63,6 +68,9 @@ func main() {
flag.StringVar(&cfg.HistoryPath, "history", "history.jsonl", "path to JSONL history log (empty to disable)") flag.StringVar(&cfg.HistoryPath, "history", "history.jsonl", "path to JSONL history log (empty to disable)")
flag.DurationVar(&cfg.Watch, "watch", 0, "repeat scan in CLI mode at interval (e.g. 30s, 2m)") flag.DurationVar(&cfg.Watch, "watch", 0, "repeat scan in CLI mode at interval (e.g. 30s, 2m)")
flag.IntVar(&cfg.Concurrency, "concurrency", 4, "number of concurrent fetch workers") flag.IntVar(&cfg.Concurrency, "concurrency", 4, "number of concurrent fetch workers")
flag.DurationVar(&cfg.RequestTimeout, "timeout", 15*time.Second, "timeout per HTTP request (e.g. 10s, 2m)")
flag.DurationVar(&cfg.ProbeTimeout, "probe-timeout", 8*time.Second, "timeout for probing stream URLs")
flag.BoolVar(&cfg.AllowPrivate, "allow-private", false, "allow requests to private/localhost addresses")


flag.Usage = func() { flag.Usage = func() {
fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [flags] <url> [url...]\n", os.Args[0]) fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [flags] <url> [url...]\n", os.Args[0])
@@ -71,7 +79,7 @@ func main() {
flag.Parse() flag.Parse()


urls := flag.Args() urls := flag.Args()
client := newHTTPClient(cfg.Proxy)
client := newHTTPClient(cfg.Proxy, cfg.RequestTimeout)
history := newHistoryWriter(cfg.HistoryPath) history := newHistoryWriter(cfg.HistoryPath)


if *web || len(urls) == 0 { if *web || len(urls) == 0 {
@@ -88,7 +96,10 @@ func main() {
func runCLIMode(urls []string, client *http.Client, cfg *config, history *historyWriter) { func runCLIMode(urls []string, client *http.Client, cfg *config, history *historyWriter) {
for { for {
results := scanURLs(urls, client, cfg) results := scanURLs(urls, client, cfg)
outputResults(results, cfg.Format, os.Stdout)
if err := outputResults(results, cfg.Format, os.Stdout); err != nil {
fmt.Fprintf(os.Stderr, "output failed: %v\n", err)
return
}
history.Write(results) history.Write(results)
if cfg.Watch == 0 { if cfg.Watch == 0 {
return return
@@ -258,7 +269,9 @@ func makeScanHandler(client *http.Client, cfg *config, history *historyWriter) h


results := scanURLs(urls, client, &localCfg) results := scanURLs(urls, client, &localCfg)
history.Write(results) history.Write(results)
outputResults(results, localCfg.Format, w)
if err := outputResults(results, localCfg.Format, w); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
} }
} }


@@ -325,13 +338,19 @@ func scanOneURL(client *http.Client, cfg *config, raw string) scanResult {


streams := extractor.ExtractStreams(html) streams := extractor.ExtractStreams(html)
playlists := extractor.ExtractPlaylistLinks(html) playlists := extractor.ExtractPlaylistLinks(html)
resolvedPlaylists := make([]string, 0, len(playlists))


for _, pl := range playlists { for _, pl := range playlists {
plContent, plType, err := fetchContent(client, cfg, pl)
plURL := resolveURL(raw, pl)
if plURL == "" {
continue
}
resolvedPlaylists = append(resolvedPlaylists, plURL)
plContent, plType, err := fetchContent(client, cfg, plURL)
if err != nil { if err != nil {
continue continue
} }
parsed := extractor.ParsePlaylist(plContent, plType)
parsed := extractor.ParsePlaylist(plContent, plType, plURL)
if len(parsed) > 0 { if len(parsed) > 0 {
streams = append(streams, parsed...) streams = append(streams, parsed...)
res.FromPlaylist = true res.FromPlaylist = true
@@ -360,11 +379,16 @@ func scanOneURL(client *http.Client, cfg *config, raw string) scanResult {
playlists = append(playlists, embedPlaylists...) playlists = append(playlists, embedPlaylists...)


for _, pl := range embedPlaylists { for _, pl := range embedPlaylists {
plContent, plType, err := fetchContent(client, cfg, pl)
plURL := resolveURL(embedURL, pl)
if plURL == "" {
continue
}
resolvedPlaylists = append(resolvedPlaylists, plURL)
plContent, plType, err := fetchContent(client, cfg, plURL)
if err != nil { if err != nil {
continue continue
} }
parsed := extractor.ParsePlaylist(plContent, plType)
parsed := extractor.ParsePlaylist(plContent, plType, plURL)
if len(parsed) > 0 { if len(parsed) > 0 {
streams = append(streams, parsed...) streams = append(streams, parsed...)
res.FromPlaylist = true res.FromPlaylist = true
@@ -403,11 +427,16 @@ func scanOneURL(client *http.Client, cfg *config, raw string) scanResult {
playlists = append(playlists, scriptPlaylists...) playlists = append(playlists, scriptPlaylists...)


for _, pl := range scriptPlaylists { for _, pl := range scriptPlaylists {
plContent, plType, err := fetchContent(client, cfg, pl)
plURL := resolveURL(scriptURL, pl)
if plURL == "" {
continue
}
resolvedPlaylists = append(resolvedPlaylists, plURL)
plContent, plType, err := fetchContent(client, cfg, plURL)
if err != nil { if err != nil {
continue continue
} }
parsed := extractor.ParsePlaylist(plContent, plType)
parsed := extractor.ParsePlaylist(plContent, plType, plURL)
if len(parsed) > 0 { if len(parsed) > 0 {
streams = append(streams, parsed...) streams = append(streams, parsed...)
res.FromPlaylist = true res.FromPlaylist = true
@@ -415,7 +444,7 @@ func scanOneURL(client *http.Client, cfg *config, raw string) scanResult {
} }
} }


res.Playlists = uniqueStrings(playlists)
res.Playlists = uniqueStrings(resolvedPlaylists)
res.Streams = uniqueStrings(streams) res.Streams = uniqueStrings(streams)


if cfg.Probe { if cfg.Probe {
@@ -427,7 +456,20 @@ func scanOneURL(client *http.Client, cfg *config, raw string) scanResult {
} }


func fetchContent(client *http.Client, cfg *config, raw string) (string, string, error) { func fetchContent(client *http.Client, cfg *config, raw string) (string, string, error) {
req, err := http.NewRequest(http.MethodGet, raw, nil)
if !cfg.AllowPrivate {
if blocked, reason := isPrivateURL(raw); blocked {
return "", "", fmt.Errorf("blocked private address (%s)", reason)
}
}

timeout := cfg.RequestTimeout
if timeout <= 0 {
timeout = 15 * time.Second
}
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()

req, err := http.NewRequestWithContext(ctx, http.MethodGet, raw, nil)
if err != nil { if err != nil {
return "", "", err return "", "", err
} }
@@ -445,8 +487,9 @@ func fetchContent(client *http.Client, cfg *config, raw string) (string, string,
} }
defer resp.Body.Close() defer resp.Body.Close()


if resp.StatusCode != http.StatusOK {
return "", "", fmt.Errorf("unexpected status %s", resp.Status)
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
snippet, _ := io.ReadAll(io.LimitReader(resp.Body, 1024))
return "", "", fmt.Errorf("unexpected status %s: %s", resp.Status, strings.TrimSpace(string(snippet)))
} }


body, err := io.ReadAll(io.LimitReader(resp.Body, 2<<20)) body, err := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
@@ -460,8 +503,25 @@ func fetchContent(client *http.Client, cfg *config, raw string) (string, string,
func probeStreams(client *http.Client, cfg *config, streams []string) []probeResult { func probeStreams(client *http.Client, cfg *config, streams []string) []probeResult {
var results []probeResult var results []probeResult
for _, s := range streams { for _, s := range streams {
req, err := http.NewRequest(http.MethodHead, s, nil)
if !cfg.AllowPrivate {
if blocked, reason := isPrivateURL(s); blocked {
results = append(results, probeResult{URL: s, Status: "blocked: " + reason})
continue
}
}

timeout := cfg.ProbeTimeout
if timeout <= 0 {
timeout = cfg.RequestTimeout
}
if timeout <= 0 {
timeout = 8 * time.Second
}

ctx, cancel := context.WithTimeout(context.Background(), timeout)
req, err := http.NewRequestWithContext(ctx, http.MethodHead, s, nil)
if err != nil { if err != nil {
cancel()
continue continue
} }
for _, h := range cfg.Headers { for _, h := range cfg.Headers {
@@ -472,16 +532,49 @@ func probeStreams(client *http.Client, cfg *config, streams []string) []probeRes
} }
resp, err := client.Do(req) resp, err := client.Do(req)
if err != nil { if err != nil {
results = append(results, probeResult{URL: s, Status: err.Error()})
cancel()
// Fallback to GET with range.
results = append(results, probeWithGet(client, cfg, s, timeout))
continue continue
} }
resp.Body.Close() resp.Body.Close()
cancel()

if resp.StatusCode == http.StatusMethodNotAllowed || resp.StatusCode == http.StatusNotImplemented {
results = append(results, probeWithGet(client, cfg, s, timeout))
continue
}
results = append(results, probeResult{URL: s, Status: resp.Status, ContentType: resp.Header.Get("Content-Type")}) results = append(results, probeResult{URL: s, Status: resp.Status, ContentType: resp.Header.Get("Content-Type")})
} }
return results return results
} }


func outputResults(results []scanResult, format string, w io.Writer) {
func probeWithGet(client *http.Client, cfg *config, urlStr string, timeout time.Duration) probeResult {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()

req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil)
if err != nil {
return probeResult{URL: urlStr, Status: err.Error()}
}
req.Header.Set("Range", "bytes=0-1023")
for _, h := range cfg.Headers {
parts := strings.SplitN(h, ":", 2)
if len(parts) == 2 {
req.Header.Set(strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]))
}
}

resp, err := client.Do(req)
if err != nil {
return probeResult{URL: urlStr, Status: err.Error()}
}
defer resp.Body.Close()
_, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, 1024))
return probeResult{URL: urlStr, Status: resp.Status, ContentType: resp.Header.Get("Content-Type")}
}

func outputResults(results []scanResult, format string, w io.Writer) error {
if rw, ok := w.(http.ResponseWriter); ok { if rw, ok := w.(http.ResponseWriter); ok {
if strings.ToLower(format) == "json" { if strings.ToLower(format) == "json" {
rw.Header().Set("Content-Type", "application/json") rw.Header().Set("Content-Type", "application/json")
@@ -492,61 +585,97 @@ func outputResults(results []scanResult, format string, w io.Writer) {


switch strings.ToLower(format) { switch strings.ToLower(format) {
case "json": case "json":
json.NewEncoder(w).Encode(results)
if err := json.NewEncoder(w).Encode(results); err != nil {
return err
}
case "csv": case "csv":
cw := csv.NewWriter(w) cw := csv.NewWriter(w)
cw.Write([]string{"input_url", "stream_url"})
if err := cw.Write([]string{"input_url", "stream_url"}); err != nil {
return err
}
for _, res := range results { for _, res := range results {
for _, s := range res.Streams { for _, s := range res.Streams {
cw.Write([]string{res.URL, s})
if err := cw.Write([]string{res.URL, s}); err != nil {
return err
}
} }
} }
cw.Flush() cw.Flush()
if err := cw.Error(); err != nil {
return err
}
case "pls": case "pls":
fmt.Fprintln(w, "[playlist]")
if _, err := fmt.Fprintln(w, "[playlist]"); err != nil {
return err
}
i := 1 i := 1
for _, res := range results { for _, res := range results {
for _, s := range res.Streams { for _, s := range res.Streams {
fmt.Fprintf(w, "File%d=%s\n", i, s)
if _, err := fmt.Fprintf(w, "File%d=%s\n", i, s); err != nil {
return err
}
i++ i++
} }
} }
fmt.Fprintf(w, "NumberOfEntries=%d\nVersion=2\n", i-1)
if _, err := fmt.Fprintf(w, "NumberOfEntries=%d\nVersion=2\n", i-1); err != nil {
return err
}
default: default:
for _, res := range results { for _, res := range results {
fmt.Fprintf(w, "URL: %s\n", res.URL)
if _, err := fmt.Fprintf(w, "URL: %s\n", res.URL); err != nil {
return err
}
if res.Error != "" { if res.Error != "" {
fmt.Fprintf(w, " error: %s\n", res.Error)
if _, err := fmt.Fprintf(w, " error: %s\n", res.Error); err != nil {
return err
}
continue continue
} }
if len(res.Streams) == 0 { if len(res.Streams) == 0 {
fmt.Fprintln(w, " (no candidate streams found)")
if _, err := fmt.Fprintln(w, " (no candidate streams found)"); err != nil {
return err
}
continue continue
} }
for _, s := range res.Streams { for _, s := range res.Streams {
fmt.Fprintf(w, " - %s\n", s)
if _, err := fmt.Fprintf(w, " - %s\n", s); err != nil {
return err
}
} }
} }
} }
return nil
} }


func newHTTPClient(proxyURL string) *http.Client {
transport := &http.Transport{}
func newHTTPClient(proxyURL string, timeout time.Duration) *http.Client {
transport := &http.Transport{
Proxy: http.ProxyFromEnvironment,
ResponseHeaderTimeout: 8 * time.Second,
TLSHandshakeTimeout: 6 * time.Second,
IdleConnTimeout: 30 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
MaxIdleConns: 100,
MaxIdleConnsPerHost: 10,
}
if proxyURL != "" { if proxyURL != "" {
if parsed, err := url.Parse(proxyURL); err == nil { if parsed, err := url.Parse(proxyURL); err == nil {
transport.Proxy = http.ProxyURL(parsed) transport.Proxy = http.ProxyURL(parsed)
} }
} }
return &http.Client{Timeout: 15 * time.Second, Transport: transport}
if timeout <= 0 {
timeout = 15 * time.Second
}
return &http.Client{Timeout: timeout, Transport: transport}
} }


func uniqueStrings(values []string) []string { func uniqueStrings(values []string) []string {
set := make(map[string]struct{})
set := make(map[string]struct{}, len(values))
out := make([]string, 0, len(values))
for _, v := range values { for _, v := range values {
if _, ok := set[v]; ok {
continue
}
set[v] = struct{}{} set[v] = struct{}{}
}
out := make([]string, 0, len(set))
for v := range set {
out = append(out, v) out = append(out, v)
} }
return out return out
@@ -574,6 +703,54 @@ func resolveURL(base, href string) string {
return baseURL.ResolveReference(parsed).String() return baseURL.ResolveReference(parsed).String()
} }


func isPrivateURL(raw string) (bool, string) {
parsed, err := url.Parse(raw)
if err != nil {
return false, ""
}
host := parsed.Hostname()
if host == "" {
return false, ""
}
lower := strings.ToLower(host)
if lower == "localhost" || strings.HasSuffix(lower, ".local") || strings.HasSuffix(lower, ".internal") {
return true, "hostname"
}
ip := net.ParseIP(host)
if ip == nil {
return false, ""
}
if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
return true, "loopback/link-local"
}
if isPrivateIP(ip) {
return true, "private range"
}
return false, ""
}

func isPrivateIP(ip net.IP) bool {
if ip4 := ip.To4(); ip4 != nil {
switch {
case ip4[0] == 10:
return true
case ip4[0] == 172 && ip4[1] >= 16 && ip4[1] <= 31:
return true
case ip4[0] == 192 && ip4[1] == 168:
return true
case ip4[0] == 169 && ip4[1] == 254:
return true
case ip4[0] == 127:
return true
}
}
// IPv6 unique local fc00::/7
if ip.To16() != nil {
return ip[0]&0xfe == 0xfc
}
return false
}

type historyWriter struct { type historyWriter struct {
path string path string
mu sync.Mutex mu sync.Mutex


+ 3
- 1
go.mod Переглянути файл

@@ -1,3 +1,5 @@
module radio-stream-extractor module radio-stream-extractor


go 1.21
go 1.25.0

require golang.org/x/net v0.52.0

+ 2
- 0
go.sum Переглянути файл

@@ -0,0 +1,2 @@
golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=

+ 102
- 30
internal/extractor/extractor.go Переглянути файл

@@ -1,18 +1,17 @@
package extractor package extractor


import ( import (
"io"
"net/url"
"regexp" "regexp"
"sort" "sort"
"strings" "strings"

"golang.org/x/net/html"
) )


var urlPattern = regexp.MustCompile(`(?i)((?:https?:)?\/\/[^\s"'<>]+\.(mp3|aac|m3u8|ogg|opus|pls|m3u|xspf|json))`)
var urlPattern = regexp.MustCompile(`(?i)((?:https?:)?\/\/[^\s"'<>]+\.(mp3|aac|m3u8|ogg|opus|pls|m3u|xspf|json)(?:\?[^\s"'<>]*)?(?:#[^\s"'<>]*)?)`)
var attrPattern = regexp.MustCompile(`(?i)(streamsrc|streamhash|stream|audioUrl|mp3Url|hls|playlist|source)\s*[:=]\s*['"]([^'"]+)['"]`) var attrPattern = regexp.MustCompile(`(?i)(streamsrc|streamhash|stream|audioUrl|mp3Url|hls|playlist|source)\s*[:=]\s*['"]([^'"]+)['"]`)
var srcPattern = regexp.MustCompile(`(?i)src\s*=\s*['"]([^'"]+)['"]`)
var iframePattern = regexp.MustCompile(`(?i)<iframe[^>]+src\s*=\s*['"]([^'"]+)['"]`)
var scriptPattern = regexp.MustCompile(`(?i)<script[^>]+src\s*=\s*['"]([^'"]+)['"]`)
var audioPattern = regexp.MustCompile(`(?i)<audio[^>]+src\s*=\s*['"]([^'"]+)['"]`)
var sourcePattern = regexp.MustCompile(`(?i)<source[^>]+src\s*=\s*['"]([^'"]+)['"]`)
var xspfPattern = regexp.MustCompile(`(?i)<location>([^<]+)</location>`) var xspfPattern = regexp.MustCompile(`(?i)<location>([^<]+)</location>`)


// ExtractStreams returns the unique streaming URLs found in the provided HTML/text. // ExtractStreams returns the unique streaming URLs found in the provided HTML/text.
@@ -37,14 +36,14 @@ func ExtractStreams(data string) []string {
for _, match := range attrPattern.FindAllStringSubmatch(data, -1) { for _, match := range attrPattern.FindAllStringSubmatch(data, -1) {
add(match[2]) add(match[2])
} }
for _, match := range srcPattern.FindAllStringSubmatch(data, -1) {
add(match[1])
for _, u := range extractTagAttrs(data, "audio", "src", "data-src") {
addSpecial(u)
} }
for _, match := range audioPattern.FindAllStringSubmatch(data, -1) {
addSpecial(match[1])
for _, u := range extractTagAttrs(data, "source", "src", "data-src") {
addSpecial(u)
} }
for _, match := range sourcePattern.FindAllStringSubmatch(data, -1) {
addSpecial(match[1])
for _, u := range extractTagAttrs(data, "a", "href") {
add(u)
} }


streams := make([]string, 0, len(candidates)) streams := make([]string, 0, len(candidates))
@@ -76,8 +75,11 @@ func ExtractPlaylistLinks(data string) []string {
for _, match := range attrPattern.FindAllStringSubmatch(data, -1) { for _, match := range attrPattern.FindAllStringSubmatch(data, -1) {
add(match[2]) add(match[2])
} }
for _, match := range srcPattern.FindAllStringSubmatch(data, -1) {
add(match[1])
for _, u := range extractTagAttrs(data, "a", "href") {
add(u)
}
for _, u := range extractTagAttrs(data, "source", "src", "data-src") {
add(u)
} }


links := make([]string, 0, len(candidates)) links := make([]string, 0, len(candidates))
@@ -90,16 +92,16 @@ func ExtractPlaylistLinks(data string) []string {


// ExtractEmbedURLs returns URLs found in iframe embeds. // ExtractEmbedURLs returns URLs found in iframe embeds.
func ExtractEmbedURLs(data string) []string { func ExtractEmbedURLs(data string) []string {
return extractURLs(iframePattern, data)
return extractTagAttrs(data, "iframe", "src")
} }


// ExtractScriptURLs returns URLs referenced by script tags. // ExtractScriptURLs returns URLs referenced by script tags.
func ExtractScriptURLs(data string) []string { func ExtractScriptURLs(data string) []string {
return extractURLs(scriptPattern, data)
return extractTagAttrs(data, "script", "src")
} }


// ParsePlaylist extracts stream URLs from playlist content. // ParsePlaylist extracts stream URLs from playlist content.
func ParsePlaylist(content string, contentType string) []string {
func ParsePlaylist(content string, contentType string, baseURL string) []string {
candidates := make(map[string]struct{}) candidates := make(map[string]struct{})
add := func(raw string) { add := func(raw string) {
raw = strings.TrimSpace(raw) raw = strings.TrimSpace(raw)
@@ -110,7 +112,9 @@ func ParsePlaylist(content string, contentType string) []string {
raw = "https:" + raw raw = "https:" + raw
} }
if isStreamURL(raw) { if isStreamURL(raw) {
candidates[raw] = struct{}{}
if resolved := resolveRelative(raw, baseURL); resolved != "" {
candidates[resolved] = struct{}{}
}
} }
} }
addForce := func(raw string) { addForce := func(raw string) {
@@ -121,7 +125,9 @@ func ParsePlaylist(content string, contentType string) []string {
if strings.HasPrefix(raw, "//") { if strings.HasPrefix(raw, "//") {
raw = "https:" + raw raw = "https:" + raw
} }
candidates[raw] = struct{}{}
if resolved := resolveRelative(raw, baseURL); resolved != "" {
candidates[resolved] = struct{}{}
}
} }


lowerType := strings.ToLower(contentType) lowerType := strings.ToLower(contentType)
@@ -156,6 +162,10 @@ func ParsePlaylist(content string, contentType string) []string {
if !matched { if !matched {
addForce(line) addForce(line)
} }
continue
}
if baseURL != "" && (strings.Contains(strings.ToLower(line), ".mp3") || strings.Contains(strings.ToLower(line), ".aac") || strings.Contains(strings.ToLower(line), ".m3u8") || strings.Contains(strings.ToLower(line), ".ogg") || strings.Contains(strings.ToLower(line), ".opus")) {
addForce(line)
} }
} }


@@ -172,20 +182,45 @@ func ParsePlaylist(content string, contentType string) []string {
sort.Strings(streams) sort.Strings(streams)
return streams return streams
} }
func extractURLs(pattern *regexp.Regexp, data string) []string {
candidates := make(map[string]struct{})
for _, match := range pattern.FindAllStringSubmatch(data, -1) {
if normalized, ok := normalizeCandidate(match[1]); ok {
candidates[normalized] = struct{}{}
}
func extractTagAttrs(data string, tag string, attrs ...string) []string {
attrSet := make(map[string]struct{}, len(attrs))
for _, a := range attrs {
attrSet[strings.ToLower(a)] = struct{}{}
} }


urls := make([]string, 0, len(candidates))
for u := range candidates {
urls = append(urls, u)
candidates := make(map[string]struct{})
z := html.NewTokenizer(strings.NewReader(data))
for {
tt := z.Next()
switch tt {
case html.ErrorToken:
if z.Err() == io.EOF {
urls := make([]string, 0, len(candidates))
for u := range candidates {
urls = append(urls, u)
}
sort.Strings(urls)
return urls
}
return nil
case html.StartTagToken, html.SelfClosingTagToken:
name, hasAttr := z.TagName()
if !strings.EqualFold(string(name), tag) || !hasAttr {
continue
}
for {
key, val, more := z.TagAttr()
if _, ok := attrSet[strings.ToLower(string(key))]; ok {
if normalized, ok := normalizeCandidate(string(val)); ok {
candidates[normalized] = struct{}{}
}
}
if !more {
break
}
}
}
} }
sort.Strings(urls)
return urls
} }


func normalizeCandidate(raw string) (string, bool) { func normalizeCandidate(raw string) (string, bool) {
@@ -207,6 +242,43 @@ func normalizeCandidate(raw string) (string, bool) {
return normalized, true return normalized, true
} }


func resolveRelative(raw string, base string) string {
raw = strings.TrimSpace(raw)
if raw == "" {
return ""
}
if base == "" {
return raw
}
if strings.HasPrefix(raw, "http://") || strings.HasPrefix(raw, "https://") {
return raw
}
if strings.HasPrefix(raw, "//") {
return "https:" + raw
}
return ResolveURL(base, raw)
}

// ResolveURL resolves a possibly relative URL against a base.
func ResolveURL(base string, href string) string {
href = strings.TrimSpace(href)
if href == "" {
return ""
}
parsed, err := url.Parse(href)
if err != nil {
return ""
}
if parsed.IsAbs() {
return parsed.String()
}
baseURL, err := url.Parse(base)
if err != nil {
return parsed.String()
}
return baseURL.ResolveReference(parsed).String()
}

func isStreamURL(u string) bool { func isStreamURL(u string) bool {
lower := strings.ToLower(u) lower := strings.ToLower(u)
return strings.Contains(lower, ".mp3") || strings.Contains(lower, ".aac") || strings.Contains(lower, ".m3u8") || return strings.Contains(lower, ".mp3") || strings.Contains(lower, ".aac") || strings.Contains(lower, ".m3u8") ||


+ 9
- 3
internal/extractor/extractor_test.go Переглянути файл

@@ -71,13 +71,19 @@ func TestParsePlaylist(t *testing.T) {
pls := "[playlist]\nFile1=https://example.com/stream.aac\n" pls := "[playlist]\nFile1=https://example.com/stream.aac\n"
xspf := "<playlist><location>https://example.com/hls.m3u8</location></playlist>" xspf := "<playlist><location>https://example.com/hls.m3u8</location></playlist>"


if len(ParsePlaylist(m3u, "audio/x-mpegurl")) != 1 {
if len(ParsePlaylist(m3u, "audio/x-mpegurl", "https://example.com/playlist.m3u")) != 1 {
t.Fatal("expected m3u playlist to yield 1 stream") t.Fatal("expected m3u playlist to yield 1 stream")
} }
if len(ParsePlaylist(pls, "audio/x-scpls")) != 1 {
if len(ParsePlaylist(pls, "audio/x-scpls", "https://example.com/playlist.pls")) != 1 {
t.Fatal("expected pls playlist to yield 1 stream") t.Fatal("expected pls playlist to yield 1 stream")
} }
if len(ParsePlaylist(xspf, "application/xspf+xml")) != 1 {
if len(ParsePlaylist(xspf, "application/xspf+xml", "https://example.com/playlist.xspf")) != 1 {
t.Fatal("expected xspf playlist to yield 1 stream") t.Fatal("expected xspf playlist to yield 1 stream")
} }

relative := "stream/live.mp3\n"
resolved := ParsePlaylist(relative, "audio/x-mpegurl", "https://example.com/radio/list.m3u")
if len(resolved) != 1 || resolved[0] != "https://example.com/radio/stream/live.mp3" {
t.Fatalf("expected relative URL to resolve against base: %v", resolved)
}
} }

Завантаження…
Відмінити
Зберегти