diff --git a/README.md b/README.md index c854d78..9743255 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,9 @@ go run ./cmd/radiostreamscan https://live24.gr/radio/generic.jsp?sid=2676 - `-history history.jsonl` (leer = aus) - `-watch 30s` (CLI wiederholt den Scan) - `-concurrency 4` +- `-timeout 15s` (HTTP Timeout pro Request) +- `-probe-timeout 8s` +- `-allow-private` (lokale/private Ziele erlauben) ## Build / EXE diff --git a/cmd/radiostreamscan/main.go b/cmd/radiostreamscan/main.go index 0367649..d24204f 100644 --- a/cmd/radiostreamscan/main.go +++ b/cmd/radiostreamscan/main.go @@ -2,11 +2,13 @@ package main import ( "bufio" + "context" "encoding/csv" "encoding/json" "flag" "fmt" "io" + "net" "net/http" "net/url" "os" @@ -34,13 +36,16 @@ type probeResult struct { } type config struct { - Format string - Probe bool - Headers headerList - Proxy string - HistoryPath string - Watch time.Duration - Concurrency int + Format string + Probe bool + Headers headerList + Proxy string + HistoryPath string + Watch time.Duration + Concurrency int + RequestTimeout time.Duration + ProbeTimeout time.Duration + AllowPrivate bool } type headerList []string @@ -63,6 +68,9 @@ func main() { flag.StringVar(&cfg.HistoryPath, "history", "history.jsonl", "path to JSONL history log (empty to disable)") flag.DurationVar(&cfg.Watch, "watch", 0, "repeat scan in CLI mode at interval (e.g. 30s, 2m)") flag.IntVar(&cfg.Concurrency, "concurrency", 4, "number of concurrent fetch workers") + flag.DurationVar(&cfg.RequestTimeout, "timeout", 15*time.Second, "timeout per HTTP request (e.g. 10s, 2m)") + flag.DurationVar(&cfg.ProbeTimeout, "probe-timeout", 8*time.Second, "timeout for probing stream URLs") + flag.BoolVar(&cfg.AllowPrivate, "allow-private", false, "allow requests to private/localhost addresses") flag.Usage = func() { fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [flags] [url...]\n", os.Args[0]) @@ -71,7 +79,7 @@ func main() { flag.Parse() urls := flag.Args() - client := newHTTPClient(cfg.Proxy) + client := newHTTPClient(cfg.Proxy, cfg.RequestTimeout) history := newHistoryWriter(cfg.HistoryPath) if *web || len(urls) == 0 { @@ -88,7 +96,10 @@ func main() { func runCLIMode(urls []string, client *http.Client, cfg *config, history *historyWriter) { for { results := scanURLs(urls, client, cfg) - outputResults(results, cfg.Format, os.Stdout) + if err := outputResults(results, cfg.Format, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "output failed: %v\n", err) + return + } history.Write(results) if cfg.Watch == 0 { return @@ -258,7 +269,9 @@ func makeScanHandler(client *http.Client, cfg *config, history *historyWriter) h results := scanURLs(urls, client, &localCfg) history.Write(results) - outputResults(results, localCfg.Format, w) + if err := outputResults(results, localCfg.Format, w); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + } } } @@ -325,13 +338,19 @@ func scanOneURL(client *http.Client, cfg *config, raw string) scanResult { streams := extractor.ExtractStreams(html) playlists := extractor.ExtractPlaylistLinks(html) + resolvedPlaylists := make([]string, 0, len(playlists)) for _, pl := range playlists { - plContent, plType, err := fetchContent(client, cfg, pl) + plURL := resolveURL(raw, pl) + if plURL == "" { + continue + } + resolvedPlaylists = append(resolvedPlaylists, plURL) + plContent, plType, err := fetchContent(client, cfg, plURL) if err != nil { continue } - parsed := extractor.ParsePlaylist(plContent, plType) + parsed := extractor.ParsePlaylist(plContent, plType, plURL) if len(parsed) > 0 { streams = append(streams, parsed...) res.FromPlaylist = true @@ -360,11 +379,16 @@ func scanOneURL(client *http.Client, cfg *config, raw string) scanResult { playlists = append(playlists, embedPlaylists...) for _, pl := range embedPlaylists { - plContent, plType, err := fetchContent(client, cfg, pl) + plURL := resolveURL(embedURL, pl) + if plURL == "" { + continue + } + resolvedPlaylists = append(resolvedPlaylists, plURL) + plContent, plType, err := fetchContent(client, cfg, plURL) if err != nil { continue } - parsed := extractor.ParsePlaylist(plContent, plType) + parsed := extractor.ParsePlaylist(plContent, plType, plURL) if len(parsed) > 0 { streams = append(streams, parsed...) res.FromPlaylist = true @@ -403,11 +427,16 @@ func scanOneURL(client *http.Client, cfg *config, raw string) scanResult { playlists = append(playlists, scriptPlaylists...) for _, pl := range scriptPlaylists { - plContent, plType, err := fetchContent(client, cfg, pl) + plURL := resolveURL(scriptURL, pl) + if plURL == "" { + continue + } + resolvedPlaylists = append(resolvedPlaylists, plURL) + plContent, plType, err := fetchContent(client, cfg, plURL) if err != nil { continue } - parsed := extractor.ParsePlaylist(plContent, plType) + parsed := extractor.ParsePlaylist(plContent, plType, plURL) if len(parsed) > 0 { streams = append(streams, parsed...) res.FromPlaylist = true @@ -415,7 +444,7 @@ func scanOneURL(client *http.Client, cfg *config, raw string) scanResult { } } - res.Playlists = uniqueStrings(playlists) + res.Playlists = uniqueStrings(resolvedPlaylists) res.Streams = uniqueStrings(streams) if cfg.Probe { @@ -427,7 +456,20 @@ func scanOneURL(client *http.Client, cfg *config, raw string) scanResult { } func fetchContent(client *http.Client, cfg *config, raw string) (string, string, error) { - req, err := http.NewRequest(http.MethodGet, raw, nil) + if !cfg.AllowPrivate { + if blocked, reason := isPrivateURL(raw); blocked { + return "", "", fmt.Errorf("blocked private address (%s)", reason) + } + } + + timeout := cfg.RequestTimeout + if timeout <= 0 { + timeout = 15 * time.Second + } + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, raw, nil) if err != nil { return "", "", err } @@ -445,8 +487,9 @@ func fetchContent(client *http.Client, cfg *config, raw string) (string, string, } defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - return "", "", fmt.Errorf("unexpected status %s", resp.Status) + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + snippet, _ := io.ReadAll(io.LimitReader(resp.Body, 1024)) + return "", "", fmt.Errorf("unexpected status %s: %s", resp.Status, strings.TrimSpace(string(snippet))) } body, err := io.ReadAll(io.LimitReader(resp.Body, 2<<20)) @@ -460,8 +503,25 @@ func fetchContent(client *http.Client, cfg *config, raw string) (string, string, func probeStreams(client *http.Client, cfg *config, streams []string) []probeResult { var results []probeResult for _, s := range streams { - req, err := http.NewRequest(http.MethodHead, s, nil) + if !cfg.AllowPrivate { + if blocked, reason := isPrivateURL(s); blocked { + results = append(results, probeResult{URL: s, Status: "blocked: " + reason}) + continue + } + } + + timeout := cfg.ProbeTimeout + if timeout <= 0 { + timeout = cfg.RequestTimeout + } + if timeout <= 0 { + timeout = 8 * time.Second + } + + ctx, cancel := context.WithTimeout(context.Background(), timeout) + req, err := http.NewRequestWithContext(ctx, http.MethodHead, s, nil) if err != nil { + cancel() continue } for _, h := range cfg.Headers { @@ -472,16 +532,49 @@ func probeStreams(client *http.Client, cfg *config, streams []string) []probeRes } resp, err := client.Do(req) if err != nil { - results = append(results, probeResult{URL: s, Status: err.Error()}) + cancel() + // Fallback to GET with range. + results = append(results, probeWithGet(client, cfg, s, timeout)) continue } resp.Body.Close() + cancel() + + if resp.StatusCode == http.StatusMethodNotAllowed || resp.StatusCode == http.StatusNotImplemented { + results = append(results, probeWithGet(client, cfg, s, timeout)) + continue + } results = append(results, probeResult{URL: s, Status: resp.Status, ContentType: resp.Header.Get("Content-Type")}) } return results } -func outputResults(results []scanResult, format string, w io.Writer) { +func probeWithGet(client *http.Client, cfg *config, urlStr string, timeout time.Duration) probeResult { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil) + if err != nil { + return probeResult{URL: urlStr, Status: err.Error()} + } + req.Header.Set("Range", "bytes=0-1023") + for _, h := range cfg.Headers { + parts := strings.SplitN(h, ":", 2) + if len(parts) == 2 { + req.Header.Set(strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1])) + } + } + + resp, err := client.Do(req) + if err != nil { + return probeResult{URL: urlStr, Status: err.Error()} + } + defer resp.Body.Close() + _, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, 1024)) + return probeResult{URL: urlStr, Status: resp.Status, ContentType: resp.Header.Get("Content-Type")} +} + +func outputResults(results []scanResult, format string, w io.Writer) error { if rw, ok := w.(http.ResponseWriter); ok { if strings.ToLower(format) == "json" { rw.Header().Set("Content-Type", "application/json") @@ -492,61 +585,97 @@ func outputResults(results []scanResult, format string, w io.Writer) { switch strings.ToLower(format) { case "json": - json.NewEncoder(w).Encode(results) + if err := json.NewEncoder(w).Encode(results); err != nil { + return err + } case "csv": cw := csv.NewWriter(w) - cw.Write([]string{"input_url", "stream_url"}) + if err := cw.Write([]string{"input_url", "stream_url"}); err != nil { + return err + } for _, res := range results { for _, s := range res.Streams { - cw.Write([]string{res.URL, s}) + if err := cw.Write([]string{res.URL, s}); err != nil { + return err + } } } cw.Flush() + if err := cw.Error(); err != nil { + return err + } case "pls": - fmt.Fprintln(w, "[playlist]") + if _, err := fmt.Fprintln(w, "[playlist]"); err != nil { + return err + } i := 1 for _, res := range results { for _, s := range res.Streams { - fmt.Fprintf(w, "File%d=%s\n", i, s) + if _, err := fmt.Fprintf(w, "File%d=%s\n", i, s); err != nil { + return err + } i++ } } - fmt.Fprintf(w, "NumberOfEntries=%d\nVersion=2\n", i-1) + if _, err := fmt.Fprintf(w, "NumberOfEntries=%d\nVersion=2\n", i-1); err != nil { + return err + } default: for _, res := range results { - fmt.Fprintf(w, "URL: %s\n", res.URL) + if _, err := fmt.Fprintf(w, "URL: %s\n", res.URL); err != nil { + return err + } if res.Error != "" { - fmt.Fprintf(w, " error: %s\n", res.Error) + if _, err := fmt.Fprintf(w, " error: %s\n", res.Error); err != nil { + return err + } continue } if len(res.Streams) == 0 { - fmt.Fprintln(w, " (no candidate streams found)") + if _, err := fmt.Fprintln(w, " (no candidate streams found)"); err != nil { + return err + } continue } for _, s := range res.Streams { - fmt.Fprintf(w, " - %s\n", s) + if _, err := fmt.Fprintf(w, " - %s\n", s); err != nil { + return err + } } } } + return nil } -func newHTTPClient(proxyURL string) *http.Client { - transport := &http.Transport{} +func newHTTPClient(proxyURL string, timeout time.Duration) *http.Client { + transport := &http.Transport{ + Proxy: http.ProxyFromEnvironment, + ResponseHeaderTimeout: 8 * time.Second, + TLSHandshakeTimeout: 6 * time.Second, + IdleConnTimeout: 30 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + MaxIdleConns: 100, + MaxIdleConnsPerHost: 10, + } if proxyURL != "" { if parsed, err := url.Parse(proxyURL); err == nil { transport.Proxy = http.ProxyURL(parsed) } } - return &http.Client{Timeout: 15 * time.Second, Transport: transport} + if timeout <= 0 { + timeout = 15 * time.Second + } + return &http.Client{Timeout: timeout, Transport: transport} } func uniqueStrings(values []string) []string { - set := make(map[string]struct{}) + set := make(map[string]struct{}, len(values)) + out := make([]string, 0, len(values)) for _, v := range values { + if _, ok := set[v]; ok { + continue + } set[v] = struct{}{} - } - out := make([]string, 0, len(set)) - for v := range set { out = append(out, v) } return out @@ -574,6 +703,54 @@ func resolveURL(base, href string) string { return baseURL.ResolveReference(parsed).String() } +func isPrivateURL(raw string) (bool, string) { + parsed, err := url.Parse(raw) + if err != nil { + return false, "" + } + host := parsed.Hostname() + if host == "" { + return false, "" + } + lower := strings.ToLower(host) + if lower == "localhost" || strings.HasSuffix(lower, ".local") || strings.HasSuffix(lower, ".internal") { + return true, "hostname" + } + ip := net.ParseIP(host) + if ip == nil { + return false, "" + } + if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() { + return true, "loopback/link-local" + } + if isPrivateIP(ip) { + return true, "private range" + } + return false, "" +} + +func isPrivateIP(ip net.IP) bool { + if ip4 := ip.To4(); ip4 != nil { + switch { + case ip4[0] == 10: + return true + case ip4[0] == 172 && ip4[1] >= 16 && ip4[1] <= 31: + return true + case ip4[0] == 192 && ip4[1] == 168: + return true + case ip4[0] == 169 && ip4[1] == 254: + return true + case ip4[0] == 127: + return true + } + } + // IPv6 unique local fc00::/7 + if ip.To16() != nil { + return ip[0]&0xfe == 0xfc + } + return false +} + type historyWriter struct { path string mu sync.Mutex diff --git a/go.mod b/go.mod index 1713d8c..2b9064e 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,5 @@ module radio-stream-extractor -go 1.21 +go 1.25.0 + +require golang.org/x/net v0.52.0 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..e3b24b9 --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= +golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= diff --git a/internal/extractor/extractor.go b/internal/extractor/extractor.go index 223413e..71a0e51 100644 --- a/internal/extractor/extractor.go +++ b/internal/extractor/extractor.go @@ -1,18 +1,17 @@ package extractor import ( + "io" + "net/url" "regexp" "sort" "strings" + + "golang.org/x/net/html" ) -var urlPattern = regexp.MustCompile(`(?i)((?:https?:)?\/\/[^\s"'<>]+\.(mp3|aac|m3u8|ogg|opus|pls|m3u|xspf|json))`) +var urlPattern = regexp.MustCompile(`(?i)((?:https?:)?\/\/[^\s"'<>]+\.(mp3|aac|m3u8|ogg|opus|pls|m3u|xspf|json)(?:\?[^\s"'<>]*)?(?:#[^\s"'<>]*)?)`) var attrPattern = regexp.MustCompile(`(?i)(streamsrc|streamhash|stream|audioUrl|mp3Url|hls|playlist|source)\s*[:=]\s*['"]([^'"]+)['"]`) -var srcPattern = regexp.MustCompile(`(?i)src\s*=\s*['"]([^'"]+)['"]`) -var iframePattern = regexp.MustCompile(`(?i)]+src\s*=\s*['"]([^'"]+)['"]`) -var scriptPattern = regexp.MustCompile(`(?i)]+src\s*=\s*['"]([^'"]+)['"]`) -var audioPattern = regexp.MustCompile(`(?i)]+src\s*=\s*['"]([^'"]+)['"]`) -var sourcePattern = regexp.MustCompile(`(?i)]+src\s*=\s*['"]([^'"]+)['"]`) var xspfPattern = regexp.MustCompile(`(?i)([^<]+)`) // ExtractStreams returns the unique streaming URLs found in the provided HTML/text. @@ -37,14 +36,14 @@ func ExtractStreams(data string) []string { for _, match := range attrPattern.FindAllStringSubmatch(data, -1) { add(match[2]) } - for _, match := range srcPattern.FindAllStringSubmatch(data, -1) { - add(match[1]) + for _, u := range extractTagAttrs(data, "audio", "src", "data-src") { + addSpecial(u) } - for _, match := range audioPattern.FindAllStringSubmatch(data, -1) { - addSpecial(match[1]) + for _, u := range extractTagAttrs(data, "source", "src", "data-src") { + addSpecial(u) } - for _, match := range sourcePattern.FindAllStringSubmatch(data, -1) { - addSpecial(match[1]) + for _, u := range extractTagAttrs(data, "a", "href") { + add(u) } streams := make([]string, 0, len(candidates)) @@ -76,8 +75,11 @@ func ExtractPlaylistLinks(data string) []string { for _, match := range attrPattern.FindAllStringSubmatch(data, -1) { add(match[2]) } - for _, match := range srcPattern.FindAllStringSubmatch(data, -1) { - add(match[1]) + for _, u := range extractTagAttrs(data, "a", "href") { + add(u) + } + for _, u := range extractTagAttrs(data, "source", "src", "data-src") { + add(u) } links := make([]string, 0, len(candidates)) @@ -90,16 +92,16 @@ func ExtractPlaylistLinks(data string) []string { // ExtractEmbedURLs returns URLs found in iframe embeds. func ExtractEmbedURLs(data string) []string { - return extractURLs(iframePattern, data) + return extractTagAttrs(data, "iframe", "src") } // ExtractScriptURLs returns URLs referenced by script tags. func ExtractScriptURLs(data string) []string { - return extractURLs(scriptPattern, data) + return extractTagAttrs(data, "script", "src") } // ParsePlaylist extracts stream URLs from playlist content. -func ParsePlaylist(content string, contentType string) []string { +func ParsePlaylist(content string, contentType string, baseURL string) []string { candidates := make(map[string]struct{}) add := func(raw string) { raw = strings.TrimSpace(raw) @@ -110,7 +112,9 @@ func ParsePlaylist(content string, contentType string) []string { raw = "https:" + raw } if isStreamURL(raw) { - candidates[raw] = struct{}{} + if resolved := resolveRelative(raw, baseURL); resolved != "" { + candidates[resolved] = struct{}{} + } } } addForce := func(raw string) { @@ -121,7 +125,9 @@ func ParsePlaylist(content string, contentType string) []string { if strings.HasPrefix(raw, "//") { raw = "https:" + raw } - candidates[raw] = struct{}{} + if resolved := resolveRelative(raw, baseURL); resolved != "" { + candidates[resolved] = struct{}{} + } } lowerType := strings.ToLower(contentType) @@ -156,6 +162,10 @@ func ParsePlaylist(content string, contentType string) []string { if !matched { addForce(line) } + continue + } + if baseURL != "" && (strings.Contains(strings.ToLower(line), ".mp3") || strings.Contains(strings.ToLower(line), ".aac") || strings.Contains(strings.ToLower(line), ".m3u8") || strings.Contains(strings.ToLower(line), ".ogg") || strings.Contains(strings.ToLower(line), ".opus")) { + addForce(line) } } @@ -172,20 +182,45 @@ func ParsePlaylist(content string, contentType string) []string { sort.Strings(streams) return streams } -func extractURLs(pattern *regexp.Regexp, data string) []string { - candidates := make(map[string]struct{}) - for _, match := range pattern.FindAllStringSubmatch(data, -1) { - if normalized, ok := normalizeCandidate(match[1]); ok { - candidates[normalized] = struct{}{} - } +func extractTagAttrs(data string, tag string, attrs ...string) []string { + attrSet := make(map[string]struct{}, len(attrs)) + for _, a := range attrs { + attrSet[strings.ToLower(a)] = struct{}{} } - urls := make([]string, 0, len(candidates)) - for u := range candidates { - urls = append(urls, u) + candidates := make(map[string]struct{}) + z := html.NewTokenizer(strings.NewReader(data)) + for { + tt := z.Next() + switch tt { + case html.ErrorToken: + if z.Err() == io.EOF { + urls := make([]string, 0, len(candidates)) + for u := range candidates { + urls = append(urls, u) + } + sort.Strings(urls) + return urls + } + return nil + case html.StartTagToken, html.SelfClosingTagToken: + name, hasAttr := z.TagName() + if !strings.EqualFold(string(name), tag) || !hasAttr { + continue + } + for { + key, val, more := z.TagAttr() + if _, ok := attrSet[strings.ToLower(string(key))]; ok { + if normalized, ok := normalizeCandidate(string(val)); ok { + candidates[normalized] = struct{}{} + } + } + if !more { + break + } + } + } } - sort.Strings(urls) - return urls } func normalizeCandidate(raw string) (string, bool) { @@ -207,6 +242,43 @@ func normalizeCandidate(raw string) (string, bool) { return normalized, true } +func resolveRelative(raw string, base string) string { + raw = strings.TrimSpace(raw) + if raw == "" { + return "" + } + if base == "" { + return raw + } + if strings.HasPrefix(raw, "http://") || strings.HasPrefix(raw, "https://") { + return raw + } + if strings.HasPrefix(raw, "//") { + return "https:" + raw + } + return ResolveURL(base, raw) +} + +// ResolveURL resolves a possibly relative URL against a base. +func ResolveURL(base string, href string) string { + href = strings.TrimSpace(href) + if href == "" { + return "" + } + parsed, err := url.Parse(href) + if err != nil { + return "" + } + if parsed.IsAbs() { + return parsed.String() + } + baseURL, err := url.Parse(base) + if err != nil { + return parsed.String() + } + return baseURL.ResolveReference(parsed).String() +} + func isStreamURL(u string) bool { lower := strings.ToLower(u) return strings.Contains(lower, ".mp3") || strings.Contains(lower, ".aac") || strings.Contains(lower, ".m3u8") || diff --git a/internal/extractor/extractor_test.go b/internal/extractor/extractor_test.go index 9f4c53a..57eb0f7 100644 --- a/internal/extractor/extractor_test.go +++ b/internal/extractor/extractor_test.go @@ -71,13 +71,19 @@ func TestParsePlaylist(t *testing.T) { pls := "[playlist]\nFile1=https://example.com/stream.aac\n" xspf := "https://example.com/hls.m3u8" - if len(ParsePlaylist(m3u, "audio/x-mpegurl")) != 1 { + if len(ParsePlaylist(m3u, "audio/x-mpegurl", "https://example.com/playlist.m3u")) != 1 { t.Fatal("expected m3u playlist to yield 1 stream") } - if len(ParsePlaylist(pls, "audio/x-scpls")) != 1 { + if len(ParsePlaylist(pls, "audio/x-scpls", "https://example.com/playlist.pls")) != 1 { t.Fatal("expected pls playlist to yield 1 stream") } - if len(ParsePlaylist(xspf, "application/xspf+xml")) != 1 { + if len(ParsePlaylist(xspf, "application/xspf+xml", "https://example.com/playlist.xspf")) != 1 { t.Fatal("expected xspf playlist to yield 1 stream") } + + relative := "stream/live.mp3\n" + resolved := ParsePlaylist(relative, "audio/x-mpegurl", "https://example.com/radio/list.m3u") + if len(resolved) != 1 || resolved[0] != "https://example.com/radio/stream/live.mp3" { + t.Fatalf("expected relative URL to resolve against base: %v", resolved) + } }