|
- package main
-
- import (
- "bufio"
- "context"
- "encoding/csv"
- "encoding/json"
- "flag"
- "fmt"
- "io"
- "net"
- "net/http"
- "net/url"
- "os"
- "strings"
- "sync"
- "time"
-
- "radio-stream-extractor/internal/extractor"
- )
-
- type scanResult struct {
- URL string `json:"url"`
- Streams []string `json:"streams"`
- Playlists []string `json:"playlists,omitempty"`
- Probes []probeResult `json:"probes,omitempty"`
- Error string `json:"error,omitempty"`
- FetchedAt time.Time `json:"fetchedAt"`
- FromPlaylist bool `json:"fromPlaylist"`
- }
-
- type probeResult struct {
- URL string `json:"url"`
- Status string `json:"status"`
- ContentType string `json:"contentType,omitempty"`
- }
-
- type config struct {
- Format string
- Probe bool
- Headers headerList
- Proxy string
- HistoryPath string
- Watch time.Duration
- Concurrency int
- RequestTimeout time.Duration
- ProbeTimeout time.Duration
- AllowPrivate bool
- }
-
- type headerList []string
-
- func (h *headerList) String() string { return strings.Join(*h, ", ") }
- func (h *headerList) Set(v string) error {
- *h = append(*h, v)
- return nil
- }
-
- func main() {
- port := flag.String("port", ":8080", "listen address for the web server (default :8080)")
- web := flag.Bool("web", false, "force web-server mode even when URLs are provided")
-
- cfg := config{}
- flag.StringVar(&cfg.Format, "format", "text", "output format: text|json|csv|pls")
- flag.BoolVar(&cfg.Probe, "probe", true, "probe discovered stream URLs with HTTP HEAD")
- flag.Var(&cfg.Headers, "header", "custom HTTP header (repeatable), e.g. -header 'Referer: https://example.com'")
- flag.StringVar(&cfg.Proxy, "proxy", "", "HTTP proxy URL (optional)")
- flag.StringVar(&cfg.HistoryPath, "history", "history.jsonl", "path to JSONL history log (empty to disable)")
- flag.DurationVar(&cfg.Watch, "watch", 0, "repeat scan in CLI mode at interval (e.g. 30s, 2m)")
- flag.IntVar(&cfg.Concurrency, "concurrency", 4, "number of concurrent fetch workers")
- flag.DurationVar(&cfg.RequestTimeout, "timeout", 15*time.Second, "timeout per HTTP request (e.g. 10s, 2m)")
- flag.DurationVar(&cfg.ProbeTimeout, "probe-timeout", 8*time.Second, "timeout for probing stream URLs")
- flag.BoolVar(&cfg.AllowPrivate, "allow-private", false, "allow requests to private/localhost addresses")
-
- flag.Usage = func() {
- fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [flags] <url> [url...]\n", os.Args[0])
- flag.PrintDefaults()
- }
- flag.Parse()
-
- urls := flag.Args()
- client := newHTTPClient(cfg.Proxy, cfg.RequestTimeout)
- history := newHistoryWriter(cfg.HistoryPath)
-
- if *web || len(urls) == 0 {
- if err := runWebMode(*port, client, &cfg, history); err != nil {
- fmt.Fprintf(os.Stderr, "web mode failed: %v\n", err)
- os.Exit(1)
- }
- return
- }
-
- runCLIMode(urls, client, &cfg, history)
- }
-
- func runCLIMode(urls []string, client *http.Client, cfg *config, history *historyWriter) {
- for {
- results := scanURLs(urls, client, cfg)
- if err := outputResults(results, cfg.Format, os.Stdout); err != nil {
- fmt.Fprintf(os.Stderr, "output failed: %v\n", err)
- return
- }
- history.Write(results)
- if cfg.Watch == 0 {
- return
- }
- time.Sleep(cfg.Watch)
- }
- }
-
- func runWebMode(addr string, client *http.Client, cfg *config, history *historyWriter) error {
- mux := http.NewServeMux()
- mux.HandleFunc("/", indexHandler)
- mux.HandleFunc("/scan", makeScanHandler(client, cfg, history))
- mux.HandleFunc("/watch", watchHandler)
-
- fmt.Printf("radiostreamscan listening on %s (GET /scan?url=... or POST url=...)\n", addr)
- return http.ListenAndServe(addr, mux)
- }
-
- func indexHandler(w http.ResponseWriter, r *http.Request) {
- fmt.Fprintf(w, `<!doctype html>
- <html>
- <head><meta charset="utf-8"><title>radiostreamscan</title></head>
- <body>
- <h1>radiostreamscan</h1>
- <form method="get" action="/watch">
- <label>Stream-URLs (eine pro Zeile)</label><br/>
- <textarea name="url" rows="6" cols="80" required></textarea><br/>
- <label>Format
- <select name="format">
- <option value="json">json</option>
- <option value="text">text</option>
- <option value="csv">csv</option>
- <option value="pls">pls</option>
- </select>
- </label>
- <label>Auto-Refresh (Sekunden)
- <input type="number" name="interval" value="0" min="0" />
- </label>
- <label><input type="checkbox" name="probe" value="1" checked> Probing</label>
- <button type="submit">Scan</button>
- </form>
- <p>Mehrere URLs: /scan?url=a&url=b&url=c</p>
- </body>
- </html>`)
- }
-
- func watchHandler(w http.ResponseWriter, r *http.Request) {
- urls := normalizeURLInputs(r.URL.Query()["url"])
- interval := r.URL.Query().Get("interval")
- probe := r.URL.Query().Get("probe")
- fmt.Fprintf(w, `<!doctype html>
- <html>
- <head><meta charset="utf-8"><title>radiostreamscan results</title>
- <style>
- body { font-family: Arial, sans-serif; }
- .url-block { margin: 10px 0; padding: 10px; border: 1px solid #ccc; }
- .error { color: #b00020; }
- button { margin: 8px 0; }
- </style>
- </head>
- <body>
- <h1>radiostreamscan results</h1>
- <button id="copy">Alle Streams kopieren</button>
- <div id="output">Loading...</div>
- <textarea id="clipboard" style="position:absolute; left:-9999px; top:-9999px;"></textarea>
- <script>
- const urls = %q.split("\n").filter(Boolean);
- const interval = %q;
- const probe = %q;
-
- async function fetchData() {
- const params = new URLSearchParams();
- urls.forEach(u => params.append("url", u));
- params.set("format", "json");
- if (probe) params.set("probe", "1");
-
- const res = await fetch("/scan?" + params.toString());
- const data = await res.json();
-
- const container = document.getElementById("output");
- container.innerHTML = "";
-
- const allStreams = [];
-
- data.forEach(item => {
- const block = document.createElement("div");
- block.className = "url-block";
- const title = document.createElement("h3");
- title.textContent = item.url;
- block.appendChild(title);
-
- if (item.error) {
- const err = document.createElement("div");
- err.className = "error";
- err.textContent = item.error;
- block.appendChild(err);
- container.appendChild(block);
- return;
- }
-
- const list = document.createElement("ul");
- (item.streams || []).forEach(s => {
- const li = document.createElement("li");
- li.textContent = s;
- list.appendChild(li);
- allStreams.push(s);
- });
- block.appendChild(list);
- container.appendChild(block);
- });
-
- document.getElementById("clipboard").value = allStreams.join("\n");
- }
-
- document.getElementById("copy").addEventListener("click", () => {
- const text = document.getElementById("clipboard").value;
- if (navigator.clipboard && navigator.clipboard.writeText) {
- navigator.clipboard.writeText(text);
- } else {
- const el = document.getElementById("clipboard");
- el.select();
- document.execCommand("copy");
- }
- });
-
- fetchData();
- if (interval && Number(interval) > 0) {
- setInterval(fetchData, Number(interval) * 1000);
- }
- </script>
- </body>
- </html>`, strings.Join(urls, "\n"), interval, probe)
- }
-
- func makeScanHandler(client *http.Client, cfg *config, history *historyWriter) http.HandlerFunc {
- return func(w http.ResponseWriter, r *http.Request) {
- var urls []string
- switch r.Method {
- case http.MethodGet:
- urls = r.URL.Query()["url"]
- case http.MethodPost:
- if err := r.ParseForm(); err != nil {
- http.Error(w, err.Error(), http.StatusBadRequest)
- return
- }
- urls = r.Form["url"]
- default:
- http.Error(w, "only GET and POST supported", http.StatusMethodNotAllowed)
- return
- }
-
- urls = normalizeURLInputs(urls)
- if len(urls) == 0 {
- http.Error(w, "provide at least one url parameter", http.StatusBadRequest)
- return
- }
-
- localCfg := *cfg
- if r.URL.Query().Get("probe") == "1" {
- localCfg.Probe = true
- } else if r.URL.Query().Get("probe") == "0" {
- localCfg.Probe = false
- }
- if f := r.URL.Query().Get("format"); f != "" {
- localCfg.Format = f
- }
-
- results := scanURLs(urls, client, &localCfg)
- history.Write(results)
- if err := outputResults(results, localCfg.Format, w); err != nil {
- http.Error(w, err.Error(), http.StatusInternalServerError)
- }
- }
- }
-
- func normalizeURLInputs(inputs []string) []string {
- var urls []string
- for _, item := range inputs {
- for _, line := range strings.Split(item, "\n") {
- line = strings.TrimSpace(line)
- if line == "" {
- continue
- }
- urls = append(urls, line)
- }
- }
- return urls
- }
-
- func scanURLs(urls []string, client *http.Client, cfg *config) []scanResult {
- results := make([]scanResult, len(urls))
- type job struct {
- index int
- url string
- }
- jobs := make(chan job)
- var wg sync.WaitGroup
-
- workers := cfg.Concurrency
- if workers < 1 {
- workers = 1
- }
-
- for i := 0; i < workers; i++ {
- wg.Add(1)
- go func() {
- defer wg.Done()
- for j := range jobs {
- res := scanOneURL(client, cfg, j.url)
- results[j.index] = res
- }
- }()
- }
-
- for i, u := range urls {
- jobs <- job{index: i, url: u}
- }
- close(jobs)
- wg.Wait()
- return results
- }
-
- func scanOneURL(client *http.Client, cfg *config, raw string) scanResult {
- res := scanResult{URL: raw, FetchedAt: time.Now()}
- html, contentType, err := fetchContent(client, cfg, raw)
- if err != nil {
- res.Error = err.Error()
- return res
- }
-
- parsedBase, _ := url.Parse(raw)
- baseHost := ""
- if parsedBase != nil {
- baseHost = parsedBase.Hostname()
- }
-
- streams := extractor.ExtractStreams(html)
- playlists := extractor.ExtractPlaylistLinks(html)
- resolvedPlaylists := make([]string, 0, len(playlists))
-
- for _, pl := range playlists {
- plURL := resolveURL(raw, pl)
- if plURL == "" {
- continue
- }
- resolvedPlaylists = append(resolvedPlaylists, plURL)
- plContent, plType, err := fetchContent(client, cfg, plURL)
- if err != nil {
- continue
- }
- parsed := extractor.ParsePlaylist(plContent, plType, plURL)
- if len(parsed) > 0 {
- streams = append(streams, parsed...)
- res.FromPlaylist = true
- }
- }
-
- embedURLs := extractor.ExtractEmbedURLs(html)
- seenEmbeds := make(map[string]struct{})
- for _, embed := range embedURLs {
- embedURL := resolveURL(raw, embed)
- if embedURL == "" || embedURL == raw {
- continue
- }
- if _, ok := seenEmbeds[embedURL]; ok {
- continue
- }
- seenEmbeds[embedURL] = struct{}{}
-
- embedHTML, _, err := fetchContent(client, cfg, embedURL)
- if err != nil {
- continue
- }
-
- streams = append(streams, extractor.ExtractStreams(embedHTML)...)
- embedPlaylists := extractor.ExtractPlaylistLinks(embedHTML)
- playlists = append(playlists, embedPlaylists...)
-
- for _, pl := range embedPlaylists {
- plURL := resolveURL(embedURL, pl)
- if plURL == "" {
- continue
- }
- resolvedPlaylists = append(resolvedPlaylists, plURL)
- plContent, plType, err := fetchContent(client, cfg, plURL)
- if err != nil {
- continue
- }
- parsed := extractor.ParsePlaylist(plContent, plType, plURL)
- if len(parsed) > 0 {
- streams = append(streams, parsed...)
- res.FromPlaylist = true
- }
- }
- }
-
- scriptURLs := extractor.ExtractScriptURLs(html)
- seenScripts := make(map[string]struct{})
- for _, script := range scriptURLs {
- scriptURL := resolveURL(raw, script)
- if scriptURL == "" || scriptURL == raw {
- continue
- }
- if baseHost != "" {
- parsedScript, err := url.Parse(scriptURL)
- if err != nil {
- continue
- }
- if parsedScript.Hostname() != "" && parsedScript.Hostname() != baseHost {
- continue
- }
- }
- if _, ok := seenScripts[scriptURL]; ok {
- continue
- }
- seenScripts[scriptURL] = struct{}{}
-
- scriptHTML, _, err := fetchContent(client, cfg, scriptURL)
- if err != nil {
- continue
- }
-
- streams = append(streams, extractor.ExtractStreams(scriptHTML)...)
- scriptPlaylists := extractor.ExtractPlaylistLinks(scriptHTML)
- playlists = append(playlists, scriptPlaylists...)
-
- for _, pl := range scriptPlaylists {
- plURL := resolveURL(scriptURL, pl)
- if plURL == "" {
- continue
- }
- resolvedPlaylists = append(resolvedPlaylists, plURL)
- plContent, plType, err := fetchContent(client, cfg, plURL)
- if err != nil {
- continue
- }
- parsed := extractor.ParsePlaylist(plContent, plType, plURL)
- if len(parsed) > 0 {
- streams = append(streams, parsed...)
- res.FromPlaylist = true
- }
- }
- }
-
- res.Playlists = uniqueStrings(resolvedPlaylists)
- res.Streams = uniqueStrings(streams)
-
- if cfg.Probe {
- res.Probes = probeStreams(client, cfg, res.Streams)
- }
-
- _ = contentType
- return res
- }
-
- func fetchContent(client *http.Client, cfg *config, raw string) (string, string, error) {
- if !cfg.AllowPrivate {
- if blocked, reason := isPrivateURL(raw); blocked {
- return "", "", fmt.Errorf("blocked private address (%s)", reason)
- }
- }
-
- timeout := cfg.RequestTimeout
- if timeout <= 0 {
- timeout = 15 * time.Second
- }
- ctx, cancel := context.WithTimeout(context.Background(), timeout)
- defer cancel()
-
- req, err := http.NewRequestWithContext(ctx, http.MethodGet, raw, nil)
- if err != nil {
- return "", "", err
- }
- req.Header.Set("User-Agent", "radiostreamscan/0.2")
- for _, h := range cfg.Headers {
- parts := strings.SplitN(h, ":", 2)
- if len(parts) == 2 {
- req.Header.Set(strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]))
- }
- }
-
- resp, err := client.Do(req)
- if err != nil {
- return "", "", err
- }
- defer resp.Body.Close()
-
- if resp.StatusCode < 200 || resp.StatusCode >= 300 {
- snippet, _ := io.ReadAll(io.LimitReader(resp.Body, 1024))
- return "", "", fmt.Errorf("unexpected status %s: %s", resp.Status, strings.TrimSpace(string(snippet)))
- }
-
- body, err := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
- if err != nil {
- return "", "", err
- }
-
- return string(body), resp.Header.Get("Content-Type"), nil
- }
-
- func probeStreams(client *http.Client, cfg *config, streams []string) []probeResult {
- var results []probeResult
- for _, s := range streams {
- if !cfg.AllowPrivate {
- if blocked, reason := isPrivateURL(s); blocked {
- results = append(results, probeResult{URL: s, Status: "blocked: " + reason})
- continue
- }
- }
-
- timeout := cfg.ProbeTimeout
- if timeout <= 0 {
- timeout = cfg.RequestTimeout
- }
- if timeout <= 0 {
- timeout = 8 * time.Second
- }
-
- ctx, cancel := context.WithTimeout(context.Background(), timeout)
- req, err := http.NewRequestWithContext(ctx, http.MethodHead, s, nil)
- if err != nil {
- cancel()
- continue
- }
- for _, h := range cfg.Headers {
- parts := strings.SplitN(h, ":", 2)
- if len(parts) == 2 {
- req.Header.Set(strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]))
- }
- }
- resp, err := client.Do(req)
- if err != nil {
- cancel()
- // Fallback to GET with range.
- results = append(results, probeWithGet(client, cfg, s, timeout))
- continue
- }
- resp.Body.Close()
- cancel()
-
- if resp.StatusCode == http.StatusMethodNotAllowed || resp.StatusCode == http.StatusNotImplemented {
- results = append(results, probeWithGet(client, cfg, s, timeout))
- continue
- }
- results = append(results, probeResult{URL: s, Status: resp.Status, ContentType: resp.Header.Get("Content-Type")})
- }
- return results
- }
-
- func probeWithGet(client *http.Client, cfg *config, urlStr string, timeout time.Duration) probeResult {
- ctx, cancel := context.WithTimeout(context.Background(), timeout)
- defer cancel()
-
- req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil)
- if err != nil {
- return probeResult{URL: urlStr, Status: err.Error()}
- }
- req.Header.Set("Range", "bytes=0-1023")
- for _, h := range cfg.Headers {
- parts := strings.SplitN(h, ":", 2)
- if len(parts) == 2 {
- req.Header.Set(strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]))
- }
- }
-
- resp, err := client.Do(req)
- if err != nil {
- return probeResult{URL: urlStr, Status: err.Error()}
- }
- defer resp.Body.Close()
- _, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, 1024))
- return probeResult{URL: urlStr, Status: resp.Status, ContentType: resp.Header.Get("Content-Type")}
- }
-
- func outputResults(results []scanResult, format string, w io.Writer) error {
- if rw, ok := w.(http.ResponseWriter); ok {
- if strings.ToLower(format) == "json" {
- rw.Header().Set("Content-Type", "application/json")
- } else if strings.ToLower(format) == "csv" {
- rw.Header().Set("Content-Type", "text/csv")
- }
- }
-
- switch strings.ToLower(format) {
- case "json":
- if err := json.NewEncoder(w).Encode(results); err != nil {
- return err
- }
- case "csv":
- cw := csv.NewWriter(w)
- if err := cw.Write([]string{"input_url", "stream_url"}); err != nil {
- return err
- }
- for _, res := range results {
- for _, s := range res.Streams {
- if err := cw.Write([]string{res.URL, s}); err != nil {
- return err
- }
- }
- }
- cw.Flush()
- if err := cw.Error(); err != nil {
- return err
- }
- case "pls":
- if _, err := fmt.Fprintln(w, "[playlist]"); err != nil {
- return err
- }
- i := 1
- for _, res := range results {
- for _, s := range res.Streams {
- if _, err := fmt.Fprintf(w, "File%d=%s\n", i, s); err != nil {
- return err
- }
- i++
- }
- }
- if _, err := fmt.Fprintf(w, "NumberOfEntries=%d\nVersion=2\n", i-1); err != nil {
- return err
- }
- default:
- for _, res := range results {
- if _, err := fmt.Fprintf(w, "URL: %s\n", res.URL); err != nil {
- return err
- }
- if res.Error != "" {
- if _, err := fmt.Fprintf(w, " error: %s\n", res.Error); err != nil {
- return err
- }
- continue
- }
- if len(res.Streams) == 0 {
- if _, err := fmt.Fprintln(w, " (no candidate streams found)"); err != nil {
- return err
- }
- continue
- }
- for _, s := range res.Streams {
- if _, err := fmt.Fprintf(w, " - %s\n", s); err != nil {
- return err
- }
- }
- }
- }
- return nil
- }
-
- func newHTTPClient(proxyURL string, timeout time.Duration) *http.Client {
- transport := &http.Transport{
- Proxy: http.ProxyFromEnvironment,
- ResponseHeaderTimeout: 8 * time.Second,
- TLSHandshakeTimeout: 6 * time.Second,
- IdleConnTimeout: 30 * time.Second,
- ExpectContinueTimeout: 1 * time.Second,
- MaxIdleConns: 100,
- MaxIdleConnsPerHost: 10,
- }
- if proxyURL != "" {
- if parsed, err := url.Parse(proxyURL); err == nil {
- transport.Proxy = http.ProxyURL(parsed)
- }
- }
- if timeout <= 0 {
- timeout = 15 * time.Second
- }
- return &http.Client{Timeout: timeout, Transport: transport}
- }
-
- func uniqueStrings(values []string) []string {
- set := make(map[string]struct{}, len(values))
- out := make([]string, 0, len(values))
- for _, v := range values {
- if _, ok := set[v]; ok {
- continue
- }
- set[v] = struct{}{}
- out = append(out, v)
- }
- return out
- }
-
- func resolveURL(base, href string) string {
- href = strings.TrimSpace(href)
- if href == "" {
- return ""
- }
- if strings.HasPrefix(href, "//") {
- return "https:" + href
- }
- parsed, err := url.Parse(href)
- if err != nil {
- return ""
- }
- if parsed.IsAbs() {
- return parsed.String()
- }
- baseURL, err := url.Parse(base)
- if err != nil {
- return parsed.String()
- }
- return baseURL.ResolveReference(parsed).String()
- }
-
- func isPrivateURL(raw string) (bool, string) {
- parsed, err := url.Parse(raw)
- if err != nil {
- return false, ""
- }
- host := parsed.Hostname()
- if host == "" {
- return false, ""
- }
- lower := strings.ToLower(host)
- if lower == "localhost" || strings.HasSuffix(lower, ".local") || strings.HasSuffix(lower, ".internal") {
- return true, "hostname"
- }
- ip := net.ParseIP(host)
- if ip == nil {
- return false, ""
- }
- if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
- return true, "loopback/link-local"
- }
- if isPrivateIP(ip) {
- return true, "private range"
- }
- return false, ""
- }
-
- func isPrivateIP(ip net.IP) bool {
- if ip4 := ip.To4(); ip4 != nil {
- switch {
- case ip4[0] == 10:
- return true
- case ip4[0] == 172 && ip4[1] >= 16 && ip4[1] <= 31:
- return true
- case ip4[0] == 192 && ip4[1] == 168:
- return true
- case ip4[0] == 169 && ip4[1] == 254:
- return true
- case ip4[0] == 127:
- return true
- }
- }
- // IPv6 unique local fc00::/7
- if ip.To16() != nil {
- return ip[0]&0xfe == 0xfc
- }
- return false
- }
-
- type historyWriter struct {
- path string
- mu sync.Mutex
- }
-
- func newHistoryWriter(path string) *historyWriter {
- return &historyWriter{path: path}
- }
-
- func (h *historyWriter) Write(results []scanResult) {
- if h == nil || h.path == "" {
- return
- }
- h.mu.Lock()
- defer h.mu.Unlock()
-
- f, err := os.OpenFile(h.path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
- if err != nil {
- return
- }
- defer f.Close()
-
- writer := bufio.NewWriter(f)
- for _, res := range results {
- data, err := json.Marshal(res)
- if err != nil {
- continue
- }
- writer.Write(data)
- writer.WriteString("\n")
- }
- writer.Flush()
- }
|