|
- package main
-
- import (
- "bufio"
- "encoding/csv"
- "encoding/json"
- "flag"
- "fmt"
- "io"
- "net/http"
- "net/url"
- "os"
- "strings"
- "sync"
- "time"
-
- "radio-stream-extractor/internal/extractor"
- )
-
- type scanResult struct {
- URL string `json:"url"`
- Streams []string `json:"streams"`
- Playlists []string `json:"playlists,omitempty"`
- Probes []probeResult `json:"probes,omitempty"`
- Error string `json:"error,omitempty"`
- FetchedAt time.Time `json:"fetchedAt"`
- FromPlaylist bool `json:"fromPlaylist"`
- }
-
- type probeResult struct {
- URL string `json:"url"`
- Status string `json:"status"`
- ContentType string `json:"contentType,omitempty"`
- }
-
- type config struct {
- Format string
- Probe bool
- Headers headerList
- Proxy string
- HistoryPath string
- Watch time.Duration
- Concurrency int
- }
-
- type headerList []string
-
- func (h *headerList) String() string { return strings.Join(*h, ", ") }
- func (h *headerList) Set(v string) error {
- *h = append(*h, v)
- return nil
- }
-
- func main() {
- port := flag.String("port", ":8080", "listen address for the web server (default :8080)")
- web := flag.Bool("web", false, "force web-server mode even when URLs are provided")
-
- cfg := config{}
- flag.StringVar(&cfg.Format, "format", "text", "output format: text|json|csv|pls")
- flag.BoolVar(&cfg.Probe, "probe", true, "probe discovered stream URLs with HTTP HEAD")
- flag.Var(&cfg.Headers, "header", "custom HTTP header (repeatable), e.g. -header 'Referer: https://example.com'")
- flag.StringVar(&cfg.Proxy, "proxy", "", "HTTP proxy URL (optional)")
- flag.StringVar(&cfg.HistoryPath, "history", "history.jsonl", "path to JSONL history log (empty to disable)")
- flag.DurationVar(&cfg.Watch, "watch", 0, "repeat scan in CLI mode at interval (e.g. 30s, 2m)")
- flag.IntVar(&cfg.Concurrency, "concurrency", 4, "number of concurrent fetch workers")
-
- flag.Usage = func() {
- fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [flags] <url> [url...]\n", os.Args[0])
- flag.PrintDefaults()
- }
- flag.Parse()
-
- urls := flag.Args()
- client := newHTTPClient(cfg.Proxy)
- history := newHistoryWriter(cfg.HistoryPath)
-
- if *web || len(urls) == 0 {
- if err := runWebMode(*port, client, &cfg, history); err != nil {
- fmt.Fprintf(os.Stderr, "web mode failed: %v\n", err)
- os.Exit(1)
- }
- return
- }
-
- runCLIMode(urls, client, &cfg, history)
- }
-
- func runCLIMode(urls []string, client *http.Client, cfg *config, history *historyWriter) {
- for {
- results := scanURLs(urls, client, cfg)
- outputResults(results, cfg.Format, os.Stdout)
- history.Write(results)
- if cfg.Watch == 0 {
- return
- }
- time.Sleep(cfg.Watch)
- }
- }
-
- func runWebMode(addr string, client *http.Client, cfg *config, history *historyWriter) error {
- mux := http.NewServeMux()
- mux.HandleFunc("/", indexHandler)
- mux.HandleFunc("/scan", makeScanHandler(client, cfg, history))
- mux.HandleFunc("/watch", watchHandler)
-
- fmt.Printf("radiostreamscan listening on %s (GET /scan?url=... or POST url=...)\n", addr)
- return http.ListenAndServe(addr, mux)
- }
-
- func indexHandler(w http.ResponseWriter, r *http.Request) {
- fmt.Fprintf(w, `<!doctype html>
- <html>
- <head><meta charset="utf-8"><title>radiostreamscan</title></head>
- <body>
- <h1>radiostreamscan</h1>
- <form method="get" action="/watch">
- <label>Stream-URLs (eine pro Zeile)</label><br/>
- <textarea name="url" rows="6" cols="80" required></textarea><br/>
- <label>Format
- <select name="format">
- <option value="json">json</option>
- <option value="text">text</option>
- <option value="csv">csv</option>
- <option value="pls">pls</option>
- </select>
- </label>
- <label>Auto-Refresh (Sekunden)
- <input type="number" name="interval" value="0" min="0" />
- </label>
- <label><input type="checkbox" name="probe" value="1" checked> Probing</label>
- <button type="submit">Scan</button>
- </form>
- <p>Mehrere URLs: /scan?url=a&url=b&url=c</p>
- </body>
- </html>`)
- }
-
- func watchHandler(w http.ResponseWriter, r *http.Request) {
- urls := normalizeURLInputs(r.URL.Query()["url"])
- interval := r.URL.Query().Get("interval")
- probe := r.URL.Query().Get("probe")
- fmt.Fprintf(w, `<!doctype html>
- <html>
- <head><meta charset="utf-8"><title>radiostreamscan results</title>
- <style>
- body { font-family: Arial, sans-serif; }
- .url-block { margin: 10px 0; padding: 10px; border: 1px solid #ccc; }
- .error { color: #b00020; }
- button { margin: 8px 0; }
- </style>
- </head>
- <body>
- <h1>radiostreamscan results</h1>
- <button id="copy">Alle Streams kopieren</button>
- <div id="output">Loading...</div>
- <textarea id="clipboard" style="position:absolute; left:-9999px; top:-9999px;"></textarea>
- <script>
- const urls = %q.split("\n").filter(Boolean);
- const interval = %q;
- const probe = %q;
-
- async function fetchData() {
- const params = new URLSearchParams();
- urls.forEach(u => params.append("url", u));
- params.set("format", "json");
- if (probe) params.set("probe", "1");
-
- const res = await fetch("/scan?" + params.toString());
- const data = await res.json();
-
- const container = document.getElementById("output");
- container.innerHTML = "";
-
- const allStreams = [];
-
- data.forEach(item => {
- const block = document.createElement("div");
- block.className = "url-block";
- const title = document.createElement("h3");
- title.textContent = item.url;
- block.appendChild(title);
-
- if (item.error) {
- const err = document.createElement("div");
- err.className = "error";
- err.textContent = item.error;
- block.appendChild(err);
- container.appendChild(block);
- return;
- }
-
- const list = document.createElement("ul");
- (item.streams || []).forEach(s => {
- const li = document.createElement("li");
- li.textContent = s;
- list.appendChild(li);
- allStreams.push(s);
- });
- block.appendChild(list);
- container.appendChild(block);
- });
-
- document.getElementById("clipboard").value = allStreams.join("\n");
- }
-
- document.getElementById("copy").addEventListener("click", () => {
- const text = document.getElementById("clipboard").value;
- if (navigator.clipboard && navigator.clipboard.writeText) {
- navigator.clipboard.writeText(text);
- } else {
- const el = document.getElementById("clipboard");
- el.select();
- document.execCommand("copy");
- }
- });
-
- fetchData();
- if (interval && Number(interval) > 0) {
- setInterval(fetchData, Number(interval) * 1000);
- }
- </script>
- </body>
- </html>`, strings.Join(urls, "\n"), interval, probe)
- }
-
- func makeScanHandler(client *http.Client, cfg *config, history *historyWriter) http.HandlerFunc {
- return func(w http.ResponseWriter, r *http.Request) {
- var urls []string
- switch r.Method {
- case http.MethodGet:
- urls = r.URL.Query()["url"]
- case http.MethodPost:
- if err := r.ParseForm(); err != nil {
- http.Error(w, err.Error(), http.StatusBadRequest)
- return
- }
- urls = r.Form["url"]
- default:
- http.Error(w, "only GET and POST supported", http.StatusMethodNotAllowed)
- return
- }
-
- urls = normalizeURLInputs(urls)
- if len(urls) == 0 {
- http.Error(w, "provide at least one url parameter", http.StatusBadRequest)
- return
- }
-
- localCfg := *cfg
- if r.URL.Query().Get("probe") == "1" {
- localCfg.Probe = true
- } else if r.URL.Query().Get("probe") == "0" {
- localCfg.Probe = false
- }
- if f := r.URL.Query().Get("format"); f != "" {
- localCfg.Format = f
- }
-
- results := scanURLs(urls, client, &localCfg)
- history.Write(results)
- outputResults(results, localCfg.Format, w)
- }
- }
-
- func normalizeURLInputs(inputs []string) []string {
- var urls []string
- for _, item := range inputs {
- for _, line := range strings.Split(item, "\n") {
- line = strings.TrimSpace(line)
- if line == "" {
- continue
- }
- urls = append(urls, line)
- }
- }
- return urls
- }
-
- func scanURLs(urls []string, client *http.Client, cfg *config) []scanResult {
- results := make([]scanResult, len(urls))
- type job struct {
- index int
- url string
- }
- jobs := make(chan job)
- var wg sync.WaitGroup
-
- workers := cfg.Concurrency
- if workers < 1 {
- workers = 1
- }
-
- for i := 0; i < workers; i++ {
- wg.Add(1)
- go func() {
- defer wg.Done()
- for j := range jobs {
- res := scanOneURL(client, cfg, j.url)
- results[j.index] = res
- }
- }()
- }
-
- for i, u := range urls {
- jobs <- job{index: i, url: u}
- }
- close(jobs)
- wg.Wait()
- return results
- }
-
- func scanOneURL(client *http.Client, cfg *config, raw string) scanResult {
- res := scanResult{URL: raw, FetchedAt: time.Now()}
- html, contentType, err := fetchContent(client, cfg, raw)
- if err != nil {
- res.Error = err.Error()
- return res
- }
-
- parsedBase, _ := url.Parse(raw)
- baseHost := ""
- if parsedBase != nil {
- baseHost = parsedBase.Hostname()
- }
-
- streams := extractor.ExtractStreams(html)
- playlists := extractor.ExtractPlaylistLinks(html)
-
- for _, pl := range playlists {
- plContent, plType, err := fetchContent(client, cfg, pl)
- if err != nil {
- continue
- }
- parsed := extractor.ParsePlaylist(plContent, plType)
- if len(parsed) > 0 {
- streams = append(streams, parsed...)
- res.FromPlaylist = true
- }
- }
-
- embedURLs := extractor.ExtractEmbedURLs(html)
- seenEmbeds := make(map[string]struct{})
- for _, embed := range embedURLs {
- embedURL := resolveURL(raw, embed)
- if embedURL == "" || embedURL == raw {
- continue
- }
- if _, ok := seenEmbeds[embedURL]; ok {
- continue
- }
- seenEmbeds[embedURL] = struct{}{}
-
- embedHTML, _, err := fetchContent(client, cfg, embedURL)
- if err != nil {
- continue
- }
-
- streams = append(streams, extractor.ExtractStreams(embedHTML)...)
- embedPlaylists := extractor.ExtractPlaylistLinks(embedHTML)
- playlists = append(playlists, embedPlaylists...)
-
- for _, pl := range embedPlaylists {
- plContent, plType, err := fetchContent(client, cfg, pl)
- if err != nil {
- continue
- }
- parsed := extractor.ParsePlaylist(plContent, plType)
- if len(parsed) > 0 {
- streams = append(streams, parsed...)
- res.FromPlaylist = true
- }
- }
- }
-
- scriptURLs := extractor.ExtractScriptURLs(html)
- seenScripts := make(map[string]struct{})
- for _, script := range scriptURLs {
- scriptURL := resolveURL(raw, script)
- if scriptURL == "" || scriptURL == raw {
- continue
- }
- if baseHost != "" {
- parsedScript, err := url.Parse(scriptURL)
- if err != nil {
- continue
- }
- if parsedScript.Hostname() != "" && parsedScript.Hostname() != baseHost {
- continue
- }
- }
- if _, ok := seenScripts[scriptURL]; ok {
- continue
- }
- seenScripts[scriptURL] = struct{}{}
-
- scriptHTML, _, err := fetchContent(client, cfg, scriptURL)
- if err != nil {
- continue
- }
-
- streams = append(streams, extractor.ExtractStreams(scriptHTML)...)
- scriptPlaylists := extractor.ExtractPlaylistLinks(scriptHTML)
- playlists = append(playlists, scriptPlaylists...)
-
- for _, pl := range scriptPlaylists {
- plContent, plType, err := fetchContent(client, cfg, pl)
- if err != nil {
- continue
- }
- parsed := extractor.ParsePlaylist(plContent, plType)
- if len(parsed) > 0 {
- streams = append(streams, parsed...)
- res.FromPlaylist = true
- }
- }
- }
-
- res.Playlists = uniqueStrings(playlists)
- res.Streams = uniqueStrings(streams)
-
- if cfg.Probe {
- res.Probes = probeStreams(client, cfg, res.Streams)
- }
-
- _ = contentType
- return res
- }
-
- func fetchContent(client *http.Client, cfg *config, raw string) (string, string, error) {
- req, err := http.NewRequest(http.MethodGet, raw, nil)
- if err != nil {
- return "", "", err
- }
- req.Header.Set("User-Agent", "radiostreamscan/0.2")
- for _, h := range cfg.Headers {
- parts := strings.SplitN(h, ":", 2)
- if len(parts) == 2 {
- req.Header.Set(strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]))
- }
- }
-
- resp, err := client.Do(req)
- if err != nil {
- return "", "", err
- }
- defer resp.Body.Close()
-
- if resp.StatusCode != http.StatusOK {
- return "", "", fmt.Errorf("unexpected status %s", resp.Status)
- }
-
- body, err := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
- if err != nil {
- return "", "", err
- }
-
- return string(body), resp.Header.Get("Content-Type"), nil
- }
-
- func probeStreams(client *http.Client, cfg *config, streams []string) []probeResult {
- var results []probeResult
- for _, s := range streams {
- req, err := http.NewRequest(http.MethodHead, s, nil)
- if err != nil {
- continue
- }
- for _, h := range cfg.Headers {
- parts := strings.SplitN(h, ":", 2)
- if len(parts) == 2 {
- req.Header.Set(strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]))
- }
- }
- resp, err := client.Do(req)
- if err != nil {
- results = append(results, probeResult{URL: s, Status: err.Error()})
- continue
- }
- resp.Body.Close()
- results = append(results, probeResult{URL: s, Status: resp.Status, ContentType: resp.Header.Get("Content-Type")})
- }
- return results
- }
-
- func outputResults(results []scanResult, format string, w io.Writer) {
- if rw, ok := w.(http.ResponseWriter); ok {
- if strings.ToLower(format) == "json" {
- rw.Header().Set("Content-Type", "application/json")
- } else if strings.ToLower(format) == "csv" {
- rw.Header().Set("Content-Type", "text/csv")
- }
- }
-
- switch strings.ToLower(format) {
- case "json":
- json.NewEncoder(w).Encode(results)
- case "csv":
- cw := csv.NewWriter(w)
- cw.Write([]string{"input_url", "stream_url"})
- for _, res := range results {
- for _, s := range res.Streams {
- cw.Write([]string{res.URL, s})
- }
- }
- cw.Flush()
- case "pls":
- fmt.Fprintln(w, "[playlist]")
- i := 1
- for _, res := range results {
- for _, s := range res.Streams {
- fmt.Fprintf(w, "File%d=%s\n", i, s)
- i++
- }
- }
- fmt.Fprintf(w, "NumberOfEntries=%d\nVersion=2\n", i-1)
- default:
- for _, res := range results {
- fmt.Fprintf(w, "URL: %s\n", res.URL)
- if res.Error != "" {
- fmt.Fprintf(w, " error: %s\n", res.Error)
- continue
- }
- if len(res.Streams) == 0 {
- fmt.Fprintln(w, " (no candidate streams found)")
- continue
- }
- for _, s := range res.Streams {
- fmt.Fprintf(w, " - %s\n", s)
- }
- }
- }
- }
-
- func newHTTPClient(proxyURL string) *http.Client {
- transport := &http.Transport{}
- if proxyURL != "" {
- if parsed, err := url.Parse(proxyURL); err == nil {
- transport.Proxy = http.ProxyURL(parsed)
- }
- }
- return &http.Client{Timeout: 15 * time.Second, Transport: transport}
- }
-
- func uniqueStrings(values []string) []string {
- set := make(map[string]struct{})
- for _, v := range values {
- set[v] = struct{}{}
- }
- out := make([]string, 0, len(set))
- for v := range set {
- out = append(out, v)
- }
- return out
- }
-
- func resolveURL(base, href string) string {
- href = strings.TrimSpace(href)
- if href == "" {
- return ""
- }
- if strings.HasPrefix(href, "//") {
- return "https:" + href
- }
- parsed, err := url.Parse(href)
- if err != nil {
- return ""
- }
- if parsed.IsAbs() {
- return parsed.String()
- }
- baseURL, err := url.Parse(base)
- if err != nil {
- return parsed.String()
- }
- return baseURL.ResolveReference(parsed).String()
- }
-
- type historyWriter struct {
- path string
- mu sync.Mutex
- }
-
- func newHistoryWriter(path string) *historyWriter {
- return &historyWriter{path: path}
- }
-
- func (h *historyWriter) Write(results []scanResult) {
- if h == nil || h.path == "" {
- return
- }
- h.mu.Lock()
- defer h.mu.Unlock()
-
- f, err := os.OpenFile(h.path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
- if err != nil {
- return
- }
- defer f.Close()
-
- writer := bufio.NewWriter(f)
- for _, res := range results {
- data, err := json.Marshal(res)
- if err != nil {
- continue
- }
- writer.Write(data)
- writer.WriteString("\n")
- }
- writer.Flush()
- }
|