feat: follow script assets

3 månader sedan · 3058ffc2ad
--- a/cmd/radiostreamscan/main.go
+++ b/cmd/radiostreamscan/main.go
@@ -1,114 +1,114 @@
 package main

 import (
    "bufio"
    "encoding/csv"
    "encoding/json"
    "flag"
    "fmt"
    "io"
    "net/http"
    "net/url"
    "os"
    "strings"
    "sync"
    "time"

    "radio-stream-extractor/internal/extractor"
 	"bufio"
 	"encoding/csv"
 	"encoding/json"
 	"flag"
 	"fmt"
 	"io"
 	"net/http"
 	"net/url"
 	"os"
 	"strings"
 	"sync"
 	"time"

 	"radio-stream-extractor/internal/extractor"
 )

 type scanResult struct {
    URL          string        `json:"url"`
    Streams      []string      `json:"streams"`
    Playlists    []string      `json:"playlists,omitempty"`
    Probes       []probeResult `json:"probes,omitempty"`
    Error        string        `json:"error,omitempty"`
    FetchedAt    time.Time     `json:"fetchedAt"`
    FromPlaylist bool          `json:"fromPlaylist"`
 	URL          string        `json:"url"`
 	Streams      []string      `json:"streams"`
 	Playlists    []string      `json:"playlists,omitempty"`
 	Probes       []probeResult `json:"probes,omitempty"`
 	Error        string        `json:"error,omitempty"`
 	FetchedAt    time.Time     `json:"fetchedAt"`
 	FromPlaylist bool          `json:"fromPlaylist"`
 }

 type probeResult struct {
    URL         string `json:"url"`
    Status      string `json:"status"`
    ContentType string `json:"contentType,omitempty"`
 	URL         string `json:"url"`
 	Status      string `json:"status"`
 	ContentType string `json:"contentType,omitempty"`
 }

 type config struct {
    Format      string
    Probe       bool
    Headers     headerList
    Proxy       string
    HistoryPath string
    Watch       time.Duration
    Concurrency int
 	Format      string
 	Probe       bool
 	Headers     headerList
 	Proxy       string
 	HistoryPath string
 	Watch       time.Duration
 	Concurrency int
 }

 type headerList []string

 func (h *headerList) String() string { return strings.Join(*h, ", ") }
 func (h *headerList) Set(v string) error {
    *h = append(*h, v)
    return nil
 	*h = append(*h, v)
 	return nil
 }

 func main() {
    port := flag.String("port", ":8080", "listen address for the web server (default :8080)")
    web := flag.Bool("web", false, "force web-server mode even when URLs are provided")

    cfg := config{}
    flag.StringVar(&cfg.Format, "format", "text", "output format: text|json|csv|pls")
    flag.BoolVar(&cfg.Probe, "probe", true, "probe discovered stream URLs with HTTP HEAD")
    flag.Var(&cfg.Headers, "header", "custom HTTP header (repeatable), e.g. -header 'Referer: https://example.com'")
    flag.StringVar(&cfg.Proxy, "proxy", "", "HTTP proxy URL (optional)")
    flag.StringVar(&cfg.HistoryPath, "history", "history.jsonl", "path to JSONL history log (empty to disable)")
    flag.DurationVar(&cfg.Watch, "watch", 0, "repeat scan in CLI mode at interval (e.g. 30s, 2m)")
    flag.IntVar(&cfg.Concurrency, "concurrency", 4, "number of concurrent fetch workers")

    flag.Usage = func() {
        fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [flags] <url> [url...]\n", os.Args[0])
        flag.PrintDefaults()
    }
    flag.Parse()

    urls := flag.Args()
    client := newHTTPClient(cfg.Proxy)
    history := newHistoryWriter(cfg.HistoryPath)

    if *web || len(urls) == 0 {
        if err := runWebMode(*port, client, &cfg, history); err != nil {
            fmt.Fprintf(os.Stderr, "web mode failed: %v\n", err)
            os.Exit(1)
        }
        return
    }

    runCLIMode(urls, client, &cfg, history)
 	port := flag.String("port", ":8080", "listen address for the web server (default :8080)")
 	web := flag.Bool("web", false, "force web-server mode even when URLs are provided")

 	cfg := config{}
 	flag.StringVar(&cfg.Format, "format", "text", "output format: text|json|csv|pls")
 	flag.BoolVar(&cfg.Probe, "probe", true, "probe discovered stream URLs with HTTP HEAD")
 	flag.Var(&cfg.Headers, "header", "custom HTTP header (repeatable), e.g. -header 'Referer: https://example.com'")
 	flag.StringVar(&cfg.Proxy, "proxy", "", "HTTP proxy URL (optional)")
 	flag.StringVar(&cfg.HistoryPath, "history", "history.jsonl", "path to JSONL history log (empty to disable)")
 	flag.DurationVar(&cfg.Watch, "watch", 0, "repeat scan in CLI mode at interval (e.g. 30s, 2m)")
 	flag.IntVar(&cfg.Concurrency, "concurrency", 4, "number of concurrent fetch workers")

 	flag.Usage = func() {
 		fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [flags] <url> [url...]\n", os.Args[0])
 		flag.PrintDefaults()
 	}
 	flag.Parse()

 	urls := flag.Args()
 	client := newHTTPClient(cfg.Proxy)
 	history := newHistoryWriter(cfg.HistoryPath)

 	if *web || len(urls) == 0 {
 		if err := runWebMode(*port, client, &cfg, history); err != nil {
 			fmt.Fprintf(os.Stderr, "web mode failed: %v\n", err)
 			os.Exit(1)
 		}
 		return
 	}

 	runCLIMode(urls, client, &cfg, history)
 }

 func runCLIMode(urls []string, client *http.Client, cfg *config, history *historyWriter) {
    for {
        results := scanURLs(urls, client, cfg)
        outputResults(results, cfg.Format, os.Stdout)
        history.Write(results)
        if cfg.Watch == 0 {
            return
        }
        time.Sleep(cfg.Watch)
    }
 	for {
 		results := scanURLs(urls, client, cfg)
 		outputResults(results, cfg.Format, os.Stdout)
 		history.Write(results)
 		if cfg.Watch == 0 {
 			return
 		}
 		time.Sleep(cfg.Watch)
 	}
 }

 func runWebMode(addr string, client *http.Client, cfg *config, history *historyWriter) error {
    mux := http.NewServeMux()
    mux.HandleFunc("/", indexHandler)
    mux.HandleFunc("/scan", makeScanHandler(client, cfg, history))
    mux.HandleFunc("/watch", watchHandler)
 	mux := http.NewServeMux()
 	mux.HandleFunc("/", indexHandler)
 	mux.HandleFunc("/scan", makeScanHandler(client, cfg, history))
 	mux.HandleFunc("/watch", watchHandler)

    fmt.Printf("radiostreamscan listening on %s (GET /scan?url=... or POST url=...)\n", addr)
    return http.ListenAndServe(addr, mux)
 	fmt.Printf("radiostreamscan listening on %s (GET /scan?url=... or POST url=...)\n", addr)
 	return http.ListenAndServe(addr, mux)
 }

 func indexHandler(w http.ResponseWriter, r *http.Request) {
    fmt.Fprintf(w, `<!doctype html>
 	fmt.Fprintf(w, `<!doctype html>
 <html>
  <head><meta charset="utf-8"><title>radiostreamscan</title></head>
  <body>
@@ -136,10 +136,10 @@ func indexHandler(w http.ResponseWriter, r *http.Request) {
 }

 func watchHandler(w http.ResponseWriter, r *http.Request) {
    urls := normalizeURLInputs(r.URL.Query()["url"])
    interval := r.URL.Query().Get("interval")
    probe := r.URL.Query().Get("probe")
    fmt.Fprintf(w, `<!doctype html>
 	urls := normalizeURLInputs(r.URL.Query()["url"])
 	interval := r.URL.Query().Get("interval")
 	probe := r.URL.Query().Get("probe")
 	fmt.Fprintf(w, `<!doctype html>
 <html>
 <head><meta charset="utf-8"><title>radiostreamscan results</title>
 <style>
@@ -224,337 +224,386 @@ func watchHandler(w http.ResponseWriter, r *http.Request) {
 }

 func makeScanHandler(client *http.Client, cfg *config, history *historyWriter) http.HandlerFunc {
    return func(w http.ResponseWriter, r *http.Request) {
        var urls []string
        switch r.Method {
        case http.MethodGet:
            urls = r.URL.Query()["url"]
        case http.MethodPost:
            if err := r.ParseForm(); err != nil {
                http.Error(w, err.Error(), http.StatusBadRequest)
                return
            }
            urls = r.Form["url"]
        default:
            http.Error(w, "only GET and POST supported", http.StatusMethodNotAllowed)
            return
        }

        urls = normalizeURLInputs(urls)
        if len(urls) == 0 {
            http.Error(w, "provide at least one url parameter", http.StatusBadRequest)
            return
        }

        localCfg := *cfg
        if r.URL.Query().Get("probe") == "1" {
            localCfg.Probe = true
        } else if r.URL.Query().Get("probe") == "0" {
            localCfg.Probe = false
        }
        if f := r.URL.Query().Get("format"); f != "" {
            localCfg.Format = f
        }

        results := scanURLs(urls, client, &localCfg)
        history.Write(results)
        outputResults(results, localCfg.Format, w)
    }
 	return func(w http.ResponseWriter, r *http.Request) {
 		var urls []string
 		switch r.Method {
 		case http.MethodGet:
 			urls = r.URL.Query()["url"]
 		case http.MethodPost:
 			if err := r.ParseForm(); err != nil {
 				http.Error(w, err.Error(), http.StatusBadRequest)
 				return
 			}
 			urls = r.Form["url"]
 		default:
 			http.Error(w, "only GET and POST supported", http.StatusMethodNotAllowed)
 			return
 		}

 		urls = normalizeURLInputs(urls)
 		if len(urls) == 0 {
 			http.Error(w, "provide at least one url parameter", http.StatusBadRequest)
 			return
 		}

 		localCfg := *cfg
 		if r.URL.Query().Get("probe") == "1" {
 			localCfg.Probe = true
 		} else if r.URL.Query().Get("probe") == "0" {
 			localCfg.Probe = false
 		}
 		if f := r.URL.Query().Get("format"); f != "" {
 			localCfg.Format = f
 		}

 		results := scanURLs(urls, client, &localCfg)
 		history.Write(results)
 		outputResults(results, localCfg.Format, w)
 	}
 }

 func normalizeURLInputs(inputs []string) []string {
    var urls []string
    for _, item := range inputs {
        for _, line := range strings.Split(item, "\n") {
            line = strings.TrimSpace(line)
            if line == "" {
                continue
            }
            urls = append(urls, line)
        }
    }
    return urls
 	var urls []string
 	for _, item := range inputs {
 		for _, line := range strings.Split(item, "\n") {
 			line = strings.TrimSpace(line)
 			if line == "" {
 				continue
 			}
 			urls = append(urls, line)
 		}
 	}
 	return urls
 }

 func scanURLs(urls []string, client *http.Client, cfg *config) []scanResult {
    results := make([]scanResult, len(urls))
    type job struct {
        index int
        url   string
    }
    jobs := make(chan job)
    var wg sync.WaitGroup

    workers := cfg.Concurrency
    if workers < 1 {
        workers = 1
    }

    for i := 0; i < workers; i++ {
        wg.Add(1)
        go func() {
            defer wg.Done()
            for j := range jobs {
                res := scanOneURL(client, cfg, j.url)
                results[j.index] = res
            }
        }()
    }

    for i, u := range urls {
        jobs <- job{index: i, url: u}
    }
    close(jobs)
    wg.Wait()
    return results
 	results := make([]scanResult, len(urls))
 	type job struct {
 		index int
 		url   string
 	}
 	jobs := make(chan job)
 	var wg sync.WaitGroup

 	workers := cfg.Concurrency
 	if workers < 1 {
 		workers = 1
 	}

 	for i := 0; i < workers; i++ {
 		wg.Add(1)
 		go func() {
 			defer wg.Done()
 			for j := range jobs {
 				res := scanOneURL(client, cfg, j.url)
 				results[j.index] = res
 			}
 		}()
 	}

 	for i, u := range urls {
 		jobs <- job{index: i, url: u}
 	}
 	close(jobs)
 	wg.Wait()
 	return results
 }

 func scanOneURL(client *http.Client, cfg *config, raw string) scanResult {
    res := scanResult{URL: raw, FetchedAt: time.Now()}
    html, contentType, err := fetchContent(client, cfg, raw)
    if err != nil {
        res.Error = err.Error()
        return res
    }

    streams := extractor.ExtractStreams(html)
    playlists := extractor.ExtractPlaylistLinks(html)

    for _, pl := range playlists {
        plContent, plType, err := fetchContent(client, cfg, pl)
        if err != nil {
            continue
        }
        parsed := extractor.ParsePlaylist(plContent, plType)
        if len(parsed) > 0 {
            streams = append(streams, parsed...)
            res.FromPlaylist = true
        }
    }

    embedURLs := extractor.ExtractEmbedURLs(html)
    seenEmbeds := make(map[string]struct{})
    for _, embed := range embedURLs {
        embedURL := resolveURL(raw, embed)
        if embedURL == "" || embedURL == raw {
            continue
        }
        if _, ok := seenEmbeds[embedURL]; ok {
            continue
        }
        seenEmbeds[embedURL] = struct{}{}

        embedHTML, _, err := fetchContent(client, cfg, embedURL)
        if err != nil {
            continue
        }

        streams = append(streams, extractor.ExtractStreams(embedHTML)...)
        embedPlaylists := extractor.ExtractPlaylistLinks(embedHTML)
        playlists = append(playlists, embedPlaylists...)

        for _, pl := range embedPlaylists {
            plContent, plType, err := fetchContent(client, cfg, pl)
            if err != nil {
                continue
            }
            parsed := extractor.ParsePlaylist(plContent, plType)
            if len(parsed) > 0 {
                streams = append(streams, parsed...)
                res.FromPlaylist = true
            }
        }
    }

    res.Playlists = uniqueStrings(playlists)
    res.Streams = uniqueStrings(streams)

    if cfg.Probe {
        res.Probes = probeStreams(client, cfg, res.Streams)
    }

    _ = contentType
    return res
 	res := scanResult{URL: raw, FetchedAt: time.Now()}
 	html, contentType, err := fetchContent(client, cfg, raw)
 	if err != nil {
 		res.Error = err.Error()
 		return res
 	}

 	parsedBase, _ := url.Parse(raw)
 	baseHost := ""
 	if parsedBase != nil {
 		baseHost = parsedBase.Hostname()
 	}

 	streams := extractor.ExtractStreams(html)
 	playlists := extractor.ExtractPlaylistLinks(html)

 	for _, pl := range playlists {
 		plContent, plType, err := fetchContent(client, cfg, pl)
 		if err != nil {
 			continue
 		}
 		parsed := extractor.ParsePlaylist(plContent, plType)
 		if len(parsed) > 0 {
 			streams = append(streams, parsed...)
 			res.FromPlaylist = true
 		}
 	}

 	embedURLs := extractor.ExtractEmbedURLs(html)
 	seenEmbeds := make(map[string]struct{})
 	for _, embed := range embedURLs {
 		embedURL := resolveURL(raw, embed)
 		if embedURL == "" || embedURL == raw {
 			continue
 		}
 		if _, ok := seenEmbeds[embedURL]; ok {
 			continue
 		}
 		seenEmbeds[embedURL] = struct{}{}

 		embedHTML, _, err := fetchContent(client, cfg, embedURL)
 		if err != nil {
 			continue
 		}

 		streams = append(streams, extractor.ExtractStreams(embedHTML)...)
 		embedPlaylists := extractor.ExtractPlaylistLinks(embedHTML)
 		playlists = append(playlists, embedPlaylists...)

 		for _, pl := range embedPlaylists {
 			plContent, plType, err := fetchContent(client, cfg, pl)
 			if err != nil {
 				continue
 			}
 			parsed := extractor.ParsePlaylist(plContent, plType)
 			if len(parsed) > 0 {
 				streams = append(streams, parsed...)
 				res.FromPlaylist = true
 			}
 		}
 	}

 	scriptURLs := extractor.ExtractScriptURLs(html)
 	seenScripts := make(map[string]struct{})
 	for _, script := range scriptURLs {
 		scriptURL := resolveURL(raw, script)
 		if scriptURL == "" || scriptURL == raw {
 			continue
 		}
 		if baseHost != "" {
 			parsedScript, err := url.Parse(scriptURL)
 			if err != nil {
 				continue
 			}
 			if parsedScript.Hostname() != "" && parsedScript.Hostname() != baseHost {
 				continue
 			}
 		}
 		if _, ok := seenScripts[scriptURL]; ok {
 			continue
 		}
 		seenScripts[scriptURL] = struct{}{}

 		scriptHTML, _, err := fetchContent(client, cfg, scriptURL)
 		if err != nil {
 			continue
 		}

 		streams = append(streams, extractor.ExtractStreams(scriptHTML)...)
 		scriptPlaylists := extractor.ExtractPlaylistLinks(scriptHTML)
 		playlists = append(playlists, scriptPlaylists...)

 		for _, pl := range scriptPlaylists {
 			plContent, plType, err := fetchContent(client, cfg, pl)
 			if err != nil {
 				continue
 			}
 			parsed := extractor.ParsePlaylist(plContent, plType)
 			if len(parsed) > 0 {
 				streams = append(streams, parsed...)
 				res.FromPlaylist = true
 			}
 		}
 	}

 	res.Playlists = uniqueStrings(playlists)
 	res.Streams = uniqueStrings(streams)

 	if cfg.Probe {
 		res.Probes = probeStreams(client, cfg, res.Streams)
 	}

 	_ = contentType
 	return res
 }

 func fetchContent(client *http.Client, cfg *config, raw string) (string, string, error) {
    req, err := http.NewRequest(http.MethodGet, raw, nil)
    if err != nil {
        return "", "", err
    }
    req.Header.Set("User-Agent", "radiostreamscan/0.2")
    for _, h := range cfg.Headers {
        parts := strings.SplitN(h, ":", 2)
        if len(parts) == 2 {
            req.Header.Set(strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]))
        }
    }

    resp, err := client.Do(req)
    if err != nil {
        return "", "", err
    }
    defer resp.Body.Close()

    if resp.StatusCode != http.StatusOK {
        return "", "", fmt.Errorf("unexpected status %s", resp.Status)
    }

    body, err := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
    if err != nil {
        return "", "", err
    }

    return string(body), resp.Header.Get("Content-Type"), nil
 	req, err := http.NewRequest(http.MethodGet, raw, nil)
 	if err != nil {
 		return "", "", err
 	}
 	req.Header.Set("User-Agent", "radiostreamscan/0.2")
 	for _, h := range cfg.Headers {
 		parts := strings.SplitN(h, ":", 2)
 		if len(parts) == 2 {
 			req.Header.Set(strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]))
 		}
 	}

 	resp, err := client.Do(req)
 	if err != nil {
 		return "", "", err
 	}
 	defer resp.Body.Close()

 	if resp.StatusCode != http.StatusOK {
 		return "", "", fmt.Errorf("unexpected status %s", resp.Status)
 	}

 	body, err := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
 	if err != nil {
 		return "", "", err
 	}

 	return string(body), resp.Header.Get("Content-Type"), nil
 }

 func probeStreams(client *http.Client, cfg *config, streams []string) []probeResult {
    var results []probeResult
    for _, s := range streams {
        req, err := http.NewRequest(http.MethodHead, s, nil)
        if err != nil {
            continue
        }
        for _, h := range cfg.Headers {
            parts := strings.SplitN(h, ":", 2)
            if len(parts) == 2 {
                req.Header.Set(strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]))
            }
        }
        resp, err := client.Do(req)
        if err != nil {
            results = append(results, probeResult{URL: s, Status: err.Error()})
            continue
        }
        resp.Body.Close()
        results = append(results, probeResult{URL: s, Status: resp.Status, ContentType: resp.Header.Get("Content-Type")})
    }
    return results
 	var results []probeResult
 	for _, s := range streams {
 		req, err := http.NewRequest(http.MethodHead, s, nil)
 		if err != nil {
 			continue
 		}
 		for _, h := range cfg.Headers {
 			parts := strings.SplitN(h, ":", 2)
 			if len(parts) == 2 {
 				req.Header.Set(strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]))
 			}
 		}
 		resp, err := client.Do(req)
 		if err != nil {
 			results = append(results, probeResult{URL: s, Status: err.Error()})
 			continue
 		}
 		resp.Body.Close()
 		results = append(results, probeResult{URL: s, Status: resp.Status, ContentType: resp.Header.Get("Content-Type")})
 	}
 	return results
 }

 func outputResults(results []scanResult, format string, w io.Writer) {
    if rw, ok := w.(http.ResponseWriter); ok {
        if strings.ToLower(format) == "json" {
            rw.Header().Set("Content-Type", "application/json")
        } else if strings.ToLower(format) == "csv" {
            rw.Header().Set("Content-Type", "text/csv")
        }
    }

    switch strings.ToLower(format) {
    case "json":
        json.NewEncoder(w).Encode(results)
    case "csv":
        cw := csv.NewWriter(w)
        cw.Write([]string{"input_url", "stream_url"})
        for _, res := range results {
            for _, s := range res.Streams {
                cw.Write([]string{res.URL, s})
            }
        }
        cw.Flush()
    case "pls":
        fmt.Fprintln(w, "[playlist]")
        i := 1
        for _, res := range results {
            for _, s := range res.Streams {
                fmt.Fprintf(w, "File%d=%s\n", i, s)
                i++
            }
        }
        fmt.Fprintf(w, "NumberOfEntries=%d\nVersion=2\n", i-1)
    default:
        for _, res := range results {
            fmt.Fprintf(w, "URL: %s\n", res.URL)
            if res.Error != "" {
                fmt.Fprintf(w, "  error: %s\n", res.Error)
                continue
            }
            if len(res.Streams) == 0 {
                fmt.Fprintln(w, "  (no candidate streams found)")
                continue
            }
            for _, s := range res.Streams {
                fmt.Fprintf(w, "  - %s\n", s)
            }
        }
    }
 	if rw, ok := w.(http.ResponseWriter); ok {
 		if strings.ToLower(format) == "json" {
 			rw.Header().Set("Content-Type", "application/json")
 		} else if strings.ToLower(format) == "csv" {
 			rw.Header().Set("Content-Type", "text/csv")
 		}
 	}

 	switch strings.ToLower(format) {
 	case "json":
 		json.NewEncoder(w).Encode(results)
 	case "csv":
 		cw := csv.NewWriter(w)
 		cw.Write([]string{"input_url", "stream_url"})
 		for _, res := range results {
 			for _, s := range res.Streams {
 				cw.Write([]string{res.URL, s})
 			}
 		}
 		cw.Flush()
 	case "pls":
 		fmt.Fprintln(w, "[playlist]")
 		i := 1
 		for _, res := range results {
 			for _, s := range res.Streams {
 				fmt.Fprintf(w, "File%d=%s\n", i, s)
 				i++
 			}
 		}
 		fmt.Fprintf(w, "NumberOfEntries=%d\nVersion=2\n", i-1)
 	default:
 		for _, res := range results {
 			fmt.Fprintf(w, "URL: %s\n", res.URL)
 			if res.Error != "" {
 				fmt.Fprintf(w, "  error: %s\n", res.Error)
 				continue
 			}
 			if len(res.Streams) == 0 {
 				fmt.Fprintln(w, "  (no candidate streams found)")
 				continue
 			}
 			for _, s := range res.Streams {
 				fmt.Fprintf(w, "  - %s\n", s)
 			}
 		}
 	}
 }

 func newHTTPClient(proxyURL string) *http.Client {
    transport := &http.Transport{}
    if proxyURL != "" {
        if parsed, err := url.Parse(proxyURL); err == nil {
            transport.Proxy = http.ProxyURL(parsed)
        }
    }
    return &http.Client{Timeout: 15 * time.Second, Transport: transport}
 	transport := &http.Transport{}
 	if proxyURL != "" {
 		if parsed, err := url.Parse(proxyURL); err == nil {
 			transport.Proxy = http.ProxyURL(parsed)
 		}
 	}
 	return &http.Client{Timeout: 15 * time.Second, Transport: transport}
 }

 func uniqueStrings(values []string) []string {
    set := make(map[string]struct{})
    for _, v := range values {
        set[v] = struct{}{}
    }
    out := make([]string, 0, len(set))
    for v := range set {
        out = append(out, v)
    }
    return out
 	set := make(map[string]struct{})
 	for _, v := range values {
 		set[v] = struct{}{}
 	}
 	out := make([]string, 0, len(set))
 	for v := range set {
 		out = append(out, v)
 	}
 	return out
 }

 func resolveURL(base, href string) string {
    href = strings.TrimSpace(href)
    if href == "" {
        return ""
    }
    if strings.HasPrefix(href, "//") {
        return "https:" + href
    }
    parsed, err := url.Parse(href)
    if err != nil {
        return ""
    }
    if parsed.IsAbs() {
        return parsed.String()
    }
    baseURL, err := url.Parse(base)
    if err != nil {
        return parsed.String()
    }
    return baseURL.ResolveReference(parsed).String()
 	href = strings.TrimSpace(href)
 	if href == "" {
 		return ""
 	}
 	if strings.HasPrefix(href, "//") {
 		return "https:" + href
 	}
 	parsed, err := url.Parse(href)
 	if err != nil {
 		return ""
 	}
 	if parsed.IsAbs() {
 		return parsed.String()
 	}
 	baseURL, err := url.Parse(base)
 	if err != nil {
 		return parsed.String()
 	}
 	return baseURL.ResolveReference(parsed).String()
 }

 type historyWriter struct {
    path string
    mu   sync.Mutex
 	path string
 	mu   sync.Mutex
 }

 func newHistoryWriter(path string) *historyWriter {
    return &historyWriter{path: path}
 	return &historyWriter{path: path}
 }

 func (h *historyWriter) Write(results []scanResult) {
    if h == nil || h.path == "" {
        return
    }
    h.mu.Lock()
    defer h.mu.Unlock()

    f, err := os.OpenFile(h.path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
    if err != nil {
        return
    }
    defer f.Close()

    writer := bufio.NewWriter(f)
    for _, res := range results {
        data, err := json.Marshal(res)
        if err != nil {
            continue
        }
        writer.Write(data)
        writer.WriteString("\n")
    }
    writer.Flush()
 	if h == nil || h.path == "" {
 		return
 	}
 	h.mu.Lock()
 	defer h.mu.Unlock()

 	f, err := os.OpenFile(h.path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
 	if err != nil {
 		return
 	}
 	defer f.Close()

 	writer := bufio.NewWriter(f)
 	for _, res := range results {
 		data, err := json.Marshal(res)
 		if err != nil {
 			continue
 		}
 		writer.Write(data)
 		writer.WriteString("\n")
 	}
 	writer.Flush()
 }
--- a/internal/extractor/extractor.go
+++ b/internal/extractor/extractor.go
@@ -1,196 +1,220 @@
 package extractor

 import (
    "regexp"
    "sort"
    "strings"
 	"regexp"
 	"sort"
 	"strings"
 )

 var urlPattern = regexp.MustCompile(`(?i)((?:https?:)?\/\/[^\s"'<>]+\.(mp3|aac|m3u8|ogg|opus|pls|m3u|xspf|json))`)
 var attrPattern = regexp.MustCompile(`(?i)(streamsrc|streamhash|stream|audioUrl|mp3Url|hls|playlist|source)\s*[:=]\s*['"]([^'"]+)['"]`)
 var srcPattern = regexp.MustCompile(`(?i)src\s*=\s*['"]([^'"]+)['"]`)
 var iframePattern = regexp.MustCompile(`(?i)<iframe[^>]+src\s*=\s*['"]([^'"]+)['"]`)
 var scriptPattern = regexp.MustCompile(`(?i)<script[^>]+src\s*=\s*['"]([^'"]+)['"]`)
 var audioPattern = regexp.MustCompile(`(?i)<audio[^>]+src\s*=\s*['"]([^'"]+)['"]`)
 var sourcePattern = regexp.MustCompile(`(?i)<source[^>]+src\s*=\s*['"]([^'"]+)['"]`)
 var xspfPattern = regexp.MustCompile(`(?i)<location>([^<]+)</location>`)

 // ExtractStreams returns the unique streaming URLs found in the provided HTML/text.
 func ExtractStreams(data string) []string {
    candidates := make(map[string]struct{})
    special := make(map[string]struct{})
    add := func(raw string) {
        if normalized, ok := normalizeCandidate(raw); ok {
            candidates[normalized] = struct{}{}
        }
    }
    addSpecial := func(raw string) {
        if normalized, ok := normalizeCandidate(raw); ok {
            candidates[normalized] = struct{}{}
            special[normalized] = struct{}{}
        }
    }

    for _, match := range urlPattern.FindAllStringSubmatch(data, -1) {
        add(match[1])
    }
    for _, match := range attrPattern.FindAllStringSubmatch(data, -1) {
        add(match[2])
    }
    for _, match := range srcPattern.FindAllStringSubmatch(data, -1) {
        add(match[1])
    }
    for _, match := range audioPattern.FindAllStringSubmatch(data, -1) {
        addSpecial(match[1])
    }
    for _, match := range sourcePattern.FindAllStringSubmatch(data, -1) {
        addSpecial(match[1])
    }

    streams := make([]string, 0, len(candidates))
    for u := range candidates {
        if isStreamURL(u) {
            streams = append(streams, u)
            continue
        }
        if _, ok := special[u]; ok {
            streams = append(streams, u)
        }
    }
    sort.Strings(streams)
    return streams
 	candidates := make(map[string]struct{})
 	special := make(map[string]struct{})
 	add := func(raw string) {
 		if normalized, ok := normalizeCandidate(raw); ok {
 			candidates[normalized] = struct{}{}
 		}
 	}
 	addSpecial := func(raw string) {
 		if normalized, ok := normalizeCandidate(raw); ok {
 			candidates[normalized] = struct{}{}
 			special[normalized] = struct{}{}
 		}
 	}

 	for _, match := range urlPattern.FindAllStringSubmatch(data, -1) {
 		add(match[1])
 	}
 	for _, match := range attrPattern.FindAllStringSubmatch(data, -1) {
 		add(match[2])
 	}
 	for _, match := range srcPattern.FindAllStringSubmatch(data, -1) {
 		add(match[1])
 	}
 	for _, match := range audioPattern.FindAllStringSubmatch(data, -1) {
 		addSpecial(match[1])
 	}
 	for _, match := range sourcePattern.FindAllStringSubmatch(data, -1) {
 		addSpecial(match[1])
 	}

 	streams := make([]string, 0, len(candidates))
 	for u := range candidates {
 		if isStreamURL(u) {
 			streams = append(streams, u)
 			continue
 		}
 		if _, ok := special[u]; ok {
 			streams = append(streams, u)
 		}
 	}
 	sort.Strings(streams)
 	return streams
 }

 // ExtractPlaylistLinks returns URLs likely pointing to playlists (m3u/pls/xspf/json).
 func ExtractPlaylistLinks(data string) []string {
    candidates := make(map[string]struct{})
    add := func(raw string) {
        if normalized, ok := normalizeCandidate(raw); ok && isPlaylistURL(normalized) {
            candidates[normalized] = struct{}{}
        }
    }

    for _, match := range urlPattern.FindAllStringSubmatch(data, -1) {
        add(match[1])
    }
    for _, match := range attrPattern.FindAllStringSubmatch(data, -1) {
        add(match[2])
    }
    for _, match := range srcPattern.FindAllStringSubmatch(data, -1) {
        add(match[1])
    }

    links := make([]string, 0, len(candidates))
    for u := range candidates {
        links = append(links, u)
    }
    sort.Strings(links)
    return links
 	candidates := make(map[string]struct{})
 	add := func(raw string) {
 		if normalized, ok := normalizeCandidate(raw); ok && isPlaylistURL(normalized) {
 			candidates[normalized] = struct{}{}
 		}
 	}

 	for _, match := range urlPattern.FindAllStringSubmatch(data, -1) {
 		add(match[1])
 	}
 	for _, match := range attrPattern.FindAllStringSubmatch(data, -1) {
 		add(match[2])
 	}
 	for _, match := range srcPattern.FindAllStringSubmatch(data, -1) {
 		add(match[1])
 	}

 	links := make([]string, 0, len(candidates))
 	for u := range candidates {
 		links = append(links, u)
 	}
 	sort.Strings(links)
 	return links
 }

 // ExtractEmbedURLs returns URLs found in iframe embeds.
 func ExtractEmbedURLs(data string) []string {
    candidates := make(map[string]struct{})
    for _, match := range iframePattern.FindAllStringSubmatch(data, -1) {
        if normalized, ok := normalizeCandidate(match[1]); ok {
            candidates[normalized] = struct{}{}
        }
    }

    urls := make([]string, 0, len(candidates))
    for u := range candidates {
        urls = append(urls, u)
    }
    sort.Strings(urls)
    return urls
 	return extractURLs(iframePattern, data)
 }

 // ExtractScriptURLs returns URLs referenced by script tags.
 func ExtractScriptURLs(data string) []string {
 	return extractURLs(scriptPattern, data)
 }

 // ParsePlaylist extracts stream URLs from playlist content.
 func ParsePlaylist(content string, contentType string) []string {
    candidates := make(map[string]struct{})
    add := func(raw string) {
        raw = strings.TrimSpace(raw)
        if raw == "" {
            return
        }
        if strings.HasPrefix(raw, "//") {
            raw = "https:" + raw
        }
        if isStreamURL(raw) {
            candidates[raw] = struct{}{}
        }
    }

    lowerType := strings.ToLower(contentType)
    lines := strings.Split(content, "\n")

    if strings.Contains(lowerType, "xspf") || strings.Contains(strings.ToLower(content), "<location>") {
        for _, match := range xspfPattern.FindAllStringSubmatch(content, -1) {
            add(match[1])
        }
    }

    for _, match := range urlPattern.FindAllStringSubmatch(content, -1) {
        add(match[1])
    }

    for _, line := range lines {
        line = strings.TrimSpace(line)
        if line == "" || strings.HasPrefix(line, "#") {
            continue
        }
        if strings.HasPrefix(strings.ToLower(line), "file") && strings.Contains(line, "=") {
            parts := strings.SplitN(line, "=", 2)
            add(parts[1])
            continue
        }
        if strings.Contains(line, "http") {
            for _, match := range urlPattern.FindAllStringSubmatch(line, -1) {
                add(match[1])
            }
        }
    }

    if strings.Contains(lowerType, "json") {
        for _, match := range urlPattern.FindAllStringSubmatch(content, -1) {
            add(match[1])
        }
    }

    streams := make([]string, 0, len(candidates))
    for u := range candidates {
        streams = append(streams, u)
    }
    sort.Strings(streams)
    return streams
 	candidates := make(map[string]struct{})
 	add := func(raw string) {
 		raw = strings.TrimSpace(raw)
 		if raw == "" {
 			return
 		}
 		if strings.HasPrefix(raw, "//") {
 			raw = "https:" + raw
 		}
 		if isStreamURL(raw) {
 			candidates[raw] = struct{}{}
 		}
 	}
 	addForce := func(raw string) {
 		raw = strings.TrimSpace(raw)
 		if raw == "" {
 			return
 		}
 		if strings.HasPrefix(raw, "//") {
 			raw = "https:" + raw
 		}
 		candidates[raw] = struct{}{}
 	}

 	lowerType := strings.ToLower(contentType)
 	lines := strings.Split(content, "\n")

 	if strings.Contains(lowerType, "xspf") || strings.Contains(strings.ToLower(content), "<location>") {
 		for _, match := range xspfPattern.FindAllStringSubmatch(content, -1) {
 			add(match[1])
 		}
 	}

 	for _, match := range urlPattern.FindAllStringSubmatch(content, -1) {
 		add(match[1])
 	}

 	for _, line := range lines {
 		line = strings.TrimSpace(line)
 		if line == "" || strings.HasPrefix(line, "#") {
 			continue
 		}
 		if strings.HasPrefix(strings.ToLower(line), "file") && strings.Contains(line, "=") {
 			parts := strings.SplitN(line, "=", 2)
 			add(parts[1])
 			continue
 		}
 		if strings.Contains(line, "http") {
 			matched := false
 			for _, match := range urlPattern.FindAllStringSubmatch(line, -1) {
 				add(match[1])
 				matched = true
 			}
 			if !matched {
 				addForce(line)
 			}
 		}
 	}

 	if strings.Contains(lowerType, "json") {
 		for _, match := range urlPattern.FindAllStringSubmatch(content, -1) {
 			add(match[1])
 		}
 	}

 	streams := make([]string, 0, len(candidates))
 	for u := range candidates {
 		streams = append(streams, u)
 	}
 	sort.Strings(streams)
 	return streams
 }
 func extractURLs(pattern *regexp.Regexp, data string) []string {
 	candidates := make(map[string]struct{})
 	for _, match := range pattern.FindAllStringSubmatch(data, -1) {
 		if normalized, ok := normalizeCandidate(match[1]); ok {
 			candidates[normalized] = struct{}{}
 		}
 	}

 	urls := make([]string, 0, len(candidates))
 	for u := range candidates {
 		urls = append(urls, u)
 	}
 	sort.Strings(urls)
 	return urls
 }

 func normalizeCandidate(raw string) (string, bool) {
    raw = strings.TrimSpace(raw)
    if raw == "" {
        return "", false
    }
    if !(strings.Contains(raw, "http") || strings.HasPrefix(raw, "//")) {
        return "", false
    }
    if strings.HasPrefix(raw, "//") {
        raw = "https:" + raw
    }
    normalized := strings.TrimRight(raw, "+")
    normalized = strings.ReplaceAll(normalized, `\\`, "")
    if normalized == "" {
        return "", false
    }
    return normalized, true
 	raw = strings.TrimSpace(raw)
 	if raw == "" {
 		return "", false
 	}
 	if !(strings.Contains(raw, "http") || strings.HasPrefix(raw, "//") || strings.HasPrefix(raw, "/")) {
 		return "", false
 	}
 	if strings.HasPrefix(raw, "//") {
 		raw = "https:" + raw
 	}
 	normalized := strings.TrimRight(raw, "+")
 	normalized = strings.ReplaceAll(normalized, `\\`, "")
 	if normalized == "" {
 		return "", false
 	}
 	return normalized, true
 }

 func isStreamURL(u string) bool {
    lower := strings.ToLower(u)
    return strings.Contains(lower, ".mp3") || strings.Contains(lower, ".aac") || strings.Contains(lower, ".m3u8") ||
        strings.Contains(lower, ".ogg") || strings.Contains(lower, ".opus")
 	lower := strings.ToLower(u)
 	return strings.Contains(lower, ".mp3") || strings.Contains(lower, ".aac") || strings.Contains(lower, ".m3u8") ||
 		strings.Contains(lower, ".ogg") || strings.Contains(lower, ".opus")
 }

 func isPlaylistURL(u string) bool {
    lower := strings.ToLower(u)
    return strings.Contains(lower, ".m3u") || strings.Contains(lower, ".pls") ||
        strings.Contains(lower, ".xspf") || strings.Contains(lower, ".json")
 	lower := strings.ToLower(u)
 	return strings.Contains(lower, ".m3u") || strings.Contains(lower, ".pls") ||
 		strings.Contains(lower, ".xspf") || strings.Contains(lower, ".json")
 }
--- a/internal/extractor/extractor_test.go
+++ b/internal/extractor/extractor_test.go
@@ -1,12 +1,12 @@
 package extractor

 import (
    "reflect"
    "testing"
 	"reflect"
 	"testing"
 )

 func TestExtractStreams(t *testing.T) {
    html := `
 	html := `
        <script>
            var streamsrc = 'https://example.com/live/stream.mp3';
            var streamhash="https://cdn.example.net/relay.m3u8";
@@ -19,56 +19,65 @@ func TestExtractStreams(t *testing.T) {
        <div data-value="https://example.com/secret.pls"></div>
    `

    streams := ExtractStreams(html)
    if len(streams) != 7 {
        t.Fatalf("wanted 7 streams, got %d: %v", len(streams), streams)
    }
    found := false
    for _, s := range streams {
        if s == "https://stream.example.com/live" {
            found = true
            break
        }
    }
    if !found {
        t.Fatalf("expected audio tag stream to be present: %v", streams)
    }
 	streams := ExtractStreams(html)
 	if len(streams) != 7 {
 		t.Fatalf("wanted 7 streams, got %d: %v", len(streams), streams)
 	}
 	found := false
 	for _, s := range streams {
 		if s == "https://stream.example.com/live" {
 			found = true
 			break
 		}
 	}
 	if !found {
 		t.Fatalf("expected audio tag stream to be present: %v", streams)
 	}
 }

 func TestExtractPlaylistLinks(t *testing.T) {
    html := `
 	html := `
        <a href="https://example.com/stream.m3u">m3u</a>
        <a href="https://example.com/playlist.pls">pls</a>
        <a href="https://example.com/radio.xspf">xspf</a>
        <a href="https://example.com/data.json">json</a>
    `
    links := ExtractPlaylistLinks(html)
    if len(links) != 4 {
        t.Fatalf("wanted 4 playlist links, got %d: %v", len(links), links)
    }
 	links := ExtractPlaylistLinks(html)
 	if len(links) != 4 {
 		t.Fatalf("wanted 4 playlist links, got %d: %v", len(links), links)
 	}
 }

 func TestExtractEmbedURLs(t *testing.T) {
    html := `<iframe src="//example.com/embed"></iframe><iframe src="https://example.org/player"></iframe>`
    urls := ExtractEmbedURLs(html)
    want := []string{"https://example.com/embed", "https://example.org/player"}
    if !reflect.DeepEqual(urls, want) {
        t.Fatalf("wanted iframe URLs %v, got %v", want, urls)
    }
 	html := `<iframe src="//example.com/embed"></iframe><iframe src="https://example.org/player"></iframe>`
 	urls := ExtractEmbedURLs(html)
 	want := []string{"https://example.com/embed", "https://example.org/player"}
 	if !reflect.DeepEqual(urls, want) {
 		t.Fatalf("wanted iframe URLs %v, got %v", want, urls)
 	}
 }

 func TestExtractScriptURLs(t *testing.T) {
 	html := `<script src="/js/app.js"></script><script src="https://example.org/player.js"></script>`
 	urls := ExtractScriptURLs(html)
 	want := []string{"/js/app.js", "https://example.org/player.js"}
 	if !reflect.DeepEqual(urls, want) {
 		t.Fatalf("wanted script URLs %v, got %v", want, urls)
 	}
 }

 func TestParsePlaylist(t *testing.T) {
    m3u := "#EXTM3U\nhttps://example.com/live.mp3\n"
    pls := "[playlist]\nFile1=https://example.com/stream.aac\n"
    xspf := "<playlist><location>https://example.com/hls.m3u8</location></playlist>"
 	m3u := "#EXTM3U\nhttps://example.com/live.mp3\n"
 	pls := "[playlist]\nFile1=https://example.com/stream.aac\n"
 	xspf := "<playlist><location>https://example.com/hls.m3u8</location></playlist>"

    if len(ParsePlaylist(m3u, "audio/x-mpegurl")) != 1 {
        t.Fatal("expected m3u playlist to yield 1 stream")
    }
    if len(ParsePlaylist(pls, "audio/x-scpls")) != 1 {
        t.Fatal("expected pls playlist to yield 1 stream")
    }
    if len(ParsePlaylist(xspf, "application/xspf+xml")) != 1 {
        t.Fatal("expected xspf playlist to yield 1 stream")
    }
 	if len(ParsePlaylist(m3u, "audio/x-mpegurl")) != 1 {
 		t.Fatal("expected m3u playlist to yield 1 stream")
 	}
 	if len(ParsePlaylist(pls, "audio/x-scpls")) != 1 {
 		t.Fatal("expected pls playlist to yield 1 stream")
 	}
 	if len(ParsePlaylist(xspf, "application/xspf+xml")) != 1 {
 		t.Fatal("expected xspf playlist to yield 1 stream")
 	}
 }