Переглянути джерело

feat: follow embedded players

master
Alfred 5 дні тому
джерело
коміт
c1068500bd
3 змінених файлів з 141 додано та 28 видалено
  1. +57
    -1
      cmd/radiostreamscan/main.go
  2. +58
    -24
      internal/extractor/extractor.go
  3. +26
    -3
      internal/extractor/extractor_test.go

+ 57
- 1
cmd/radiostreamscan/main.go Переглянути файл

@@ -319,7 +319,6 @@ func scanOneURL(client *http.Client, cfg *config, raw string) scanResult {

streams := extractor.ExtractStreams(html)
playlists := extractor.ExtractPlaylistLinks(html)
res.Playlists = playlists

for _, pl := range playlists {
plContent, plType, err := fetchContent(client, cfg, pl)
@@ -333,6 +332,41 @@ func scanOneURL(client *http.Client, cfg *config, raw string) scanResult {
}
}

embedURLs := extractor.ExtractEmbedURLs(html)
seenEmbeds := make(map[string]struct{})
for _, embed := range embedURLs {
embedURL := resolveURL(raw, embed)
if embedURL == "" || embedURL == raw {
continue
}
if _, ok := seenEmbeds[embedURL]; ok {
continue
}
seenEmbeds[embedURL] = struct{}{}

embedHTML, _, err := fetchContent(client, cfg, embedURL)
if err != nil {
continue
}

streams = append(streams, extractor.ExtractStreams(embedHTML)...)
embedPlaylists := extractor.ExtractPlaylistLinks(embedHTML)
playlists = append(playlists, embedPlaylists...)

for _, pl := range embedPlaylists {
plContent, plType, err := fetchContent(client, cfg, pl)
if err != nil {
continue
}
parsed := extractor.ParsePlaylist(plContent, plType)
if len(parsed) > 0 {
streams = append(streams, parsed...)
res.FromPlaylist = true
}
}
}

res.Playlists = uniqueStrings(playlists)
res.Streams = uniqueStrings(streams)

if cfg.Probe {
@@ -469,6 +503,28 @@ func uniqueStrings(values []string) []string {
return out
}

func resolveURL(base, href string) string {
href = strings.TrimSpace(href)
if href == "" {
return ""
}
if strings.HasPrefix(href, "//") {
return "https:" + href
}
parsed, err := url.Parse(href)
if err != nil {
return ""
}
if parsed.IsAbs() {
return parsed.String()
}
baseURL, err := url.Parse(base)
if err != nil {
return parsed.String()
}
return baseURL.ResolveReference(parsed).String()
}

type historyWriter struct {
path string
mu sync.Mutex


+ 58
- 24
internal/extractor/extractor.go Переглянути файл

@@ -9,25 +9,25 @@ import (
var urlPattern = regexp.MustCompile(`(?i)((?:https?:)?\/\/[^\s"'<>]+\.(mp3|aac|m3u8|ogg|opus|pls|m3u|xspf|json))`)
var attrPattern = regexp.MustCompile(`(?i)(streamsrc|streamhash|stream|audioUrl|mp3Url|hls|playlist|source)\s*[:=]\s*['"]([^'"]+)['"]`)
var srcPattern = regexp.MustCompile(`(?i)src\s*=\s*['"]([^'"]+)['"]`)
var iframePattern = regexp.MustCompile(`(?i)<iframe[^>]+src\s*=\s*['"]([^'"]+)['"]`)
var audioPattern = regexp.MustCompile(`(?i)<audio[^>]+src\s*=\s*['"]([^'"]+)['"]`)
var sourcePattern = regexp.MustCompile(`(?i)<source[^>]+src\s*=\s*['"]([^'"]+)['"]`)
var xspfPattern = regexp.MustCompile(`(?i)<location>([^<]+)</location>`)

// ExtractStreams returns the unique streaming URLs found in the provided HTML/text.
func ExtractStreams(data string) []string {
candidates := make(map[string]struct{})
special := make(map[string]struct{})
add := func(raw string) {
raw = strings.TrimSpace(raw)
if raw == "" {
return
}
if !(strings.Contains(raw, "http") || strings.HasPrefix(raw, "//")) {
return
if normalized, ok := normalizeCandidate(raw); ok {
candidates[normalized] = struct{}{}
}
if strings.HasPrefix(raw, "//") {
raw = "https:" + raw
}
addSpecial := func(raw string) {
if normalized, ok := normalizeCandidate(raw); ok {
candidates[normalized] = struct{}{}
special[normalized] = struct{}{}
}
normalized := strings.TrimRight(raw, "+")
normalized = strings.ReplaceAll(normalized, `\\`, "")
candidates[normalized] = struct{}{}
}

for _, match := range urlPattern.FindAllStringSubmatch(data, -1) {
@@ -39,11 +39,21 @@ func ExtractStreams(data string) []string {
for _, match := range srcPattern.FindAllStringSubmatch(data, -1) {
add(match[1])
}
for _, match := range audioPattern.FindAllStringSubmatch(data, -1) {
addSpecial(match[1])
}
for _, match := range sourcePattern.FindAllStringSubmatch(data, -1) {
addSpecial(match[1])
}

streams := make([]string, 0, len(candidates))
for u := range candidates {
if isStreamURL(u) {
streams = append(streams, u)
continue
}
if _, ok := special[u]; ok {
streams = append(streams, u)
}
}
sort.Strings(streams)
@@ -54,19 +64,7 @@ func ExtractStreams(data string) []string {
func ExtractPlaylistLinks(data string) []string {
candidates := make(map[string]struct{})
add := func(raw string) {
raw = strings.TrimSpace(raw)
if raw == "" {
return
}
if !(strings.Contains(raw, "http") || strings.HasPrefix(raw, "//")) {
return
}
if strings.HasPrefix(raw, "//") {
raw = "https:" + raw
}
normalized := strings.TrimRight(raw, "+")
normalized = strings.ReplaceAll(normalized, `\\`, "")
if isPlaylistURL(normalized) {
if normalized, ok := normalizeCandidate(raw); ok && isPlaylistURL(normalized) {
candidates[normalized] = struct{}{}
}
}
@@ -89,6 +87,23 @@ func ExtractPlaylistLinks(data string) []string {
return links
}

// ExtractEmbedURLs returns URLs found in iframe embeds.
func ExtractEmbedURLs(data string) []string {
candidates := make(map[string]struct{})
for _, match := range iframePattern.FindAllStringSubmatch(data, -1) {
if normalized, ok := normalizeCandidate(match[1]); ok {
candidates[normalized] = struct{}{}
}
}

urls := make([]string, 0, len(candidates))
for u := range candidates {
urls = append(urls, u)
}
sort.Strings(urls)
return urls
}

// ParsePlaylist extracts stream URLs from playlist content.
func ParsePlaylist(content string, contentType string) []string {
candidates := make(map[string]struct{})
@@ -149,6 +164,25 @@ func ParsePlaylist(content string, contentType string) []string {
return streams
}

func normalizeCandidate(raw string) (string, bool) {
raw = strings.TrimSpace(raw)
if raw == "" {
return "", false
}
if !(strings.Contains(raw, "http") || strings.HasPrefix(raw, "//")) {
return "", false
}
if strings.HasPrefix(raw, "//") {
raw = "https:" + raw
}
normalized := strings.TrimRight(raw, "+")
normalized = strings.ReplaceAll(normalized, `\\`, "")
if normalized == "" {
return "", false
}
return normalized, true
}

func isStreamURL(u string) bool {
lower := strings.ToLower(u)
return strings.Contains(lower, ".mp3") || strings.Contains(lower, ".aac") || strings.Contains(lower, ".m3u8") ||


+ 26
- 3
internal/extractor/extractor_test.go Переглянути файл

@@ -1,6 +1,9 @@
package extractor

import "testing"
import (
"reflect"
"testing"
)

func TestExtractStreams(t *testing.T) {
html := `
@@ -11,13 +14,24 @@ func TestExtractStreams(t *testing.T) {
</script>
<a href="https://streams.example.org/radio.aac?user=test">listen</a>
<source src="//players.example.eu/ambient.ogg" type="audio/ogg" />
<audio src="https://stream.example.com/live"></audio>
<audio data-src="https://pod.example.com/episode.opus"></audio>
<div data-value="https://example.com/secret.pls"></div>
`

streams := ExtractStreams(html)
if len(streams) != 6 {
t.Fatalf("wanted 6 streams, got %d: %v", len(streams), streams)
if len(streams) != 7 {
t.Fatalf("wanted 7 streams, got %d: %v", len(streams), streams)
}
found := false
for _, s := range streams {
if s == "https://stream.example.com/live" {
found = true
break
}
}
if !found {
t.Fatalf("expected audio tag stream to be present: %v", streams)
}
}

@@ -34,6 +48,15 @@ func TestExtractPlaylistLinks(t *testing.T) {
}
}

func TestExtractEmbedURLs(t *testing.T) {
html := `<iframe src="//example.com/embed"></iframe><iframe src="https://example.org/player"></iframe>`
urls := ExtractEmbedURLs(html)
want := []string{"https://example.com/embed", "https://example.org/player"}
if !reflect.DeepEqual(urls, want) {
t.Fatalf("wanted iframe URLs %v, got %v", want, urls)
}
}

func TestParsePlaylist(t *testing.T) {
m3u := "#EXTM3U\nhttps://example.com/live.mp3\n"
pls := "[playlist]\nFile1=https://example.com/stream.aac\n"


Завантаження…
Відмінити
Зберегти