Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

197 řádky
6.1KB

  1. package extractor
  2. import (
  3. "regexp"
  4. "sort"
  5. "strings"
  6. )
  7. var urlPattern = regexp.MustCompile(`(?i)((?:https?:)?\/\/[^\s"'<>]+\.(mp3|aac|m3u8|ogg|opus|pls|m3u|xspf|json))`)
  8. var attrPattern = regexp.MustCompile(`(?i)(streamsrc|streamhash|stream|audioUrl|mp3Url|hls|playlist|source)\s*[:=]\s*['"]([^'"]+)['"]`)
  9. var srcPattern = regexp.MustCompile(`(?i)src\s*=\s*['"]([^'"]+)['"]`)
  10. var iframePattern = regexp.MustCompile(`(?i)<iframe[^>]+src\s*=\s*['"]([^'"]+)['"]`)
  11. var audioPattern = regexp.MustCompile(`(?i)<audio[^>]+src\s*=\s*['"]([^'"]+)['"]`)
  12. var sourcePattern = regexp.MustCompile(`(?i)<source[^>]+src\s*=\s*['"]([^'"]+)['"]`)
  13. var xspfPattern = regexp.MustCompile(`(?i)<location>([^<]+)</location>`)
  14. // ExtractStreams returns the unique streaming URLs found in the provided HTML/text.
  15. func ExtractStreams(data string) []string {
  16. candidates := make(map[string]struct{})
  17. special := make(map[string]struct{})
  18. add := func(raw string) {
  19. if normalized, ok := normalizeCandidate(raw); ok {
  20. candidates[normalized] = struct{}{}
  21. }
  22. }
  23. addSpecial := func(raw string) {
  24. if normalized, ok := normalizeCandidate(raw); ok {
  25. candidates[normalized] = struct{}{}
  26. special[normalized] = struct{}{}
  27. }
  28. }
  29. for _, match := range urlPattern.FindAllStringSubmatch(data, -1) {
  30. add(match[1])
  31. }
  32. for _, match := range attrPattern.FindAllStringSubmatch(data, -1) {
  33. add(match[2])
  34. }
  35. for _, match := range srcPattern.FindAllStringSubmatch(data, -1) {
  36. add(match[1])
  37. }
  38. for _, match := range audioPattern.FindAllStringSubmatch(data, -1) {
  39. addSpecial(match[1])
  40. }
  41. for _, match := range sourcePattern.FindAllStringSubmatch(data, -1) {
  42. addSpecial(match[1])
  43. }
  44. streams := make([]string, 0, len(candidates))
  45. for u := range candidates {
  46. if isStreamURL(u) {
  47. streams = append(streams, u)
  48. continue
  49. }
  50. if _, ok := special[u]; ok {
  51. streams = append(streams, u)
  52. }
  53. }
  54. sort.Strings(streams)
  55. return streams
  56. }
  57. // ExtractPlaylistLinks returns URLs likely pointing to playlists (m3u/pls/xspf/json).
  58. func ExtractPlaylistLinks(data string) []string {
  59. candidates := make(map[string]struct{})
  60. add := func(raw string) {
  61. if normalized, ok := normalizeCandidate(raw); ok && isPlaylistURL(normalized) {
  62. candidates[normalized] = struct{}{}
  63. }
  64. }
  65. for _, match := range urlPattern.FindAllStringSubmatch(data, -1) {
  66. add(match[1])
  67. }
  68. for _, match := range attrPattern.FindAllStringSubmatch(data, -1) {
  69. add(match[2])
  70. }
  71. for _, match := range srcPattern.FindAllStringSubmatch(data, -1) {
  72. add(match[1])
  73. }
  74. links := make([]string, 0, len(candidates))
  75. for u := range candidates {
  76. links = append(links, u)
  77. }
  78. sort.Strings(links)
  79. return links
  80. }
  81. // ExtractEmbedURLs returns URLs found in iframe embeds.
  82. func ExtractEmbedURLs(data string) []string {
  83. candidates := make(map[string]struct{})
  84. for _, match := range iframePattern.FindAllStringSubmatch(data, -1) {
  85. if normalized, ok := normalizeCandidate(match[1]); ok {
  86. candidates[normalized] = struct{}{}
  87. }
  88. }
  89. urls := make([]string, 0, len(candidates))
  90. for u := range candidates {
  91. urls = append(urls, u)
  92. }
  93. sort.Strings(urls)
  94. return urls
  95. }
  96. // ParsePlaylist extracts stream URLs from playlist content.
  97. func ParsePlaylist(content string, contentType string) []string {
  98. candidates := make(map[string]struct{})
  99. add := func(raw string) {
  100. raw = strings.TrimSpace(raw)
  101. if raw == "" {
  102. return
  103. }
  104. if strings.HasPrefix(raw, "//") {
  105. raw = "https:" + raw
  106. }
  107. if isStreamURL(raw) {
  108. candidates[raw] = struct{}{}
  109. }
  110. }
  111. lowerType := strings.ToLower(contentType)
  112. lines := strings.Split(content, "\n")
  113. if strings.Contains(lowerType, "xspf") || strings.Contains(strings.ToLower(content), "<location>") {
  114. for _, match := range xspfPattern.FindAllStringSubmatch(content, -1) {
  115. add(match[1])
  116. }
  117. }
  118. for _, match := range urlPattern.FindAllStringSubmatch(content, -1) {
  119. add(match[1])
  120. }
  121. for _, line := range lines {
  122. line = strings.TrimSpace(line)
  123. if line == "" || strings.HasPrefix(line, "#") {
  124. continue
  125. }
  126. if strings.HasPrefix(strings.ToLower(line), "file") && strings.Contains(line, "=") {
  127. parts := strings.SplitN(line, "=", 2)
  128. add(parts[1])
  129. continue
  130. }
  131. if strings.Contains(line, "http") {
  132. for _, match := range urlPattern.FindAllStringSubmatch(line, -1) {
  133. add(match[1])
  134. }
  135. }
  136. }
  137. if strings.Contains(lowerType, "json") {
  138. for _, match := range urlPattern.FindAllStringSubmatch(content, -1) {
  139. add(match[1])
  140. }
  141. }
  142. streams := make([]string, 0, len(candidates))
  143. for u := range candidates {
  144. streams = append(streams, u)
  145. }
  146. sort.Strings(streams)
  147. return streams
  148. }
  149. func normalizeCandidate(raw string) (string, bool) {
  150. raw = strings.TrimSpace(raw)
  151. if raw == "" {
  152. return "", false
  153. }
  154. if !(strings.Contains(raw, "http") || strings.HasPrefix(raw, "//")) {
  155. return "", false
  156. }
  157. if strings.HasPrefix(raw, "//") {
  158. raw = "https:" + raw
  159. }
  160. normalized := strings.TrimRight(raw, "+")
  161. normalized = strings.ReplaceAll(normalized, `\\`, "")
  162. if normalized == "" {
  163. return "", false
  164. }
  165. return normalized, true
  166. }
  167. func isStreamURL(u string) bool {
  168. lower := strings.ToLower(u)
  169. return strings.Contains(lower, ".mp3") || strings.Contains(lower, ".aac") || strings.Contains(lower, ".m3u8") ||
  170. strings.Contains(lower, ".ogg") || strings.Contains(lower, ".opus")
  171. }
  172. func isPlaylistURL(u string) bool {
  173. lower := strings.ToLower(u)
  174. return strings.Contains(lower, ".m3u") || strings.Contains(lower, ".pls") ||
  175. strings.Contains(lower, ".xspf") || strings.Contains(lower, ".json")
  176. }