選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

163 行
4.9KB

  1. package extractor
  2. import (
  3. "regexp"
  4. "sort"
  5. "strings"
  6. )
  7. var urlPattern = regexp.MustCompile(`(?i)((?:https?:)?\/\/[^\s"'<>]+\.(mp3|aac|m3u8|ogg|opus|pls|m3u|xspf|json))`)
  8. var attrPattern = regexp.MustCompile(`(?i)(streamsrc|streamhash|stream|audioUrl|mp3Url|hls|playlist|source)\s*[:=]\s*['"]([^'"]+)['"]`)
  9. var srcPattern = regexp.MustCompile(`(?i)src\s*=\s*['"]([^'"]+)['"]`)
  10. var xspfPattern = regexp.MustCompile(`(?i)<location>([^<]+)</location>`)
  11. // ExtractStreams returns the unique streaming URLs found in the provided HTML/text.
  12. func ExtractStreams(data string) []string {
  13. candidates := make(map[string]struct{})
  14. add := func(raw string) {
  15. raw = strings.TrimSpace(raw)
  16. if raw == "" {
  17. return
  18. }
  19. if !(strings.Contains(raw, "http") || strings.HasPrefix(raw, "//")) {
  20. return
  21. }
  22. if strings.HasPrefix(raw, "//") {
  23. raw = "https:" + raw
  24. }
  25. normalized := strings.TrimRight(raw, "+")
  26. normalized = strings.ReplaceAll(normalized, `\\`, "")
  27. candidates[normalized] = struct{}{}
  28. }
  29. for _, match := range urlPattern.FindAllStringSubmatch(data, -1) {
  30. add(match[1])
  31. }
  32. for _, match := range attrPattern.FindAllStringSubmatch(data, -1) {
  33. add(match[2])
  34. }
  35. for _, match := range srcPattern.FindAllStringSubmatch(data, -1) {
  36. add(match[1])
  37. }
  38. streams := make([]string, 0, len(candidates))
  39. for u := range candidates {
  40. if isStreamURL(u) {
  41. streams = append(streams, u)
  42. }
  43. }
  44. sort.Strings(streams)
  45. return streams
  46. }
  47. // ExtractPlaylistLinks returns URLs likely pointing to playlists (m3u/pls/xspf/json).
  48. func ExtractPlaylistLinks(data string) []string {
  49. candidates := make(map[string]struct{})
  50. add := func(raw string) {
  51. raw = strings.TrimSpace(raw)
  52. if raw == "" {
  53. return
  54. }
  55. if !(strings.Contains(raw, "http") || strings.HasPrefix(raw, "//")) {
  56. return
  57. }
  58. if strings.HasPrefix(raw, "//") {
  59. raw = "https:" + raw
  60. }
  61. normalized := strings.TrimRight(raw, "+")
  62. normalized = strings.ReplaceAll(normalized, `\\`, "")
  63. if isPlaylistURL(normalized) {
  64. candidates[normalized] = struct{}{}
  65. }
  66. }
  67. for _, match := range urlPattern.FindAllStringSubmatch(data, -1) {
  68. add(match[1])
  69. }
  70. for _, match := range attrPattern.FindAllStringSubmatch(data, -1) {
  71. add(match[2])
  72. }
  73. for _, match := range srcPattern.FindAllStringSubmatch(data, -1) {
  74. add(match[1])
  75. }
  76. links := make([]string, 0, len(candidates))
  77. for u := range candidates {
  78. links = append(links, u)
  79. }
  80. sort.Strings(links)
  81. return links
  82. }
  83. // ParsePlaylist extracts stream URLs from playlist content.
  84. func ParsePlaylist(content string, contentType string) []string {
  85. candidates := make(map[string]struct{})
  86. add := func(raw string) {
  87. raw = strings.TrimSpace(raw)
  88. if raw == "" {
  89. return
  90. }
  91. if strings.HasPrefix(raw, "//") {
  92. raw = "https:" + raw
  93. }
  94. if isStreamURL(raw) {
  95. candidates[raw] = struct{}{}
  96. }
  97. }
  98. lowerType := strings.ToLower(contentType)
  99. lines := strings.Split(content, "\n")
  100. if strings.Contains(lowerType, "xspf") || strings.Contains(strings.ToLower(content), "<location>") {
  101. for _, match := range xspfPattern.FindAllStringSubmatch(content, -1) {
  102. add(match[1])
  103. }
  104. }
  105. for _, match := range urlPattern.FindAllStringSubmatch(content, -1) {
  106. add(match[1])
  107. }
  108. for _, line := range lines {
  109. line = strings.TrimSpace(line)
  110. if line == "" || strings.HasPrefix(line, "#") {
  111. continue
  112. }
  113. if strings.HasPrefix(strings.ToLower(line), "file") && strings.Contains(line, "=") {
  114. parts := strings.SplitN(line, "=", 2)
  115. add(parts[1])
  116. continue
  117. }
  118. if strings.Contains(line, "http") {
  119. for _, match := range urlPattern.FindAllStringSubmatch(line, -1) {
  120. add(match[1])
  121. }
  122. }
  123. }
  124. if strings.Contains(lowerType, "json") {
  125. for _, match := range urlPattern.FindAllStringSubmatch(content, -1) {
  126. add(match[1])
  127. }
  128. }
  129. streams := make([]string, 0, len(candidates))
  130. for u := range candidates {
  131. streams = append(streams, u)
  132. }
  133. sort.Strings(streams)
  134. return streams
  135. }
  136. func isStreamURL(u string) bool {
  137. lower := strings.ToLower(u)
  138. return strings.Contains(lower, ".mp3") || strings.Contains(lower, ".aac") || strings.Contains(lower, ".m3u8") ||
  139. strings.Contains(lower, ".ogg") || strings.Contains(lower, ".opus")
  140. }
  141. func isPlaylistURL(u string) bool {
  142. lower := strings.ToLower(u)
  143. return strings.Contains(lower, ".m3u") || strings.Contains(lower, ".pls") ||
  144. strings.Contains(lower, ".xspf") || strings.Contains(lower, ".json")
  145. }