-
Notifications
You must be signed in to change notification settings - Fork 8
/
1337x.go
336 lines (298 loc) · 11.6 KB
/
1337x.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
package imdb2torrent
import (
"context"
"fmt"
"net/http"
"net/url"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"go.uber.org/zap"
)
type LeetxClientOptions struct {
// Typically "https://1337x.to"
BaseURL string
Timeout time.Duration
CacheAge time.Duration
}
func NewLeetxClientOpts(baseURL string, timeout, cacheAge time.Duration) LeetxClientOptions {
return LeetxClientOptions{
BaseURL: baseURL,
Timeout: timeout,
CacheAge: cacheAge,
}
}
var DefaultLeetxClientOpts = LeetxClientOptions{
BaseURL: "https://1337x.to",
Timeout: 5 * time.Second,
CacheAge: 24 * time.Hour,
}
var _ MagnetSearcher = (*leetxClient)(nil)
type leetxClient struct {
baseURL string
httpClient *http.Client
cache Cache
metaGetter MetaGetter
cacheAge time.Duration
logger *zap.Logger
logFoundTorrents bool
}
func NewLeetxClient(opts LeetxClientOptions, cache Cache, metaGetter MetaGetter, logger *zap.Logger, logFoundTorrents bool) *leetxClient {
return &leetxClient{
baseURL: opts.BaseURL,
httpClient: &http.Client{
Timeout: opts.Timeout,
},
cache: cache,
metaGetter: metaGetter,
cacheAge: opts.CacheAge,
logger: logger,
logFoundTorrents: logFoundTorrents,
}
}
// FindMovie scrapes 1337x to find torrents for the given IMDb ID.
// It uses the Stremio Cinemeta remote addon to get a movie name for a given IMDb ID, so it can search 1337x with the name.
// If no error occured, but there are just no torrents for the movie yet, an empty result and *no* error are returned.
func (c *leetxClient) FindMovie(ctx context.Context, imdbID string) ([]Result, error) {
// Get movie name
meta, err := c.metaGetter.GetMovieSimple(ctx, imdbID)
if err != nil {
return nil, fmt.Errorf("Couldn't get movie name via Cinemeta for IMDb ID %v: %v", imdbID, err)
}
movieSearch := meta.Title
if meta.Year != 0 {
movieSearch += " " + strconv.Itoa(meta.Year)
}
movieSearch = url.PathEscape(movieSearch)
urlPath := "category-search/" + movieSearch + "/Movies/1/"
return c.find(ctx, imdbID, urlPath, meta.Title, false)
}
// FindTVShow scrapes 1337x to find torrents for the given IMDb ID + season + episode.
// It uses the Stremio Cinemeta remote addon to get a TV show name for a given IMDb ID, so it can search 1337x with the name.
// If no error occured, but there are just no torrents for the TV show yet, an empty result and *no* error are returned.
func (c *leetxClient) FindTVShow(ctx context.Context, imdbID string, season, episode int) ([]Result, error) {
id := imdbID + ":" + strconv.Itoa(season) + ":" + strconv.Itoa(episode)
meta, err := c.metaGetter.GetTVShowSimple(ctx, imdbID, season, episode)
if err != nil {
return nil, fmt.Errorf("Couldn't get TV show title via Cinemeta for ID %v: %v", id, err)
}
tvShowSearch, err := createTVShowSearch(ctx, c.metaGetter, imdbID, season, episode)
if err != nil {
return nil, err
}
tvShowSearch = url.PathEscape(tvShowSearch)
urlPath := "category-search/" + tvShowSearch + "/TV/1/"
return c.find(ctx, id, urlPath, meta.Title, true)
}
func (c *leetxClient) find(ctx context.Context, id, urlPath, title string, isTVShow bool) ([]Result, error) {
zapFieldID := zap.String("id", id)
zapFieldTorrentSite := zap.String("torrentSite", "1337x")
// Check cache first
cacheKey := id + "-1337x"
torrentList, created, found, err := c.cache.Get(cacheKey)
if err != nil {
c.logger.Error("Couldn't get torrent results from cache", zap.Error(err), zapFieldID, zapFieldTorrentSite)
} else if !found {
c.logger.Debug("Torrent results not found in cache", zapFieldID, zapFieldTorrentSite)
} else if time.Since(created) > (c.cacheAge) {
expiredSince := time.Since(created.Add(c.cacheAge))
c.logger.Debug("Hit cache for torrents, but item is expired", zap.Duration("expiredSince", expiredSince), zapFieldID, zapFieldTorrentSite)
} else {
c.logger.Debug("Hit cache for torrents, returning results", zap.Int("torrentCount", len(torrentList)), zapFieldID, zapFieldTorrentSite)
return torrentList, nil
}
// Search on 1337x
reqUrl := c.baseURL + "/" + urlPath
origDoc, err := c.getDoc(ctx, reqUrl)
if err != nil {
return nil, err
}
// Pick the first element, it's the most likely one to belong to the correct movie / TV show
torrentPath, ok := origDoc.Find(".table-list tbody td a").Next().Attr("href")
if !ok {
return nil, fmt.Errorf("Couldn't find search result")
}
// Try to go via the first search result to the general movie page. This guarantees that all torrents found on that page are definitive matches for the movie.
// But this only works for movies, not for TV shows.
// For movies, if we don't find the general movie page, we can always go back to the original search result page as well.
// TODO: For TV shows we could try to go via the episode page.
var docToSearch *goquery.Document
if isTVShow {
reqUrl = c.baseURL + torrentPath
firstTorrentDoc, err := c.getDoc(ctx, reqUrl)
if err != nil {
c.logger.Warn("Couldn't get HTML doc for first torrent result", zap.Error(err), zapFieldID, zapFieldTorrentSite)
docToSearch = origDoc
} else {
// Find the general movie page URL
movieInfoURL, ok := firstTorrentDoc.Find(".content-row h3 a").Attr("href")
// Only if this was found, we try to go through the torrent pages for the movie page
if ok && movieInfoURL != "" {
reqUrl = c.baseURL + movieInfoURL
docToSearch, err = c.getDoc(ctx, reqUrl)
if err != nil {
// Only log, but continue - we can always use the results from the original search result page
c.logger.Warn("Couldn't get HTML doc for general movie page", zap.Error(err), zapFieldID, zapFieldTorrentSite)
docToSearch = origDoc
}
} else {
docToSearch = origDoc
}
}
} else {
docToSearch = origDoc
}
// Go through elements
var torrentPageURLs []string
docToSearch.Find(".table-list tbody tr").Each(func(i int, s *goquery.Selection) {
linkText := s.Find("a").Next().Text()
if strings.Contains(linkText, "720p") || strings.Contains(linkText, "1080p") || strings.Contains(linkText, "2160p") {
torrentLink, ok := s.Find("a").Next().Attr("href")
if !ok || torrentLink == "" {
c.logger.Warn("Couldn't find link to the torrent page, did the HTML change?", zapFieldID, zapFieldTorrentSite)
return
}
torrentPageURLs = append(torrentPageURLs, c.baseURL+torrentLink)
}
})
// TODO: We should differentiate between "parsing went wrong" and "just no search results".
if len(torrentPageURLs) == 0 {
return nil, nil
}
// Visit each torrent page *in parallel* and get the magnet URL
resultChan := make(chan Result, len(torrentPageURLs))
for _, torrentPageURL := range torrentPageURLs {
// Use configured base URL, which could be a proxy that we want to go through
torrentPageURL, err = replaceURL(torrentPageURL, c.baseURL)
if err != nil {
c.logger.Warn("Couldn't replace URL which was retrieved from an HTML link", zap.Error(err), zapFieldID, zapFieldTorrentSite)
continue
}
go func(goTorrentPageURL string) {
doc, err := c.getDoc(ctx, goTorrentPageURL)
if err != nil {
resultChan <- Result{}
return
}
magnet, ok := doc.Find(".box-info ul li").First().Find("a").Attr("href")
if !ok || magnet == "" {
resultChan <- Result{}
return
}
quality := ""
if strings.Contains(magnet, "720p") {
quality = "720p"
} else if strings.Contains(magnet, "1080p") {
quality = "1080p"
} else if strings.Contains(magnet, "2160p") {
quality = "2160p"
} else {
// This should never be the case, because it was previously checked during scraping
resultChan <- Result{}
return
}
if strings.Contains(magnet, "10bit") {
quality += " 10bit"
}
// https://en.wikipedia.org/wiki/Pirated_movie_release_types
if strings.Contains(magnet, "HDCam") {
quality += (" (⚠️cam)")
}
// look for "btih:dd8255ecdc7ca55fb0bbf81323d87062db1f6d1c&" via regex and then cut out the hash
match := magnet2InfoHashRegex.Find([]byte(magnet))
infoHash := strings.TrimPrefix(string(match), "btih:")
infoHash = strings.TrimSuffix(infoHash, "&")
infoHash = strings.ToLower(infoHash)
if infoHash == "" {
c.logger.Warn("Couldn't extract info_hash. Did the HTML change?", zap.String("magnet", magnet), zapFieldID, zapFieldTorrentSite)
resultChan <- Result{}
return
} else if len(infoHash) != 40 {
c.logger.Warn("InfoHash isn't 40 characters long", zap.String("magnet", magnet), zapFieldID, zapFieldTorrentSite)
resultChan <- Result{}
return
}
var size int
sizeString := doc.Find(".box-info ul").Eq(2).Find("li span").Eq(3).Text()
if sizeString == "" {
c.logger.Warn("Couldn't find torrent size", zapFieldID, zapFieldTorrentSite)
} else {
sizeSplit := strings.Split(sizeString, " ")
if len(sizeSplit) != 2 {
c.logger.Warn("Expected two parts after splitting size string", zap.String("sizeString", sizeString), zapFieldID, zapFieldTorrentSite)
} else {
sizeFloat, err := strconv.ParseFloat(sizeSplit[0], 64)
if err != nil {
c.logger.Warn("Couldn't convert torrent size to float", zap.Error(err), zap.String("sizeString", sizeString), zapFieldID, zapFieldTorrentSite)
} else {
// 1337x uses MiB and GiB, but calls them MB and GB, so we have to multiply with 1024 instead of 1000.
switch sizeSplit[1] {
case "MB":
size = int(sizeFloat * 1024 * 1024)
case "GB":
size = int(sizeFloat * 1024 * 1024 * 1024)
}
}
}
}
name := doc.Find(".box-info-heading h1").Text()
name = strings.Trim(name, " ")
if strings.HasSuffix(name, "...") {
name = doc.Find(".torrent-tabs .tab-content .file-content span").First().Text()
}
seedersString := doc.Find(".box-info .list").Eq(1).Find(".seeds").Text()
seeders, err := strconv.Atoi(seedersString)
if err != nil {
c.logger.Warn("Couldn't convert torrent seeders to int", zap.Error(err), zap.String("seedersString", seedersString), zapFieldID, zapFieldTorrentSite)
}
if c.logFoundTorrents {
c.logger.Debug("Found torrent", zap.String("title", title), zap.String("quality", quality), zap.String("infoHash", infoHash), zap.String("magnet", magnet), zap.Int("size", size), zap.Int("seeders", seeders), zapFieldID, zapFieldTorrentSite)
}
result := Result{
Name: name,
Title: title,
Quality: quality,
InfoHash: infoHash,
MagnetURL: magnet,
Fuzzy: true,
Size: size,
Seeders: seeders,
}
resultChan <- result
}(torrentPageURL)
}
var results []Result
// We don't use a timeout channel because the HTTP clients have a timeout so the goroutines are guaranteed to finish
for i := 0; i < len(torrentPageURLs); i++ {
result := <-resultChan
if result.MagnetURL != "" {
results = append(results, result)
}
}
// Fill cache, even if there are no results, because that's just the current state of the torrent site.
// Any actual errors would have returned earlier.
if err := c.cache.Set(cacheKey, results); err != nil {
c.logger.Error("Couldn't cache torrents", zap.Error(err), zap.String("cache", "torrent"), zapFieldID, zapFieldTorrentSite)
}
return results, nil
}
func (c *leetxClient) IsSlow() bool {
return false
}
func (c *leetxClient) getDoc(ctx context.Context, url string) (*goquery.Document, error) {
res, err := c.httpClient.Get(url)
if err != nil {
return nil, fmt.Errorf("Couldn't GET %v: %v", url, err)
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return nil, fmt.Errorf("Bad GET response: %v", res.StatusCode)
}
// Load the HTML document
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return nil, fmt.Errorf("Couldn't load the HTML in goquery: %v", err)
}
return doc, nil
}