mirror of
https://github.com/urbanguacamole/torrent-paradise.git
synced 2022-02-03 00:44:15 +03:00
Add crawl of TPB 48h top to crawl-rss
I still miss most of the trending torrents because I source new torrents mainly from the DHT.
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
[Unit]
|
||||
Description=Automatic crawl of new torrent feeds
|
||||
Description=Automatic crawl of new torrent feeds. Also automatically refreshes the index every hour.
|
||||
Requires=postgresql
|
||||
|
||||
[Service]
|
||||
|
||||
@@ -21,6 +21,7 @@ type Torrent struct {
|
||||
func main() {
|
||||
db := initDb()
|
||||
crawled := make(map[string]bool) // set to not needlessly send all torrents to db to check if we found them already
|
||||
var i int
|
||||
for {
|
||||
torrents := CrawlYts()
|
||||
for _, torrent := range torrents {
|
||||
@@ -30,6 +31,16 @@ func main() {
|
||||
for _, torrent := range torrents {
|
||||
addTorrent(db, torrent, crawled)
|
||||
}
|
||||
if i%10 == 0 {
|
||||
torrents = CrawlTPB48hTop()
|
||||
for _, torrent := range torrents {
|
||||
addTorrent(db, torrent, crawled)
|
||||
}
|
||||
if len(torrents) == 0 {
|
||||
log.Println("weird, no torrents crawled from TPB")
|
||||
}
|
||||
}
|
||||
i++
|
||||
time.Sleep(time.Minute * 60)
|
||||
go refresh(db)
|
||||
}
|
||||
@@ -72,6 +83,9 @@ func CrawlYts() []Torrent {
|
||||
return torrents
|
||||
}
|
||||
|
||||
//TODO https://rarbg.to/rssdd.php?category=2;14;15;16;17;21;22;42;18;19;41;27;28;29;30;31;32;40;23;24;25;26;33;34;43;44;45;46;47;48;49;50;51;52;54
|
||||
// ^^ rarbg w/o porn
|
||||
|
||||
func CrawlEztv() []Torrent { //maybe is there some kind of interface that this can share with CrawlYts? This function has the same signature and purpose.
|
||||
fp := gofeed.NewParser()
|
||||
feed, err := fp.ParseURL("https://eztv.io/ezrss.xml")
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
import "testing"
|
||||
|
||||
func TestCrawlYts(t *testing.T) {
|
||||
torrents := CrawlYts()
|
||||
|
||||
41
crawl-rss/tpb.go
Normal file
41
crawl-rss/tpb.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
// CrawlTPB48hTop crawls https://apibay.org/precompiled/data_top100_48h.json
|
||||
func CrawlTPB48hTop() []Torrent {
|
||||
return parseApibayJSON("https://apibay.org/precompiled/data_top100_48h.json")
|
||||
}
|
||||
|
||||
func parseApibayJSON(url string) []Torrent {
|
||||
httpresp, err := http.Get(url)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return nil
|
||||
}
|
||||
defer httpresp.Body.Close()
|
||||
body, err := ioutil.ReadAll(httpresp.Body)
|
||||
|
||||
var resp []ApibayTorrent
|
||||
err = json.Unmarshal(body, &resp)
|
||||
|
||||
var torrents []Torrent
|
||||
for _, apibayTorr := range resp {
|
||||
torrents = append(torrents, Torrent{apibayTorr.Info_hash, apibayTorr.Name, apibayTorr.Size})
|
||||
}
|
||||
return torrents
|
||||
}
|
||||
|
||||
// ApibayTorrent Structure returned from apibay. For unmarshaling from JSON. Not all fields that are returned from Apibay are in this struct; YAGNI
|
||||
type ApibayTorrent struct {
|
||||
ID int
|
||||
Info_hash string
|
||||
Name string
|
||||
Size int
|
||||
Added int
|
||||
}
|
||||
21
crawl-rss/tpb_test.go
Normal file
21
crawl-rss/tpb_test.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCrawlTPB48hTop(t *testing.T) {
|
||||
torrents := CrawlTPB48hTop()
|
||||
if len(torrents) < 1 {
|
||||
t.Error("no torrents crawled from tpb")
|
||||
}
|
||||
for i, torrent := range torrents {
|
||||
if torrent.Length < 10 {
|
||||
t.Error("bad length of torrent "+strconv.Itoa(i))
|
||||
}
|
||||
if len(torrent.Name) < 2 {
|
||||
t.Error("weirdly short name of torrent "+strconv.Itoa(i))
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user