From bd7c6d1ee3e5bc95f726f78b0aea2283fcaaf677 Mon Sep 17 00:00:00 2001 From: Jacob Casper Date: Thu, 16 Apr 2020 18:27:47 -0500 Subject: [PATCH] Add a background scraper to pull in related artists --- main.go | 3 ++ migrations/004-create_scraped_artist.sql | 5 ++ scrape/graph/graph.go | 67 ++++++++++++++++++++++++ 3 files changed, 75 insertions(+) create mode 100644 migrations/004-create_scraped_artist.sql create mode 100644 scrape/graph/graph.go diff --git a/main.go b/main.go index b59a627..702fd48 100644 --- a/main.go +++ b/main.go @@ -4,6 +4,7 @@ import ( "git.jacobcasper.com/brackets/env" "git.jacobcasper.com/brackets/routes/artist" "git.jacobcasper.com/brackets/routes/genre" + "git.jacobcasper.com/brackets/scrape/graph" _ "github.com/mattn/go-sqlite3" "log" "net/http" @@ -21,5 +22,7 @@ func main() { http.HandleFunc("/genre", genre.Index(env)) + go graph.Scrape(env) + log.Fatal(http.ListenAndServe(":8080", nil)) } diff --git a/migrations/004-create_scraped_artist.sql b/migrations/004-create_scraped_artist.sql new file mode 100644 index 0000000..bc51487 --- /dev/null +++ b/migrations/004-create_scraped_artist.sql @@ -0,0 +1,5 @@ +CREATE TABLE SCRAPED_ARTIST ( + ARTIST_ID TEXT UNIQUE NOT NULL, + SCRAPED BOOLEAN NOT NULL DEFAULT 0, + FOREIGN KEY(ARTIST_ID) REFERENCES ARTIST(ID) +); diff --git a/scrape/graph/graph.go b/scrape/graph/graph.go new file mode 100644 index 0000000..7a31924 --- /dev/null +++ b/scrape/graph/graph.go @@ -0,0 +1,67 @@ +package graph + +import ( + "git.jacobcasper.com/brackets/env" + "github.com/zmb3/spotify" + "log" + "net/http" + "net/url" + "time" +) + +func Scrape(env *env.Env) { +infinite: + for { + time.Sleep(time.Second * 5) + rows, err := env.Db.Db.Query(` +SELECT ID +FROM ARTIST +WHERE ID NOT IN ( + SELECT ARTIST_ID + FROM SCRAPED_ARTIST + WHERE SCRAPED == 1 +)`) + if err != nil { + log.Print(err) + continue infinite + } + defer rows.Close() + + var artistId string + for rows.Next() { + if err := rows.Scan(&artistId); err != nil { + log.Print(err) + continue infinite + } + + artists, err := env.C.GetRelatedArtists(spotify.ID(artistId)) + if err != nil { + log.Print(err) + continue infinite + } + + success := true + postArtists: + for _, artist := range artists { + resp, err := http.PostForm("http://localhost:8080/artist/add", url.Values{"id": {string(artist.ID)}}) + if err != nil { + log.Print(err) + success = false + continue postArtists + } + if resp.StatusCode != http.StatusCreated { + success = false + } + } + + if success { + env.Db.Mu.Lock() + env.Db.Db.Exec(` +REPLACE INTO SCRAPED_ARTIST (ARTIST_ID, SCRAPED) +VALUES (?, 1)`, + string(artistId)) + env.Db.Mu.Unlock() + } + } + } +} -- 2.20.1