Add a background scraper to pull in related artists
authorJacob Casper <dev@jacobcasper.com>
Thu, 16 Apr 2020 23:27:47 +0000 (18:27 -0500)
committerJacob Casper <dev@jacobcasper.com>
Fri, 17 Apr 2020 00:20:10 +0000 (19:20 -0500)
main.go
migrations/004-create_scraped_artist.sql [new file with mode: 0644]
scrape/graph/graph.go [new file with mode: 0644]

diff --git a/main.go b/main.go
index b59a627..702fd48 100644 (file)
--- a/main.go
+++ b/main.go
@@ -4,6 +4,7 @@ import (
        "git.jacobcasper.com/brackets/env"
        "git.jacobcasper.com/brackets/routes/artist"
        "git.jacobcasper.com/brackets/routes/genre"
+       "git.jacobcasper.com/brackets/scrape/graph"
        _ "github.com/mattn/go-sqlite3"
        "log"
        "net/http"
@@ -21,5 +22,7 @@ func main() {
 
        http.HandleFunc("/genre", genre.Index(env))
 
+       go graph.Scrape(env)
+
        log.Fatal(http.ListenAndServe(":8080", nil))
 }
diff --git a/migrations/004-create_scraped_artist.sql b/migrations/004-create_scraped_artist.sql
new file mode 100644 (file)
index 0000000..bc51487
--- /dev/null
@@ -0,0 +1,5 @@
+CREATE TABLE SCRAPED_ARTIST (
+  ARTIST_ID TEXT UNIQUE NOT NULL,
+  SCRAPED BOOLEAN NOT NULL DEFAULT 0,
+  FOREIGN KEY(ARTIST_ID) REFERENCES ARTIST(ID)
+);
diff --git a/scrape/graph/graph.go b/scrape/graph/graph.go
new file mode 100644 (file)
index 0000000..7a31924
--- /dev/null
@@ -0,0 +1,67 @@
+package graph
+
+import (
+       "git.jacobcasper.com/brackets/env"
+       "github.com/zmb3/spotify"
+       "log"
+       "net/http"
+       "net/url"
+       "time"
+)
+
+func Scrape(env *env.Env) {
+infinite:
+       for {
+               time.Sleep(time.Second * 5)
+               rows, err := env.Db.Db.Query(`
+SELECT ID
+FROM ARTIST
+WHERE ID NOT IN (
+  SELECT ARTIST_ID
+       FROM SCRAPED_ARTIST
+       WHERE SCRAPED == 1
+)`)
+               if err != nil {
+                       log.Print(err)
+                       continue infinite
+               }
+               defer rows.Close()
+
+               var artistId string
+               for rows.Next() {
+                       if err := rows.Scan(&artistId); err != nil {
+                               log.Print(err)
+                               continue infinite
+                       }
+
+                       artists, err := env.C.GetRelatedArtists(spotify.ID(artistId))
+                       if err != nil {
+                               log.Print(err)
+                               continue infinite
+                       }
+
+                       success := true
+               postArtists:
+                       for _, artist := range artists {
+                               resp, err := http.PostForm("http://localhost:8080/artist/add", url.Values{"id": {string(artist.ID)}})
+                               if err != nil {
+                                       log.Print(err)
+                                       success = false
+                                       continue postArtists
+                               }
+                               if resp.StatusCode != http.StatusCreated {
+                                       success = false
+                               }
+                       }
+
+                       if success {
+                               env.Db.Mu.Lock()
+                               env.Db.Db.Exec(`
+REPLACE INTO SCRAPED_ARTIST (ARTIST_ID, SCRAPED)
+VALUES (?, 1)`,
+                                       string(artistId))
+                               env.Db.Mu.Unlock()
+                       }
+               }
+       }
+}