Commit | Line | Data |
---|---|---|
16889626 JC |
1 | <?php |
2 | ||
86b8d9ed | 3 | require 'mpfuncs.php'; |
16889626 | 4 | |
86b8d9ed | 5 | set_time_limit(0); |
16889626 | 6 | |
86b8d9ed JC |
7 | const SQLITE_DATE_FORM = 'Y-m-d H:i:s'; |
8 | ||
9 | $pdo = new PDO("sqlite:mktplc.sqlite3"); | |
10 | ||
11 | $genDate = DateTime::createFromFormat(DATE_FORM, '1/1/2017'); | |
12 | ||
13 | $page = 1; | |
14 | ||
15 | $query = $pdo->query('SELECT date FROM songs order by date desc limit 1'); | |
16 | $resultset = $query->fetch(); | |
17 | ||
18 | $lastEpDT = DateTime::createFromFormat(SQLITE_DATE_FORM, $resultset['date']); | |
19 | $startDate = new DateTime; | |
20 | $episodeDatePages = []; | |
21 | $episodeTrackPages = []; | |
22 | ||
23 | $episodes = []; | |
24 | while ($startDate > $lastEpDT) { | |
25 | // DOM garbles UTF-8 chars, so loading them to HTML-ENTITIES data fixes this | |
26 | $html = mb_convert_encoding(file_get_contents('https://www.marketplace.org/latest-music/marketplace/page/' . $page), 'HTML-ENTITIES', "UTF-8"); | |
27 | $DOM = new DOMDocument; | |
28 | $DOM->loadHTML($html); | |
29 | $xpath = new DOMXPath($DOM); | |
30 | $episodeData = $xpath->evaluate("//div[contains(@class, 'mp-music-card')]"); | |
31 | foreach($episodeData as $episode) { | |
32 | $children = iterator_to_array($episode->childNodes); | |
33 | $episodeHeadCard = array_pop(findChildWithClass($children, 'mp-music-card-episode')); | |
34 | $episodeMeta = array_pop(findChildWithClass($episodeHeadCard->childNodes, 'mp-music-card-meta')); | |
35 | $episodeDate = array_pop(findChildWithClass($episodeMeta->childNodes, 'mp-music-card-meta_pubdate'))->textContent; | |
36 | if (!isset($episodeDate)) { continue; } | |
37 | $trackDiv = array_pop(findChildWithClass($children, 'mp-music-card-tracks')); | |
38 | $trackItems = findChildWithClass($trackDiv->childNodes, 'flex w-full flex-wrap item'); | |
39 | $trackIDs = []; | |
40 | foreach($trackItems as $trackItem) { | |
41 | $divs = findChildWithClass($trackItem->childNodes, 'w-full min-tablet:w-1/2'); | |
42 | foreach ($divs as $div) { | |
43 | $trackIDs[] = array_pop(explode('/', array_pop(findChildWithClass($div->childNodes, 'song-title'))->attributes->getNamedItem('href')->value)); | |
44 | } | |
45 | } | |
46 | $episodes[$episodeDate] = $trackIDs; | |
16889626 | 47 | } |
86b8d9ed JC |
48 | $startDate = new DateTime(end(array_keys($episodes))); |
49 | $page++; | |
50 | } | |
51 | ||
52 | $stmt = $pdo->prepare("INSERT INTO songs (date, uri) VALUES (:date, :uri)"); | |
53 | $stmt->bindParam(':date', $date); | |
54 | $stmt->bindParam(':uri', $uri); | |
55 | foreach(array_reverse($episodes) as $airDate => $trackIDs) { | |
56 | $date = (new DateTime($airDate))->format(SQLITE_DATE_FORM); | |
57 | foreach ($trackIDs as $trackID) { | |
58 | $uri = "spotify:track:{$trackID}"; | |
16889626 | 59 | $stmt->execute(); |
16889626 | 60 | } |
86b8d9ed | 61 | } |