Update README because I forgot how to develop this app.
[Marketplaylister.git] / marketscraper.php
CommitLineData
16889626
JC
1<?php
2
86b8d9ed 3require 'mpfuncs.php';
16889626 4
86b8d9ed 5set_time_limit(0);
16889626 6
86b8d9ed
JC
7const SQLITE_DATE_FORM = 'Y-m-d H:i:s';
8
9$pdo = new PDO("sqlite:mktplc.sqlite3");
10
11$genDate = DateTime::createFromFormat(DATE_FORM, '1/1/2017');
12
13$page = 1;
14
15$query = $pdo->query('SELECT date FROM songs order by date desc limit 1');
16$resultset = $query->fetch();
17
18$lastEpDT = DateTime::createFromFormat(SQLITE_DATE_FORM, $resultset['date']);
19$startDate = new DateTime;
20$episodeDatePages = [];
21$episodeTrackPages = [];
22
23$episodes = [];
24while ($startDate > $lastEpDT) {
25 // DOM garbles UTF-8 chars, so loading them to HTML-ENTITIES data fixes this
26 $html = mb_convert_encoding(file_get_contents('https://www.marketplace.org/latest-music/marketplace/page/' . $page), 'HTML-ENTITIES', "UTF-8");
27 $DOM = new DOMDocument;
28 $DOM->loadHTML($html);
29 $xpath = new DOMXPath($DOM);
30 $episodeData = $xpath->evaluate("//div[contains(@class, 'mp-music-card')]");
31 foreach($episodeData as $episode) {
32 $children = iterator_to_array($episode->childNodes);
33 $episodeHeadCard = array_pop(findChildWithClass($children, 'mp-music-card-episode'));
34 $episodeMeta = array_pop(findChildWithClass($episodeHeadCard->childNodes, 'mp-music-card-meta'));
35 $episodeDate = array_pop(findChildWithClass($episodeMeta->childNodes, 'mp-music-card-meta_pubdate'))->textContent;
36 if (!isset($episodeDate)) { continue; }
37 $trackDiv = array_pop(findChildWithClass($children, 'mp-music-card-tracks'));
38 $trackItems = findChildWithClass($trackDiv->childNodes, 'flex w-full flex-wrap item');
39 $trackIDs = [];
40 foreach($trackItems as $trackItem) {
41 $divs = findChildWithClass($trackItem->childNodes, 'w-full min-tablet:w-1/2');
42 foreach ($divs as $div) {
43 $trackIDs[] = array_pop(explode('/', array_pop(findChildWithClass($div->childNodes, 'song-title'))->attributes->getNamedItem('href')->value));
44 }
45 }
46 $episodes[$episodeDate] = $trackIDs;
16889626 47 }
86b8d9ed
JC
48 $startDate = new DateTime(end(array_keys($episodes)));
49 $page++;
50}
51
52$stmt = $pdo->prepare("INSERT INTO songs (date, uri) VALUES (:date, :uri)");
53$stmt->bindParam(':date', $date);
54$stmt->bindParam(':uri', $uri);
55foreach(array_reverse($episodes) as $airDate => $trackIDs) {
56 $date = (new DateTime($airDate))->format(SQLITE_DATE_FORM);
57 foreach ($trackIDs as $trackID) {
58 $uri = "spotify:track:{$trackID}";
16889626 59 $stmt->execute();
16889626 60 }
86b8d9ed 61}