7 const SQLITE_DATE_FORM
= 'Y-m-d H:i:s';
9 $pdo = new PDO("sqlite:mktplc.sqlite3");
11 $genDate = DateTime
::createFromFormat(DATE_FORM
, '1/1/2017');
15 $query = $pdo->query('SELECT date FROM songs order by date desc limit 1');
16 $resultset = $query->fetch();
18 $lastEpDT = DateTime
::createFromFormat(SQLITE_DATE_FORM
, $resultset['date']);
19 $startDate = new DateTime
;
20 $episodeDatePages = [];
21 $episodeTrackPages = [];
24 while ($startDate > $lastEpDT) {
25 // DOM garbles UTF-8 chars, so loading them to HTML-ENTITIES data fixes this
26 $html = mb_convert_encoding(file_get_contents('https://www.marketplace.org/latest-music/marketplace/page/' . $page), 'HTML-ENTITIES', "UTF-8");
27 $DOM = new DOMDocument
;
28 $DOM->loadHTML($html);
29 $xpath = new DOMXPath($DOM);
30 $episodeData = $xpath->evaluate("//div[contains(@class, 'mp-music-card')]");
31 foreach($episodeData as $episode) {
32 $children = iterator_to_array($episode->childNodes
);
33 $episodeHeadCard = array_pop(findChildWithClass($children, 'mp-music-card-episode'));
34 $episodeMeta = array_pop(findChildWithClass($episodeHeadCard->childNodes
, 'mp-music-card-meta'));
35 $episodeDate = array_pop(findChildWithClass($episodeMeta->childNodes
, 'mp-music-card-meta_pubdate'))->textContent
;
36 if (!isset($episodeDate)) { continue; }
37 $trackDiv = array_pop(findChildWithClass($children, 'mp-music-card-tracks'));
38 $trackItems = findChildWithClass($trackDiv->childNodes
, 'flex w-full flex-wrap item');
40 foreach($trackItems as $trackItem) {
41 $divs = findChildWithClass($trackItem->childNodes
, 'w-full min-tablet:w-1/2');
42 foreach ($divs as $div) {
43 $trackIDs[] = array_pop(explode('/', array_pop(findChildWithClass($div->childNodes
, 'song-title'))->attributes
->getNamedItem('href')->value
));
46 $episodes[$episodeDate] = $trackIDs;
48 $startDate = new DateTime(end(array_keys($episodes)));
52 $stmt = $pdo->prepare("INSERT INTO songs (date, uri) VALUES (:date, :uri)");
53 $stmt->bindParam(':date', $date);
54 $stmt->bindParam(':uri', $uri);
55 foreach(array_reverse($episodes) as $airDate => $trackIDs) {
56 $date = (new DateTime($airDate))->format(SQLITE_DATE_FORM
);
57 foreach ($trackIDs as $trackID) {
58 $uri = "spotify:track:{$trackID}";