<?php
- require 'mpfuncs.php';
-
- const SQLITE_DATE_FORM = 'Y-m-d H:i:s';
-
- $pdo = new PDO("sqlite:mktplc.sqlite3");
-
- $genDate = DateTime::createFromFormat(DATE_FORM, '1/1/2017');
-
- $page = 1;
-
- $query = $pdo->query('SELECT date FROM songs order by date desc limit 1');
- $resultset = $query->fetch();
+require 'mpfuncs.php';
- $lastEpDT = new DateTime::createFromFormat(SQLITE_DATE_FORM, $resultset['date'];
- $startDate = new DateTime;
- $episodeDatePages = [];
- $episodeTrackPages = [];
-
- while ($startDate > $lastEpDT) {
- // DOM garbles UTF-8 chars, so loading them to HTML-ENTITIES data fixes this
- $html = mb_convert_encoding(file_get_contents('https://www.marketplace.org/latest-music?page=' . $page), 'HTML-ENTITIES', "UTF-8");
- $DOM = new DOMDocument;
- $DOM->loadHTML($html);
- $headers = $DOM->getElementsByTagName('h2');
- $divs = $DOM->getElementsByTagName('div');
-
- $episodeDatePages[] = parseEpisodeDate($headers, $lastEpDT);
- $episodeTrackPages[] = parseEpisodePage($divs);
- $startDate = end($episodeDatePages[$page - 1]);
- $page++;
- }
-
- //print_r($episodeDatePages);
+set_time_limit(0);
-
- //Unroll episodeDatePages
- $episodeDates = [];
- foreach ($episodeDatePages as $episodeDatePage) {
- foreach ($episodeDatePage as $episodeDate) {
- $episodeDates[] = $episodeDate;
- }
- }
-
-
- // Unroll episodeTrackPages
- $episodeTrackLists = [];
- foreach ($episodeTrackPages as $epTrackPage) {
- foreach ($epTrackPage as $epTrackList) {
- $episodeTrackLists[] = $epTrackList;
- }
+const SQLITE_DATE_FORM = 'Y-m-d H:i:s';
+
+$pdo = new PDO("sqlite:mktplc.sqlite3");
+
+$genDate = DateTime::createFromFormat(DATE_FORM, '1/1/2017');
+
+$page = 1;
+
+$query = $pdo->query('SELECT date FROM songs order by date desc limit 1');
+$resultset = $query->fetch();
+
+$lastEpDT = DateTime::createFromFormat(SQLITE_DATE_FORM, $resultset['date']);
+$startDate = new DateTime;
+$episodeDatePages = [];
+$episodeTrackPages = [];
+
+$episodes = [];
+while ($startDate > $lastEpDT) {
+ // DOM garbles UTF-8 chars, so loading them to HTML-ENTITIES data fixes this
+ $html = mb_convert_encoding(file_get_contents('https://www.marketplace.org/latest-music/marketplace/page/' . $page), 'HTML-ENTITIES', "UTF-8");
+ $DOM = new DOMDocument;
+ $DOM->loadHTML($html);
+ $xpath = new DOMXPath($DOM);
+ $episodeData = $xpath->evaluate("//div[contains(@class, 'mp-music-card')]");
+ foreach($episodeData as $episode) {
+ $children = iterator_to_array($episode->childNodes);
+ $episodeHeadCard = array_pop(findChildWithClass($children, 'mp-music-card-episode'));
+ $episodeMeta = array_pop(findChildWithClass($episodeHeadCard->childNodes, 'mp-music-card-meta'));
+ $episodeDate = array_pop(findChildWithClass($episodeMeta->childNodes, 'mp-music-card-meta_pubdate'))->textContent;
+ if (!isset($episodeDate)) { continue; }
+ $trackDiv = array_pop(findChildWithClass($children, 'mp-music-card-tracks'));
+ $trackItems = findChildWithClass($trackDiv->childNodes, 'flex w-full flex-wrap item');
+ $trackIDs = [];
+ foreach($trackItems as $trackItem) {
+ $divs = findChildWithClass($trackItem->childNodes, 'w-full min-tablet:w-1/2');
+ foreach ($divs as $div) {
+ $trackIDs[] = array_pop(explode('/', array_pop(findChildWithClass($div->childNodes, 'song-title'))->attributes->getNamedItem('href')->value));
+ }
+ }
+ $episodes[$episodeDate] = $trackIDs;
}
-
- $episodes = array_slice(
- array_map(
- null, $episodeDates, $episodeTrackLists), 0, min(
- count($episodeDates), count($episodeTrackLists)
- )
- );
- print_r($episodes);
-
- $stmt = $pdo->prepare("INSERT INTO songs (track, artist, date) VALUES (:track, :artist, :date)");
- $stmt->bindParam(':track', $trackName);
- $stmt->bindParam(':artist', $artist);
- $stmt->bindParam(':date', $date);
- foreach(array_reverse($episodes) as $episode) {
- $date = $episode[0]->format(SQLITE_DATE_FORM);
- foreach ($episode[1] as $track) {
- $trackName = $track['title'];
- $artist = $track['artist'];
+ $startDate = new DateTime(end(array_keys($episodes)));
+ $page++;
+}
+
+$stmt = $pdo->prepare("INSERT INTO songs (date, uri) VALUES (:date, :uri)");
+$stmt->bindParam(':date', $date);
+$stmt->bindParam(':uri', $uri);
+foreach(array_reverse($episodes) as $airDate => $trackIDs) {
+ $date = (new DateTime($airDate))->format(SQLITE_DATE_FORM);
+ foreach ($trackIDs as $trackID) {
+ $uri = "spotify:track:{$trackID}";
$stmt->execute();
- }
}
-
-
\ No newline at end of file
+}