| 1 | <?php |
| 2 | |
| 3 | require 'mpfuncs.php'; |
| 4 | |
| 5 | set_time_limit(0); |
| 6 | |
| 7 | const SQLITE_DATE_FORM = 'Y-m-d H:i:s'; |
| 8 | |
| 9 | $pdo = new PDO("sqlite:mktplc.sqlite3"); |
| 10 | |
| 11 | $genDate = DateTime::createFromFormat(DATE_FORM, '1/1/2017'); |
| 12 | |
| 13 | $page = 1; |
| 14 | |
| 15 | $query = $pdo->query('SELECT date FROM songs order by date desc limit 1'); |
| 16 | $resultset = $query->fetch(); |
| 17 | |
| 18 | $lastEpDT = DateTime::createFromFormat(SQLITE_DATE_FORM, $resultset['date']); |
| 19 | $startDate = new DateTime; |
| 20 | $episodeDatePages = []; |
| 21 | $episodeTrackPages = []; |
| 22 | |
| 23 | $episodes = []; |
| 24 | while ($startDate > $lastEpDT) { |
| 25 | // DOM garbles UTF-8 chars, so loading them to HTML-ENTITIES data fixes this |
| 26 | $html = mb_convert_encoding(file_get_contents('https://www.marketplace.org/latest-music/marketplace/page/' . $page), 'HTML-ENTITIES', "UTF-8"); |
| 27 | $DOM = new DOMDocument; |
| 28 | $DOM->loadHTML($html); |
| 29 | $xpath = new DOMXPath($DOM); |
| 30 | $episodeData = $xpath->evaluate("//div[contains(@class, 'mp-music-card')]"); |
| 31 | foreach($episodeData as $episode) { |
| 32 | $children = iterator_to_array($episode->childNodes); |
| 33 | $episodeHeadCard = array_pop(findChildWithClass($children, 'mp-music-card-episode')); |
| 34 | $episodeMeta = array_pop(findChildWithClass($episodeHeadCard->childNodes, 'mp-music-card-meta')); |
| 35 | $episodeDate = array_pop(findChildWithClass($episodeMeta->childNodes, 'mp-music-card-meta_pubdate'))->textContent; |
| 36 | if (!isset($episodeDate)) { continue; } |
| 37 | $trackDiv = array_pop(findChildWithClass($children, 'mp-music-card-tracks')); |
| 38 | $trackItems = findChildWithClass($trackDiv->childNodes, 'flex w-full flex-wrap item'); |
| 39 | $trackIDs = []; |
| 40 | foreach($trackItems as $trackItem) { |
| 41 | $divs = findChildWithClass($trackItem->childNodes, 'w-full min-tablet:w-1/2'); |
| 42 | foreach ($divs as $div) { |
| 43 | $trackIDs[] = array_pop(explode('/', array_pop(findChildWithClass($div->childNodes, 'song-title'))->attributes->getNamedItem('href')->value)); |
| 44 | } |
| 45 | } |
| 46 | $episodes[$episodeDate] = $trackIDs; |
| 47 | } |
| 48 | $startDate = new DateTime(end(array_keys($episodes))); |
| 49 | $page++; |
| 50 | } |
| 51 | |
| 52 | $stmt = $pdo->prepare("INSERT INTO songs (date, uri) VALUES (:date, :uri)"); |
| 53 | $stmt->bindParam(':date', $date); |
| 54 | $stmt->bindParam(':uri', $uri); |
| 55 | foreach(array_reverse($episodes) as $airDate => $trackIDs) { |
| 56 | $date = (new DateTime($airDate))->format(SQLITE_DATE_FORM); |
| 57 | foreach ($trackIDs as $trackID) { |
| 58 | $uri = "spotify:track:{$trackID}"; |
| 59 | $stmt->execute(); |
| 60 | } |
| 61 | } |