| 1 | <?php |
| 2 | |
| 3 | require 'mpfuncs.php'; |
| 4 | |
| 5 | const SQLITE_DATE_FORM = 'Y-m-d H:i:s'; |
| 6 | |
| 7 | $pdo = new PDO("sqlite:mktplc.sqlite3"); |
| 8 | |
| 9 | $genDate = DateTime::createFromFormat(DATE_FORM, '1/1/2017'); |
| 10 | |
| 11 | $page = 1; |
| 12 | |
| 13 | $query = $pdo->query('SELECT date FROM songs order by date desc limit 1'); |
| 14 | $resultset = $query->fetch(); |
| 15 | |
| 16 | $lastEpDT = DateTime::createFromFormat(SQLITE_DATE_FORM, $resultset['date']); |
| 17 | $startDate = new DateTime; |
| 18 | $episodeDatePages = []; |
| 19 | $episodeTrackPages = []; |
| 20 | |
| 21 | while ($startDate > $lastEpDT) { |
| 22 | // DOM garbles UTF-8 chars, so loading them to HTML-ENTITIES data fixes this |
| 23 | $html = mb_convert_encoding(file_get_contents('https://www.marketplace.org/latest-music?page=' . $page), 'HTML-ENTITIES', "UTF-8"); |
| 24 | $DOM = new DOMDocument; |
| 25 | $DOM->loadHTML($html); |
| 26 | $headers = $DOM->getElementsByTagName('h2'); |
| 27 | $divs = $DOM->getElementsByTagName('div'); |
| 28 | |
| 29 | $episodeDatePages[] = parseEpisodeDate($headers, $lastEpDT); |
| 30 | $episodeTrackPages[] = parseEpisodePage($divs); |
| 31 | $startDate = end($episodeDatePages[$page - 1]); |
| 32 | $page++; |
| 33 | } |
| 34 | |
| 35 | //print_r($episodeDatePages); |
| 36 | |
| 37 | |
| 38 | //Unroll episodeDatePages |
| 39 | $episodeDates = []; |
| 40 | foreach ($episodeDatePages as $episodeDatePage) { |
| 41 | foreach ($episodeDatePage as $episodeDate) { |
| 42 | $episodeDates[] = $episodeDate; |
| 43 | } |
| 44 | } |
| 45 | |
| 46 | |
| 47 | // Unroll episodeTrackPages |
| 48 | $episodeTrackLists = []; |
| 49 | foreach ($episodeTrackPages as $epTrackPage) { |
| 50 | foreach ($epTrackPage as $epTrackList) { |
| 51 | $episodeTrackLists[] = $epTrackList; |
| 52 | } |
| 53 | } |
| 54 | |
| 55 | $episodes = array_slice( |
| 56 | array_map( |
| 57 | null, $episodeDates, $episodeTrackLists), 0, min( |
| 58 | count($episodeDates), count($episodeTrackLists) |
| 59 | ) |
| 60 | ); |
| 61 | print_r($episodes); |
| 62 | |
| 63 | $stmt = $pdo->prepare("INSERT INTO songs (track, artist, date) VALUES (:track, :artist, :date)"); |
| 64 | $stmt->bindParam(':track', $trackName); |
| 65 | $stmt->bindParam(':artist', $artist); |
| 66 | $stmt->bindParam(':date', $date); |
| 67 | foreach(array_reverse($episodes) as $episode) { |
| 68 | $date = $episode[0]->format(SQLITE_DATE_FORM); |
| 69 | foreach ($episode[1] as $track) { |
| 70 | $trackName = $track['title']; |
| 71 | $artist = $track['artist']; |
| 72 | $stmt->execute(); |
| 73 | } |
| 74 | } |
| 75 | |
| 76 | |