From: Jacob Casper Date: Wed, 4 Apr 2018 02:20:53 +0000 (-0400) Subject: v0.3: X-Git-Url: https://git.jacobcasper.com/?p=Marketplaylister.git;a=commitdiff_plain;h=168896268ee9a19bce7125b029a37c34437aeb52 v0.3: commit ca40ac9fa60bda69f4b7a3415ce4d97bd06257fc Author: Jacob Casper Date: Tue Apr 3 22:18:45 2018 -0400 Refactored scraping and Spotify search tools, added DB --- diff --git a/callback.php b/callback.php index 7488530..2b92014 100644 --- a/callback.php +++ b/callback.php @@ -25,22 +25,15 @@ $code = $_GET['code']; - if (!$code) { - exit(1); - } - - $today = new DateTime; - - #print_r($today); - - $prevDTTxt = file_get_contents(DATE_FILE); + // Get month and year user is requesting + $state = explode(':', $_GET['state']); - $prevDT = $prevDTTxt ? DateTime::createFromFormat(DATE_FORM, $prevDTTxt) : DateTime::createFromFormat(DATE_FORM, $today->format('m/') . '01' . $today->format('/Y')); - if (strcmp($prevDT->format('m'), $today->format('m')) < 0) { - $prevDT = DateTime::createFromFormat(DATE_FORM, $today->format('m/') . '01' . $today->format('/Y')); + if (!$code) { + exit(1); } - + + #print_r($today); #Handle Spotify Token Authorization @@ -66,11 +59,12 @@ $spot_req = file_get_contents(AUTH_URL . 'api/token', false, $token_context); - #echo $spot_req; + echo $spot_req; $spot_json = json_decode($spot_req, true); $spot_token = $spot_json['access_token']; + $me_opts = [ 'http' => [ 'method' => 'GET', @@ -84,55 +78,9 @@ $me_json = json_decode($me_resp, true); $me_id = $me_json['id']; - echo '
'; - #print_r($me_resp); - - $page = 1; - $html = file_get_contents('https://www.marketplace.org/latest-music'); - $DOM = new DOMDocument; - $DOM->loadHTML($html); - $headers = $DOM->getElementsByTagName('h2'); - $divs = $DOM->getElementsByTagName('div'); - - $recentEpDT; - $episodePages = []; - - foreach ($headers as $header) { - if ($header->hasAttribute('class') && $header->getAttribute('class') === 'river--hed') { - $recentEpDT = DateTime::createFromFormat(DATE_FORM, explode(':', $header->nodeValue)[0]); - break; - } - } - - $prevDate = (int) $prevDT->format('d'); - $recentEpDate = (int) $recentEpDT->format('d'); - $daysToGet = ($prevDate === 1) ? $recentEpDate : $recentEpDate - $prevDate; - $daysToGet = $daysToGet - 2 * (int) ($daysToGet / 7); - - if ($daysToGet === 0) { - echo 'No new episodes since last check.'; - exit(0); - } - - - do { - $episodePages[] = parseEpisodePage($divs, $daysToGet); - } while ($daysToGet > 0 && ($DOM->loadHTML(file_get_contents('https://www.marketplace.org/latest-music?page=' . ++$page))) && ($divs = $DOM->getElementsByTagName('div')) ); - - /* - echo '
'; - - print_r($date_headers); - - echo '
'; - print_r($episodes); - */ - - print_r($episodePages); - # Check if this month's playlist exists - $playlistName = MONTHS[$today->format('m')] . ' Marketplace Tracks'; + $playlistName = MONTHS[$state[0]] . ' ' . $state[1] . ' Marketplace Tracks'; $checkPlaylistOpts = [ 'http' => [ @@ -163,6 +111,7 @@ $playlist_data = [ 'name' => $playlistName, + 'description' => 'A playlist of Marketplace tracks by Marketplaylister.', ]; $playlist_opts = [ @@ -185,45 +134,14 @@ $uris = []; - foreach ( array_reverse($episodePages) as $episodes) { - foreach ( array_reverse($episodes) as $episode) { - - $track_opts = [ - 'http' => [ - 'method' => 'GET', - 'header' => 'Authorization: Bearer ' . $spot_token . "\r\n" - ] - ]; - - $track_context = stream_context_create($track_opts); - - foreach ($episode as $song_info) { - - $track_search_url = BASE_URL . 'search?q=track:' . urlencode($song_info['title']) - . '+artist:' . urlencode($song_info['artist']) . '&type=track'; - - #echo '
' . $track_search_url; - #echo '
'; - - $trackReq = file_get_contents($track_search_url, false, $track_context); - if ($trackReq) { - $trackJSON = json_decode($trackReq, true); - $trackJSON = $trackJSON['tracks']; - - print_r($trackJSON); - - if ($trackJSON['total'] === 0) { - continue; - } - - $uris[] = $trackJSON['items'][0]['uri']; - - #rate limit - sleep(1); - - } - } - } + $pdo = new PDO("sqlite:mktplc.sqlite3"); + $stmt = $pdo->prepare("SELECT uri FROM songs s WHERE (SELECT strftime('%m', s.date) == :month and strftime('%Y', s.date) == :year;"); + $stmt->bindParam(':month', $state[0]); + $stmt->bindParam(':year', $state[1]); + if ($stmt->execute()) { + while ($row = $stmt->fetch()) { + $uris[] = $row['uri']; + } } $update_data = [ diff --git a/marketplay.php b/marketplay.php index 361e2ae..b456ec8 100644 --- a/marketplay.php +++ b/marketplay.php @@ -1,6 +1,6 @@ query('SELECT date FROM songs order by date desc limit 1'); + $resultset = $query->fetch(); + + $lastEpDT = new DateTime::createFromFormat(SQLITE_DATE_FORM, $resultset['date']; + $startDate = new DateTime; + $episodeDatePages = []; + $episodeTrackPages = []; + + while ($startDate > $lastEpDT) { + // DOM garbles UTF-8 chars, so loading them to HTML-ENTITIES data fixes this + $html = mb_convert_encoding(file_get_contents('https://www.marketplace.org/latest-music?page=' . $page), 'HTML-ENTITIES', "UTF-8"); + $DOM = new DOMDocument; + $DOM->loadHTML($html); + $headers = $DOM->getElementsByTagName('h2'); + $divs = $DOM->getElementsByTagName('div'); + + $episodeDatePages[] = parseEpisodeDate($headers, $lastEpDT); + $episodeTrackPages[] = parseEpisodePage($divs); + $startDate = end($episodeDatePages[$page - 1]); + $page++; + } + + //print_r($episodeDatePages); + + + //Unroll episodeDatePages + $episodeDates = []; + foreach ($episodeDatePages as $episodeDatePage) { + foreach ($episodeDatePage as $episodeDate) { + $episodeDates[] = $episodeDate; + } + } + + + // Unroll episodeTrackPages + $episodeTrackLists = []; + foreach ($episodeTrackPages as $epTrackPage) { + foreach ($epTrackPage as $epTrackList) { + $episodeTrackLists[] = $epTrackList; + } + } + + $episodes = array_slice( + array_map( + null, $episodeDates, $episodeTrackLists), 0, min( + count($episodeDates), count($episodeTrackLists) + ) + ); + print_r($episodes); + + $stmt = $pdo->prepare("INSERT INTO songs (track, artist, date) VALUES (:track, :artist, :date)"); + $stmt->bindParam(':track', $trackName); + $stmt->bindParam(':artist', $artist); + $stmt->bindParam(':date', $date); + foreach(array_reverse($episodes) as $episode) { + $date = $episode[0]->format(SQLITE_DATE_FORM); + foreach ($episode[1] as $track) { + $trackName = $track['title']; + $artist = $track['artist']; + $stmt->execute(); + } + } + + \ No newline at end of file diff --git a/mktplc.sqlite3 b/mktplc.sqlite3 new file mode 100644 index 0000000..5daf12d Binary files /dev/null and b/mktplc.sqlite3 differ diff --git a/mpfuncs.php b/mpfuncs.php index a35c89f..051619a 100644 --- a/mpfuncs.php +++ b/mpfuncs.php @@ -1,12 +1,12 @@ hasAttribute('class') && $div->getAttribute('class') === 'episode-music') { - if (!$numDays) { - break; - } $songs = []; foreach ($div->childNodes as $row) { $children = $row->childNodes[0]->childNodes; @@ -15,7 +15,6 @@ 'artist' => $children[1]->nodeValue ]; } - $numDays--; $episodePage[] = $songs; } @@ -23,4 +22,29 @@ return $episodePage; + } + + /** + * Go through the DOM elements provided and pull out the Dates of all marketplace + * pod episodes in the provided list. + * + * @param DomNodeList $headers The elements with a header tag from the DOM + * @param DateTime $lastDate The date of the most recent episode from the DB + */ + function parseEpisodeDate(DomNodeList $headers, DateTime $lastDate): array { + $episodeDates = []; + foreach ($headers as $header) { + if ($header->hasAttribute('class') && $header->getAttribute('class') === 'river--hed') { + $dateStringParts = explode('/', explode(':', $header->nodeValue)[0]); + if ( strlen($dateStringParts[2]) === 2 ) { + $dateStringParts[2] = '20' . $dateStringParts[2]; + } + $episodeDate = DateTime::createFromFormat(DATE_FORM, implode("/", $dateStringParts)); + if ($episodeDate < $lastDate) { + break; + } + $episodeDates[] = $episodeDate; + } + } + return $episodeDates; } \ No newline at end of file diff --git a/searchify.php b/searchify.php new file mode 100644 index 0000000..740f79f --- /dev/null +++ b/searchify.php @@ -0,0 +1,49 @@ +prepare("SELECT * FROM SONGS WHERE uri IS NULL"); + $upstmt = $pdo->prepare("UPDATE songs SET (uri) = :uri WHERE id = :id"); + $upstmt->bindParam(':uri', $uri); + $upstmt->bindParam(':id', $id); + + if ($stmt->execute()) { + + print_r($stmt->fetchAll()); + exit(0); + while ($row = $stmt->fetch()) { + + $track_opts = [ + 'http' => [ + 'method' => 'GET', + 'header' => 'Authorization: Bearer ' . SPOT_TOKEN . "\r\n" + ] + ]; + + $track_context = stream_context_create($track_opts); + + $track_search_url = BASE_URL . 'search?q=track:' . urlencode($row['track']) + . '+artist:' . urlencode($row['artist']) . '&type=track'; + + $trackReq = file_get_contents($track_search_url, false, $track_context); + if ($trackReq) { + $trackJSON = json_decode($trackReq, true); + $trackJSON = $trackJSON['tracks']; + if ($trackJSON['total'] === 0) { + continue; + } + + $uri = $trackJSON['items'][0]['uri']; + $id = $row['id']; + $upstmt->execute(); + + #rate limit + sleep(1); + + } + } + } \ No newline at end of file