v0.3.2:
[Marketplaylister.git] / marketscraper.php
CommitLineData
16889626
JC
1<?php
2
3 require 'mpfuncs.php';
4
5 const SQLITE_DATE_FORM = 'Y-m-d H:i:s';
6
7 $pdo = new PDO("sqlite:mktplc.sqlite3");
8
9 $genDate = DateTime::createFromFormat(DATE_FORM, '1/1/2017');
10
11 $page = 1;
12
13 $query = $pdo->query('SELECT date FROM songs order by date desc limit 1');
14 $resultset = $query->fetch();
15
29d84c29 16 $lastEpDT = DateTime::createFromFormat(SQLITE_DATE_FORM, $resultset['date']);
16889626
JC
17 $startDate = new DateTime;
18 $episodeDatePages = [];
19 $episodeTrackPages = [];
20
21 while ($startDate > $lastEpDT) {
22 // DOM garbles UTF-8 chars, so loading them to HTML-ENTITIES data fixes this
23 $html = mb_convert_encoding(file_get_contents('https://www.marketplace.org/latest-music?page=' . $page), 'HTML-ENTITIES', "UTF-8");
24 $DOM = new DOMDocument;
25 $DOM->loadHTML($html);
26 $headers = $DOM->getElementsByTagName('h2');
27 $divs = $DOM->getElementsByTagName('div');
28
29 $episodeDatePages[] = parseEpisodeDate($headers, $lastEpDT);
30 $episodeTrackPages[] = parseEpisodePage($divs);
31 $startDate = end($episodeDatePages[$page - 1]);
32 $page++;
33 }
34
35 //print_r($episodeDatePages);
36
37
38 //Unroll episodeDatePages
39 $episodeDates = [];
40 foreach ($episodeDatePages as $episodeDatePage) {
41 foreach ($episodeDatePage as $episodeDate) {
42 $episodeDates[] = $episodeDate;
43 }
44 }
45
46
47 // Unroll episodeTrackPages
48 $episodeTrackLists = [];
49 foreach ($episodeTrackPages as $epTrackPage) {
50 foreach ($epTrackPage as $epTrackList) {
51 $episodeTrackLists[] = $epTrackList;
52 }
53 }
54
55 $episodes = array_slice(
56 array_map(
57 null, $episodeDates, $episodeTrackLists), 0, min(
58 count($episodeDates), count($episodeTrackLists)
59 )
60 );
61 print_r($episodes);
62
63 $stmt = $pdo->prepare("INSERT INTO songs (track, artist, date) VALUES (:track, :artist, :date)");
64 $stmt->bindParam(':track', $trackName);
65 $stmt->bindParam(':artist', $artist);
66 $stmt->bindParam(':date', $date);
67 foreach(array_reverse($episodes) as $episode) {
68 $date = $episode[0]->format(SQLITE_DATE_FORM);
69 foreach ($episode[1] as $track) {
70 $trackName = $track['title'];
71 $artist = $track['artist'];
72 $stmt->execute();
73 }
74 }
75
76