Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- //hopefully gets data from url then returns the data
- $url = "http://www.thepowerof10.info/rankings/rankinglist.aspx?event=60&agegroup=U20&sex=M&year=2015";
- /**$event ;
- $agegroup;
- $sex;
- $year;
- $newUrl = "http://www.thepowerof10.info/rankings/rankinglist.aspx?event=$event&agegroup=$agegroup&sex=$sex&year=$year"
- $urlInput = array
- (
- array("ALL","M",60),
- array("ALL","M",100),
- array("ALL","M",200),
- array("ALL","M",400),
- array("ALL","M",800),
- array("ALL","M",1500),
- array("ALL","M",3000),
- array("ALL","M",5000),
- array("ALL","M",10000),
- array("ALL","M","10K"),
- array("ALL","M","HM"),
- array("ALL","M","Mar"),
- array("ALL","M","60H"),
- array("ALL","M","110H"),
- array("ALL","M","400H"),
- array("ALL","M","HJ"),
- array("ALL","M","PV"),
- array("ALL","M","LJ"),
- array("ALL","M","TJ"),
- array("ALL","M","SP7.26K"),
- array("ALL","M","DT2K"),
- array("ALL","M","HT7.26K"),
- array("ALL","M","JT800"),
- array("ALL","M","HepI"),
- array("ALL","M","Dec"),
- array("ALL","M","20KW"),
- array("ALL","M","50KW"),
- array("ALL","M","4x100"),
- array("ALL","M","4x400"),
- array("U20","M",60),
- array("U20","M",100),
- array("U20","M",200),
- array("U20","M",400),
- array("U20","M",800),
- array("U20","M",1500),
- array("U20","M",3000),
- array("U20","M",5000),
- array("U20","M","2000SC"),
- array("U20","M","60HU20M"),
- array("U20","M","110HU20M"),
- array("U20","M","400H"),
- array("U20","M","HJ"),
- array("U20","M", "PV"),
- array("U20","M","LJ"),
- array("U20","M","TJ"),
- array("U20","M","SP6K"),
- array("U20","M","DT1.75K"),
- array("U20","M","HT6K"),
- array("U20","M","JT800"),
- array("U20","M","HepIU20M"),
- array("U20","M","DecU20M"),
- array("U20","M","4x100"),
- array("U20","M","4x400"),
- array("U17","M",60),
- array("U17","M",100),
- array("U17","M",200),
- array("U17","M",400),
- array("U17","M",800),
- array("U17","M",1500),
- array("U17","M",3000),
- array("U17","M","1500SC"),
- array("U17","M","60HU17M"),
- array("U17","M","100HU17M"),
- array("U17","M","400HU17M"),
- array("U17","M","HJ"),
- array("U17","M", "PV"),
- array("U17","M","LJ"),
- array("U17","M","TJ"),
- array("U17","M","SP5K"),
- array("U17","M","DT1.5K"),
- array("U17","M","HT5K"),
- array("U17","M","JT700"),
- array("U17","M","HepIU17M"),
- array("U17","M","OctU17M"),
- array("U17","M","DecU17M"),
- array("U17","M","4x100"),
- array("U17","M","4x400"),
- array("U15","M",60),
- array("U15","M",100),
- array("U15","M",200),
- array("U15","M",300),
- array("U15","M",800),
- array("U15","M",1500),
- array("U15","M",3000),
- array("U15","M","60HU15M"),
- array("U15","M","80HU15M"),
- array("U15","M","HJ"),
- array("U15","M", "PV"),
- array("U15","M","LJ"),
- array("U15","M","TJ"),
- array("U15","M","SP4K"),
- array("U15","M","DT1.25K"),
- array("U15","M","HT4K"),
- array("U15","M","JT600"),
- array("U15","M","PenIU15M"),
- array("U15","M","PenU15M"),
- array("U15","M","4x100"),
- array("U15","M","4x300"),
- array("U13","M",60),
- array("U13","M",100),
- array("U13","M",200),
- array("U13","M",800),
- array("U13","M",1500),
- array("U13","M","60HU13M"),
- array("U13","M","75HU13M"),
- array("U13","M","HJ"),
- array("U13","M", "PV"),
- array("U13","M","LJ"),
- array("U13","M","TJ"),
- array("U13","M","SP3.25K"),
- array("U13","M","DT1K"),
- array("U13","M","HT4K"),
- array("U13","M","JT400"),
- array("U13","M","PenU13M"),
- array("U13","M","4x100"),
- array("DIS","M",60),
- array("DIS","M",100),
- array("DIS","M","100WC"),
- array("DIS","M",200),
- array("DIS","M","200WC"),
- array("DIS","M",400),
- array("DIS","M","400WC"),
- array("DIS","M",800),
- array("DIS","M","800WC"),
- array("DIS","M",1500),
- array("DIS","M","1500WC"),
- array("DIS","M",5000),
- array("DIS","M","5000WC"),
- array("DIS","M",10000),
- array("DIS","M","10000WC"),
- array("DIS","M","MarWC"),
- array("DIS","M","LJ"),
- array("DIS","M","TJ"),
- array("DIS","M","SP%"),
- array("DIS","M","DT%"),
- array("DIS","M","JT%"),
- array("DIS","M","CT"),
- );
- **/
- // 2014 simple running errors
- error_reporting(E_ERROR | E_WARNING | E_PARSE);
- function get_data($url) {
- $ch = curl_init($url);
- $timeout = 5;
- curl_setopt($ch, CURLOPT_URL, $url);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
- curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
- $data = curl_exec($ch);
- curl_close($ch);
- return $data;
- }
- function get_links(){
- //NOT BEING USED RIGHT NOW
- // Empty array to hold all links to return
- $links = array();
- //Loop through each <a> and </a> tag in the dom and add it to the link array
- for ($i=0; $i < $counter; $i++) {
- # code...
- foreach($table->getElementsByTagName('a') as $link) {
- $links[] = array('url' => $link->getAttribute('href'), 'text' => $link->nodeValue);
- }
- }
- }
- function main($url) {
- //put html data in variable
- $html = get_data($url);
- // A new dom object
- global $dom;
- $dom = new DOMDocument();
- //load the html into the object if rankings are updated
- @$dom ->loadHTML($html);
- //Save the unsorted html data locally
- $dom ->saveHTMLFile("data.html");
- //load data.html back in to dom
- @$dom ->loadHTMLFile("data.html");
- /* Remove the banners */
- //FIll array with elements we will remove
- $domElemsToRemove = array();
- //find the columns with a td tag in them
- $dom->getElementsByTagName("td");
- foreach ( $dom->getElementsByTagName("td") as $domElement ) {
- // check if the td has the colspan attribute.
- //The banners are contained in a colspan attribute with the value being 14
- if ( $domElement->hasAttribute('colspan') ) {
- $domElemsToRemove[] = $domElement;
- }
- }
- //Go through each element in the array and remove the element from the dom
- foreach( $domElemsToRemove as $domElement ){
- $domElement->parentNode->removeChild($domElement);
- }
- // Discard Whitespace
- $dom-> preserveWhiteSpace = false;
- //Save stripped html
- $dom ->saveHTMLFile("data.html");
- //Fill an array with required attributes
- $x = 0;
- //Initialise an array called athletes
- global $athletes;
- $athletes = array (
- 'rank' => array($x),
- 'perf' => array($x),
- 'pb' => array($x),
- 'dump' => array($x),
- 'name' => array($x),
- 'dob' => array($x),
- 'coach' => array($x),
- 'club' => array($x),
- 'venue' => array($x),
- 'dpb' => array($x),
- 'athlink' =>array($x),
- 'coachlink' =>array($x),
- 'pbvenuelink'=>array($x),
- );
- }
- function parse($dom, $athletes) {
- global $counter;
- $counter = 0;
- $table = $dom ->getElementById('ctl00_cphBody_lblCachedRankingList');
- $tr = $table ->getElementsByTagName('tr');
- //Gets all info in table
- for ($i=0; $i< $tr ->length; $i++) {
- //get the plaintext of what's in the td's
- $cols = $tr->item($i)->getElementsByTagName("td");
- $counter++;
- //put info in right place
- for ($j = 0; $j < $cols->length; $j++) {
- //getting everything else
- //Assign what is in td to appropriate array attr.
- if ($j == 0){
- $athletes['rank'][$i-3] = $cols->item($j)->nodeValue;
- }
- if($j == 1){
- $athletes['perf'][$i-3] = $cols->item($j)->nodeValue;
- }
- if ($j == 4) {
- $athletes['pb'][$i-3] = $cols->item($j)->nodeValue;
- }
- if($j == 6){
- $athletes['name'][$i-3] = $cols->item($j)->nodeValue;
- }
- if ($j == 8) {
- $athletes['dob'][$i-3] = $cols->item($j)->nodeValue;
- }
- if ($j == 9){
- $athletes['coach'][$i-3] = $cols->item($j)->nodeValue;
- }
- if($j == 10){
- $athletes['club'][$i-3] = $cols->item($j)->nodeValue;
- }
- if ($j == 11) {
- $athletes['venue'][$i-3] = $cols->item($j)->nodeValue;
- }
- if ($j == 12) {
- $athletes['dpb'][$i-3] = $cols->item($j)->nodeValue;
- }
- $x++;
- }
- echo "\n";
- }
- return $athletes;
- }
- function display($allVals, $counter) {
- //print rank, perf, pb, name, dob, coach, club, venue, dpb
- for ($i=-1; $i < $counter; $i++) {
- if( $allVals['name'][$i] == '') {
- $i++;
- }
- echo $allVals['rank'][$i];
- echo ".", " ";
- echo $allVals['name'][$i];
- echo ",";
- echo $allVals['perf'][$i];
- echo ",";
- echo $allVals['pb'][$i];
- echo ",";
- echo $allVals['dob'][$i];
- echo ",";
- echo $allVals['coach'][$i];
- echo ",";
- echo $allVals['club'][$i];
- echo ",";
- echo $allVals['venue'][$i];
- echo nl2br("\n");
- }
- }
- function sqlDb($allVals, $counter) {
- $servername = "localhost";
- $username = "root";
- $password = "bleach390";
- $dbname = "mydb";
- $table = 'rankData';
- //Connecting to DB using PDO
- try {
- $dbh = new PDO("mysql:host=$servername;dbname=$dbname", $username, $password);
- /*** echo a message saying we have connected ***/
- echo "Connected to DB \n";
- }
- catch(PDOException $e){
- echo $e->getMessage();
- }
- try {
- //Create Table
- $sql = "CREATE TABLE rankData (
- rank INT(4),
- name VARCHAR(30),
- perf VARCHAR(10),
- pb VARCHAR(10),
- dob VARCHAR(10),
- coach VARCHAR(10),
- club VARCHAR(30),
- venue VARCHAR(30),
- pbdate VARCHAR(30)
- )";
- $dbh->exec($sql);
- print("Created rankData Table. \n");
- } catch (PDOException $e){
- echo $e->getMessage();
- }
- /*** INSERT data***/
- if (is_array($allVals)) {
- $sql2 = "INSERT INTO rankData (rank, name, perf, pb, dob, coach, club, venue, pbdate) VALUES ";
- foreach ($allVals as $row) {
- $rank = (int) $row['rank'];
- $name = ($row['name']);
- $perf = ($row['perf']);
- $pb = ($row['pb']);
- $dob = ($row['dob']);
- $coach = ($row['coach']);
- $club = ($row['club']);
- $venue = ($row['venue']);
- $pbdate = ($row['pbdate']);
- $valuesArr[] = "([0],[1],[2],[3],[4],[5],[6],[7],[8])";
- }
- $sql2 .= implode(',', $valuesArr);
- $dbh->exec($sql2);
- //mysql_query($sql2) or exit(mysql_error());
- }
- /*** close the database connection ***/
- try {
- $dbh = null;
- }
- catch(PDOException $e)
- {
- echo $e->getMessage();
- }
- }
- //Get everything ready
- main($url);
- //parse then store all the information in arrays
- $allVals = parse($dom, $athletes);
- //print all of the info for one person
- display($allVals, $counter);
- sqlDb($allVals, $counter);
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement