Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- date_default_timezone_set('Europe/Belgrade');
- header('Content-Type: text/plain; charset=utf-8');
- // [PART 1 - Making the database]
- // a) Establishing a MySQL database connection; b) creating a database; c) selecting the database; and d) creating a table
- // MySQL connection parameters
- $hostname = "localhost";
- $username = "blabla";
- $password = "blabla";
- $database = "mysql"; // establish connection to an already existing database
- // Create a database handle/connection. NB: mysqli API is used throughout so make sure u never have mysql show up or shit will get fucked!
- $dbh = mysqli_connect($hostname, $username, $password, $database)
- or die("Unable to connect to MySQL");
- echo "Connected to MySQL"; echo "\n";
- // Create database and set encodings
- $sql="CREATE DATABASE events DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;";
- if (mysqli_query($dbh, $sql))
- {
- echo "Database events created successfully"; echo "\n";
- }
- else
- {
- echo "Error creating database: " . mysqli_error($dbh); echo "\n";
- }
- // Select the database just created
- $db_selected = mysqli_select_db($dbh, "events");
- // Create table. make sure your varchars are long enough or data wont get entered and you'll be left clueless as to why. i had Name VARCHAR(30) at start and it was too small, then i made it like Name VARCHAR(200) and it worked, data got entered. i used phpmyadmin to figure this out, entering the values in manually, great for debugging as you see what errors come up. anyway, set all your varchars high enough and then adjust later once you see how much they really take up
- $sql="CREATE TABLE eventlist(PID INT NOT NULL AUTO_INCREMENT, PRIMARY KEY(PID), Name VARCHAR(200), Day INT, Month VARCHAR(20), Until VARCHAR(300), Category VARCHAR(30), Info VARCHAR(200))";
- if (mysqli_query($dbh, $sql))
- {
- echo "Table eventlist created successfully"; echo "\n\n";
- }
- else
- {
- echo "Error creating table: " . mysqli_error($dbh); echo "\n\n";
- }
- // [PART 2 - PHP Scrapping]
- // a) grabbing the source code of the target site; b) separating what parts we need (the values/data) from what we dont using regexes; c) storing the data in a multi-dimensional array; d) converting it to sql format; and e) inserting it into the database
- // store the source code of the site you want to scrape in a variable
- $contents = file_get_contents('http://www.tob.rs/en/events_list.php');
- // define a regular expression where you want to split the source code (where the information is useful to you)
- $regexp = '/<div class="list\\_articles/';
- // split the source code into pieces and store them in an array
- $records = preg_split($regexp, $contents);
- // remove index 0 from the array because it doesnt contain what u want (its all the stuff in the code before the regex you defined above ($regexp))
- $records = array_slice($records, 1);
- // create arrays for the upcoming steps, you will see their use there
- $name = array(); $day = array(); $month = array(); $until = array(); $category = array(); $info = array(); $all = array(); $line = array();
- // most imporant part; defining your regexes to extract the parts you want to go into the database; use regexbuddy and/or txt2re.com to help you create these
- for ($ix=0; $ix < count($records); $ix++)
- {
- $tmp = $records[$ix];
- preg_match('/events\\.php\\?id=[0-9][0-9][0-9]">(.*?)</', $tmp, $match_name);
- preg_match("/<p class='day'>(.*?)</", $tmp, $match_day);
- preg_match("/<p class='mon'>(.*?)</", $tmp, $match_month);
- preg_match("/>[\s]*(.*?)<a/", $tmp, $match_until);
- preg_match('/events_list\\.php\\?t=[0-9]">(.*?)</', $tmp, $match_cat);
- preg_match('/<p>(.*?)</', $tmp, $match_info);
- array_push($name, $match_name[1]);
- array_push($day, $match_day[1]);
- array_push($month, $match_month[1]);
- array_push($until, $match_until[1]);
- array_push($category, $match_cat[1]);
- array_push($info, $match_info[1]);
- // setting up the values in a 2-dimensional array. later it will be converted to sql format using the implode function
- $line = array("Name"=>$name[$ix], "Day"=>$day[$ix], "Month"=>$month[$ix], "Until"=>$until[$ix], "Category"=>$category[$ix], "Info"=>$info[$ix]);
- array_push($all, $line);
- }
- // this is just to see what your 2-d array looks like. you can comment it out later
- print_r($all);
- // now we will convert the 2-d array to sql format
- $sql = array();
- foreach($all as $row)
- {
- $sql[] = '("'.mysqli_real_escape_string($dbh, $row['Name']).'", '.$row['Day'].', "'.mysqli_real_escape_string($dbh, $row['Month']).'", "'.mysqli_real_escape_string($dbh, $row['Until']).'", "'.mysqli_real_escape_string($dbh, $row['Category']).'", "'.mysqli_real_escape_string($dbh, $row['Info']).'")';
- }
- $conv = "INSERT INTO `events`.`eventlist` (Name, Day, Month, Until, Category, Info) VALUES ".implode(',', $sql);
- // where the magic happens; data getting inserted into the database
- mysqli_query($dbh, $conv);
- // finito!
- mysqli_close($dbh);
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement