Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <html>
- <head><title>Site-Spider-WEBSPR</title></head>
- <style>
- body {font-family: courier; color: #00FF00; background-color: #000000;}
- </style>
- <body>
- <h2>Spidering the database: Results/Connections</h2>
- <?
- //Variables
- $dir = (".");
- //Connect to MySQL, create database (table/fields) or select database if already exists
- function dB_open() {
- mysql_connect("localhost","root") or die("Error: Could not connect to MySQL.");
- if (mysql_query("create database index_dB")) {
- mysql_select_db("index_dB") or die("Error: couldn't select a new database");
- mysql_query("create table words (word varchar(40),filename varchar(100))");
- }
- else
- mysql_select_db("index_dB") or die("couldn't select a old database");
- echo "<p><b>**Database opened OK**</b><p>";
- }
- //Remove duplicates for when re-spidering
- function removeDuplicates($file) {
- mysql_query("delete from words where filename='$file'");
- // echo "<p><b>Bombed $file.</b><p>";
- }
- //Write to database
- function write2Database($word,$file) {
- $w2dB = mysql_query("select * from words where word='$word' and filename='$file'");
- if (is_resource($w2dB)) {
- if (mysql_num_rows($w2dB)==0)
- mysql_query("insert into words values ('$word','$file')");
- }
- }
- //Close MySQL connection
- function dB_close() {
- mysql_close();
- echo "<p><b>**Databse closed OK**</b><p>";
- }
- //Spider the dir, echo the words within file
- function dB_crawl($file) {
- echo "words in $file<br><br>";
- removeDuplicates($file);
- $words = file_get_contents($file);
- $exSt = explode(" ", $words); //Explode the contents as mulit-line
- //stripData($words);
- foreach ($exSt as $word) {
- echo strip_tags($word);
- write2Database(rtrim($word),$file);
- }
- }
- echo "<br>-------------------------------------------<p>";
- //Scan the directory
- function spiderDir($dir) {
- $igFiles = array("spider.man.php","search.php","stop_list.txt","search_form.html");
- $files = scandir($dir);
- foreach($files as $file) {
- if (!in_array($file, $igFiles)){ //Ignore certain files
- if (is_file($file) && is_readable($file)) { //Check if $file is a file an if its readable
- if (strtolower(substr($file,-4))) {
- echo "Spidering file $file<br><br>";
- dB_crawl($file);
- }
- }
- }
- else if (is_dir($file)) {
- //echo "Spidering directory $file - later!<br>";
- // spiderDir($dir.$file);
- //echo "finished spidering directory $file - later!<br>";
- }
- }
- }
- //Open database, start spider on completion close connection
- dB_open();
- spiderDir($dir);
- dB_close();
- ?>
- </body>
- </html>
Add Comment
Please, Sign In to add comment