Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- function add_link_to_array($url_page)
- {
- global $log;
- global $dots;
- global $dots_open;
- global $bad_links;
- global $bad_open;
- global $root;
- global $all_links;
- global $index;
- global $root_index;
- global $root_links;
- global $pic_types;
- global $old_cat;
- global $now_cat;
- global $links_find_log;
- fwrite($log, "\r\n".date("d.m.y H:i:s")." add_link_to_array from ".$url_page);
- $page = get_page($url_page);
- if ($page['code'] == 200)
- $html = str_get_html($page['page']);
- else
- return ;
- if (!$html)
- return;
- foreach($html->find('a') as $a)
- {
- fwrite($links_find_log, "\r\n".date("d.m.y H:i:s")." find link: ".$url_page);
- $link = $a->href;
- $if_err_back_up_link = $link;
- //if (!strcmp($link, $begin)) continue;
- if ($link == NULL)
- continue;
- if ((strncasecmp($link, "mailto:",7) == 0) || ($link[0] == '#'))
- {
- echo 'link: '.$link.'<br />';
- continue;
- }
- if (!(strncasecmp($link, '../', 3)))
- {
- if ($dots_open === FALSE)
- {
- $dots = fopen("dots_links.txt","w");
- $t = fwrite($dots,"what\twhere\r\n");
- $dots_open = true;
- }
- $t = fwrite($dots, $link."\t".$url_page."\r\n");
- }
- if ($link[0] == '/')
- $link = $root.$link;
- $u_arr = explode('#',$link);
- $link = $u_arr[0];
- $now_cat = return_correct_cat($url_page);
- if ((!strcmp($begin, $url_page)) || (!strcmp($root, $url_page)))
- $now_cat = $begin;
- $pic_flag = false;
- $ifpic = array();
- $ifpic = explode('.',$link);
- if (in_array( strtolower($ifpic[count($ifpic) - 1]),$pic_types))
- {
- fwrite($log, '\r\nPIC!');
- $pic_flag = true;
- $code_rv = get_page($link);
- $code = $code_rv['code'];
- if ((($code >= 400) && ($code <600))|| ($code == 0))
- {
- if ($bad_open === FALSE)
- {
- $bad_links = fopen('badlinks.txt', "w");
- $f = fwrite($bad_links,"link\tfinded link(on page, with out changing)\tcode\twhere find\r\n");
- $bad_open = true;
- }
- $f = fwrite($bad_links,$link."\t".$if_err_back_up_link."\n".$code."\t".$url_page."\r\n");
- }
- }
- if ($pic_flag == TRUE) continue;
- $https = false;
- if (strncasecmp($link, "https://", 8) === 0)
- {
- $https = true;
- $code_rv = get_page($link);
- $code = $code_rv['code'];
- if (($code >= 400) || ($code == 0))
- {
- if ($bad_open === FALSE)
- {
- $bad_links = fopen('badlinks.txt', "w");
- $f = fwrite($bad_links,"link\tfinded link(on page, with out changing)\tcode\twhere find\r\n");
- $bad_open = true;
- }
- $f = fwrite($bad_links,$link."\t".$code."\t".$url_page."\r\n");
- }
- }
- if ($https) continue;
- if ((strncasecmp($link, "http://", 7) != 0) && ($link[0] != '/'))
- $link = $now_cat.$link;
- if (in_array($link, $all_links[0]) === FALSE)
- {
- $code_rv = get_page($link);
- $all_links[0][$index] = $link;
- $all_links[1][$index] = $code_rv['code'];
- $all_links[2][$index] = FALSE;
- if (strncasecmp($all_links[0][$index], $root, strlen($root) - 1) != 0)
- $all_links[2][$index] = TRUE;
- else
- {
- if ($all_links[1][$index] == 200)
- {
- $root_links[$root_index] = $link;
- $root_index++;
- }
- }
- if ((($all_links[1][$index] >= 400) && ($all_links[1][$index] < 600))
- || ($all_links[1][$index] == 0))
- {
- if ($bad_open === FALSE)
- {
- $bad_links = fopen('badlinks.txt', "w");
- $f = fwrite($bad_links,"link\tfinded link(on page, with out changing)\tcode\twhere find\r\n");
- $bad_open = true;
- }
- $f = fwrite($bad_links, $all_links[0][$index]."\t".$if_err_back_up_link."\t".$all_links[1][$index]."\t".$url_page."\r\n");
- }
- $index ++;
- }
- }
- $html->clear();
- unset($html);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement