Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- //скрипт, получает все ссылки с сайта
- $site = 'http://konyakov.ru/';
- foreach(get_urls($site) as $url) {
- echo '<a href="'.$url.'">'.$url.'</a><br>'."\n";
- }
- function my_sort($array) {
- $new_array = array();
- foreach($array as $value) {
- $new_array[] = $value;
- }
- return $new_array;
- }
- function DirnameNormal($url) {
- $fulldir = '';
- $explode = explode('/', $url);
- foreach($explode as $i => $dir) {
- if($dir && $i != (count($explode)-1)) {
- $fulldir .= $dir.'/';
- }
- }
- return $fulldir;
- }
- function JoinToSite($url, $site) {
- $domain = parse_url($site);
- $domain = $domain['scheme'].'://'.$domain['host'];
- if($url{0} == '/') {
- $link = $domain.$url;
- } else if(preg_match('~^http(s)?:~i', $url)) {
- if(parse_url($url, PHP_URL_HOST) == parse_url($site, PHP_URL_HOST)) {
- $link = $url;
- }
- } else {
- if(!preg_match('~^(ftp(s)?|javascript|mailto):~i', $url)) {
- $dirname = DirnameNormal(parse_url($site, PHP_URL_PATH));
- $link = $domain.'/'.$dirname.$url;
- }
- }
- return (isset($link) ? $link : false);
- }
- function GetAllUrlsFromUrl($url, $all_links) {
- $first = file_get_contents($url);
- preg_match_all('~<a[^>]+href[\x20]?=[\x20\x22\x27]?([^\x20\x22\x27\x3E]+)[\x20\x22\x27]?[^>]*>~i', $first, $second);
- $array_urls = array();
- foreach($second[1] as $link) {
- $link = JoinToSite($link, $url);
- if($link !== false && !in_array($link, $all_links)) {
- $array_urls[] = $link;
- }
- }
- return ((count($array_urls) > 0) ? $array_urls : false);
- }
- function get_urls($url, $all_links = array()) {
- $get_urls = GetAllUrlsFromUrl($url, $all_links);
- if($get_urls) {
- if($all_links == array()) {
- $all_links[] = $url;
- }
- $all_links = array_merge($all_links, $get_urls);
- foreach($get_urls as $url) {
- $GetAllUrls = get_urls($url, $all_links);
- return my_sort(array_unique($GetAllUrls));
- }
- } else {
- return $all_links;
- }
- }
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement