Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?
- Header('Content-type: text/html; charset=UTF-8');
- mysql_pconnect('host','user','pass') or die(mysql_errno().": ".mysql_error());
- mysql_select_db('vuz_db') or die(mysql_errno().": ".mysql_error());
- mysql_query('set names \'utf8\'') or die(mysql_errno().": ".mysql_error());
- set_time_limit(0);
- function koi2utf($s){
- return iconv('koi8-r','utf-8',$s);
- }
- function curl_get($url, $params = false){
- if(strpos($url,'#')) $url = substr($url,0,strpos($url,'#'));
- if($params){
- $url .= strpos($url,'?') ? '&' : '?';
- foreach($params as $k=>&$v) $v = URLEncode($k).'='.URLEncode($v);
- $params = join('&',$params);
- $url .= $params;
- }
- $ch = curl_init();
- curl_setopt($ch, CURLOPT_URL, $url);
- curl_setopt($ch, CURLOPT_HEADER, false);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
- curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
- curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
- $data = curl_exec($ch);
- curl_close($ch);
- return $data;
- }
- $page = isset($_GET['page']) ? $_GET['page'] : 1;
- $data = curl_get('http://abitur.nica.ru/new/www/search.php',array('page'=>$page));
- preg_match_all('/vuz_detail\.php\?[^"]*/i',$data,$match);
- $specs = array();
- $vuzes = array();
- $vuzes_specs = array();
- foreach($match[0] as $v){
- preg_match('/code=([0-9]*)/i',$v,$vuz_id);
- $vuz_id = intval($vuz_id[1]);
- $data = curl_get("http://abitur.nica.ru/new/www/vuz_detail.php",array('code'=>$vuz_id));
- preg_match_all('/<h1>(.*?)<\/h1>/i',$data,$vuz);
- $vuz_name = $vuz[1][0];
- preg_match_all('/<div\s+class="contact"\s*>.*?<\/div>/is',$data,$vuz_contacts);
- preg_match_all('/<p>(.*?)<\/p>/is',$vuz_contacts[0][0],$vuz_contacts);
- $vuz_contacts = preg_replace('/\s+/',' ',$vuz_contacts[1]);
- $vuz_addr = trim($vuz_contacts[0]);
- $data = curl_get("http://abitur.nica.ru/new/www/vuz_specs.php",array('code'=>$vuz_id));
- preg_match_all('/<span\s+class="bold"\s*>(.*?)<\/span>/is',$data,$vuz_specs);
- foreach($vuz_specs[1] as &$s){
- preg_match('/\s*(.*)\s+\((\d{6})\)/is',$s,$s_inf);
- $specs[$s_inf[2]]=koi2utf($s_inf[1]);
- $vuzes_specs[]='('.$vuz_id.','.intval($s_inf[2]).')';
- }
- $vuzes[]='('.intval($vuz_id).',\''.koi2utf($vuz_name).'\',\''.str_replace('\'','\\\'',koi2utf($vuz_addr)).'\')';
- }
- ksort($specs);
- foreach($specs as $id=>&$name){
- $name = '('.intval($id).',\''.$name.'\')';
- }
- if(count($vuzes)>0) mysql_query('insert ignore into vuzes values'.join(',',$vuzes)) or die(mysql_errno().": ".mysql_error());
- if(count($specs)>0) mysql_query('insert ignore into specs values'.join(',',$specs)) or die(mysql_errno().": ".mysql_error());
- if(count($vuzes_specs)>0) mysql_query('insert ignore into vuzes_specs values'.join(',',$vuzes_specs)) or die(mysql_errno().": ".mysql_error());
- echo "Со страницы #$page в базу добавлено ".count($vuzes)." вузов, ".count($specs)." специализаций и ".count($vuzes_specs)." пар \"вуз-специализация\".<br/>";
- if(count($vuzes)==0) echo "Похоже, это всё. Ура :)";
- else echo "Работаем дальше...<script>location.replace('?page=".(++$page)."');</script>";
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement