Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- /*
- include '../Classes/PHPExcel.php';
- $dir = "kek";
- $mas = scandir($dir);
- print_r($mas);
- $tmp_arr = array();
- for($i = 0; $i < count($mas); $i++){
- if($i>1){
- $objReader = PHPExcel_IOFactory::createReader('CSV');
- // If the files uses a delimiter other than a comma (e.g. a tab), then tell the reader
- // If the files uses an encoding other than UTF-8 or ASCII, then tell the reader
- $objReader->setInputEncoding('cp1251');
- $objPHPExcel = $objReader->load('kek/'.$mas[$i]);
- $tmp_arr[$i-2] = $objPHPExcel->setActiveSheetIndex(0)->toArray();
- }
- }
- $new_mas = array();
- $cnt = 0;
- foreach($tmp_arr as $key => $value){
- foreach($value as $key1 => $value1){
- if($value1!=$tmp_arr[0][0] && $value1[0]!=""){
- $new_mas[$cnt] = $value1[0];
- $cnt++;
- }
- }
- }
- echo count($new_mas).'<br>';
- print_r($new_mas[0]);
- $excel = new PHPExcel();
- $excel->setActiveSheetIndex(0);
- $active_excel = $excel->getActiveSheet();
- $filename = 'kek.csv';
- if (!file_exists($filename)) {
- $fp = fopen($filename, "w");
- fclose($fp);
- }
- $active_excel->setCellValueExplicit("A1","Title;Author;ID item;Publishing;Year;ISBN;Translator;Page quantity;Format;Cover type;Printing;Weight;Age restriction;Price",PHPExcel_Cell_DataType::TYPE_STRING);
- for($i = 0; $i <count($new_mas);$i++){
- $num = $i + 2;
- $active_excel->setCellValueExplicit("A{$num}",mb_convert_encoding($new_mas[$i],'cp1251'),PHPExcel_Cell_DataType::TYPE_STRING);
- }
- $csv = PHPExcel_IOFactory::createWriter($excel,'CSV');
- $csv->save($filename);
- */
- ini_set("memory_limit", '1024M');
- include "../app/simple_html_dom.php";
- include "../Classes/PHPExcel.php";
- ini_set('max_execution_time', 90000);
- function curl_safity_access($url){
- $curl = curl_init();
- curl_setopt($curl, CURLOPT_URL, $url);
- curl_setopt($curl, CURLOPT_COOKIEFILE, '');
- curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);
- curl_setopt($curl, CURLOPT_HEADER, 0);
- curl_setopt($curl, CURLOPT_FRESH_CONNECT, 0);
- /*$arr=array(
- 'Cache-Control: max-age=0',
- 'Connection: keep-alive',
- 'Cookie: HISTORY_UNAUTH_SESSION=true; __cfduid=dd64575df284a5c28b449e1898835fadb1533682541; _ym_d=1537769163; _ym_isad=1; _ym_uid=1518874994368486896;
- _ym_visorc_160656=b; _ym_visorc_45411513=b; _ym_wasSynced=%7B%22time%22%3A1547484052496%2C%22params%22%3A%7B%22eu%22%3A0%7D%2C%22bkParams%22%3A%7B%7D%7D;
- cmp-merge=true; currentRegionId=47; currentRegionName=%D0%9D%D0%B8%D0%B6%D0%BD%D0%B8%D0%B9%20%D0%9D%D0%BE%D0%B2%D0%B3%D0%BE%D1%80%D0%BE%D0%B4;
- first_visit_time=2019-01-14T19%3A40%3A52%2B03%3A00; fonts-loaded=1; head-banner=%7B%22closingCounter%22%3A0%2C%22showingCounter%22%3A2%2C%22shownAfterClicked%22%3Afalse%2C%22isClicked%22%3Afalse%7D;
- i=+JFgJlMQL9qJyiX5ZS++CPh8f5b9gNJs/dQW45Fn+TlCA6NeBBHYsxf/P8VYoeM7Tty07iePJJwkXE3gxv+8eQhlIJs=; js=1; mda=0; my=YwA=; parent_reqid_seq=fa4a82de104f4e1467a04bdda234e7c6%2Cd954de8c59e5ae7dbb29316d8a7611d1;
- reviews-merge=true; uid=AABcdlw8u49+oQDML4GAAg==; visits=1547484046-1547484046-1547484046; yabs-frequency=/4/0000000000000000/-rImSBWo8EvKi72uCY43LR1mk3900bMmSBWoGG00/;
- yabs-sid=2512643851511561836; yandexmarket=48; yandexuid=2606689591511115108; yp=1826475108.yrts.1511115108#1545370339.ygu.1#1543987941.ysl.1#1558546343.szm.1:1920x1080:1920x969#1545370358.shlos.1#1545370358.los.1#1545370358.losc.0;
- ys=svt.1#wprid.1542778362735550-674598815814834297502859-sas1-5510;'
- ys_fp=form-requestid%3D1542778362735550-674598815814834297502859-sas1-5510;
- );*/
- $arr=array('Cache-Control: max-age=0',
- 'Connection: keep-alive',
- 'Cookie: __cfduid=dd64575df284a5c28b449e1898835fadb1533682541; ');
- curl_setopt($curl,CURLOPT_HTTPHEADER,$arr);
- curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36");
- curl_setopt($curl, CURLOPT_REFERER, $url);
- curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
- $access = curl_exec($curl);
- if(!$access){
- $access = curl_error($curl);
- }
- curl_close($curl);
- return $access;
- }
- function space_sub($str){
- $str = preg_replace('/ /',' ',$str);
- return $str;
- }
- $cnt = 0;
- $all_books = array();
- $max_charact = 0;
- $charact_mas = array();
- /*
- $excel = new PHPExcel();
- $excel->setActiveSheetIndex(0);
- $active_excel = $excel->getActiveSheet();
- $active_excel->setCellValueExplicit("A1","Title,Author,ID item,Publishing,Year,ISBN,Translator,Page quantity,Format,Cover type,Printing,Weight,Age restriction,Price",PHPExcel_Cell_DataType::TYPE_STRING);
- $filename = 'kek8.csv';
- if (!file_exists($filename)) {
- $fp = fopen($filename, "w");
- fclose($fp);
- }*/
- for($r=1; $r<2 ;$r++){
- $url_str = "https://market.yandex.ru/catalog--ledeme/56379/list?track=pieces&page=${r}&glfilter=7893318%3A6336712&onstock=1&local-offers-first=0";
- $main_page = curl_safity_access($url_str);
- $dom = str_get_html($main_page);
- //var_dump($main_page);
- $description_links = $dom->find('a.n-link_theme_blue');
- foreach($description_links as $description_link){
- $a = $description_link->href;
- if (preg_match('/context=search/',$a)){
- echo $cnt.'<br>';
- echo $a.'<br>';
- sleep(0.5);
- $book = curl_safity_access("https://market.yandex.ru".$a);
- $book_dom = str_get_html($book);
- //echo $book;
- if(!is_bool($book_dom)){
- $name = $book_dom->find('h1.title_size_28');
- foreach($name as $plane){
- $tmp = $plane->plaintext;
- $all_books[$cnt]['title'] = trim($tmp);
- echo $all_books[$cnt]['title']."<br>";
- }
- }
- $all_books[$cnt]['shortcuts'] = '';
- if(!is_bool($book_dom)){
- $shortcuts = $book_dom->find('.n-product-spec-list__item');
- foreach($shortcuts as $shortcut){
- $tmp = $shortcut->plaintext;
- $all_books[$cnt]['shortcuts'] .= trim($tmp).',';
- }
- $kek1 = strlen($all_books[$cnt]['shortcuts'])-1;
- $all_books[$cnt]['shortcuts'] = substr($all_books[$cnt]['shortcuts'],0,$kek1);
- echo $all_books[$cnt]['shortcuts']."<br>";
- }
- if(!is_bool($book_dom)){
- $price = $book_dom->find('.n-product-top-offer__item_type_price');
- if($price != null){
- $cnt_price = 0;
- $tmp_price = array();
- foreach($price as $price1){
- $tmp = $price1->plaintext;
- //echo $tmp.'<br>';
- if(preg_match("/[\d]+/i",str_replace(' ','',$tmp),$matches)){
- $tmp_price[$cnt_price] = $matches[0];
- $cnt_price++;
- }
- //$all_books[$cnt]['minprice'] = trim($tmp);
- }
- $min_pr = 9999999;
- $num_min = 0;
- for($i=0;$i<count($tmp_price);$i++){
- if($min_pr > $tmp_price[$i]){
- $min_pr = $tmp_price[$i];
- $num_min = $i;
- }
- }
- $all_books[$cnt]['minprice'] = $min_pr;
- //$all_books[$cnt]['minprice'] = space_sub($all_books[$cnt]['minprice']);
- preg_match("/[\d]+/",str_replace(' ','',$all_books[$cnt]['minprice']),$matches);
- $all_books[$cnt]['minprice'] = $matches[0];
- unset($price);
- $steps = $book_dom->find('.n-shop-name-with-logo__name');
- $cnt_steps = 0;
- foreach($steps as $step){
- echo '-----<br>';
- if($cnt_steps != $num_min){
- $cnt_steps++;
- continue;
- }else{
- $tmp = $step->plaintext;
- if($tmp != null){
- $all_books[$cnt]['minprice_shop'] = $tmp;
- echo 'Shop: '.$all_books[$cnt]['minprice_shop'].'<br>';
- break;
- }else{
- $alt_shops = $step->find('. n-shop-logo__img');
- echo '++++<br>';
- foreach($alt_shops as $alt_shop){
- $tmp = $alt_shop->alt;
- $all_books[$cnt]['minprice_shop'] = $tmp;
- echo 'Shop: '.$all_books[$cnt]['minprice_shop'].'<br>';
- break;
- }
- break;
- }
- }
- }
- echo $all_books[$cnt]['minprice']."<br>";
- }else{
- $price = $book_dom->find('.n-product-price-cpa2');
- foreach($price as $price1){
- $tmp = $price1->plaintext;
- //echo $tmp.'<br>';
- if(preg_match("/[\d]+/i",str_replace(' ','',$tmp),$matches)){
- $tmp = $matches[0];
- }
- //$all_books[$cnt]['minprice'] = trim($tmp);
- }
- $all_books[$cnt]['minprice'] = $tmp;
- unset($price);
- echo $all_books[$cnt]['minprice']."<br>";
- }
- }
- if(!is_bool($book_dom)){
- $customers = $book_dom->find('.n-reasons-to-buy__label');
- if($customers != null){
- foreach($customers as $customer){
- $tmp = $customer->plaintext;
- $keek = htmlspecialchars_decode(substr($tmp,7,strlen($tmp)));
- $tmp = '';
- for($i=0;$i<strlen($keek);$i++){
- if(preg_match('/[\d]/',$keek[$i])){
- $tmp .= $keek[$i];
- }
- }
- //preg_match("/\d/i",html_entity_decode(str_replace(' ','',substr($tmp,7,strlen($tmp)))),$matches);
- $all_books[$cnt]['customers'] = $tmp;
- }
- unset($customers);
- echo $all_books[$cnt]['customers']."<br>";
- }else{
- $all_books[$cnt]['customers'] = 'Информации о последних покупках данного товара нет';
- echo $all_books[$cnt]['customers']."<br>";
- unset($customers);
- }
- }
- if(!is_bool($book_dom)){
- $characteristics_link = $book_dom->find('a.n-smart-link');
- foreach($characteristics_link as $cl){
- $new_link = $cl->href;
- echo $new_link.'<br>';
- if(preg_match('/spec/',$new_link)){
- break;
- }
- }
- }
- $charact_page = curl_safity_access("https://market.yandex.ru".$new_link);
- $ch_dom = str_get_html($charact_page);
- if(!is_bool($ch_dom)){
- $colors = $ch_dom->find('.product-color');
- if($colors != null){
- echo '-<br>';
- $all_books[$cnt]['colors'] = '';
- foreach($colors as $color){
- $tmp = $color->title;
- $all_books[$cnt]['colors'] .= $tmp.' ';
- }
- $all_books[$cnt]['colors'] = trim($all_books[$cnt]['colors']);
- unset($colors);
- echo $all_books[$cnt]['colors']."<br>";
- }else{
- $all_books[$cnt]['colors'] = 'Цвет материала';
- echo $all_books[$cnt]['colors']."<br>";
- unset($colors);
- }
- }
- if(!is_bool($ch_dom)){
- $cnt_charact = 0;
- $charact_names = $ch_dom->find('.n-product-spec__name-inner');
- $charact_vals = $ch_dom->find('.n-product-spec__value-inner');
- if(($charact_names != null)&&($charact_vals != null)){
- echo '+<br>';
- $all_books[$cnt]['com_charact'] = array();
- foreach($charact_names as $key => $names){
- $tmp1 = $names->plaintext;
- if(preg_match('/Цвет/i',$tmp1)){
- continue;
- }
- if(preg_match('/[?]+/',$tmp1)){
- $tmp1 = stristr($tmp1,'?',true);
- }
- //echo $tmp1.'<br>';
- $tmp2 = $charact_vals[$key]->plaintext;
- //echo $tmp2.'<br>';
- $all_books[$cnt]['com_charact'][$cnt_charact] = trim($tmp1).':'.trim($tmp2);
- echo $all_books[$cnt]['com_charact'][$cnt_charact]."<br>";
- $cnt_charact++;
- if ($max_charact < $cnt_charact){
- $max_charact = $cnt_charact;
- $charact_mas = $all_books[$cnt]['com_charact'];
- }
- }
- unset($charact_names);
- unset($charact_vals);
- }else{
- $all_books[$cnt]['com_charact'] = 'Товар без характеристик';
- echo $all_books[$cnt]['com_charact']."<br>";
- unset($charact_names);
- unset($charact_vals);
- }
- }
- $cnt++;
- }
- /*n-product-price-cpa2 n-reasons-to-buy__label
- $author = $book_dom->find('.product__author');
- foreach($author as $plane){
- $tmp = $plane->plaintext;
- $all_books[$cnt] .= trim($tmp).';';
- }
- $attr = $book_dom->find('.product-prop__value');
- $cnt_attr = 0;
- foreach($attr as $plane){
- $tmp = $plane->plaintext;
- $all_books[$cnt] .= trim($tmp).';';
- $cnt_attr++;
- }
- $price = $book_dom->find('.price');
- foreach($price as $plane){
- $tmp = $plane->plaintext;
- $tmp = str_replace(' ₽','',$tmp);
- $all_books[$cnt] .= trim($tmp);
- }
- $active_excel->setCellValueExplicit("A{$num}",mb_convert_encoding($all_books[$cnt],'cp1251'),PHPExcel_Cell_DataType::TYPE_STRING);
- $csv = PHPExcel_IOFactory::createWriter($excel,'CSV');
- $csv->save($filename);*/
- }
- }
- echo 'Максимум характеристик: '.$max_charact;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement