Advertisement
Guest User

Untitled

a guest
Jan 20th, 2019
202
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.13 KB | None | 0 0
  1. <?php
  2. /*
  3. include '../Classes/PHPExcel.php';
  4. $dir = "kek";
  5. $mas = scandir($dir);
  6. print_r($mas);
  7. $tmp_arr = array();
  8. for($i = 0; $i < count($mas); $i++){
  9. if($i>1){
  10. $objReader = PHPExcel_IOFactory::createReader('CSV');
  11. // If the files uses a delimiter other than a comma (e.g. a tab), then tell the reader
  12. // If the files uses an encoding other than UTF-8 or ASCII, then tell the reader
  13. $objReader->setInputEncoding('cp1251');
  14. $objPHPExcel = $objReader->load('kek/'.$mas[$i]);
  15. $tmp_arr[$i-2] = $objPHPExcel->setActiveSheetIndex(0)->toArray();
  16. }
  17. }
  18. $new_mas = array();
  19. $cnt = 0;
  20. foreach($tmp_arr as $key => $value){
  21. foreach($value as $key1 => $value1){
  22. if($value1!=$tmp_arr[0][0] && $value1[0]!=""){
  23. $new_mas[$cnt] = $value1[0];
  24. $cnt++;
  25. }
  26. }
  27. }
  28. echo count($new_mas).'<br>';
  29. print_r($new_mas[0]);
  30. $excel = new PHPExcel();
  31. $excel->setActiveSheetIndex(0);
  32. $active_excel = $excel->getActiveSheet();
  33. $filename = 'kek.csv';
  34. if (!file_exists($filename)) {
  35. $fp = fopen($filename, "w");
  36. fclose($fp);
  37. }
  38. $active_excel->setCellValueExplicit("A1","Title;Author;ID item;Publishing;Year;ISBN;Translator;Page quantity;Format;Cover type;Printing;Weight;Age restriction;Price",PHPExcel_Cell_DataType::TYPE_STRING);
  39. for($i = 0; $i <count($new_mas);$i++){
  40. $num = $i + 2;
  41. $active_excel->setCellValueExplicit("A{$num}",mb_convert_encoding($new_mas[$i],'cp1251'),PHPExcel_Cell_DataType::TYPE_STRING);
  42. }
  43. $csv = PHPExcel_IOFactory::createWriter($excel,'CSV');
  44. $csv->save($filename);
  45. */
  46. ini_set("memory_limit", '1024M');
  47. include "../app/simple_html_dom.php";
  48. include "../Classes/PHPExcel.php";
  49. ini_set('max_execution_time', 90000);
  50. function curl_safity_access($url){
  51. $curl = curl_init();
  52. curl_setopt($curl, CURLOPT_URL, $url);
  53. curl_setopt($curl, CURLOPT_COOKIEFILE, '');
  54. curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);
  55. curl_setopt($curl, CURLOPT_HEADER, 0);
  56. curl_setopt($curl, CURLOPT_FRESH_CONNECT, 0);
  57. /*$arr=array(
  58. 'Cache-Control: max-age=0',
  59. 'Connection: keep-alive',
  60. 'Cookie: HISTORY_UNAUTH_SESSION=true; __cfduid=dd64575df284a5c28b449e1898835fadb1533682541; _ym_d=1537769163; _ym_isad=1; _ym_uid=1518874994368486896;
  61. _ym_visorc_160656=b; _ym_visorc_45411513=b; _ym_wasSynced=%7B%22time%22%3A1547484052496%2C%22params%22%3A%7B%22eu%22%3A0%7D%2C%22bkParams%22%3A%7B%7D%7D;
  62. cmp-merge=true; currentRegionId=47; currentRegionName=%D0%9D%D0%B8%D0%B6%D0%BD%D0%B8%D0%B9%20%D0%9D%D0%BE%D0%B2%D0%B3%D0%BE%D1%80%D0%BE%D0%B4;
  63. first_visit_time=2019-01-14T19%3A40%3A52%2B03%3A00; fonts-loaded=1; head-banner=%7B%22closingCounter%22%3A0%2C%22showingCounter%22%3A2%2C%22shownAfterClicked%22%3Afalse%2C%22isClicked%22%3Afalse%7D;
  64. i=+JFgJlMQL9qJyiX5ZS++CPh8f5b9gNJs/dQW45Fn+TlCA6NeBBHYsxf/P8VYoeM7Tty07iePJJwkXE3gxv+8eQhlIJs=; js=1; mda=0; my=YwA=; parent_reqid_seq=fa4a82de104f4e1467a04bdda234e7c6%2Cd954de8c59e5ae7dbb29316d8a7611d1;
  65. reviews-merge=true; uid=AABcdlw8u49+oQDML4GAAg==; visits=1547484046-1547484046-1547484046; yabs-frequency=/4/0000000000000000/-rImSBWo8EvKi72uCY43LR1mk3900bMmSBWoGG00/;
  66. yabs-sid=2512643851511561836; yandexmarket=48; yandexuid=2606689591511115108; yp=1826475108.yrts.1511115108#1545370339.ygu.1#1543987941.ysl.1#1558546343.szm.1:1920x1080:1920x969#1545370358.shlos.1#1545370358.los.1#1545370358.losc.0;
  67. ys=svt.1#wprid.1542778362735550-674598815814834297502859-sas1-5510;'
  68. ys_fp=form-requestid%3D1542778362735550-674598815814834297502859-sas1-5510;
  69. );*/
  70. $arr=array('Cache-Control: max-age=0',
  71. 'Connection: keep-alive',
  72. 'Cookie: __cfduid=dd64575df284a5c28b449e1898835fadb1533682541; ');
  73. curl_setopt($curl,CURLOPT_HTTPHEADER,$arr);
  74. curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36");
  75. curl_setopt($curl, CURLOPT_REFERER, $url);
  76. curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
  77. $access = curl_exec($curl);
  78. if(!$access){
  79. $access = curl_error($curl);
  80. }
  81. curl_close($curl);
  82. return $access;
  83. }
  84.  
  85. function space_sub($str){
  86. $str = preg_replace('/&nbsp;/',' ',$str);
  87. return $str;
  88. }
  89.  
  90. $cnt = 0;
  91. $all_books = array();
  92. $max_charact = 0;
  93. $charact_mas = array();
  94. /*
  95. $excel = new PHPExcel();
  96. $excel->setActiveSheetIndex(0);
  97. $active_excel = $excel->getActiveSheet();
  98. $active_excel->setCellValueExplicit("A1","Title,Author,ID item,Publishing,Year,ISBN,Translator,Page quantity,Format,Cover type,Printing,Weight,Age restriction,Price",PHPExcel_Cell_DataType::TYPE_STRING);
  99. $filename = 'kek8.csv';
  100. if (!file_exists($filename)) {
  101. $fp = fopen($filename, "w");
  102. fclose($fp);
  103. }*/
  104. for($r=1; $r<2 ;$r++){
  105. $url_str = "https://market.yandex.ru/catalog--ledeme/56379/list?track=pieces&page=${r}&glfilter=7893318%3A6336712&onstock=1&local-offers-first=0";
  106. $main_page = curl_safity_access($url_str);
  107. $dom = str_get_html($main_page);
  108. //var_dump($main_page);
  109. $description_links = $dom->find('a.n-link_theme_blue');
  110. foreach($description_links as $description_link){
  111. $a = $description_link->href;
  112. if (preg_match('/context=search/',$a)){
  113. echo $cnt.'<br>';
  114. echo $a.'<br>';
  115. sleep(0.5);
  116. $book = curl_safity_access("https://market.yandex.ru".$a);
  117. $book_dom = str_get_html($book);
  118. //echo $book;
  119. if(!is_bool($book_dom)){
  120. $name = $book_dom->find('h1.title_size_28');
  121. foreach($name as $plane){
  122. $tmp = $plane->plaintext;
  123. $all_books[$cnt]['title'] = trim($tmp);
  124. echo $all_books[$cnt]['title']."<br>";
  125. }
  126. }
  127. $all_books[$cnt]['shortcuts'] = '';
  128. if(!is_bool($book_dom)){
  129. $shortcuts = $book_dom->find('.n-product-spec-list__item');
  130. foreach($shortcuts as $shortcut){
  131. $tmp = $shortcut->plaintext;
  132. $all_books[$cnt]['shortcuts'] .= trim($tmp).',';
  133. }
  134. $kek1 = strlen($all_books[$cnt]['shortcuts'])-1;
  135. $all_books[$cnt]['shortcuts'] = substr($all_books[$cnt]['shortcuts'],0,$kek1);
  136. echo $all_books[$cnt]['shortcuts']."<br>";
  137. }
  138. if(!is_bool($book_dom)){
  139. $price = $book_dom->find('.n-product-top-offer__item_type_price');
  140. if($price != null){
  141. $cnt_price = 0;
  142. $tmp_price = array();
  143. foreach($price as $price1){
  144. $tmp = $price1->plaintext;
  145. //echo $tmp.'<br>';
  146. if(preg_match("/[\d]+/i",str_replace(' ','',$tmp),$matches)){
  147. $tmp_price[$cnt_price] = $matches[0];
  148. $cnt_price++;
  149. }
  150. //$all_books[$cnt]['minprice'] = trim($tmp);
  151. }
  152. $min_pr = 9999999;
  153. $num_min = 0;
  154. for($i=0;$i<count($tmp_price);$i++){
  155. if($min_pr > $tmp_price[$i]){
  156. $min_pr = $tmp_price[$i];
  157. $num_min = $i;
  158. }
  159. }
  160. $all_books[$cnt]['minprice'] = $min_pr;
  161. //$all_books[$cnt]['minprice'] = space_sub($all_books[$cnt]['minprice']);
  162. preg_match("/[\d]+/",str_replace(' ','',$all_books[$cnt]['minprice']),$matches);
  163. $all_books[$cnt]['minprice'] = $matches[0];
  164. unset($price);
  165. $steps = $book_dom->find('.n-shop-name-with-logo__name');
  166. $cnt_steps = 0;
  167. foreach($steps as $step){
  168. echo '-----<br>';
  169. if($cnt_steps != $num_min){
  170. $cnt_steps++;
  171. continue;
  172. }else{
  173. $tmp = $step->plaintext;
  174. if($tmp != null){
  175. $all_books[$cnt]['minprice_shop'] = $tmp;
  176. echo 'Shop: '.$all_books[$cnt]['minprice_shop'].'<br>';
  177. break;
  178. }else{
  179. $alt_shops = $step->find('. n-shop-logo__img');
  180. echo '++++<br>';
  181. foreach($alt_shops as $alt_shop){
  182. $tmp = $alt_shop->alt;
  183. $all_books[$cnt]['minprice_shop'] = $tmp;
  184. echo 'Shop: '.$all_books[$cnt]['minprice_shop'].'<br>';
  185. break;
  186. }
  187. break;
  188. }
  189. }
  190. }
  191. echo $all_books[$cnt]['minprice']."<br>";
  192. }else{
  193. $price = $book_dom->find('.n-product-price-cpa2');
  194. foreach($price as $price1){
  195. $tmp = $price1->plaintext;
  196. //echo $tmp.'<br>';
  197. if(preg_match("/[\d]+/i",str_replace(' ','',$tmp),$matches)){
  198. $tmp = $matches[0];
  199. }
  200. //$all_books[$cnt]['minprice'] = trim($tmp);
  201. }
  202. $all_books[$cnt]['minprice'] = $tmp;
  203. unset($price);
  204. echo $all_books[$cnt]['minprice']."<br>";
  205. }
  206. }
  207. if(!is_bool($book_dom)){
  208. $customers = $book_dom->find('.n-reasons-to-buy__label');
  209. if($customers != null){
  210. foreach($customers as $customer){
  211. $tmp = $customer->plaintext;
  212. $keek = htmlspecialchars_decode(substr($tmp,7,strlen($tmp)));
  213. $tmp = '';
  214. for($i=0;$i<strlen($keek);$i++){
  215. if(preg_match('/[\d]/',$keek[$i])){
  216. $tmp .= $keek[$i];
  217. }
  218. }
  219. //preg_match("/\d/i",html_entity_decode(str_replace(' ','',substr($tmp,7,strlen($tmp)))),$matches);
  220. $all_books[$cnt]['customers'] = $tmp;
  221. }
  222. unset($customers);
  223. echo $all_books[$cnt]['customers']."<br>";
  224. }else{
  225. $all_books[$cnt]['customers'] = 'Информации о последних покупках данного товара нет';
  226. echo $all_books[$cnt]['customers']."<br>";
  227. unset($customers);
  228. }
  229. }
  230. if(!is_bool($book_dom)){
  231. $characteristics_link = $book_dom->find('a.n-smart-link');
  232. foreach($characteristics_link as $cl){
  233. $new_link = $cl->href;
  234. echo $new_link.'<br>';
  235. if(preg_match('/spec/',$new_link)){
  236. break;
  237. }
  238. }
  239. }
  240. $charact_page = curl_safity_access("https://market.yandex.ru".$new_link);
  241. $ch_dom = str_get_html($charact_page);
  242. if(!is_bool($ch_dom)){
  243. $colors = $ch_dom->find('.product-color');
  244. if($colors != null){
  245. echo '-<br>';
  246. $all_books[$cnt]['colors'] = '';
  247. foreach($colors as $color){
  248. $tmp = $color->title;
  249. $all_books[$cnt]['colors'] .= $tmp.' ';
  250. }
  251. $all_books[$cnt]['colors'] = trim($all_books[$cnt]['colors']);
  252. unset($colors);
  253. echo $all_books[$cnt]['colors']."<br>";
  254. }else{
  255. $all_books[$cnt]['colors'] = 'Цвет материала';
  256. echo $all_books[$cnt]['colors']."<br>";
  257. unset($colors);
  258. }
  259. }
  260.  
  261. if(!is_bool($ch_dom)){
  262. $cnt_charact = 0;
  263. $charact_names = $ch_dom->find('.n-product-spec__name-inner');
  264. $charact_vals = $ch_dom->find('.n-product-spec__value-inner');
  265. if(($charact_names != null)&&($charact_vals != null)){
  266. echo '+<br>';
  267. $all_books[$cnt]['com_charact'] = array();
  268. foreach($charact_names as $key => $names){
  269.  
  270. $tmp1 = $names->plaintext;
  271. if(preg_match('/Цвет/i',$tmp1)){
  272. continue;
  273. }
  274. if(preg_match('/[?]+/',$tmp1)){
  275. $tmp1 = stristr($tmp1,'?',true);
  276. }
  277. //echo $tmp1.'<br>';
  278. $tmp2 = $charact_vals[$key]->plaintext;
  279. //echo $tmp2.'<br>';
  280. $all_books[$cnt]['com_charact'][$cnt_charact] = trim($tmp1).':'.trim($tmp2);
  281. echo $all_books[$cnt]['com_charact'][$cnt_charact]."<br>";
  282. $cnt_charact++;
  283. if ($max_charact < $cnt_charact){
  284. $max_charact = $cnt_charact;
  285. $charact_mas = $all_books[$cnt]['com_charact'];
  286. }
  287.  
  288. }
  289. unset($charact_names);
  290. unset($charact_vals);
  291. }else{
  292. $all_books[$cnt]['com_charact'] = 'Товар без характеристик';
  293. echo $all_books[$cnt]['com_charact']."<br>";
  294. unset($charact_names);
  295. unset($charact_vals);
  296. }
  297. }
  298.  
  299. $cnt++;
  300. }
  301. /*n-product-price-cpa2 n-reasons-to-buy__label
  302. $author = $book_dom->find('.product__author');
  303. foreach($author as $plane){
  304. $tmp = $plane->plaintext;
  305. $all_books[$cnt] .= trim($tmp).';';
  306. }
  307. $attr = $book_dom->find('.product-prop__value');
  308. $cnt_attr = 0;
  309. foreach($attr as $plane){
  310. $tmp = $plane->plaintext;
  311. $all_books[$cnt] .= trim($tmp).';';
  312. $cnt_attr++;
  313. }
  314. $price = $book_dom->find('.price');
  315. foreach($price as $plane){
  316. $tmp = $plane->plaintext;
  317. $tmp = str_replace(' ₽','',$tmp);
  318. $all_books[$cnt] .= trim($tmp);
  319. }
  320. $active_excel->setCellValueExplicit("A{$num}",mb_convert_encoding($all_books[$cnt],'cp1251'),PHPExcel_Cell_DataType::TYPE_STRING);
  321. $csv = PHPExcel_IOFactory::createWriter($excel,'CSV');
  322. $csv->save($filename);*/
  323. }
  324. }
  325.  
  326. echo 'Максимум характеристик: '.$max_charact;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement