Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public function actionOlx()
- {
- $baseUrl = 'https://www.olx.ua/';
- $pages = 10;
- for ($i = 1; $i < $pages; $i++) {
- $url = $baseUrl . 'nedvizhimost/kvartiry-komnaty/arenda-kvartir-komnat/kvartira/kiev/q-%D0%B1%D0%B5%D0%B7-%D0%BA%D0%BE%D0%BC%D0%B8%D1%81%D1%81%D0%B8%D0%B8/?search%5Bdescription%5D=1&page=' . $i;
- $ch = curl_init($url);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
- curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0');
- curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
- curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
- curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
- curl_setopt($ch, CURLOPT_VERBOSE, 1);
- // curl_setopt($ch, CURLOPT_PROXY, '103.225.228.193:58732');
- // curl_setopt($ch, CURLOPT_REFERER, 'https://google.com');
- $html = curl_exec($ch);
- \phpQuery::newDocument($html);
- $catalogContent = pq('div.offer-wrapper')->find('a.detailsLink');
- foreach ($catalogContent as $k => $v) {
- $value = pq($v);
- $apartmentLink = $value->attr('href');
- $apartmentTitle = $value->find('img.fleft')->attr('alt');
- $hash = Parser::makeHash($apartmentTitle);
- $isExist = Parser::findByHash($hash, $baseUrl);
- if (!$isExist) {
- $cookie_path = $_SERVER['DOCUMENT_ROOT'] . '/cookie.dat';
- preg_match('|-ID(.*).html|', $apartmentLink, $id);
- $apartment = curl_init($apartmentLink);
- $options = [
- CURLOPT_FOLLOWLOCATION => true, // +
- CURLOPT_RETURNTRANSFER => true,
- CURLOPT_HEADER => true, // false
- CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
- // CURLOPT_AUTOREFERER => true,
- // CURLOPT_CONNECTTIMEOUT => 120,
- // CURLOPT_TIMEOUT => 120,
- CURLOPT_SSL_VERIFYHOST => false, // +
- CURLOPT_SSL_VERIFYPEER => false, // +
- CURLOPT_VERBOSE => 1,
- // CURLOPT_PROXY => '103.225.228.193:58732',
- // CURLOPT_REFERER => 'https://google.com', // +
- CURLOPT_COOKIEFILE => $cookie_path,
- CURLOPT_COOKIEJAR => $cookie_path
- ];
- curl_setopt_array($apartment, $options);
- $apartmentContent = curl_exec($apartment);
- curl_close($apartment);
- \phpQuery::newDocument($apartmentContent);
- $apartmentImageLinks = pq('div.offerdescription')->find('div.photo-glow > img');
- $apartmentH1 = pq('div.offerdescription')->find('div.offer-titlebox > h1')->text();
- $apartmentText = pq('div.offerdescription')->find('div.descriptioncontent')->find('div.large')->text();
- $apartmentExternalInfo = pq('div.offerdescription')->find('div.descriptioncontent')->find('table.details')->text();
- $apartmentPrice = pq('div.price-label')->find('.xxxx-large')->text(); //xxxx-large not-arranged
- $apartmentAreaLocation = pq('a.show-map-link > strong')->text();
- $apartmentDetails = pq('div.offerdescription')->find('div.descriptioncontent')->text();
- var_dump($apartmentDetails);
- exit;
- $typoCheck = 'Частного лица';
- if (strpos($apartmentDetails, $typoCheck) !== false) {
- $imagesPathArray = [];
- foreach ($apartmentImageLinks as $key => $l) {
- $l = pq($l);
- // пытаемся собрать изображения и создаем запись в таблице с квартирами
- $imageLink = $l->attr('src');
- $dir = \Yii::getAlias('@frontend/web/img/apartment/');
- $fullPath = $dir . $hash;
- if (!file_exists($fullPath)) {
- mkdir($fullPath, 0777, true);
- }
- $hFileName = rand(1, 9999);
- $context = stream_context_create([
- "http" => [
- "header" => "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Iron/31.0.1700.0 Chrome/31.0.1700.0 Safari/537.36"
- ]
- ]);
- $dataImage = $this->curl_get_contents($imageLink);
- file_put_contents($fullPath . '/' . $hFileName . '.jpg', $dataImage);
- $imagesPathArray[] = [
- 'path' => '/apartment/' . $hash . '/',
- 'name' => $hFileName . '.jpg'
- ];
- }
- $parser = new Parser([
- 'h1' => $apartmentH1 . ' / ' . $apartmentAreaLocation,
- 'hash' => $hash,
- 'name' => $apartmentTitle,
- 'price' => $apartmentPrice, // On Olxwe cant get phone numbers
- 'external_info' => $apartmentExternalInfo,
- 'contacts' => '', //,
- 'description' => $apartmentText,
- 'site' => $baseUrl,
- 'link_to_object' => $apartmentLink,
- 'status' => Parser::STATUS_ACTIVE
- ]);
- if ($parser->save()) {
- foreach ($imagesPathArray as $value) {
- $parserImage = new ParserImages();
- $parserImage->parser_id = $parser->id;
- $parserImage->path = $value['path'];
- $parserImage->name = $value['name'];
- $parserImage->save();
- }
- }
- }
- }
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement