Advertisement
ShineKami

Класс парсера Kinopoisk.ru

Dec 18th, 2014
589
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.62 KB | None | 0 0
  1. <?php
  2. /**
  3. * KPFilms - Парсер фильмов с kinopoisk.ru.
  4. * =======================================================
  5. * Автор: ShineKami
  6. * URL: http://webgod-studio.ru/
  7. * email: sluciferk@gmail.ru
  8. * =======================================================
  9. * Файл: wgs_kpfilms.class.php
  10. * -------------------------------------------------------
  11. * Версия: 1.0.0 (26.09.2014)
  12. * -------------------------------------------------------
  13. * Назначение: Файл функционала модуля
  14. * =======================================================
  15. **/
  16.  
  17. require_once "htmldom.class.php";
  18.  
  19. class kpfilm{
  20.  
  21. //Private
  22. private $ch;
  23. private $fTitle;
  24. private $html;
  25. private $sUrl;
  26. private $fUrl;
  27. private $FilmID;
  28. private $TrailerUrl;
  29.  
  30. private function getPage($ulr){
  31. curl_setopt($this->ch, CURLOPT_URL, $ulr);
  32. curl_setopt($this->ch, CURLOPT_VERBOSE, 1);
  33. curl_setopt($this->ch, CURLOPT_FOLLOWLOCATION, true);
  34. curl_setopt($this->ch, CURLOPT_RETURNTRANSFER,1);
  35. curl_setopt($this->ch, CURLOPT_POST, 1);
  36. curl_setopt($this->ch, CURLOPT_REFERER, "http://www.kinopoisk.ru/");
  37. curl_setopt($this->ch, CURLOPT_COOKIEJAR, dirname(__FILE__).'/cookie.txt');
  38. curl_setopt($this->ch, CURLOPT_COOKIEFILE, dirname(__FILE__).'/cookie.txt');
  39. curl_setopt($this->ch, CURLOPT_USERAGENT, 'Mozilla/4.0.' .'(compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322)');
  40. $this->html = str_get_html(curl_exec($this->ch));
  41. return $this->html;
  42. }
  43. private function serchFilm(){
  44. $this->getPage($this->sUrl);
  45. $sdom = $this->html->find('.search_results');
  46. $slink = $sdom[0]->find('.pic a');
  47. $this->fUrl = "http://www.kinopoisk.ru".$slink[0]->href;
  48. preg_match('#([0-9]{2,7})#',$this->fUrl,$this->FilmID);
  49. $this->FilmID = $this->FilmID[0];
  50. return true;
  51. }
  52. private function getTrailerUrl(){
  53. $this->getPage("http://www.kinopoisk.ru/film/".$this->FilmID."/video/");
  54. parse_str($this->html->find('.news a.continue', 0)->href,$fInfoTrail);
  55. return $fInfoTrail["link"];
  56. }
  57. private function getActors(){
  58. $ul = $this->html->find('#actorList ul', 0);
  59. $actorList = '';
  60. foreach($ul->find('li') as $li)
  61. {
  62. $actorList .= $li->plaintext.', ';
  63. }
  64. return $actorList;
  65. }
  66.  
  67. //Public
  68. public function __construct($_ch,$_title){
  69. $this->ch = $_ch;
  70. $this->html = new simple_html_dom();
  71. if(preg_match('/^(https?:\/\/)?([\w\.]+)\.([a-z]{2,6}\.?)(\/[\w\.]*)*\/?$/', $_title)){
  72. $this->fUrl = $_title;
  73. preg_match('#([0-9]{2,7})#',$this->fUrl,$this->FilmID);
  74. $this->FilmID = $this->FilmID[0];
  75. return true;
  76. } else {
  77. $this->fTitle = urlencode($_title);
  78. $this->sUrl = "http://www.kinopoisk.ru/index.php?first=no&what=&kp_query=".$this->fTitle;
  79. if(!$this->serchFilm()) return false;
  80. return true;
  81. }
  82. }
  83.  
  84. public function getFilmInfo(){
  85. $this->getPage($this->fUrl);
  86. //GET Info Film
  87. $array = [
  88. "Title" => $this->html->find('[itemprop="name"]', 0)->plaintext,
  89. "EngTitle" => $this->html->find('[itemprop="alternativeHeadline"]', 0)->plaintext,
  90. "Poster" => $this->html->find('[itemprop="image"]', 0)->src,
  91. "Year" => $this->html->find('#infoTable tr', 0)->find('td',1)->plaintext,
  92. "Country" => $this->html->find('#infoTable tr', 1)->find('td',1)->plaintext,
  93. "Slogan" => $this->html->find('#infoTable tr', 2)->find('td',1)->plaintext,
  94. "Director" => $this->html->find('#infoTable [itemprop="director"]', 0)->plaintext,
  95. "Scenario" => $this->html->find('#infoTable tr', 4)->find('td',1)->plaintext,
  96. "Producer" => $this->html->find('#infoTable [itemprop="producer"]', 0)->plaintext,
  97. "Operator" => $this->html->find('#infoTable tr', 6)->find('td',1)->plaintext,
  98. "MusicBy" => $this->html->find('#infoTable [itemprop="musicBy"]', 0)->plaintext,
  99. "Artist" => $this->html->find('#infoTable tr', 8)->find('td',1)->plaintext,
  100. "Film_Editing" => $this->html->find('#infoTable tr', 9)->find('td',1)->plaintext,
  101. "Genre" => $this->html->find('#infoTable [itemprop="genre"]', 0)->plaintext,
  102. "Premiera_WD" => $this->html->find('#infoTable #div_world_prem_td2', 0)->plaintext,
  103. "Premiera_RU" => $this->html->find('#infoTable #div_rus_prem_td2', 0)->plaintext,
  104. "Premiera_UA" => $this->html->find('#infoTable #div_ua_prem_td2', 0)->plaintext,
  105. "Relis_BluRay" => $this->html->find('#infoTable .bluray', 0)->plaintext,
  106. "Relis_DVD" => $this->html->find('#infoTable .dvd', 0)->plaintext,
  107. "Rate" => $this->html->find('#infoTable .ratePopup td', 1)->plaintext,
  108. "Time" => $this->html->find('#infoTable #runtime', 0)->plaintext,
  109. "Rating" => $this->html->find('#block_rating .rating_ball', 0)->plaintext,
  110. "Descript" => $this->html->find('[itemprop="description"]', 0)->plaintext,
  111. "Actors" => $this->getActors(),
  112. "Trailer" => $this->getTrailerUrl()
  113. ];
  114. foreach ($array as $key => $value){
  115. $array[$key] = iconv(mb_detect_encoding($value,'UTF-8', true), 'utf-8',$value);
  116. }
  117. return $array;
  118. }
  119. public function getHTML(){
  120. return $this->html;
  121. }
  122.  
  123. }
  124.  
  125. function Login($ch,$login,$pass){
  126. curl_setopt($ch, CURLOPT_URL, "http://www.kinopoisk.ru/login/");
  127. curl_setopt($ch, CURLOPT_POST, 1);
  128. curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
  129. curl_setopt($ch, CURLOPT_POSTFIELDS, "shop_user[login]=".$login."&shop_user[pass]=".$pass."&shop_user[mem]=on&auth=");
  130. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
  131. curl_setopt($ch, CURLOPT_COOKIEJAR, dirname(__FILE__).'/cookie.txt');
  132. curl_setopt($ch, CURLOPT_COOKIEFILE, dirname(__FILE__).'/cookie.txt');
  133. return curl_exec($ch);
  134. }
  135. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement