Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- function ol1($str){
- echo "\n $str";
- }
- ol1("--- Start crawle ".basename(__FILE__). " --- ");
- $mLink = [
- 'http://abc.com/pc-console/page/'=>'zy370886',
- 'http://abc.com/cong-dong-game/page/'=>'zd047253',
- 'http://abc.com/game-online/page/'=>'va207791',
- 'http://abc.com/game-mobile/page/'=>'ue985114',
- 'http://abc.com/esports/page/'=>'gh466291',
- ''=>'',
- ''=>'',
- ''=>'',
- ];
- $ll = 0;
- while (1) {
- BaoGame::$newListInsert = [];
- echo "<br/>\n LOOP = $ll";
- if($ll){
- echo "<br/>\n Sleep 10p";
- sleep(600);
- }
- $ll++;
- foreach ($mLink AS $link => $pid) {
- if (!$link)
- continue;
- for ($i = 1; $i < 10; $i++) {
- $link1 = "$link$i";
- echo "<br/>\n$link1";
- sleep(1);
- getListBaiBao($link1, qqgetIdFromRand($pid));
- }
- }
- BaoGame::checkInsert(__FILE__);;
- }
- function getContentBai($linkBai){
- sleep(1);
- BaoGame::$totalKeoVe++;
- $cont = ctool::postget1curl($linkBai);
- if(!$cont)
- return;
- $xx = str_get_html($cont);
- if(!$xx)
- return;
- $x1 = $xx->find("div.entry-content", 0);
- if(!$x1)
- return;
- //$cont = $x1->innertext;
- //Tim tat ca a co href= img, jpg
- foreach ($x1->find("img") AS $elm){
- $src = $elm->src;
- $elm->parent()->outertext = "<img src='$src' class='glx_img'>";
- }
- foreach ($x1->find("script") AS $img1) {
- $img1->outertext = '';
- }
- $cont = $x1->innertext;
- $cont = str_replace("text-align: justify;", '', $cont);
- //echo "<br/>\nCONT = $cont ";
- //Remove all atribute khac src, href, class
- $xx = str_get_html($cont);
- foreach ($xx->find("*") AS $elm){
- foreach ($elm->getAllAttributes() as $attr => $val) {
- //if($attr != 'src' && $attr != 'title'&& $attr != 'href')
- if($attr != 'src' && $attr != 'title' && $attr != 'href' && $attr != 'class')
- $elm->removeAttribute($attr);
- }
- }
- $sum = $xx->find("p", 0)->innertext;
- $sum = strip_tags($sum);
- $xx->find("p", 0)->outertext = '';
- //getch( "SUME = $sum");
- $cont = $xx->innertext;
- for($i = 1; $i< 20; $i++){
- $cont = str_replace("<p> </p>\n<p> </p>", "<p> </p>", $cont);
- $cont = str_replace("<p> </p>\r<p> </p>", "<p> </p>", $cont);
- $cont = str_replace("<p> </p><p> </p>", "<p> </p>", $cont);
- $cont = str_replace("<p> </p>\n<p> </p>", "<p> </p>", $cont);
- $cont = str_replace("<p> </p>\r<p> </p>", "<p> </p>", $cont);
- $cont = str_replace("<p> </p> <p> </p>", "<p> </p>", $cont);
- }
- //return $cont;
- return [$sum, $cont];
- }
- function getListBaiBao($link, $pid){
- $newList = [];
- $cont = ctool::postget1curl($link);
- if(!$cont)
- return;
- $xx = str_get_html($cont);
- foreach ($xx->find("div.blog-post.saxon-block.saxon-large-grid-post") AS $x){
- echo "<br/>\n ++++++++++++++ " . BaoGame::$totalKeoVe . " +++++++++++++++ ";
- echo "<br/>\n Total New = ".count(BaoGame::$newListInsert);
- $x1 = str_get_html($x->innertext);
- $link1 = trim($x1->find("a",0)->href);
- $link1 = "$link1";
- echo "<br/>\n LINK = $link1";
- $img = \Base\ClassString::getStringBetween2StringType2($x1->innertext, 'url\(', '\);');
- $img = str_replace(['(', ')', ';','"','>'] ,'', $img);
- echo "<br/>\n IMG = $img";
- //;
- $title1 = $x1->find("h3",0)->children(0)->innertext;
- $title1 = strip_tags($title1);
- $title1 = html_entity_decode($title1);
- echo "<br/>\n Title = $title1";
- $sum1 = $x1->find("div.post-excerpt",0)->innertext;
- $sum1 = html_entity_decode($sum1);
- $sum1 = strip_tags($sum1);
- echo "<br/>\n Sum = $sum1";
- $time = $x1->find("div.post-date",0)->innertext;
- $time = trim($time);
- echo "<br/>\n Time =" . $time ;
- $time0 = $time = trim($time);
- list($d, $m, $y) = explode("/", substr($time, 0,10));
- //echo "<br/>\n $y-$m-$d";
- $dateOK = $time = "$y-$m-$d";
- if(strtotime($dateOK) < time() - 7 * _NSECOND_DAY)
- return;
- echo "<br/>\n DateOK = $dateOK";
- $daco = 0;
- $obj = new \Base\ModelNewsFile();
- if($obj->getOneWhere_(" refer = '$link1'")){
- echo "<br/>\n Da co , ID = $obj->id ";
- //continue;
- $daco = 1;
- }
- foreach (BaoGame::$newListInsert AS $obj1){
- if($obj1->refer == $link1){
- continue;
- }
- }
- if(isCli()){
- $title1 = str_replace(["“", "”"], "\"", $title1);
- $obj->name = $title1;
- $obj->refer = $link1;
- $obj->image0 = $img;
- $obj->createdAt = $dateOK;
- $obj->name = $title1;
- $obj->parent = $pid;
- $obj->status = 1;
- $obj->siteid = ClassSetting::$siteId;
- //$obj->summary = $sum1;
- $ttt = getContentBai($link1);
- $cont = $ttt[1];
- $obj->summary = $ttt[0];
- $obj->content = $cont;
- if($daco) {
- //$obj->updateDbMe();
- // echo "<br/>\n Update done!";
- }
- else {
- BaoGame::$newListInsert[] = $obj;
- //$obj->insertDbMe();
- //echo "<br/>\n Insert done!";
- }
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement