Advertisement
am_dot_com

ACA 20201124

Nov 24th, 2020 (edited)
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 5.11 KB | None | 0 0
  1. <?php
  2.  
  3. /*
  4.  * testing with
  5.  * https://boards.4chan.org/wg/
  6.  * https://boards.4chan.org/wg/2
  7.  * ...
  8.  * https://boards.4chan.org/wg/10
  9.  * 404 for .../1 and for URLs ending in numbers >10
  10.  */
  11.  
  12. require_once "AmUtil.php";
  13.  
  14. class FourChanBot {
  15.     //data member
  16.     private $mBoardName; //e.g. "wg"
  17.     private $mBoardValidUrls; //"e.g. ["https://.../wg/" ... "https://.../wg/10"]
  18.     private $mBoardHtmlForValidUrls; //e.g. ["https://.../wg/" => "<html>...</html>" , ...]
  19.  
  20.     public function __construct(
  21.         string $pStrBoardName
  22.     )
  23.     {
  24.         $this->mBoardName = $pStrBoardName;
  25.  
  26.         //this costs no time
  27.         $this->mBoardValidUrls =
  28.             $this->buildAllValidBoardUrls();
  29.  
  30.         //this can take time: it will consume each and all of the board's pages
  31.         //$this->mBoardHtmlForValidUrls = $this->buildHtmlOfAllBoardPages(); //method returns null
  32.         $this->buildHtmlOfAllBoardPages(); //method returns null, but it built the data member with the proper values
  33.     }//__construct
  34.  
  35.     const BASE_URL = "https://boards.4chan.org";
  36.     const MIN_PAGE = 1;
  37.     const MAX_PAGE = 10;
  38.     public function buildAllValidBoardUrls(){
  39.         $aRet = [];
  40.  
  41.         for(
  42.             $iPage=self::MIN_PAGE;
  43.             $iPage<=self::MAX_PAGE;
  44.             $iPage++
  45.         ){
  46.             $strUrl = sprintf(
  47.                 "%s/%s/%s",
  48.                 self::BASE_URL,
  49.                 $this->mBoardName,
  50.                 $iPage===1 ? "" : $iPage
  51.             );
  52.             $aRet[] = $strUrl;
  53.         }//for
  54.  
  55.         return $aRet;
  56.     }//buildAllValidBoardUrls
  57.  
  58.     public function buildHtmlOfAllBoardPages(){
  59.         foreach($this->mBoardValidUrls as $strOneValidUrl){
  60.             $strHtml = AmUtil::consumeUrl($strOneValidUrl);
  61.  
  62.             $this->mBoardHtmlForValidUrls[$strOneValidUrl] =
  63.                 $strHtml;
  64.             /*
  65.             [
  66.                 //0 => "<html>...</html>"
  67.                 "https://boards.4chan.org/wg/2" => "<html>...</html>"
  68.             ];
  69.             */
  70.         }//buildHtmlOfAllBoardPages
  71.     }
  72.  
  73.     public function getMBoardName()
  74.     {
  75.         return $this->mBoardName;
  76.     }
  77.  
  78.     public function getMBoardValidUrls()
  79.     {
  80.         return $this->mBoardValidUrls;
  81.     }
  82.  
  83.     public function getMBoardHtmlForValidUrls()
  84.     {
  85.         return $this->mBoardHtmlForValidUrls;
  86.     }
  87.  
  88.  
  89. }//FourChanBot
  90.  
  91. $bot = new FourChanBot("wg");
  92. //$bot->consumePage(1);
  93. //$bot->downloadResourcesAtPage(1);
  94. //$allValidUrls = $bot->buildAllValidBoardUrls();
  95. //var_dump($allValidUrls);
  96.  
  97. var_dump($bot->getMBoardHtmlForValidUrls());
  98.  
  99. //**
  100.  
  101. <?php
  102.  
  103. class AmUtil{
  104.     const IMPOSSIBLE_MONTH = -1;
  105.     const BOT_SIGNATURE = "For educational tests only";
  106.  
  107.     public static function leapYear(
  108.         $pY
  109.     ){
  110.         return ($pY%400 === 0) || ($pY%4===0 && ($pY%100!==0));
  111.     }//leapYear
  112.  
  113.     public static function numberOfDaysInMonth(
  114.         $pY,
  115.         $pM
  116.     ){
  117.         switch($pM){
  118.             case 1: case 3:case 5:case 7:case 8: case 10;case 12: return 31;
  119.             case 4: case 6:case 9:case 11: return 30;
  120.             case 2: return (self::leapYear($pY) ? 29 :  28);
  121.             default: return self::IMPOSSIBLE_MONTH;
  122.         }//switch
  123.     }//numberOfDaysInMonth
  124.  
  125.     public static function consumeUrl(
  126.         $pUrl //can be an HTML page, can be a JPG, ...
  127.     ){
  128.         //$bValid = is_string($pUrl) && strlen($pUrl);
  129.         $ch = curl_init($pUrl);
  130.         if ($ch){
  131.             //curl_setopt(CURLOPT_URL, $pUrl);
  132.             /*
  133.              * makes it explic that the request
  134.              * will happen using HTTP GET
  135.              */
  136.             curl_setopt(
  137.                 $ch,
  138.                 CURLOPT_HTTPGET,
  139.                 true
  140.             );
  141.  
  142.             /*
  143.              * disables the verification of SSL
  144.              * certificates
  145.              * useful when not using cacert.pem
  146.              */
  147.             curl_setopt(
  148.                 $ch,
  149.                 CURLOPT_SSL_VERIFYPEER,
  150.                 true
  151.             );
  152.  
  153.             /*
  154.              * sets a user agent string for our
  155.              * software
  156.              */
  157.             curl_setopt(
  158.                 $ch,
  159.                 CURLOPT_USERAGENT,
  160.                 self::BOT_SIGNATURE
  161.             );
  162.  
  163.             //if set to true, curl_exec will return
  164.             //the data consumed at the URL
  165.             //instead of just true/false
  166.             curl_setopt(
  167.                 $ch,
  168.                 CURLOPT_RETURNTRANSFER,
  169.                 true
  170.             );
  171.  
  172.             /*
  173.              * makes it clear that we want all the bytes
  174.              */
  175.             curl_setopt(
  176.                 $ch,
  177.                 CURLOPT_BINARYTRANSFER, //deprecated
  178.                 true
  179.             );
  180.  
  181.             /*
  182.              * sets automatic handling of the encoded
  183.              * data
  184.              */
  185.             curl_setopt(
  186.                 $ch,
  187.                 CURLOPT_ENCODING,
  188.                 ""
  189.             );
  190.  
  191.             $bin = curl_exec($ch);
  192.  
  193.             return $bin;
  194.         }//if
  195.         return false;
  196.     }//consumeUrl
  197. }//AmUtil
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement