Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- /*
- * testing with
- * https://boards.4chan.org/wg/
- * https://boards.4chan.org/wg/2
- * ...
- * https://boards.4chan.org/wg/10
- * 404 for .../1 and for URLs ending in numbers >10
- */
- require_once "AmUtil.php";
- class FourChanBot {
- //data member
- private $mBoardName; //e.g. "wg"
- private $mBoardValidUrls; //"e.g. ["https://.../wg/" ... "https://.../wg/10"]
- private $mBoardHtmlForValidUrls; //e.g. ["https://.../wg/" => "<html>...</html>" , ...]
- public function __construct(
- string $pStrBoardName
- )
- {
- $this->mBoardName = $pStrBoardName;
- //this costs no time
- $this->mBoardValidUrls =
- $this->buildAllValidBoardUrls();
- //this can take time: it will consume each and all of the board's pages
- //$this->mBoardHtmlForValidUrls = $this->buildHtmlOfAllBoardPages(); //method returns null
- $this->buildHtmlOfAllBoardPages(); //method returns null, but it built the data member with the proper values
- }//__construct
- const BASE_URL = "https://boards.4chan.org";
- const MIN_PAGE = 1;
- const MAX_PAGE = 10;
- public function buildAllValidBoardUrls(){
- $aRet = [];
- for(
- $iPage=self::MIN_PAGE;
- $iPage<=self::MAX_PAGE;
- $iPage++
- ){
- $strUrl = sprintf(
- "%s/%s/%s",
- self::BASE_URL,
- $this->mBoardName,
- $iPage===1 ? "" : $iPage
- );
- $aRet[] = $strUrl;
- }//for
- return $aRet;
- }//buildAllValidBoardUrls
- public function buildHtmlOfAllBoardPages(){
- foreach($this->mBoardValidUrls as $strOneValidUrl){
- $strHtml = AmUtil::consumeUrl($strOneValidUrl);
- $this->mBoardHtmlForValidUrls[$strOneValidUrl] =
- $strHtml;
- /*
- [
- //0 => "<html>...</html>"
- "https://boards.4chan.org/wg/2" => "<html>...</html>"
- ];
- */
- }//buildHtmlOfAllBoardPages
- }
- public function getMBoardName()
- {
- return $this->mBoardName;
- }
- public function getMBoardValidUrls()
- {
- return $this->mBoardValidUrls;
- }
- public function getMBoardHtmlForValidUrls()
- {
- return $this->mBoardHtmlForValidUrls;
- }
- }//FourChanBot
- $bot = new FourChanBot("wg");
- //$bot->consumePage(1);
- //$bot->downloadResourcesAtPage(1);
- //$allValidUrls = $bot->buildAllValidBoardUrls();
- //var_dump($allValidUrls);
- var_dump($bot->getMBoardHtmlForValidUrls());
- //**
- <?php
- class AmUtil{
- const IMPOSSIBLE_MONTH = -1;
- const BOT_SIGNATURE = "For educational tests only";
- public static function leapYear(
- $pY
- ){
- return ($pY%400 === 0) || ($pY%4===0 && ($pY%100!==0));
- }//leapYear
- public static function numberOfDaysInMonth(
- $pY,
- $pM
- ){
- switch($pM){
- case 1: case 3:case 5:case 7:case 8: case 10;case 12: return 31;
- case 4: case 6:case 9:case 11: return 30;
- case 2: return (self::leapYear($pY) ? 29 : 28);
- default: return self::IMPOSSIBLE_MONTH;
- }//switch
- }//numberOfDaysInMonth
- public static function consumeUrl(
- $pUrl //can be an HTML page, can be a JPG, ...
- ){
- //$bValid = is_string($pUrl) && strlen($pUrl);
- $ch = curl_init($pUrl);
- if ($ch){
- //curl_setopt(CURLOPT_URL, $pUrl);
- /*
- * makes it explic that the request
- * will happen using HTTP GET
- */
- curl_setopt(
- $ch,
- CURLOPT_HTTPGET,
- true
- );
- /*
- * disables the verification of SSL
- * certificates
- * useful when not using cacert.pem
- */
- curl_setopt(
- $ch,
- CURLOPT_SSL_VERIFYPEER,
- true
- );
- /*
- * sets a user agent string for our
- * software
- */
- curl_setopt(
- $ch,
- CURLOPT_USERAGENT,
- self::BOT_SIGNATURE
- );
- //if set to true, curl_exec will return
- //the data consumed at the URL
- //instead of just true/false
- curl_setopt(
- $ch,
- CURLOPT_RETURNTRANSFER,
- true
- );
- /*
- * makes it clear that we want all the bytes
- */
- curl_setopt(
- $ch,
- CURLOPT_BINARYTRANSFER, //deprecated
- true
- );
- /*
- * sets automatic handling of the encoded
- * data
- */
- curl_setopt(
- $ch,
- CURLOPT_ENCODING,
- ""
- );
- $bin = curl_exec($ch);
- return $bin;
- }//if
- return false;
- }//consumeUrl
- }//AmUtil
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement