Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- /*
- include => warning
- include "fich";
- include_once "caminho/fich";
- require => error
- require "fich/sdsd/sdsD/f";
- */
- require_once "AmUtil.php";
- //https://codeshare.luismeneses.pt/
- //https://curl.haxx.se/docs/sslcerts.html
- //https://curl.se/docs/caextract.html
- //edit php.ini and add
- //curl.cainfo="C:/<path to my php installation folder>/cacert.pem"
- define ("TEST_URL", "https://apod.nasa.gov/apod/ap141231.html");
- // ... "<IMG SRC="image/2011/M78... "
- //https://apod.nasa.gov/apod/image/2011/M78_LDN1622_BarnardsLoop_SEP27_28_Oct15_final1024.jpg
- class ApodBot{
- //consts?
- const APOD_URL = "https://apod.nasa.gov/apod/";
- const DESIRED_NUMBER_OF_DIGITS = 2;
- //const BOT_SIGNATURE = "For educational tests only";
- /*
- * the structure of an APOD URL:
- * https://apod.nasa.gov/apod/apYYMMDD.html
- * e.g.: https://apod.nasa.gov/apod/ap141231.html
- */
- private
- $mCurrentYear,
- $mCurrentMonth,
- $mCurrentDay;
- public function __construct(){
- /*
- * Y - format for year with 4 digits
- * m - format for month with 2 digits
- * d - format for day with 2 digits
- */
- $strYMD = date("Y-m-d"); //"2020-11-05"
- $aDateParts = explode("-", $strYMD);
- /*
- * $aDateParts = [
- * 0 => "2020", 1 => "11", 2 => "05"
- * ];
- */
- $this->mCurrentYear =
- intval($aDateParts[0]);
- $this->mCurrentMonth =
- intval($aDateParts[1]);
- $this->mCurrentDay =
- intval($aDateParts[2]);
- }//__construct
- //data members?
- //methods?
- //https://apod.nasa.gov/apod/ap141231.html
- /*
- * receives 3 ints: year, month, day
- * returns a string in the format
- * https://apod.nasa.gov/apod/apYYMMDD.html
- */
- public function urlForDay(
- int $pYear=0, //2020
- int $pMonth=0, //12->"12" 5->"05"
- int $pDay=0 //31->"31" 1->"01"
- ){
- $strUrlForDay = "";
- $strYear=$strMonth=$strDay="";
- //pattern for default values for parameters
- $pYear = empty($pYear) ?
- $this->mCurrentYear
- :
- $pYear;
- $pMonth =
- empty($pMonth) ?
- $this->mCurrentMonth
- :
- $pMonth;
- $pDay =
- empty($pDay) ?
- $this->mCurrentDay
- :
- $pDay;
- /*
- 293029 (6 excede 2 = 6-2=4)
- 9999 (4 excede 2 em 4-2=2)
- 999 (3 excede 2 em 3-2=1)
- */
- $strYear = $pYear . "";
- if (
- $iSizeOfYear=strlen($strYear)
- >
- self::DESIRED_NUMBER_OF_DIGITS
- ){
- $iExcessDigits =
- $iSizeOfYear
- -
- self::DESIRED_NUMBER_OF_DIGITS;
- $strYear = substr(
- $strYear,
- $iExcessDigits-1, //start position
- self::DESIRED_NUMBER_OF_DIGITS //2 symbols
- );
- }//if there is need to fix the string for year
- $strMonth = $pMonth<10 ? "0".$pMonth : $pMonth."";
- $strDay = $pDay<10 ? "0".$pDay : $pDay."";
- /*
- * sprintf is "print into a string following a format"
- * https://apod.nasa.gov/apod/apYYMMDD.html
- */
- $strUrlForDay = sprintf(
- "%sap%s%s%s.html",
- self::APOD_URL, //"https://apod.nasa.gov/apod/"
- $strYear,
- $strMonth,
- $strDay
- );
- return $strUrlForDay;
- }//urlForDay
- const IMAGE_MARK = "<img src=\"";
- public function identifyImgInSourceCode(
- $pSrcCode
- ){
- $iWhereDoesImgElementStart =
- stripos(
- $pSrcCode,
- self::IMAGE_MARK
- );
- //it exists!
- if ($iWhereDoesImgElementStart!==false){
- $strContainsTheImgAddress =
- substr(
- $pSrcCode,
- $iWhereDoesImgElementStart
- +
- strlen(self::IMAGE_MARK)
- );
- $strImgUrl =
- substr(
- $strContainsTheImgAddress,
- 0,
- strpos(
- $strContainsTheImgAddress,
- "\"" //the closing double-quote
- )
- );
- return self::APOD_URL.$strImgUrl;
- }
- }//identifyImgInSourceCode
- /*
- * receives the URL of some "image of the day"
- * e.g. https://apod.nasa.gov/apod/image/2011/C2020M3Orion_CharlesBracken1024.jpg
- * returns a valid file name
- * e.g. "C2020M3Orion_CharlesBracken1024.jpg"
- */
- public function extractFileNameForDownload(
- string $pUrl
- ){
- /*
- * trim(" a b c ") --> "a b c"
- * strlen("abc") ---> 3
- */
- $bCheck = trim(strlen($pUrl))>0;
- /*
- * returns the leftmost ocurrence of the 2nd arg on the 1st
- * strpos("ABCDEFBC", "BC") --> 1
- * rightmost sensitive
- * strrpos ("ABCDEFBC", "BC") --> 6
- * strrpos ("ABCDEFBC", "bc") --> false
- * leftmost, insensitive
- * stripos ("ABCDEFBC", "bc") --> 1
- * rightmost, insensitive
- * strripos ("ABCB", "b") --> 3
- */
- if ($bCheck){
- $iWhereDoesTheLastFwSlashBeginOrFalse =
- strripos(
- $pUrl,
- "/"
- );
- //if the forward slash occurs in the URL
- if ($iWhereDoesTheLastFwSlashBeginOrFalse!==false){
- $strFileName =
- substr(
- $pUrl,
- $iWhereDoesTheLastFwSlashBeginOrFalse+1
- );
- return $strFileName;
- }//if
- }//if
- return false;
- }//extractFileNameForDownload
- public function downloadImgForDay(
- $pY=0,
- $pM=0,
- $pD=0
- ){
- $bDownloadTodaysImg = empty($pY) && empty($pM) && empty($pD);
- //e.g. https://apod.nasa.gov/apod/ap201110.html
- $strUrlForTheHtmlPageThatPublishedTheImg = "";
- if ($bDownloadTodaysImg){
- $strUrlForTheHtmlPageThatPublishedTheImg =
- $this->urlForDay();
- }//if
- else{
- $strUrlForTheHtmlPageThatPublishedTheImg =
- $this->urlForDay($pY, $pM, $pD);
- }//else
- //source code of the html page
- $strHtmlSourceCode = AmUtil::consumeUrl($strUrlForTheHtmlPageThatPublishedTheImg);
- //this is the image direct URL
- $strUrlForTheImg = $this->identifyImgInSourceCode(
- $strHtmlSourceCode
- );
- //string/bytes array which is the published image
- //this is the download
- $bytesForTheImg = AmUtil::consumeUrl(
- $strUrlForTheImg
- );
- $strOriginalFilename =
- $this->extractFileNameForDownload($strUrlForTheImg);
- $iBytesWrittenOrFalse =
- file_put_contents(
- //"saved.jpg",
- $strOriginalFilename,
- $bytesForTheImg
- );
- //return $iBytesWrittenOrFalse;
- return [
- self::KEY_SIZE => $iBytesWrittenOrFalse,
- self::KEY_FILE_NAME => $strOriginalFilename
- ];
- }//downloadImgForDay
- const KEY_SIZE = "KEY_SIZE";
- const KEY_FILE_NAME = "KEY_FILE_NAME";
- public function downloadAllImagesForMonth(
- $pY,
- $pM,
- $pStartDay = 1,
- $pbFeedback = true
- ){
- for (
- $iCurrentDay = $pStartDay;
- $iCurrentDay<=AmUtil::numberOfDaysInMonth($pY, $pM);
- $iCurrentDay++
- ){
- $oRet = $this->downloadImgForDay($pY, $pM, $iCurrentDay);
- $iImageSizeInBytes = $oRet[self::KEY_SIZE];
- $strFileName = $oRet[self::KEY_FILE_NAME];
- if ($pbFeedback){
- $strMsg = sprintf(
- "Current date=%d-%d-%d\nSave %d bytes in file: %s".PHP_EOL,
- $pY, $pM, $iCurrentDay,
- $iImageSizeInBytes,
- $strFileName
- );
- echo $strMsg;
- }//if feedback
- }
- }//downloadAllImagesForMonth
- //**
- //June 16 1995
- public function ripThemAll(){
- $this->downloadAllImagesForMonth
- (1996, 1, 16);
- for(
- $iYear=1997;
- $iYear<=2019;
- $iYear++
- ){
- for ($iMonth=1; $iMonth<=12; $iMonth++)
- $this->downloadAllImagesForMonth($iYear, $iMonth);
- }//for
- //TODO: current year
- }//ripThemAll
- }//ApodBot
- $bot = new ApodBot(); //constructor
- /*
- //$bot->getImageOfTheDay(); //image of the day (today)
- $strUrl = $bot->urlForDay();
- echo $strUrl.PHP_EOL;
- $strUrl = $bot->urlForDay(2019, 12, 25);
- echo $strUrl.PHP_EOL;
- //$sourceCode = ApodBot::consumeUrl(TEST_URL);
- $sourceCode = AmUtil::consumeUrl($bot->urlForDay());
- echo $sourceCode;
- $urlForTodaysImg = $bot->identifyImgInSourceCode(
- $sourceCode
- );
- echo $urlForTodaysImg.PHP_EOL;
- /*
- $bot->getImageOfTheDay(2019, 12, 25);//image on that date
- $bot->getEntireCollection();
- $bot->getAllImagemFromYear(2000);
- $bot->getAllImagemFromMonth(2000, 4);
- */
- //echo $bot->downloadImgForDay(2015, 12, 23);
- //$bot->downloadAllImagesForMonth(1999, 1);
- //single threaded
- //$bot->ripThemAll();
- while(1){
- /*
- $pedido = obterPedido();
- reagirPedido();
- */
- $bot->downloadImgForDay();
- echo "Will continue in 24 hours time".PHP_EOL;
- sleep(60*60*24); //1 day pause
- }//while
- **
- <?php
- class AmUtil{
- const IMPOSSIBLE_MONTH = -1;
- const BOT_SIGNATURE = "For educational tests only";
- public static function leapYear(
- $pY
- ){
- return ($pY%400 === 0) || ($pY%4===0 && ($pY%100!==0));
- }//leapYear
- public static function numberOfDaysInMonth(
- $pY,
- $pM
- ){
- switch($pM){
- case 1: case 3:case 5:case 7:case 8: case 10;case 12: return 31;
- case 4: case 6:case 9:case 11: return 30;
- case 2: return (self::leapYear($pY) ? 29 : 28);
- default: return self::IMPOSSIBLE_MONTH;
- }//switch
- }//numberOfDaysInMonth
- public static function consumeUrl(
- $pUrl //can be an HTML page, can be a JPG, ...
- ){
- //$bValid = is_string($pUrl) && strlen($pUrl);
- $ch = curl_init($pUrl);
- if ($ch){
- //curl_setopt(CURLOPT_URL, $pUrl);
- /*
- * makes it explic that the request
- * will happen using HTTP GET
- */
- curl_setopt(
- $ch,
- CURLOPT_HTTPGET,
- true
- );
- /*
- * disables the verification of SSL
- * certificates
- * useful when not using cacert.pem
- */
- curl_setopt(
- $ch,
- CURLOPT_SSL_VERIFYPEER,
- true
- );
- /*
- * sets a user agent string for our
- * software
- */
- curl_setopt(
- $ch,
- CURLOPT_USERAGENT,
- self::BOT_SIGNATURE
- );
- //if set to true, curl_exec will return
- //the data consumed at the URL
- //instead of just true/false
- curl_setopt(
- $ch,
- CURLOPT_RETURNTRANSFER,
- true
- );
- /*
- * makes it clear that we want all the bytes
- */
- curl_setopt(
- $ch,
- CURLOPT_BINARYTRANSFER, //deprecated
- true
- );
- /*
- * sets automatic handling of the encoded
- * data
- */
- curl_setopt(
- $ch,
- CURLOPT_ENCODING,
- ""
- );
- $bin = curl_exec($ch);
- return $bin;
- }//if
- return false;
- }//consumeUrl
- }//AmUtil
Advertisement
Add Comment
Please, Sign In to add comment