Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- function getHtml($url){
- $curl_handle=curl_init();
- curl_setopt($curl_handle, CURLOPT_URL,$url);
- curl_setopt($curl_handle, CURLOPT_HEADER, false);
- curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, true);
- curl_setopt($curl_handle, CURLOPT_FOLLOWLOCATION, false);
- curl_setopt($curl_handle, CURLOPT_ENCODING, '');
- curl_setopt($curl_handle, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4 );
- curl_setopt($curl_handle, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1");
- $html = curl_exec($curl_handle);
- curl_close($curl_handle);
- return $html;
- }
- function getRecipeFromUrl($url){
- echo "$url<br>";
- $html=getHtml($url);
- $dom = new DomDocument();
- $internalErrors = libxml_use_internal_errors(true);
- $dom->loadHTML($html);
- libxml_use_internal_errors($internalErrors);
- $finder = new DomXPath($dom);
- //recipe name
- $classname="recipe-panel-title";
- $nodes = $finder->query("//*[@class='" . $classname . "']");
- $recipe['name']=SQLite3::escapeString($nodes[0]->textContent);
- //recipeUrl
- $recipe['recipeUrl']=SQLite3::escapeString($url);
- //recipe image url
- $classname="pull-left recipe-title-image";
- $nodes = $finder->query("//*[@class='" . $classname . "']");
- $recipe['imageUrl']= isset($nodes[0])?$nodes[0]->getAttribute('src'):NULL;
- //recipe author
- $classname="btn btn-success btn-sm";
- $nodes = $finder->query("//*[@class='" . $classname . "']");
- $recipe['author']= trim(str_replace('©','', $nodes[0]->textContent));
- //recipe flavours
- $classname1="even";
- $classname2="odd";
- $nodes = $finder->query("//*[@class='$classname1' or @class='$classname2']");
- $recipe['flavors']='';
- foreach ($nodes as $node){
- $flavour_name=str_replace('\'', '', $node->getElementsByTagName('a')[1]->textContent);
- $flavour_maker=$node->getElementsByTagName('a')[0]->textContent;
- $flavour_percentage=$node->getElementsByTagName('div')[0]->textContent;
- $recipe['flavors'].="$flavour_name@$flavour_maker@$flavour_percentage;";
- }
- //$recipe['flavors']=SQLite3::escapeString($recipe['flavors']);
- //note1
- $classname="description";
- $nodes = $finder->query("//*[@class='" . $classname . "']");
- $recipe['notes']= SQLite3::escapeString($nodes[0]->nodeValue);
- //stepping, vg and notes2
- $nodes = $finder->query("//*[@colspan='5']");
- $recipe['notes2']=isset($nodes[1])?SQLite3::escapeString($nodes[1]->nodeValue):'';
- preg_match_all('!\d+(?:\.\d+)?!', $nodes[0]->textContent, $matches);
- $recipe['stepping']=(int)$matches[0][1];
- $recipe['vg']=(int)$matches[0][2];
- $recipe['pg']=abs($recipe['vg']-100);
- return $recipe;
- }
- function saveRecipeSqlite($recipe){
- $file='e_juicer.sqlite';
- $image=isset($recipe['imageUrl'])?file_get_contents($recipe['imageUrl']):NULL;
- $db = new SQLite3($file);
- $sql="INSERT INTO recipes (parentID, author, name, versionNumber, creationDateTime, revisionAuthor, revisionName, revisionVersion, flavors, PG, VG, nicotine, stepping, notes, notes2, recipeKind, recipeUrl, imageUrl, image, starRating, locked, checked, water, ethanol)
- VALUES (NULL,'$recipe[author]','$recipe[name]',1,NULL,NULL,NULL,NULL,'$recipe[flavors]',$recipe[pg],$recipe[vg],0,$recipe[stepping],'$recipe[notes]','$recipe[notes2]',NULL,'$recipe[recipeUrl]','$recipe[imageUrl]',:image,0,1,0,0,0)";
- $query=$db->prepare($sql);
- $query->bindValue(':image',$image,SQLITE3_BLOB);
- $query->execute();
- }
- function parseRecipes(){
- $npage=file_get_contents('page.txt');
- do{
- $html = getHtml("https://alltheflavors.com/recipes?page=".$npage);
- $dom = new DomDocument();
- $internalErrors = libxml_use_internal_errors(true);
- $dom->loadHTML($html);
- libxml_use_internal_errors($internalErrors);
- $finder = new DomXPath($dom);
- $classname="recipe-line-title";
- $nodes = $finder->query("//*[@class='" . $classname . "']");
- foreach ($nodes as $node){
- saveRecipeSqlite(getRecipeFromUrl("https://alltheflavors.com".$node->getAttribute('href')));
- }
- $npage++;
- file_put_contents('page.txt',$npage);
- }while($npage<=209);
- }
- parseRecipes();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement