Advertisement
Keltere

Untitled

May 21st, 2017
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.35 KB | None | 0 0
  1. <?php
  2.  
  3. function getHtml($url){
  4. $curl_handle=curl_init();
  5. curl_setopt($curl_handle, CURLOPT_URL,$url);
  6. curl_setopt($curl_handle, CURLOPT_HEADER, false);
  7. curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, true);
  8. curl_setopt($curl_handle, CURLOPT_FOLLOWLOCATION, false);
  9. curl_setopt($curl_handle, CURLOPT_ENCODING, '');
  10. curl_setopt($curl_handle, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4 );
  11. curl_setopt($curl_handle, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1");
  12. $html = curl_exec($curl_handle);
  13. curl_close($curl_handle);
  14. return $html;
  15. }
  16.  
  17. function getRecipeFromUrl($url){
  18.  
  19. echo "$url<br>";
  20.  
  21. $html=getHtml($url);
  22.  
  23. $dom = new DomDocument();
  24.  
  25. $internalErrors = libxml_use_internal_errors(true);
  26. $dom->loadHTML($html);
  27. libxml_use_internal_errors($internalErrors);
  28.  
  29.  
  30. $finder = new DomXPath($dom);
  31.  
  32. //recipe name
  33. $classname="recipe-panel-title";
  34. $nodes = $finder->query("//*[@class='" . $classname . "']");
  35. $recipe['name']=SQLite3::escapeString($nodes[0]->textContent);
  36.  
  37. //recipeUrl
  38. $recipe['recipeUrl']=SQLite3::escapeString($url);
  39.  
  40. //recipe image url
  41. $classname="pull-left recipe-title-image";
  42. $nodes = $finder->query("//*[@class='" . $classname . "']");
  43. $recipe['imageUrl']= isset($nodes[0])?$nodes[0]->getAttribute('src'):NULL;
  44.  
  45. //recipe author
  46. $classname="btn btn-success btn-sm";
  47. $nodes = $finder->query("//*[@class='" . $classname . "']");
  48. $recipe['author']= trim(str_replace('©','', $nodes[0]->textContent));
  49.  
  50. //recipe flavours
  51. $classname1="even";
  52. $classname2="odd";
  53. $nodes = $finder->query("//*[@class='$classname1' or @class='$classname2']");
  54.  
  55. $recipe['flavors']='';
  56. foreach ($nodes as $node){
  57. $flavour_name=str_replace('\'', '', $node->getElementsByTagName('a')[1]->textContent);
  58. $flavour_maker=$node->getElementsByTagName('a')[0]->textContent;
  59. $flavour_percentage=$node->getElementsByTagName('div')[0]->textContent;
  60.  
  61. $recipe['flavors'].="$flavour_name@$flavour_maker@$flavour_percentage;";
  62.  
  63. }
  64. //$recipe['flavors']=SQLite3::escapeString($recipe['flavors']);
  65.  
  66. //note1
  67. $classname="description";
  68. $nodes = $finder->query("//*[@class='" . $classname . "']");
  69. $recipe['notes']= SQLite3::escapeString($nodes[0]->nodeValue);
  70.  
  71. //stepping, vg and notes2
  72. $nodes = $finder->query("//*[@colspan='5']");
  73.  
  74. $recipe['notes2']=isset($nodes[1])?SQLite3::escapeString($nodes[1]->nodeValue):'';
  75.  
  76.  
  77. preg_match_all('!\d+(?:\.\d+)?!', $nodes[0]->textContent, $matches);
  78.  
  79. $recipe['stepping']=(int)$matches[0][1];
  80. $recipe['vg']=(int)$matches[0][2];
  81. $recipe['pg']=abs($recipe['vg']-100);
  82.  
  83. return $recipe;
  84. }
  85.  
  86.  
  87. function saveRecipeSqlite($recipe){
  88. $file='e_juicer.sqlite';
  89. $image=isset($recipe['imageUrl'])?file_get_contents($recipe['imageUrl']):NULL;
  90.  
  91. $db = new SQLite3($file);
  92.  
  93.  
  94. $sql="INSERT INTO recipes (parentID, author, name, versionNumber, creationDateTime, revisionAuthor, revisionName, revisionVersion, flavors, PG, VG, nicotine, stepping, notes, notes2, recipeKind, recipeUrl, imageUrl, image, starRating, locked, checked, water, ethanol)
  95. VALUES (NULL,'$recipe[author]','$recipe[name]',1,NULL,NULL,NULL,NULL,'$recipe[flavors]',$recipe[pg],$recipe[vg],0,$recipe[stepping],'$recipe[notes]','$recipe[notes2]',NULL,'$recipe[recipeUrl]','$recipe[imageUrl]',:image,0,1,0,0,0)";
  96.  
  97. $query=$db->prepare($sql);
  98. $query->bindValue(':image',$image,SQLITE3_BLOB);
  99. $query->execute();
  100.  
  101.  
  102. }
  103.  
  104. function parseRecipes(){
  105.  
  106. $npage=file_get_contents('page.txt');
  107.  
  108. do{
  109. $html = getHtml("https://alltheflavors.com/recipes?page=".$npage);
  110. $dom = new DomDocument();
  111.  
  112. $internalErrors = libxml_use_internal_errors(true);
  113. $dom->loadHTML($html);
  114. libxml_use_internal_errors($internalErrors);
  115.  
  116.  
  117. $finder = new DomXPath($dom);
  118. $classname="recipe-line-title";
  119. $nodes = $finder->query("//*[@class='" . $classname . "']");
  120.  
  121. foreach ($nodes as $node){
  122.  
  123. saveRecipeSqlite(getRecipeFromUrl("https://alltheflavors.com".$node->getAttribute('href')));
  124. }
  125. $npage++;
  126. file_put_contents('page.txt',$npage);
  127. }while($npage<=209);
  128.  
  129. }
  130.  
  131. parseRecipes();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement