Advertisement
thorbj

extracter

Mar 10th, 2015
224
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 4.39 KB | None | 0 0
  1. <?php  
  2.     function file_get_contents_curl1($url) {
  3.         $ch = curl_init();
  4.  
  5.         curl_setopt($ch, CURLOPT_HEADER, 0);
  6.         curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  7.         curl_setopt($ch, CURLOPT_URL, $url);
  8.         curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
  9.  
  10.         $data = curl_exec($ch);
  11.         curl_close($ch);
  12.  
  13.         return $data;
  14.     }
  15.    
  16.     //The URL for the external content we want to pull
  17.     $html = file_get_contents_curl1("https://external.url.com/subdir/");
  18.  
  19.     //parsing all content:
  20.     $doc = new DOMDocument();
  21.     @$doc->loadHTML($html);
  22.    
  23.     $content = $html;
  24.    
  25.     //The div that includes the content '<div id="divid">'
  26.     $first_step = explode( '<div id="ide">' , $content );
  27.     $second_step = explode("</div>" , $first_step[1] );
  28.    
  29.     //Do some magic with the URL
  30.     $url2 = $second_step[0];
  31.     $url3 = $second_step[8];
  32.     $url4 = $second_step[16];
  33.     $url5 = $second_step[24];
  34.     $url6 = $second_step[32];
  35.     $url7 = $second_step[40];
  36.  
  37.     $patterns = array(
  38.         '#\./opening;jsessionid=.*\?#',
  39.         '#<a href=#',
  40.         '#span(.*?)>#'
  41.     );
  42.        
  43.     $replaces = array(
  44.         'https://external.url.com/subdir/opening?',
  45.         '<a target="_blank" href=',
  46.         'h1>'
  47.     );
  48.    
  49.        
  50.     //Print the final output
  51.     ///Merge the result into one variable
  52.     $final_output =
  53.             '<div class="one">' .
  54.             //'div 1' .
  55.             preg_replace($patterns, $replaces, $url2) .
  56.             $second_step[1] . /* Description -- NOTE: By commenting out this you need to change the H1 margin in the style declaration */
  57.             $second_step[2] . /* From date */
  58.             $second_step[3] . /* To date */
  59.             $second_step[4] . /* Company */
  60.             $second_step[5] . /* Employment condition (full-time/part-time) */
  61.             $second_step[6] . /* Department */
  62.             //$second_step[7] .
  63.             //'<hr>' . /* Horizontal rule */
  64.             '</div><div class="two">' .
  65.             //'div 2' .
  66.             preg_replace($patterns, $replaces, $url3) .
  67.             $second_step[9] . /* Description -- NOTE: By commenting out this you need to change the H1 margin in the style declaration */
  68.             $second_step[10] . /* From date */
  69.             $second_step[11] . /* To date */
  70.             $second_step[12] . /* Company */
  71.             $second_step[13] . /* Employment condition (full-time/part-time) */
  72.             $second_step[14] . /* Department */
  73.             //$second_step[15] .
  74.             //'<hr>' . /* Horizontal rule */
  75.             '</div><div class="three">' .
  76.             //'div 3' .
  77.             preg_replace($patterns, $replaces, $url4) .
  78.             $second_step[17] . /* Description -- NOTE: By commenting out this you need to change the H1 margin in the style declaration */
  79.             $second_step[18] . /* From date */
  80.             $second_step[19] . /* To date */
  81.             $second_step[20] . /* Company */
  82.             $second_step[21] . /* Employment condition (full-time/part-time) */
  83.             $second_step[22] . /* Department */
  84.             //$second_step[23] .
  85.             //'<hr>' . /* Horizontal rule */
  86.             '</div><div class="four">' .
  87.             //'div 4' .
  88.             preg_replace($patterns, $replaces, $url5) .
  89.             $second_step[25] . /* Description -- NOTE: By commenting out this you need to change the H1 margin in the style declaration */
  90.             $second_step[26] . /* From date */
  91.             $second_step[27] . /* To date */
  92.             $second_step[28] . /* Company */
  93.             $second_step[29] . /* Employment condition (full-time/part-time) */
  94.             $second_step[30] . /* Department */
  95.             //$second_step[31] .
  96.             //'<hr>' . /* Horizontal rule */
  97.             '</div><div class="five">' .
  98.             //'div 5' .
  99.             preg_replace($patterns, $replaces, $url6) .
  100.             $second_step[33] . /* Description -- NOTE: By commenting out this you need to change the H1 margin in the style declaration */
  101.             $second_step[34] . /* From date */
  102.             $second_step[35] . /* To date */
  103.             $second_step[36] . /* Company */
  104.             $second_step[37] . /* Employment condition (full-time/part-time) */
  105.             $second_step[38] . /* Department */
  106.             //$second_step[39] .
  107.             //'<hr>' . /* Horizontal rule */
  108.             '</div><div class="six">' .
  109.             //'div 6' .
  110.             preg_replace($patterns, $replaces, $url7) .
  111.             $second_step[41] . /* Description -- NOTE: By commenting out this you need to change the H1 margin in the style declaration */
  112.             $second_step[42] . /* From date */
  113.             $second_step[43] . /* To date */
  114.             $second_step[44] . /* Company */
  115.             $second_step[45] . /* Employment condition (full-time/part-time) */
  116.             $second_step[46] . /* Department */
  117.             //$second_step[47] .
  118.             //'<hr>'; /* Horizontal rule */
  119.             '</div>';
  120.    
  121.     ///Convert special chars
  122.     $converted = iconv("UTF-8", "UTF-8//TRANSLIT", $final_output);
  123.    
  124.     ///Display the final result
  125.     echo $converted;
  126.     ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement