Advertisement
Guest User

Wyrażenia regularne vs DOM Document

a guest
Sep 12th, 2010
1,496
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 1.90 KB | None | 0 0
  1. <?php
  2.  
  3. header('Content-Type: text/html; charset=UTF-8');
  4. error_reporting(E_ALL ^ E_NOTICE);
  5. require 'benchmark.php';
  6.  
  7.  
  8. function regex($text) {
  9.  
  10.     $pattern = '/<a\shref=["\']([^"]+)["\']/i';
  11.     preg_match_all($pattern, $text, $matches);
  12.  
  13.     return $matches[1];
  14.    
  15. }
  16.  
  17.  
  18. function domdocument($text) {
  19.  
  20.     // Create a new DOM Document to hold our webpage structure
  21.     $dom = new DOMDocument();
  22.  
  23.     // Load the url's contents into the DOM
  24.     @$dom->loadHTML($text);
  25.  
  26.     // Empty array to hold all links to return
  27.     $links = array();
  28.  
  29.     //Loop through each <a> tag in the dom and add it to the link array
  30.     foreach ($dom->getElementsByTagName('a') as $link) {
  31.         $links[] = $link->getAttribute('href');
  32.     }
  33.    
  34.     return $links;
  35.  
  36. }
  37.  
  38.  
  39. function curl_open($url) {
  40.  
  41.     $ch = curl_init();
  42.     curl_setopt($ch, CURLOPT_URL, $url);
  43.     curl_setopt($ch, CURLOPT_FAILONERROR, true);
  44.     curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  45.     curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  46.     curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
  47.     curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
  48.     curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)');
  49.     $output = curl_exec($ch);
  50.     curl_close($ch);
  51.    
  52.     return $output;
  53.  
  54. }
  55.  
  56.  
  57.  
  58. $subject = curl_open('http://pl.wikipedia.org/wiki/Polska');
  59.  
  60.  
  61. $result = array();
  62.  
  63. $result[1]['reg']   = Benchmark('regex', 1, $subject);
  64. $result[1]['dom']   = Benchmark('domdocument', 1, $subject);
  65.  
  66. $result[10]['reg']  = Benchmark('regex', 10, $subject);
  67. $result[10]['dom']  = Benchmark('domdocument', 10, $subject);
  68.  
  69. $result[100]['reg']     = Benchmark('regex', 100, $subject);
  70. $result[100]['dom']     = Benchmark('domdocument', 100, $subject);
  71.  
  72. $result[1000]['reg']    = Benchmark('regex', 1000, $subject);
  73. $result[1000]['dom']    = Benchmark('domdocument', 1000, $subject);
  74.  
  75.  
  76. echo '<pre>';
  77. print_r ($result);
  78. echo '</pre>';
  79.  
  80.  
  81.  
  82. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement