Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- $url = 'http://en.wikipedia.org/w/index.php?title=Elephant&action=render';
- $curl = curl_init();
- curl_setopt($curl, CURLOPT_URL, $url);
- curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
- curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
- // Now, we have the response
- $html = '<html>' . curl_exec($curl) . '</html>';
- curl_close($curl);
- $document = new DOMDocument('1.0');
- $document->loadHTML($html);
- // these are the elements which are to be kept in the response, rest all are to be removed.
- $allowed_elements = array('a','b','i','p');
- // this array stores all nodes present in the DOM initially.
- $elems = array();
- $parent = $document->getElementsByTagName('html')->item(0);
- foreach ($parent->getElementsByTagName('*') as $element)
- {
- $node = (string)$element->nodeName;
- if(strtolower($node) == 'body'){
- continue;
- }
- // store this node in the array.
- $elems[] = $node;
- }
- // keep only unique node names, i.e remove multiple occurrences of same node
- $elems = array_values( array_unique( $elems ) );
- // now, get a list of the elements, which are to be removed from the response
- // array_diff removes all the elements present in the second array from the first array
- // and returns a final array. So $elems now has all the elements, which need to be removed.
- $elems = array_diff( $elems, $allowed_elements );
- // re-index and sort them
- $elems = array_values( array_unique( $elems ) );
- sort($elems);
- // Now, remove all elements present in $elem as we don't need them.
- foreach( $elems as $elem ) {
- $parent1 = $parent->getElementsByTagName($elem);
- // this is the number of times this element occurs in the response
- $length = $parent->getElementsByTagName($elem)->length;
- // remove each one of them
- for($i=0;$i<$length;$i++) {
- // 0 will always be the index because after each `removeChild`, the next element shifts 1 position back.
- $el = $parent1->item(0);
- if( $el ) {
- $el->parentNode->removeChild($el);
- }
- }
- }
- echo $document->saveHTML();
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement