Advertisement
Guest User

hnreplies.php

a guest
Feb 4th, 2021
193
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 4.12 KB | None | 0 0
  1. <?php
  2. # Call this script with the query parameter of your ID.
  3. # So hnreplies.php?id=tn1
  4.  
  5. # Download from: https://sourceforge.net/projects/simplehtmldom/
  6. include_once 'simplehtmldom/simple_html_dom.php';
  7. include_once 'simplehtmldom/HtmlWeb.php';
  8.  
  9. $n = new simplehtmldom\HtmlWeb();
  10. $url = "https://news.ycombinator.com/threads?id=" . $_GET['id'];
  11. $doc = $n->load($url);
  12.  
  13. $items = [];
  14.  
  15. foreach ($doc->find(".athing") as $row) {
  16.     if (!$row->find(".storyon a", 0)) {
  17.         // find my comment
  18.         $mycomment = $row->previousSibling();
  19.         $depth = 1;
  20.         while (!$mycomment->find(".storyon a", 0)) {
  21.             $mycomment = $mycomment->previousSibling();
  22.             $depth++;
  23.         }
  24.        
  25.         $description = $row->find(".commtext", 0)->innertext;
  26.         $description .= '<hr /><a href="';
  27.         $description .= getAbsoluteUrl("https://news.ycombinator.com/", $mycomment->find(".age a", 0)->getAttribute("href"));
  28.         $description .= '">My comment</a>';
  29.        
  30.         $pubDateStr = $row->find(".age", 0)->text();
  31.         $pubDateTs = date_create($pubDateStr)->setTime(0, 0, 0, 0)->getTimestamp();
  32.        
  33.         $items[] = array(
  34.             "title" => "Re^$depth: " . $mycomment->find(".storyon a", 0)->innertext,
  35.             "description" => $description,
  36.             "author" => $row->find(".hnuser", 0)->innertext,
  37.             "pubDate" => gmdate(DATE_RFC2822, $pubDateTs),
  38.             "link" => getAbsoluteUrl("https://news.ycombinator.com/", $row->find(".age a", 0)->getAttribute("href")),
  39.             "guid" => $row->getAttribute("id"),
  40.             "_sort" => $pubDateTs
  41.         );
  42.     }
  43. }
  44.  
  45. // sort reverse chronologically
  46. usort($items, function ($a, $b) {
  47.     return $b['_sort'] - $a['_sort'];
  48. });
  49.  
  50. // setup feed output
  51. $rss = new SimpleXMLElement('<rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/"></rss>');
  52. $rss->addAttribute('version', '2.0');
  53.  
  54. $channel = $rss->addChild('channel');
  55.  
  56. $channel->addChild('title', "HN Replies for " . $_GET['id']);
  57. $channel->addChild('link', $url);
  58.  
  59. foreach ($items as $i) {
  60.     unset($i['_sort']);
  61.    
  62.     $item = $channel->addChild("item");
  63.    
  64.     foreach ($i as $k => $v) {
  65.         $x = $item->addChild($k, htmlentities($v, ENT_XML1 | ENT_DISALLOWED, "UTF-8"));
  66.         if ($k == "guid") {
  67.             $x->addAttribute('isPermaLink', 'false');
  68.         }
  69.     }
  70. }
  71.  
  72.  
  73. header('Content-Type: application/rss+xml; charset=utf-8', true);
  74. echo $rss->asXML() . PHP_EOL;
  75.  
  76.  
  77. function getAbsoluteUrl($baseUrl, $relativeUrl){
  78.  
  79.     // if already absolute URL
  80.     if (parse_url($relativeUrl, PHP_URL_SCHEME) !== null){
  81.         return $relativeUrl;
  82.     }
  83.  
  84.     // queries and anchors
  85.     if ($relativeUrl[0] === '#' || $relativeUrl[0] === '?'){
  86.         return $baseUrl.$relativeUrl;
  87.     }
  88.  
  89.     // parse base URL and convert to: $scheme, $host, $path, $query, $port, $user, $pass
  90.     extract(parse_url($baseUrl));
  91.  
  92.     // if base URL contains a path remove non-directory elements from $path
  93.     if (isset($path) === true){
  94.         $path = preg_replace('#/[^/]*$#', '', $path);
  95.     }
  96.     else {
  97.         $path = '';
  98.     }
  99.  
  100.     // if realtive URL starts with //
  101.     if (substr($relativeUrl, 0, 2) === '//'){
  102.         return $scheme.':'.$relativeUrl;
  103.     }
  104.  
  105.     // if realtive URL starts with /
  106.     if ($relativeUrl[0] === '/'){
  107.         $path = null;
  108.     }
  109.  
  110.     $abs = null;
  111.  
  112.     // if realtive URL contains a user
  113.     if (isset($user) === true){
  114.         $abs .= $user;
  115.  
  116.         // if realtive URL contains a password
  117.         if (isset($pass) === true){
  118.             $abs .= ':'.$pass;
  119.         }
  120.  
  121.         $abs .= '@';
  122.     }
  123.  
  124.     $abs .= $host;
  125.  
  126.     // if realtive URL contains a port
  127.     if (isset($port) === true){
  128.         $abs .= ':'.$port;
  129.     }
  130.  
  131.     $abs .= $path.'/'.$relativeUrl.(isset($query) === true ? '?'.$query : null);
  132.  
  133.     // replace // or /./ or /foo/../ with /
  134.     $re = ['#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'];
  135.     for ($n = 1; $n > 0; $abs = preg_replace($re, '/', $abs, -1, $n)) {
  136.     }
  137.  
  138.     // return absolute URL
  139.     return $scheme.'://'.$abs;
  140.  
  141. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement