Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- # Call this script with the query parameter of your ID.
- # So hnreplies.php?id=tn1
- # Download from: https://sourceforge.net/projects/simplehtmldom/
- include_once 'simplehtmldom/simple_html_dom.php';
- include_once 'simplehtmldom/HtmlWeb.php';
- $n = new simplehtmldom\HtmlWeb();
- $url = "https://news.ycombinator.com/threads?id=" . $_GET['id'];
- $doc = $n->load($url);
- $items = [];
- foreach ($doc->find(".athing") as $row) {
- if (!$row->find(".storyon a", 0)) {
- // find my comment
- $mycomment = $row->previousSibling();
- $depth = 1;
- while (!$mycomment->find(".storyon a", 0)) {
- $mycomment = $mycomment->previousSibling();
- $depth++;
- }
- $description = $row->find(".commtext", 0)->innertext;
- $description .= '<hr /><a href="';
- $description .= getAbsoluteUrl("https://news.ycombinator.com/", $mycomment->find(".age a", 0)->getAttribute("href"));
- $description .= '">My comment</a>';
- $pubDateStr = $row->find(".age", 0)->text();
- $pubDateTs = date_create($pubDateStr)->setTime(0, 0, 0, 0)->getTimestamp();
- $items[] = array(
- "title" => "Re^$depth: " . $mycomment->find(".storyon a", 0)->innertext,
- "description" => $description,
- "author" => $row->find(".hnuser", 0)->innertext,
- "pubDate" => gmdate(DATE_RFC2822, $pubDateTs),
- "link" => getAbsoluteUrl("https://news.ycombinator.com/", $row->find(".age a", 0)->getAttribute("href")),
- "guid" => $row->getAttribute("id"),
- "_sort" => $pubDateTs
- );
- }
- }
- // sort reverse chronologically
- usort($items, function ($a, $b) {
- return $b['_sort'] - $a['_sort'];
- });
- // setup feed output
- $rss = new SimpleXMLElement('<rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/"></rss>');
- $rss->addAttribute('version', '2.0');
- $channel = $rss->addChild('channel');
- $channel->addChild('title', "HN Replies for " . $_GET['id']);
- $channel->addChild('link', $url);
- foreach ($items as $i) {
- unset($i['_sort']);
- $item = $channel->addChild("item");
- foreach ($i as $k => $v) {
- $x = $item->addChild($k, htmlentities($v, ENT_XML1 | ENT_DISALLOWED, "UTF-8"));
- if ($k == "guid") {
- $x->addAttribute('isPermaLink', 'false');
- }
- }
- }
- header('Content-Type: application/rss+xml; charset=utf-8', true);
- echo $rss->asXML() . PHP_EOL;
- function getAbsoluteUrl($baseUrl, $relativeUrl){
- // if already absolute URL
- if (parse_url($relativeUrl, PHP_URL_SCHEME) !== null){
- return $relativeUrl;
- }
- // queries and anchors
- if ($relativeUrl[0] === '#' || $relativeUrl[0] === '?'){
- return $baseUrl.$relativeUrl;
- }
- // parse base URL and convert to: $scheme, $host, $path, $query, $port, $user, $pass
- extract(parse_url($baseUrl));
- // if base URL contains a path remove non-directory elements from $path
- if (isset($path) === true){
- $path = preg_replace('#/[^/]*$#', '', $path);
- }
- else {
- $path = '';
- }
- // if realtive URL starts with //
- if (substr($relativeUrl, 0, 2) === '//'){
- return $scheme.':'.$relativeUrl;
- }
- // if realtive URL starts with /
- if ($relativeUrl[0] === '/'){
- $path = null;
- }
- $abs = null;
- // if realtive URL contains a user
- if (isset($user) === true){
- $abs .= $user;
- // if realtive URL contains a password
- if (isset($pass) === true){
- $abs .= ':'.$pass;
- }
- $abs .= '@';
- }
- $abs .= $host;
- // if realtive URL contains a port
- if (isset($port) === true){
- $abs .= ':'.$port;
- }
- $abs .= $path.'/'.$relativeUrl.(isset($query) === true ? '?'.$query : null);
- // replace // or /./ or /foo/../ with /
- $re = ['#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'];
- for ($n = 1; $n > 0; $abs = preg_replace($re, '/', $abs, -1, $n)) {
- }
- // return absolute URL
- return $scheme.'://'.$abs;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement