Advertisement
Guest User

Untitled

a guest
May 20th, 2017
105
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.99 KB | None | 0 0
  1.  
  2.  
  3.  
  4.  
  5.  
  6.  
  7.  
  8.  
  9. ENV["URLS"]="https://granary-demo.appspot.com/url?input=html&output=atom&url=https://aaronparecki.com/"
  10.  
  11.  
  12. require "xml"
  13. require "db"
  14. require "pg"
  15. require "digest"
  16. require "http"
  17.  
  18. DbUrl = "postgres://127.0.0.1/feeds"
  19.  
  20. Conn = begin
  21. DB.open DbUrl
  22. rescue
  23. raise "Need to create database named #{DbUrl}"
  24. end
  25.  
  26. # Conn = DB.open DbUrl
  27.  
  28. Urls = if urls = ENV["URLS"]?
  29. urls.split(",")
  30. else
  31. [] of String
  32. end
  33.  
  34.  
  35.  
  36. # FeedUrls = [
  37. # "https://www.wired.com/feed/",
  38. # "https://www.theatlantic.com/feed/all/",
  39. # "http://readwrite.com/feed/",
  40. # "https://lobste.rs/rss?token=r2nOOrbCZKC2dSbLAL0kLSYGJ4U5jE8Pe4PmHyLDhtuSyimsQWRCqyMYCeVM",
  41. # "https://news.ycombinator.com/rss",
  42. # "http://fetchrss.com/rss/58dd6e848a93f8b2758b4568953711301.atom",
  43. # "http://www.economist.com/sections/united-states/rss.xml",
  44. # "http://www.aljazeera.com/xml/rss/all.xml",
  45. # ]
  46.  
  47. begin
  48. Conn.exec "create table items (
  49. title text,
  50. link text,
  51. guid text,
  52. date timestamp,
  53. primary key (guid)
  54.  
  55. )"
  56. rescue
  57. #Table exists
  58. end
  59.  
  60. Urls.map do |feed_url|
  61. future { Feed.new(feed_url) }
  62. end.map do |f|
  63. f.get
  64. end
  65.  
  66. Conn.query "select link,title from items order by date desc limit 50" do |rs|
  67. rs.each do
  68. if ENV["FMT"]? == "text/html"
  69. puts "<div><a href=#{rs.read(String)}>#{rs.read(String)}</a></div>"
  70. else
  71. link = rs.read(String)
  72. title = rs.read(String)
  73. # puts "#{title} #{link}"
  74. end
  75. end
  76. end
  77.  
  78. class Feed
  79. record Item, title : String, link : String, date : Time, guid : String
  80.  
  81. def initialize(url)
  82. res = HTTP::Client.get(url).body
  83. p res
  84. document = XML.parse(res)
  85. parse_document(document)
  86. end
  87.  
  88. def parse_document(document)
  89. items = document.xpath("//channel//item").as(XML::NodeSet)
  90. items2 = items
  91. if !items.empty?
  92. items.map do |node|
  93. title = node.xpath_node("title").as(XML::Node).content
  94. link = node.xpath("link").as(XML::NodeSet).first.content
  95. date = Time.parse(node.xpath("pubDate").as(XML::NodeSet).first.content, "%a, %d %b %Y %T %z")
  96. content = (node.xpath_node("description").as(XML::Node)).content
  97. guid_node = node.xpath_node("guid")
  98. if guid_node.nil?
  99. guid = Digest::SHA1.hexdigest(content)
  100. else
  101. guid = guid_node.as(XML::Node).content
  102. end
  103. item = Item.new(title, link, date, guid)
  104. begin
  105. Conn.exec "insert into items values ($1, $2, $3, $4)", title, link, guid, date
  106. item
  107. rescue e : PQ::PQError
  108. # STDERR.puts e.message
  109. end
  110. end
  111. return
  112. end
  113. items = document.xpath("//atom:entry", {"atom" => "http://www.w3.org/2005/Atom"}).as(XML::NodeSet)
  114. if !items.empty?
  115. items.map do |node|
  116. guid = node.xpath_node("atom:id", {"atom" => "http://www.w3.org/2005/Atom"}).as(XML::Node).content
  117. title = node.xpath_node("atom:title", {"atom" => "http://www.w3.org/2005/Atom"}).as(XML::Node).content
  118. link = node.xpath_node("atom:link", {"atom" => "http://www.w3.org/2005/Atom"}).as(XML::Node)["href"]
  119. date_content = case x = node.xpath_node("atom:published", {"atom" => "http://www.w3.org/2005/Atom"})
  120. when XML::Node
  121. x.content
  122. when Nil
  123. # puts node.inspect
  124. nil
  125. end
  126. if date_content
  127. date = Time.parse(date_content, "%FT%X%z")
  128. else
  129. date = Time.now
  130. end
  131. categories = [] of String
  132. content = node.xpath_node("atom:content", {"atom" => "http://www.w3.org/2005/Atom"}).as(XML::Node).content
  133. item = Item.new(title, link, date, guid)
  134. begin
  135. Conn.exec "insert into items values ($1, $2, $3, $4)", title, link, guid, date
  136. item
  137. rescue e : PQ::PQError
  138. # STDERR.puts e.message
  139. end
  140. end
  141. end
  142. end
  143. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement