Advertisement
Guest User

rubyApplication

a guest
Jun 19th, 2013
210
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.71 KB | None | 0 0
  1. #!/usr/bin/env ruby
  2. # encoding: UTF-8
  3. #
  4. # library included
  5. #
  6. require 'cgi'
  7. require 'open-uri'
  8. #
  9. # rule for regular expression
  10. #
  11. commentRegexp = /(\w+)/
  12. commentRegexp1 = /<!-- start guides[\D\d]*end guides -->/
  13. commentRegexp2 = /<img src="http[\d\D]+?>/
  14. commentRegexp3 = /img src="([\d\D]+?)"/
  15. commentRegexp4 = /title="([\d\D]+?)"/
  16. commentRegexp5 = /<div class='small-text light-text'>[\s]{8}([\w \S]+)[\s]{21}<\/div>/
  17. commentRegexp6 = /([A-Z].+?)\\t/
  18. commentRegexp7 = /id="star1" title='\d\/5'|<div class="left" style="margin-left:0px">/
  19. commentRegexp8 = /title='(\d)\/5'/
  20. commentRegexp9 = /<a href='(.+?)'>/
  21. commentRegexp10 = /\/.+[0-9a-zA-Z-]/
  22. commentRegexp11 = /Viewed.(\d+?) times/
  23. #
  24. # variable & initialize
  25. #
  26. a = 'china'
  27. q = ''
  28. url = 'http://www.everytrail.com/search.php?q='
  29. codeString = ''
  30. codeString2 = ''
  31. guideData = ''
  32. getImg_Title = ''
  33. getLoc = ''
  34. resultData = [] # array to store hash
  35. i = 0 # loop variable
  36. j = 0 # loop variable
  37. url2 = 'http://www.everytrail.com'
  38. urlLoc = ''
  39. ################################################
  40. # #
  41. # main program #
  42. # #
  43. ################################################
  44. STDOUT.set_encoding("UTF-8")
  45. #
  46. # print html & CGI
  47. #
  48. #cgi = CGI.new
  49. cgi = CGI.new(:accept_charset => "UTF-8")
  50. print cgi.header("text/xml")
  51. a = cgi['trip']
  52. #
  53. # open web page & show the source code
  54. #
  55. q = a.sub(' ','+')
  56. url.concat(q)
  57. codeString = open(url).read.encode!('UTF-8','UTF-8',:invalid =>:replace)
  58. #
  59. # to verify there has result or not
  60. #
  61. if /No result/ =~ codeString.to_s
  62. puts "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
  63. puts "<trips total=\"0\">"
  64. puts "<trip>"
  65. puts "<trip_name>null</trip_name>"
  66. puts "<location>null</location>"
  67. puts "<view_times>0</view_times>"
  68. puts "<rating_out_of_5>0</rating_out_of_5>"
  69. puts "<image_url>null</image_url>"
  70. puts "<guide_url>null</guide_url>"
  71. puts "</trip>"
  72. puts "</trips>"
  73. exit
  74. end
  75. if /<div class="odd">|<div class="even">/ =~ codeString.to_s
  76. else
  77. puts "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
  78. puts "<trips total=\"0\">"
  79. puts "<trip>"
  80. puts "<trip_name>null</trip_name>"
  81. puts "<location>null</location>"
  82. puts "<view_times>0</view_times>"
  83. puts "<rating_out_of_5>0</rating_out_of_5>"
  84. puts "<image_url>null</image_url>"
  85. puts "<guide_url>null</guide_url>"
  86. puts "</trip>"
  87. puts "</trips>"
  88. exit
  89. end
  90. #
  91. # get needed data by using regular expression
  92. #
  93. codeString.scan(commentRegexp1){|matched|
  94. guideData = matched
  95. }
  96. #
  97. # get attribute and value(Image and Title)
  98. #
  99. guideData.scan(commentRegexp2){|matched|
  100. getImg_Title += matched
  101. }
  102. #
  103. # get Image Data
  104. #
  105. getImg_Title.scan(commentRegexp3){|matched|
  106. h = Hash.new
  107. h.store("Image",matched)
  108. resultData.push(h)
  109. }
  110. #
  111. # get Title Data
  112. #
  113. getImg_Title.scan(commentRegexp4){|matched|
  114. name = ""
  115. name = matched
  116. resultData[i].store("Name",name)
  117. i = i + 1
  118. }
  119. i = 0 #initialize to zero
  120. #
  121. # get Location Data
  122. #
  123. guideData.scan(commentRegexp5){|matched|
  124. resultData[i].store("Location",matched)
  125. i = i + 1
  126. }
  127. i = 0
  128. #
  129. # get Rank Data
  130. #
  131. guideData.scan(commentRegexp7){|matched|
  132. if commentRegexp8 =~ matched.to_s
  133. matched.to_s.scan(commentRegexp8){|matched2|
  134. resultData[i].store("Rating out of 5",matched2)
  135. i = i + 1
  136. }
  137. else
  138. resultData[i].store("Rating out of 5",'N')
  139. i = i + 1
  140. end
  141. }
  142. i = 0
  143. #
  144. # get views times
  145. #
  146. guideData.scan(commentRegexp9){|matched|
  147. matched.to_s.scan(commentRegexp10){|matched2|
  148. resultData[i].store("Link",url2 + matched2.to_s)
  149. open(url2 + matched2.to_s){|f|
  150. f.each_line{|line|
  151. line.to_s.scan(commentRegexp11){|matched3|
  152. resultData[i].store("Viewed Times",matched3)
  153. break
  154. }
  155. }
  156. }
  157. break
  158. }
  159. i = i + 1
  160. }
  161. i = 0
  162. #
  163. # print xml
  164. #
  165. puts "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
  166. puts "<trips total=\"" + resultData.size.to_s + "\">"
  167. while i < resultData.size do
  168. puts "<trip>"
  169. puts "<trip_name><![CDATA[" + resultData[i].fetch("Name")[0] + "]]></trip_name>"
  170. puts "<location><![CDATA[" + resultData[i].fetch("Location")[0] + "]]></location>"
  171. puts "<view_times><![CDATA[" + resultData[i].fetch("Viewed Times")[0] + "]]></view_times>"
  172. puts "<rating_out_of_5><![CDATA[" + resultData[i].fetch("Rating out of 5")[0] + "]]></rating_out_of_5>"
  173. puts "<image_url><![CDATA[" + resultData[i].fetch("Image")[0] + "]]></image_url>"
  174. puts "<guide_url><![CDATA[" + resultData[i].fetch("Link") + "]]></guide_url>"
  175. puts "</trip>"
  176. i = i + 1
  177. end
  178. puts "</trips>"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement