Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env ruby
- # encoding: UTF-8
- #
- # library included
- #
- require 'cgi'
- require 'open-uri'
- #
- # rule for regular expression
- #
- commentRegexp = /(\w+)/
- commentRegexp1 = /<!-- start guides[\D\d]*end guides -->/
- commentRegexp2 = /<img src="http[\d\D]+?>/
- commentRegexp3 = /img src="([\d\D]+?)"/
- commentRegexp4 = /title="([\d\D]+?)"/
- commentRegexp5 = /<div class='small-text light-text'>[\s]{8}([\w \S]+)[\s]{21}<\/div>/
- commentRegexp6 = /([A-Z].+?)\\t/
- commentRegexp7 = /id="star1" title='\d\/5'|<div class="left" style="margin-left:0px">/
- commentRegexp8 = /title='(\d)\/5'/
- commentRegexp9 = /<a href='(.+?)'>/
- commentRegexp10 = /\/.+[0-9a-zA-Z-]/
- commentRegexp11 = /Viewed.(\d+?) times/
- #
- # variable & initialize
- #
- a = 'china'
- q = ''
- url = 'http://www.everytrail.com/search.php?q='
- codeString = ''
- codeString2 = ''
- guideData = ''
- getImg_Title = ''
- getLoc = ''
- resultData = [] # array to store hash
- i = 0 # loop variable
- j = 0 # loop variable
- url2 = 'http://www.everytrail.com'
- urlLoc = ''
- ################################################
- # #
- # main program #
- # #
- ################################################
- STDOUT.set_encoding("UTF-8")
- #
- # print html & CGI
- #
- #cgi = CGI.new
- cgi = CGI.new(:accept_charset => "UTF-8")
- print cgi.header("text/xml")
- a = cgi['trip']
- #
- # open web page & show the source code
- #
- q = a.sub(' ','+')
- url.concat(q)
- codeString = open(url).read.encode!('UTF-8','UTF-8',:invalid =>:replace)
- #
- # to verify there has result or not
- #
- if /No result/ =~ codeString.to_s
- puts "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
- puts "<trips total=\"0\">"
- puts "<trip>"
- puts "<trip_name>null</trip_name>"
- puts "<location>null</location>"
- puts "<view_times>0</view_times>"
- puts "<rating_out_of_5>0</rating_out_of_5>"
- puts "<image_url>null</image_url>"
- puts "<guide_url>null</guide_url>"
- puts "</trip>"
- puts "</trips>"
- exit
- end
- if /<div class="odd">|<div class="even">/ =~ codeString.to_s
- else
- puts "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
- puts "<trips total=\"0\">"
- puts "<trip>"
- puts "<trip_name>null</trip_name>"
- puts "<location>null</location>"
- puts "<view_times>0</view_times>"
- puts "<rating_out_of_5>0</rating_out_of_5>"
- puts "<image_url>null</image_url>"
- puts "<guide_url>null</guide_url>"
- puts "</trip>"
- puts "</trips>"
- exit
- end
- #
- # get needed data by using regular expression
- #
- codeString.scan(commentRegexp1){|matched|
- guideData = matched
- }
- #
- # get attribute and value(Image and Title)
- #
- guideData.scan(commentRegexp2){|matched|
- getImg_Title += matched
- }
- #
- # get Image Data
- #
- getImg_Title.scan(commentRegexp3){|matched|
- h = Hash.new
- h.store("Image",matched)
- resultData.push(h)
- }
- #
- # get Title Data
- #
- getImg_Title.scan(commentRegexp4){|matched|
- name = ""
- name = matched
- resultData[i].store("Name",name)
- i = i + 1
- }
- i = 0 #initialize to zero
- #
- # get Location Data
- #
- guideData.scan(commentRegexp5){|matched|
- resultData[i].store("Location",matched)
- i = i + 1
- }
- i = 0
- #
- # get Rank Data
- #
- guideData.scan(commentRegexp7){|matched|
- if commentRegexp8 =~ matched.to_s
- matched.to_s.scan(commentRegexp8){|matched2|
- resultData[i].store("Rating out of 5",matched2)
- i = i + 1
- }
- else
- resultData[i].store("Rating out of 5",'N')
- i = i + 1
- end
- }
- i = 0
- #
- # get views times
- #
- guideData.scan(commentRegexp9){|matched|
- matched.to_s.scan(commentRegexp10){|matched2|
- resultData[i].store("Link",url2 + matched2.to_s)
- open(url2 + matched2.to_s){|f|
- f.each_line{|line|
- line.to_s.scan(commentRegexp11){|matched3|
- resultData[i].store("Viewed Times",matched3)
- break
- }
- }
- }
- break
- }
- i = i + 1
- }
- i = 0
- #
- # print xml
- #
- puts "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
- puts "<trips total=\"" + resultData.size.to_s + "\">"
- while i < resultData.size do
- puts "<trip>"
- puts "<trip_name><![CDATA[" + resultData[i].fetch("Name")[0] + "]]></trip_name>"
- puts "<location><![CDATA[" + resultData[i].fetch("Location")[0] + "]]></location>"
- puts "<view_times><![CDATA[" + resultData[i].fetch("Viewed Times")[0] + "]]></view_times>"
- puts "<rating_out_of_5><![CDATA[" + resultData[i].fetch("Rating out of 5")[0] + "]]></rating_out_of_5>"
- puts "<image_url><![CDATA[" + resultData[i].fetch("Image")[0] + "]]></image_url>"
- puts "<guide_url><![CDATA[" + resultData[i].fetch("Link") + "]]></guide_url>"
- puts "</trip>"
- i = i + 1
- end
- puts "</trips>"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement