Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env ruby
- #
- # usage: tweets_to_csv.rb <search term> <output filename>
- #
- #
- # This is the 'dumb' approach
- # needs reworking if you want something that handles duplicates and whatnot
- require 'rubygems'
- require 'csv'
- require 'twitter_search'
- query = ARGV[0]
- output_filename = ARGV[1]
- outfile = File.open(output_filename, 'wb')
- # setup my twitter search client
- client = TwitterSearch::Client.new('tweet_to_csv')
- CSV::Writer.generate(outfile) do |csv|
- # csv headers
- csv << ['id', 'created_at', 'from_user', 'to_user', 'body']
- # start at page 1
- page = 1
- tweets = client.query(:q => query, :rpp => '100', :page => page)
- # while there are tweets to be had
- while tweets.count > 0
- # increment page
- page += 1
- # print tweets
- tweets.each do |tweet|
- # print line of csv
- csv << [tweet.id, tweet.created_at, tweet.from_user, tweet.to_user, tweet.text]
- end
- # been getting strange timeout issues
- # i think twitter is rate limiting
- # sleeping for a couple seconds seems to make it not do that
- sleep(3)
- # fetch the next page
- tweets = client.query(:q => query, :rpp => '100', :page => page)
- end
- end
- outfile.close
Add Comment
Please, Sign In to add comment