Untitled

#!/usr/bin/env ruby
#
# usage: tweets_to_csv.rb <search term> <output filename>
#
#
# This is the 'dumb' approach
# needs reworking if you want something that handles duplicates and whatnot

require 'rubygems'
require 'csv'
require 'twitter_search'

query = ARGV[0]
output_filename = ARGV[1]
outfile = File.open(output_filename, 'wb')

# setup my twitter search client
client = TwitterSearch::Client.new('tweet_to_csv')

CSV::Writer.generate(outfile) do |csv|
    # csv headers
    csv << ['id', 'created_at', 'from_user', 'to_user', 'body']

    # start at page 1
    page = 1
    tweets = client.query(:q => query, :rpp => '100', :page => page)
    # while there are tweets to be had
    while tweets.count > 0
        # increment page
        page += 1
        # print tweets
        tweets.each do |tweet|
            # print line of csv
            csv << [tweet.id, tweet.created_at, tweet.from_user, tweet.to_user, tweet.text]
        end
        # been getting strange timeout issues
        # i think twitter is rate limiting
        # sleeping for a couple seconds seems to make it not do that
        sleep(3)
        # fetch the next page
        tweets = client.query(:q => query, :rpp => '100', :page => page)
    end
end

outfile.close