Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- commit 3460700d98c26b1e0e255a54e10be722489b9391
- Author: unknown <dwmcqueen@gmail.com>
- Date: Sun Apr 21 12:41:01 2013 -0500
- Updated Twitter to use OAuth and fixed UTF conversion
- diff --git a/R/2_run.R b/R/2_run.R
- index 164c0cf..0f7e3cc 100644
- --- a/R/2_run.R
- +++ b/R/2_run.R
- @@ -9,20 +9,18 @@ if (VERBOSE)
- # we do end up with lots of objects in memory to play with (it _is_
- # a tutorial, after all :)
- -american.text = laply(american.tweets, function(t) t$getText() )
- -delta.text = laply(delta.tweets, function(t) t$getText() )
- -jetblue.text = laply(jetblue.tweets, function(t) t$getText() )
- -southwest.text = laply(southwest.tweets, function(t) t$getText() )
- -united.text = laply(united.tweets, function(t) t$getText() )
- -us.text = laply(us.tweets, function(t) t$getText() )
- -
- +american.text = laply(american.tweets, function(t) iconv(t$getText(), to="UTF8"))
- +delta.text = laply(delta.tweets, function(t) iconv(t$getText(), to="UTF8") )
- +jetblue.text = laply(jetblue.tweets, function(t) iconv(t$getText(), to="UTF8") )
- +southwest.text = laply(southwest.tweets, function(t) iconv(t$getText(), to="UTF8") )
- +united.text = laply(united.tweets, function(t) iconv(t$getText(), to="UTF8") )
- +us.text = laply(us.tweets, function(t) iconv(t$getText(), to="UTF8") )
- american.scores = score.sentiment(american.text, pos.words, neg.words, .progress='text')
- delta.scores = score.sentiment(delta.text, pos.words, neg.words, .progress='text')
- jetblue.scores = score.sentiment(jetblue.text, pos.words, neg.words, .progress='text')
- southwest.scores = score.sentiment(southwest.text, pos.words, neg.words, .progress='text')
- united.scores = score.sentiment(united.text, pos.words, neg.words, .progress='text')
- us.scores = score.sentiment(us.text, pos.words, neg.words, .progress='text')
- -
- american.scores$airline = 'American'
- american.scores$code = 'AA'
- delta.scores$airline = 'Delta'
- diff --git a/R/scrape.R b/R/scrape.R
- index 56dcf2b..a138e92 100644
- --- a/R/scrape.R
- +++ b/R/scrape.R
- @@ -9,23 +9,51 @@ if (VERBOSE)
- print("Searching Twitter for airline tweets and saving to disk")
- require(twitteR)
- -
- -american.tweets = searchTwitter('@americanair', n=1500)
- +library(RCurl)
- +library(ROAuth)
- +
- +#Need to make sure we have a caert
- +
- +if (!file.exists("cacert.pem"))
- + download.file(url="http://curl.haxx.se/ca/cacert.pem", destfile="cacert.pem")
- +
- +if (file.exists("twitter authentication.Rdata")){
- + load("twitter authentication.Rdata")
- +} else
- +{
- + requestURL <- "https://api.twitter.com/oauth/request_token"
- + accessURL = "http://api.twitter.com/oauth/access_token"
- + authURL = "http://api.twitter.com/oauth/authorize"
- + consumerKey = "FILLINWITHCONSUMERKEY"
- + consumerSecret = "FILLINWITHCONSUMERSECRET"
- + Cred <- OAuthFactory$new(consumerKey=consumerKey,
- + consumerSecret=consumerSecret,
- + requestURL=requestURL,
- + accessURL=accessURL,
- + authURL=authURL)
- + #The next command provides a URL which you will need to copy and paste into your favourite browser
- + #Assuming you are logged into Twitter you will then be provided a PIN number to type into the R command line
- + Cred$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl") )
- + # Checks that you are authorised
- + save(Cred, file="twitter authentication.Rdata")
- +}
- +registerTwitterOAuth(Cred)
- +american.tweets = searchTwitter('@americanair', n=1500, cainfo="cacert.pem")
- save(american.tweets, file=file.path(dataDir, 'american.tweets.RData' ), ascii=T)
- -delta.tweets = searchTwitter('@delta', n=1500)
- +delta.tweets = searchTwitter('@delta', n=1500, cainfo="cacert.pem")
- save(delta.tweets, file=file.path(dataDir, 'delta.tweets.RData' ), ascii=T)
- -jetblue.tweets = searchTwitter('@jetblue', n=1500)
- +jetblue.tweets = searchTwitter('@jetblue', n=1500, cainfo="cacert.pem")
- save(jetblue.tweets, file=file.path(dataDir, 'jetblue.tweets.RData' ), ascii=T)
- -southwest.tweets = searchTwitter('@southwestair', n=1500)
- +southwest.tweets = searchTwitter('@southwestair', n=1500, cainfo="cacert.pem")
- save(southwest.tweets, file=file.path(dataDir, 'southwest.tweets.RData' ), ascii=T)
- -united.tweets = searchTwitter('@united', n=1500)
- +united.tweets = searchTwitter('@united', n=1500, cainfo="cacert.pem")
- save(united.tweets, file=file.path(dataDir, 'united.tweets.RData' ), ascii=T)
- -us.tweets = searchTwitter('@usairways', n=1500)
- +us.tweets = searchTwitter('@usairways', n=1500, cainfo="cacert.pem")
- save(us.tweets, file=file.path(dataDir, 'us.tweets.RData' ), ascii=T)
- @@ -45,7 +73,7 @@ acsi.df = acsi.raw.df[,c(1,19)]
- colnames(acsi.df) = c('airline', 'score')
- # add codes for later matching, and make sure score is treated as a number (not a string)
- -acsi.df$code = c('WN', NA, NA, 'CO', 'AA', 'UA', 'US', 'DL', 'NW')
- +acsi.df$code = c('B6', 'WN', NA, NA, 'DL', 'US', 'AA', 'UA', NA, 'NW')
- acsi.df$score = as.numeric(acsi.df$score)
- save(acsi.raw.df, file=file.path(dataDir, 'acsi.raw.df.RData'), ascii=T)
- diff --git a/R/sentiment.R b/R/sentiment.R
- index 4b2be84..f389fff 100644
- --- a/R/sentiment.R
- +++ b/R/sentiment.R
- @@ -14,7 +14,7 @@ score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
- {
- require(plyr)
- require(stringr)
- -
- +
- # we got a vector of sentences. plyr will handle a list or a vector as an "l" for us
- # we want a simple array of scores back, so we use "l" + "a" + "ply" = laply:
- scores = laply(sentences, function(sentence, pos.words, neg.words) {
- diff --git a/data/acsi.df.RData b/data/acsi.df.RData
- index 97d6f46..8b7511a 100644
- --- a/data/acsi.df.RData
- +++ b/data/acsi.df.RData
- @@ -1,115 +1,121 @@
- RDA2
- A
- 2
- -134400
- +196608
- 131840
- 1026
- 1
- -9
- +262153
- 7
- acsi.df
- 787
- 3
- 16
- -9
- -9
- +10
- +262153
- +7
- +JetBlue
- +262153
- 9
- Southwest
- -9
- +262153
- 10
- All\040Others
- -9
- +262153
- 8
- Airlines
- -9
- -11
- -Continental
- -9
- +262153
- +5
- +Delta
- +262153
- +10
- +US\040Airways
- +262153
- 8
- American
- -9
- +262153
- 6
- United
- -9
- -10
- -US\040Airways
- -9
- -5
- -Delta
- -9
- +262153
- +11
- +Continental
- +262153
- 18
- Northwest\040Airlines
- 14
- -9
- +10
- +NA
- 81
- 76
- 65
- -64
- -63
- +56
- 61
- +63
- 61
- -56
- +64
- NA
- 16
- -9
- -9
- +10
- +262153
- +2
- +B6
- +262153
- 2
- WN
- 9
- -1
- 9
- -1
- -9
- +262153
- 2
- -CO
- -9
- +DL
- +262153
- +2
- +US
- +262153
- 2
- AA
- -9
- +262153
- 2
- UA
- 9
- -2
- -US
- -9
- -2
- -DL
- -9
- +-1
- +262153
- 2
- NW
- 1026
- 1
- -9
- +262153
- 5
- names
- 16
- 3
- -9
- +262153
- 7
- airline
- -9
- +262153
- 5
- score
- -9
- +262153
- 4
- code
- 1026
- 1
- -9
- +262153
- 9
- row.names
- 13
- 2
- NA
- --9
- +-10
- 1026
- 1
- -9
- +262153
- 5
- class
- 16
- 1
- -9
- +262153
- 10
- data.frame
- 254
- diff --git a/data/acsi.raw.df.RData b/data/acsi.raw.df.RData
- index 546910e..c9e98ff 100644
- --- a/data/acsi.raw.df.RData
- +++ b/data/acsi.raw.df.RData
- @@ -1,711 +1,844 @@
- RDA2
- A
- 2
- -134400
- +196608
- 131840
- 1026
- 1
- -9
- +262153
- 11
- acsi.raw.df
- 787
- -21
- +23
- 16
- -9
- -9
- +10
- +262153
- +7
- +JetBlue
- +262153
- 9
- Southwest
- -9
- +262153
- 10
- All\040Others
- -9
- +262153
- 8
- Airlines
- -9
- -11
- -Continental
- -9
- +262153
- +5
- +Delta
- +262153
- +10
- +US\040Airways
- +262153
- 8
- American
- -9
- +262153
- 6
- United
- -9
- -10
- -US\040Airways
- -9
- -5
- -Delta
- -9
- +262153
- +11
- +Continental
- +262153
- 18
- Northwest\040Airlines
- 16
- -9
- -9
- +10
- +262153
- +2
- +NM
- +262153
- 2
- 78
- -9
- +262153
- 2
- NM
- -9
- +262153
- 2
- 72
- -9
- +262153
- 2
- -67
- -9
- +77
- +262153
- +2
- +72
- +262153
- 2
- 70
- -9
- +262153
- 2
- 71
- -9
- -2
- -72
- -9
- +262153
- 2
- -77
- -9
- +67
- +262153
- 2
- 69
- 16
- -9
- -9
- +10
- +262153
- +2
- +NM
- +262153
- 2
- 76
- -9
- +262153
- 2
- 70
- -9
- +262153
- 2
- 69
- -9
- -2
- -64
- -9
- +262153
- 2
- -71
- -9
- +72
- +262153
- 2
- 67
- -9
- +262153
- +2
- +71
- +262153
- 2
- 67
- -9
- +262153
- 2
- -72
- -9
- +64
- +262153
- 2
- 71
- 16
- -9
- -9
- +10
- +262153
- +2
- +NM
- +262153
- 2
- 76
- -9
- +262153
- 2
- 74
- -9
- +262153
- 2
- 69
- -9
- +262153
- +2
- +67
- +262153
- 2
- 66
- -9
- +262153
- 2
- 71
- -9
- +262153
- 2
- 70
- -9
- +262153
- 2
- 66
- -9
- -2
- -67
- -9
- +262153
- 2
- 67
- 16
- -9
- -9
- +10
- +262153
- +2
- +NM
- +262153
- 2
- 76
- -9
- +262153
- 2
- 70
- -9
- +262153
- 2
- 67
- -9
- +262153
- 2
- -64
- -9
- -2
- -62
- -9
- +69
- +262153
- 2
- 68
- -9
- +262153
- +2
- +62
- +262153
- 2
- 68
- -9
- +262153
- 2
- -69
- -9
- +64
- +262153
- 2
- 64
- 16
- -9
- -9
- +10
- +262153
- +2
- +NM
- +262153
- 2
- 74
- -9
- +262153
- 2
- 62
- -9
- +262153
- 2
- 65
- -9
- -2
- -66
- -9
- -2
- -67
- -9
- +262153
- 2
- 65
- -9
- +262153
- 2
- 65
- -9
- +262153
- +2
- +67
- +262153
- 2
- 65
- -9
- +262153
- +2
- +66
- +262153
- 2
- 63
- 16
- -9
- -9
- +10
- +262153
- +2
- +NM
- +262153
- 2
- 72
- -9
- +262153
- 2
- 67
- -9
- +262153
- 2
- 63
- -9
- +262153
- 2
- -64
- -9
- +68
- +262153
- +2
- +61
- +262153
- 2
- 64
- -9
- +262153
- 2
- 62
- -9
- -2
- -61
- -9
- +262153
- 2
- -68
- -9
- +64
- +262153
- 2
- 53
- 16
- -9
- -9
- +10
- +262153
- +2
- +NM
- +262153
- 2
- 70
- -9
- +262153
- 2
- 63
- -9
- +262153
- 2
- 63
- -9
- +262153
- +2
- +66
- +262153
- 2
- 62
- -9
- +262153
- 2
- 63
- -9
- +262153
- 2
- 62
- -9
- +262153
- 2
- 62
- -9
- -2
- -66
- -9
- +262153
- 2
- 62
- 16
- -9
- -9
- +10
- +262153
- +2
- +NM
- +262153
- 2
- 70
- -9
- +262153
- 2
- 64
- -9
- +262153
- 2
- 61
- -9
- +262153
- 2
- -67
- -9
- +61
- +262153
- +2
- +60
- +262153
- 2
- 62
- -9
- +262153
- 2
- 59
- -9
- +262153
- 2
- -60
- -9
- -2
- -61
- -9
- +67
- +262153
- 2
- 56
- 16
- -9
- -9
- +10
- +262153
- +2
- +NM
- +262153
- 2
- 74
- -9
- +262153
- 2
- 72
- -9
- +262153
- 2
- 66
- -9
- +262153
- 2
- -68
- -9
- +66
- +262153
- 2
- 63
- -9
- -2
- -64
- -9
- +262153
- 2
- 63
- -9
- +262153
- 2
- -66
- -9
- +64
- +262153
- +2
- +68
- +262153
- 2
- 65
- 16
- -9
- -9
- +10
- +262153
- +2
- +NM
- +262153
- 2
- 75
- -9
- +262153
- 2
- 74
- -9
- +262153
- 2
- 67
- -9
- -2
- -68
- -9
- +262153
- 2
- 67
- -9
- -2
- -63
- -9
- +262153
- 2
- 64
- -9
- +262153
- 2
- 67
- -9
- +262153
- +2
- +63
- +262153
- +2
- +68
- +262153
- 2
- 64
- 16
- -9
- -9
- +10
- +262153
- +2
- +NM
- +262153
- 2
- 73
- -9
- +262153
- 2
- 73
- -9
- +262153
- 2
- 66
- -9
- +262153
- 2
- 67
- -9
- +262153
- +2
- +62
- +262153
- 2
- 66
- -9
- +262153
- 2
- 64
- -9
- -2
- -62
- -9
- +262153
- 2
- 67
- -9
- +262153
- 2
- 64
- 16
- -9
- -9
- +10
- +262153
- +2
- +NM
- +262153
- 2
- 74
- -9
- +262153
- 2
- 74
- -9
- +262153
- 2
- 66
- -9
- +262153
- 2
- -70
- -9
- +65
- +262153
- +2
- +57
- +262153
- 2
- 64
- -9
- +262153
- 2
- 61
- -9
- -2
- -57
- -9
- +262153
- 2
- -65
- -9
- +70
- +262153
- 2
- 64
- 16
- -9
- -9
- +10
- +262153
- +2
- +NM
- +262153
- 2
- 74
- -9
- +262153
- 2
- 74
- -9
- +262153
- 2
- 65
- -9
- +262153
- 2
- -67
- -9
- +64
- +262153
- 2
- 62
- -9
- -2
- -63
- -9
- +262153
- 2
- 62
- -9
- +262153
- 2
- -64
- -9
- +63
- +262153
- +2
- +67
- +262153
- 2
- 61
- 16
- -9
- -9
- +10
- +262153
- +2
- +NM
- +262153
- 2
- 76
- -9
- +262153
- 2
- 75
- -9
- +262153
- 2
- 63
- -9
- +262153
- 2
- -69
- -9
- +59
- +262153
- +2
- +61
- +262153
- 2
- 60
- -9
- +262153
- 2
- 56
- -9
- -2
- -61
- -9
- +262153
- 2
- -59
- -9
- +69
- +262153
- 2
- 61
- 16
- -9
- -9
- +10
- +262153
- +2
- +NM
- +262153
- 2
- 79
- -9
- +262153
- 2
- 75
- -9
- +262153
- 2
- 62
- -9
- +262153
- 2
- -62
- -9
- +60
- +262153
- +2
- +54
- +262153
- 2
- 62
- -9
- +262153
- 2
- 56
- -9
- -2
- -54
- -9
- +262153
- 2
- -60
- -9
- +62
- +262153
- 2
- 57
- 16
- -9
- -9
- +10
- +262153
- +2
- +NM
- +262153
- 2
- 81
- -9
- +262153
- 2
- 77
- -9
- +262153
- 2
- 64
- -9
- +262153
- 2
- -68
- -9
- +64
- +262153
- +2
- +59
- +262153
- 2
- 60
- -9
- +262153
- 2
- 56
- -9
- -2
- -59
- -9
- +262153
- 2
- -64
- -9
- +68
- +262153
- 2
- 57
- 16
- -9
- -9
- +10
- +262153
- +2
- +NM
- +262153
- 2
- 79
- -9
- +262153
- 2
- 75
- -9
- +262153
- 2
- 66
- -9
- +262153
- 2
- -71
- -9
- +62
- +262153
- +2
- +62
- +262153
- 2
- 63
- -9
- +262153
- 2
- 60
- -9
- -2
- -62
- -9
- +262153
- 2
- -62
- -9
- +71
- +262153
- 2
- 61
- 16
- -9
- -9
- +10
- +262153
- +2
- +NM
- +262153
- 2
- 81
- -9
- +262153
- 2
- 76
- -9
- +262153
- 2
- 65
- -9
- +262153
- 2
- -64
- -9
- +56
- +262153
- +2
- +61
- +262153
- 2
- 63
- -9
- +262153
- 2
- 61
- -9
- +262153
- 2
- -61
- -9
- +64
- +262153
- +1
- +#
- +16
- +10
- +262153
- 2
- -56
- -9
- +81
- +262153
- +2
- +77
- +262153
- +2
- +74
- +262153
- +2
- +67
- +262153
- +2
- +65
- +262153
- +2
- +65
- +262153
- +2
- +64
- +262153
- +2
- +62
- +262153
- 1
- #
- +262153
- +0
- +
- 16
- -9
- -9
- -3
- -2.5
- -9
- +10
- +262153
- +0
- +
- +262153
- +0
- +
- +262153
- +0
- +
- +262153
- +0
- +
- +262153
- +0
- +
- +262153
- +0
- +
- +262153
- +0
- +
- +262153
- +0
- +
- +262153
- +0
- +
- +262153
- +0
- +
- +16
- +10
- +262153
- 3
- -1.3
- -9
- +N/A
- +262153
- 4
- --1.5
- -9
- +-4.9
- +262153
- 4
- --9.9
- -9
- -3
- -0.0
- -9
- +-2.6
- +262153
- 3
- -1.7
- -9
- -4
- --1.6
- -9
- +3.1
- +262153
- 4
- --9.7
- -9
- +16.1
- +262153
- +3
- +6.6
- +262153
- +3
- +1.6
- +262153
- +3
- +1.6
- +262153
- +3
- +N/A
- +262153
- 3
- N/A
- 16
- -9
- -9
- +10
- +262153
- 3
- -3.8
- -9
- +N/A
- +262153
- +4
- +-1.3
- +262153
- 3
- -8.6
- -9
- +5.7
- +262153
- +4
- +-6.9
- +262153
- +5
- +-15.6
- +262153
- 4
- -9.7
- -9
- +262153
- 4
- --4.5
- -9
- -5
- --10.0
- -9
- -5
- --14.1
- -9
- -5
- --15.3
- -9
- +-8.6
- +262153
- 5
- --27.3
- -9
- +-12.7
- +262153
- +3
- +N/A
- +262153
- 3
- N/A
- 1026
- 1
- -9
- +262153
- 5
- names
- 16
- -21
- -9
- +23
- +262153
- 0
- -9
- +262153
- 9
- Base-line
- -9
- +262153
- 2
- 95
- -9
- +262153
- 2
- 96
- -9
- +262153
- 2
- 97
- -9
- +262153
- 2
- 98
- -9
- +262153
- 2
- 99
- -9
- +262153
- 2
- 00
- -9
- +262153
- 2
- 01
- -9
- +262153
- 2
- 02
- -9
- +262153
- 2
- 03
- -9
- +262153
- 2
- 04
- -9
- +262153
- 2
- 05
- -9
- +262153
- 2
- 06
- -9
- +262153
- 2
- 07
- -9
- +262153
- 2
- 08
- -9
- +262153
- 2
- 09
- -9
- +262153
- 2
- 10
- -9
- +262153
- 2
- 11
- -9
- +262153
- +2
- +12
- +262153
- +2
- +13
- +262153
- 19
- PreviousYear%Change
- -9
- +262153
- 16
- FirstYear%Change
- 1026
- 1
- -9
- +262153
- 9
- row.names
- 13
- 2
- NA
- --9
- +-10
- 1026
- 1
- -9
- +262153
- 5
- class
- 16
- 1
- -9
- +262153
- 10
- data.frame
- 254
- diff --git a/output/twitter_acsi_comparison.pdf b/output/twitter_acsi_comparison.pdf
- index fc66b1d..73b4c04 100644
- --- a/output/twitter_acsi_comparison.pdf
- +++ b/output/twitter_acsi_comparison.pdf
- @@ -1,11 +1,12 @@
- - airline
- - q
- - 80 q American
- - q Delta
- - q Southwest
- - q US Airways
- - q United
- - 75
- + airline
- + q
- + 80
- + q American
- + q Delta
- + q JetBlue
- + q Southwest
- + q United
- + 75 q US Airways
- @@ -19,16 +20,16 @@ score.acsi
- 65
- - q
- + q
- - q q
- + q q
- 60
- - q
- -
- - 40 45 50 55 60 65 70
- - score.twitter
- + q
- + 55
- + 40 60 80 100
- + score.twitter
- \ No newline at end of file
- diff --git a/output/twitter_acsi_comparison_with_fit.pdf b/output/twitter_acsi_comparison_with_fit.pdf
- index 2905f0a..4a43e36 100644
- --- a/output/twitter_acsi_comparison_with_fit.pdf
- +++ b/output/twitter_acsi_comparison_with_fit.pdf
- @@ -1,11 +1,12 @@
- - airline
- - q
- - 80 q American
- - q Delta
- - q Southwest
- - q US Airways
- - q United
- - 75
- + airline
- + q
- + 80
- + q American
- + q Delta
- + q JetBlue
- + q Southwest
- + q United
- + 75 q US Airways
- @@ -19,16 +20,16 @@ score.acsi
- 65
- - q
- + q
- - q q
- + q q
- 60
- - q
- -
- - 40 45 50 55 60 65 70
- - score.twitter
- + q
- + 55
- + 40 60 80 100
- + score.twitter
- \ No newline at end of file
- diff --git a/output/twitter_score_histograms.pdf b/output/twitter_score_histograms.pdf
- index 55f4562..7ce1a1d 100644
- --- a/output/twitter_score_histograms.pdf
- +++ b/output/twitter_score_histograms.pdf
- @@ -1,66 +1,52 @@
- + American
- + 1000
- 500
- + 0
- + 1000
- - American
- - 400
- - 300
- - 200
- - 100
- - 0
- +
- + Delta
- 500
- - 400
- + 0
- + airline
- - Delta
- - 300
- - 200
- - 100
- - 0
- - 500 airline
- + JetBlue
- + 1000 American
- + 500 Delta
- +count
- - JetBlue
- - 400
- - 300 American
- - 200
- - 100 Delta
- - 0
- -count
- + 0 JetBlue
- - JetBlue
- + Southwest
- + 1000 Southwest
- + 500 United
- + 0 US Airways
- - Southwest US Airways
- - 500
- - 400 Southwest
- - 300
- - 200
- - 100 US Airways
- - 0
- - United
- +
- +
- + United
- + 1000
- 500
- - 400
- - 300
- - 200
- - 100
- 0
- + US Airways
- + 1000
- 500
- - 400 United
- - 300
- - 200
- - 100
- 0
- - −6 −4 −2 0 2 4 6
- - score
- + −5.0 −2.5 0.0 2.5 5.0 7.5
- + score
- \ No newline at end of file
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement