commit 3460700d98c26b1e0e255a54e10be722489b9391
Author: unknown <dwmcqueen@gmail.com>
Date: Sun Apr 21 12:41:01 2013 -0500
Updated Twitter to use OAuth and fixed UTF conversion
diff --git a/R/2_run.R b/R/2_run.R
index 164c0cf..0f7e3cc 100644
--- a/R/2_run.R
+++ b/R/2_run.R
@@ -9,20 +9,18 @@ if (VERBOSE)
# we do end up with lots of objects in memory to play with (it _is_
# a tutorial, after all :)
-american.text = laply(american.tweets, function(t) t$getText() )
-delta.text = laply(delta.tweets, function(t) t$getText() )
-jetblue.text = laply(jetblue.tweets, function(t) t$getText() )
-southwest.text = laply(southwest.tweets, function(t) t$getText() )
-united.text = laply(united.tweets, function(t) t$getText() )
-us.text = laply(us.tweets, function(t) t$getText() )
-
+american.text = laply(american.tweets, function(t) iconv(t$getText(), to="UTF8"))
+delta.text = laply(delta.tweets, function(t) iconv(t$getText(), to="UTF8") )
+jetblue.text = laply(jetblue.tweets, function(t) iconv(t$getText(), to="UTF8") )
+southwest.text = laply(southwest.tweets, function(t) iconv(t$getText(), to="UTF8") )
+united.text = laply(united.tweets, function(t) iconv(t$getText(), to="UTF8") )
+us.text = laply(us.tweets, function(t) iconv(t$getText(), to="UTF8") )
american.scores = score.sentiment(american.text, pos.words, neg.words, .progress='text')
delta.scores = score.sentiment(delta.text, pos.words, neg.words, .progress='text')
jetblue.scores = score.sentiment(jetblue.text, pos.words, neg.words, .progress='text')
southwest.scores = score.sentiment(southwest.text, pos.words, neg.words, .progress='text')
united.scores = score.sentiment(united.text, pos.words, neg.words, .progress='text')
us.scores = score.sentiment(us.text, pos.words, neg.words, .progress='text')
-
american.scores$airline = 'American'
american.scores$code = 'AA'
delta.scores$airline = 'Delta'
diff --git a/R/scrape.R b/R/scrape.R
index 56dcf2b..a138e92 100644
--- a/R/scrape.R
+++ b/R/scrape.R
@@ -9,23 +9,51 @@ if (VERBOSE)
print("Searching Twitter for airline tweets and saving to disk")
require(twitteR)
-
-american.tweets = searchTwitter('@americanair', n=1500)
+library(RCurl)
+library(ROAuth)
+
+#Need to make sure we have a caert
+
+if (!file.exists("cacert.pem"))
+ download.file(url="http://curl.haxx.se/ca/cacert.pem", destfile="cacert.pem")
+
+if (file.exists("twitter authentication.Rdata")){
+ load("twitter authentication.Rdata")
+} else
+{
+ requestURL <- "https://api.twitter.com/oauth/request_token"
+ accessURL = "http://api.twitter.com/oauth/access_token"
+ authURL = "http://api.twitter.com/oauth/authorize"
+ consumerKey = "FILLINWITHCONSUMERKEY"
+ consumerSecret = "FILLINWITHCONSUMERSECRET"
+ Cred <- OAuthFactory$new(consumerKey=consumerKey,
+ consumerSecret=consumerSecret,
+ requestURL=requestURL,
+ accessURL=accessURL,
+ authURL=authURL)
+ #The next command provides a URL which you will need to copy and paste into your favourite browser
+ #Assuming you are logged into Twitter you will then be provided a PIN number to type into the R command line
+ Cred$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl") )
+ # Checks that you are authorised
+ save(Cred, file="twitter authentication.Rdata")
+}
+registerTwitterOAuth(Cred)
+american.tweets = searchTwitter('@americanair', n=1500, cainfo="cacert.pem")
save(american.tweets, file=file.path(dataDir, 'american.tweets.RData' ), ascii=T)
-delta.tweets = searchTwitter('@delta', n=1500)
+delta.tweets = searchTwitter('@delta', n=1500, cainfo="cacert.pem")
save(delta.tweets, file=file.path(dataDir, 'delta.tweets.RData' ), ascii=T)
-jetblue.tweets = searchTwitter('@jetblue', n=1500)
+jetblue.tweets = searchTwitter('@jetblue', n=1500, cainfo="cacert.pem")
save(jetblue.tweets, file=file.path(dataDir, 'jetblue.tweets.RData' ), ascii=T)
-southwest.tweets = searchTwitter('@southwestair', n=1500)
+southwest.tweets = searchTwitter('@southwestair', n=1500, cainfo="cacert.pem")
save(southwest.tweets, file=file.path(dataDir, 'southwest.tweets.RData' ), ascii=T)
-united.tweets = searchTwitter('@united', n=1500)
+united.tweets = searchTwitter('@united', n=1500, cainfo="cacert.pem")
save(united.tweets, file=file.path(dataDir, 'united.tweets.RData' ), ascii=T)
-us.tweets = searchTwitter('@usairways', n=1500)
+us.tweets = searchTwitter('@usairways', n=1500, cainfo="cacert.pem")
save(us.tweets, file=file.path(dataDir, 'us.tweets.RData' ), ascii=T)
@@ -45,7 +73,7 @@ acsi.df = acsi.raw.df[,c(1,19)]
colnames(acsi.df) = c('airline', 'score')
# add codes for later matching, and make sure score is treated as a number (not a string)
-acsi.df$code = c('WN', NA, NA, 'CO', 'AA', 'UA', 'US', 'DL', 'NW')
+acsi.df$code = c('B6', 'WN', NA, NA, 'DL', 'US', 'AA', 'UA', NA, 'NW')
acsi.df$score = as.numeric(acsi.df$score)
save(acsi.raw.df, file=file.path(dataDir, 'acsi.raw.df.RData'), ascii=T)
diff --git a/R/sentiment.R b/R/sentiment.R
index 4b2be84..f389fff 100644
--- a/R/sentiment.R
+++ b/R/sentiment.R
@@ -14,7 +14,7 @@ score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
{
require(plyr)
require(stringr)
-
+
# we got a vector of sentences. plyr will handle a list or a vector as an "l" for us
# we want a simple array of scores back, so we use "l" + "a" + "ply" = laply:
scores = laply(sentences, function(sentence, pos.words, neg.words) {
diff --git a/data/acsi.df.RData b/data/acsi.df.RData
index 97d6f46..8b7511a 100644
--- a/data/acsi.df.RData
+++ b/data/acsi.df.RData
@@ -1,115 +1,121 @@
RDA2
A
2
-134400
+196608
131840
1026
1
-9
+262153
7
acsi.df
787
3
16
-9
-9
+10
+262153
+7
+JetBlue
+262153
9
Southwest
-9
+262153
10
All\040Others
-9
+262153
8
Airlines
-9
-11
-Continental
-9
+262153
+5
+Delta
+262153
+10
+US\040Airways
+262153
8
American
-9
+262153
6
United
-9
-10
-US\040Airways
-9
-5
-Delta
-9
+262153
+11
+Continental
+262153
18
Northwest\040Airlines
14
-9
+10
+NA
81
76
65
-64
-63
+56
61
+63
61
-56
+64
NA
16
-9
-9
+10
+262153
+2
+B6
+262153
2
WN
9
-1
9
-1
-9
+262153
2
-CO
-9
+DL
+262153
+2
+US
+262153
2
AA
-9
+262153
2
UA
9
-2
-US
-9
-2
-DL
-9
+-1
+262153
2
NW
1026
1
-9
+262153
5
names
16
3
-9
+262153
7
airline
-9
+262153
5
score
-9
+262153
4
code
1026
1
-9
+262153
9
row.names
13
2
NA
--9
+-10
1026
1
-9
+262153
5
class
16
1
-9
+262153
10
data.frame
254
diff --git a/data/acsi.raw.df.RData b/data/acsi.raw.df.RData
index 546910e..c9e98ff 100644
--- a/data/acsi.raw.df.RData
+++ b/data/acsi.raw.df.RData
@@ -1,711 +1,844 @@
RDA2
A
2
-134400
+196608
131840
1026
1
-9
+262153
11
acsi.raw.df
787
-21
+23
16
-9
-9
+10
+262153
+7
+JetBlue
+262153
9
Southwest
-9
+262153
10
All\040Others
-9
+262153
8
Airlines
-9
-11
-Continental
-9
+262153
+5
+Delta
+262153
+10
+US\040Airways
+262153
8
American
-9
+262153
6
United
-9
-10
-US\040Airways
-9
-5
-Delta
-9
+262153
+11
+Continental
+262153
18
Northwest\040Airlines
16
-9
-9
+10
+262153
+2
+NM
+262153
2
78
-9
+262153
2
NM
-9
+262153
2
72
-9
+262153
2
-67
-9
+77
+262153
+2
+72
+262153
2
70
-9
+262153
2
71
-9
-2
-72
-9
+262153
2
-77
-9
+67
+262153
2
69
16
-9
-9
+10
+262153
+2
+NM
+262153
2
76
-9
+262153
2
70
-9
+262153
2
69
-9
-2
-64
-9
+262153
2
-71
-9
+72
+262153
2
67
-9
+262153
+2
+71
+262153
2
67
-9
+262153
2
-72
-9
+64
+262153
2
71
16
-9
-9
+10
+262153
+2
+NM
+262153
2
76
-9
+262153
2
74
-9
+262153
2
69
-9
+262153
+2
+67
+262153
2
66
-9
+262153
2
71
-9
+262153
2
70
-9
+262153
2
66
-9
-2
-67
-9
+262153
2
67
16
-9
-9
+10
+262153
+2
+NM
+262153
2
76
-9
+262153
2
70
-9
+262153
2
67
-9
+262153
2
-64
-9
-2
-62
-9
+69
+262153
2
68
-9
+262153
+2
+62
+262153
2
68
-9
+262153
2
-69
-9
+64
+262153
2
64
16
-9
-9
+10
+262153
+2
+NM
+262153
2
74
-9
+262153
2
62
-9
+262153
2
65
-9
-2
-66
-9
-2
-67
-9
+262153
2
65
-9
+262153
2
65
-9
+262153
+2
+67
+262153
2
65
-9
+262153
+2
+66
+262153
2
63
16
-9
-9
+10
+262153
+2
+NM
+262153
2
72
-9
+262153
2
67
-9
+262153
2
63
-9
+262153
2
-64
-9
+68
+262153
+2
+61
+262153
2
64
-9
+262153
2
62
-9
-2
-61
-9
+262153
2
-68
-9
+64
+262153
2
53
16
-9
-9
+10
+262153
+2
+NM
+262153
2
70
-9
+262153
2
63
-9
+262153
2
63
-9
+262153
+2
+66
+262153
2
62
-9
+262153
2
63
-9
+262153
2
62
-9
+262153
2
62
-9
-2
-66
-9
+262153
2
62
16
-9
-9
+10
+262153
+2
+NM
+262153
2
70
-9
+262153
2
64
-9
+262153
2
61
-9
+262153
2
-67
-9
+61
+262153
+2
+60
+262153
2
62
-9
+262153
2
59
-9
+262153
2
-60
-9
-2
-61
-9
+67
+262153
2
56
16
-9
-9
+10
+262153
+2
+NM
+262153
2
74
-9
+262153
2
72
-9
+262153
2
66
-9
+262153
2
-68
-9
+66
+262153
2
63
-9
-2
-64
-9
+262153
2
63
-9
+262153
2
-66
-9
+64
+262153
+2
+68
+262153
2
65
16
-9
-9
+10
+262153
+2
+NM
+262153
2
75
-9
+262153
2
74
-9
+262153
2
67
-9
-2
-68
-9
+262153
2
67
-9
-2
-63
-9
+262153
2
64
-9
+262153
2
67
-9
+262153
+2
+63
+262153
+2
+68
+262153
2
64
16
-9
-9
+10
+262153
+2
+NM
+262153
2
73
-9
+262153
2
73
-9
+262153
2
66
-9
+262153
2
67
-9
+262153
+2
+62
+262153
2
66
-9
+262153
2
64
-9
-2
-62
-9
+262153
2
67
-9
+262153
2
64
16
-9
-9
+10
+262153
+2
+NM
+262153
2
74
-9
+262153
2
74
-9
+262153
2
66
-9
+262153
2
-70
-9
+65
+262153
+2
+57
+262153
2
64
-9
+262153
2
61
-9
-2
-57
-9
+262153
2
-65
-9
+70
+262153
2
64
16
-9
-9
+10
+262153
+2
+NM
+262153
2
74
-9
+262153
2
74
-9
+262153
2
65
-9
+262153
2
-67
-9
+64
+262153
2
62
-9
-2
-63
-9
+262153
2
62
-9
+262153
2
-64
-9
+63
+262153
+2
+67
+262153
2
61
16
-9
-9
+10
+262153
+2
+NM
+262153
2
76
-9
+262153
2
75
-9
+262153
2
63
-9
+262153
2
-69
-9
+59
+262153
+2
+61
+262153
2
60
-9
+262153
2
56
-9
-2
-61
-9
+262153
2
-59
-9
+69
+262153
2
61
16
-9
-9
+10
+262153
+2
+NM
+262153
2
79
-9
+262153
2
75
-9
+262153
2
62
-9
+262153
2
-62
-9
+60
+262153
+2
+54
+262153
2
62
-9
+262153
2
56
-9
-2
-54
-9
+262153
2
-60
-9
+62
+262153
2
57
16
-9
-9
+10
+262153
+2
+NM
+262153
2
81
-9
+262153
2
77
-9
+262153
2
64
-9
+262153
2
-68
-9
+64
+262153
+2
+59
+262153
2
60
-9
+262153
2
56
-9
-2
-59
-9
+262153
2
-64
-9
+68
+262153
2
57
16
-9
-9
+10
+262153
+2
+NM
+262153
2
79
-9
+262153
2
75
-9
+262153
2
66
-9
+262153
2
-71
-9
+62
+262153
+2
+62
+262153
2
63
-9
+262153
2
60
-9
-2
-62
-9
+262153
2
-62
-9
+71
+262153
2
61
16
-9
-9
+10
+262153
+2
+NM
+262153
2
81
-9
+262153
2
76
-9
+262153
2
65
-9
+262153
2
-64
-9
+56
+262153
+2
+61
+262153
2
63
-9
+262153
2
61
-9
+262153
2
-61
-9
+64
+262153
+1
+#
+16
+10
+262153
2
-56
-9
+81
+262153
+2
+77
+262153
+2
+74
+262153
+2
+67
+262153
+2
+65
+262153
+2
+65
+262153
+2
+64
+262153
+2
+62
+262153
1
#
+262153
+0
+
16
-9
-9
-3
-2.5
-9
+10
+262153
+0
+
+262153
+0
+
+262153
+0
+
+262153
+0
+
+262153
+0
+
+262153
+0
+
+262153
+0
+
+262153
+0
+
+262153
+0
+
+262153
+0
+
+16
+10
+262153
3
-1.3
-9
+N/A
+262153
4
--1.5
-9
+-4.9
+262153
4
--9.9
-9
-3
-0.0
-9
+-2.6
+262153
3
-1.7
-9
-4
--1.6
-9
+3.1
+262153
4
--9.7
-9
+16.1
+262153
+3
+6.6
+262153
+3
+1.6
+262153
+3
+1.6
+262153
+3
+N/A
+262153
3
N/A
16
-9
-9
+10
+262153
3
-3.8
-9
+N/A
+262153
+4
+-1.3
+262153
3
-8.6
-9
+5.7
+262153
+4
+-6.9
+262153
+5
+-15.6
+262153
4
-9.7
-9
+262153
4
--4.5
-9
-5
--10.0
-9
-5
--14.1
-9
-5
--15.3
-9
+-8.6
+262153
5
--27.3
-9
+-12.7
+262153
+3
+N/A
+262153
3
N/A
1026
1
-9
+262153
5
names
16
-21
-9
+23
+262153
0
-9
+262153
9
Base-line
-9
+262153
2
95
-9
+262153
2
96
-9
+262153
2
97
-9
+262153
2
98
-9
+262153
2
99
-9
+262153
2
00
-9
+262153
2
01
-9
+262153
2
02
-9
+262153
2
03
-9
+262153
2
04
-9
+262153
2
05
-9
+262153
2
06
-9
+262153
2
07
-9
+262153
2
08
-9
+262153
2
09
-9
+262153
2
10
-9
+262153
2
11
-9
+262153
+2
+12
+262153
+2
+13
+262153
19
PreviousYear%Change
-9
+262153
16
FirstYear%Change
1026
1
-9
+262153
9
row.names
13
2
NA
--9
+-10
1026
1
-9
+262153
5
class
16
1
-9
+262153
10
data.frame
254
diff --git a/output/twitter_acsi_comparison.pdf b/output/twitter_acsi_comparison.pdf
index fc66b1d..73b4c04 100644
--- a/output/twitter_acsi_comparison.pdf
+++ b/output/twitter_acsi_comparison.pdf
@@ -1,11 +1,12 @@
- airline
- q
- 80 q American
- q Delta
- q Southwest
- q US Airways
- q United
- 75
+ airline
+ q
+ 80
+ q American
+ q Delta
+ q JetBlue
+ q Southwest
+ q United
+ 75 q US Airways
@@ -19,16 +20,16 @@ score.acsi
65
- q
+ q
- q q
+ q q
60
- q
-
- 40 45 50 55 60 65 70
- score.twitter
+ q
+ 55
+ 40 60 80 100
+ score.twitter
\ No newline at end of file
diff --git a/output/twitter_acsi_comparison_with_fit.pdf b/output/twitter_acsi_comparison_with_fit.pdf
index 2905f0a..4a43e36 100644
--- a/output/twitter_acsi_comparison_with_fit.pdf
+++ b/output/twitter_acsi_comparison_with_fit.pdf
@@ -1,11 +1,12 @@
- airline
- q
- 80 q American
- q Delta
- q Southwest
- q US Airways
- q United
- 75
+ airline
+ q
+ 80
+ q American
+ q Delta
+ q JetBlue
+ q Southwest
+ q United
+ 75 q US Airways
@@ -19,16 +20,16 @@ score.acsi
65
- q
+ q
- q q
+ q q
60
- q
-
- 40 45 50 55 60 65 70
- score.twitter
+ q
+ 55
+ 40 60 80 100
+ score.twitter
\ No newline at end of file
diff --git a/output/twitter_score_histograms.pdf b/output/twitter_score_histograms.pdf
index 55f4562..7ce1a1d 100644
--- a/output/twitter_score_histograms.pdf
+++ b/output/twitter_score_histograms.pdf
@@ -1,66 +1,52 @@
+ American
+ 1000
500
+ 0
+ 1000
- American
- 400
- 300
- 200
- 100
- 0
+
+ Delta
500
- 400
+ 0
+ airline
- Delta
- 300
- 200
- 100
- 0
- 500 airline
+ JetBlue
+ 1000 American
+ 500 Delta
+count
- JetBlue
- 400
- 300 American
- 200
- 100 Delta
- 0
-count
+ 0 JetBlue
- JetBlue
+ Southwest
+ 1000 Southwest
+ 500 United
+ 0 US Airways
- Southwest US Airways
- 500
- 400 Southwest
- 300
- 200
- 100 US Airways
- 0
- United
+
+
+ United
+ 1000
500
- 400
- 300
- 200
- 100
0
+ US Airways
+ 1000
500
- 400 United
- 300
- 200
- 100
0
- −6 −4 −2 0 2 4 6
- score
+ −5.0 −2.5 0.0 2.5 5.0 7.5
+ score
\ No newline at end of file