View difference between Paste ID: xGbKrV6K and 3A5aLM0P
SHOW: | | - or go back to the newest paste.
1
# Date file was retrieved on March 14, 2014, from 
2
# http://hkupop.hku.hk/chinese/popexpress/ce2012/dataset/tp1403013.sav
3
4
############################
5
# To load in the data file #
6
############################
7
8
# Install the "foreign" package, to read in SPSS files
9
install.packages("foreign")
10
11
# Load in the "foreign" package
12
library("foreign")
13
14
# Read in the data file
15
cy201403 <- read.spss("tp1403013.sav", to.data.frame=T)
16
17
# Check if file has been read in correctly
18
head(cy201403)
19
20
21
######################
22
# Basic descriptions #
23
######################
24
25
# Number of subjects = 1017
26
dim(cy201403)[1]
27
28
# Number of raters = 998
29
sum(table(cy201403$CE_rating))
30
31
32
33
##################
34
# On raw ratings #
35
##################
36
37
# A quick scan of the distribution of the raw ratings
38
table(cy201403$CE_rating)
39
40
# Mean of raw ratings = 47.4008
41
mean(cy201403$CE_rating, na.rm=T)
42
43
# Median of raw ratings = 50
44
median(cy201403$CE_rating, na.rm=T)
45
46
# Histogram of the raw ratings
47
hist(cy201403$CE_rating, main="CY Leung's rating (raw), \nMarch 2013", 
48
	xlab="Raw Scores", ylab="Frequency", right=F, axes=F,
49
	breaks=seq(0, 100, 10), col="lightgray")
50
axis(1, at=seq(0, 100, 10))
51
axis(2, at=seq(0, 300, 50))
52
53
# Indicate mean on plot
54
abline(v=mean(cy201403$CE_rating, na.rm=T), col="red", lwd=4)
55
56
# Indicate median on plot
57
abline(v=median(cy201403$CE_rating, na.rm=T), col="blue", lwd=4)
58
59
# Add labels
60
text(mean(cy201403$CE_rating, na.rm=T), 200, "Mean", col="red", lwd=4, pos=2)
61
text(median(cy201403$CE_rating, na.rm=T), 200, "Median", col="blue", lwd=4, pos=4)
62
63
64
######################
65
# On weighed ratings #
66
######################
67
68
# Calculate weighed ratings
69
cy201403$weighedRating <- cy201403$CE_rating * cy201403$weight
70
71
# Mean of raw ratings = 47.48411
72
mean(cy201403$weighedRating, na.rm=T)
73
74
# Median of raw ratings = 41.9557
75
median(cy201403$weighedRating, na.rm=T)
76
77
# Histrogram of the weighed ratings
78
hist(cy201403$weighedRating, main="CY Leung's rating (weighed), \nMarch 2013", 
79
	xlab="Weighed Scores", ylab="Frequency", right=F, axes=F, 
80
	breaks=seq(0, 500, 10), col="lightgray")
81
axis(1, at=seq(0, 500, 50))
82
axis(2, at=seq(0, 140, 20))
83
84
# Indicate mean on plot
85
abline(v=mean(cy201403$weighedRating, na.rm=T), col="red", lwd=4)
86
87
# Indicate median on plot
88
abline(v=median(cy201403$weighedRating, na.rm=T), col="blue", lwd=4)
89
90
# Add labels
91-
text(mean(cy201403$weighedRating, na.rm=T), 200, "Mean", col="red", lwd=4, pos=2)
91+
text(mean(cy201403$weighedRating, na.rm=T), 100, "Mean", col="red", lwd=4, pos=4)
92-
text(median(cy201403$weighedRating, na.rm=T), 200, "Median", col="blue", lwd=4, pos=4)
92+
text(median(cy201403$weighedRating, na.rm=T), 100, "Median", col="blue", lwd=4, pos=2)