# -*- coding: utf-8 -*- ################### # # # CONFIGURATION # # # ################### # Libraries library('ggplot2') library('maps') library('reshape2') library('RColorBrewer') # Future movie configuration setwd('/path/to/working/directory') start.date <- '2012-12-31 00:00:00' finish.date <- '2013-01-01 12:00:00' seconds.in.frame <- 30 ma.period <- 60 * 30 # Moving averege period for tweet-stats in frame texttime <- 3 * 24 # Base time for text demonstration (in frames in final movie) # Loop computations start.date <- strptime(start.date, format='%Y-%m-%d %H:%M:%S') finish.date <- strptime(finish.date, format='%Y-%m-%d %H:%M:%S') frames <- as.numeric(difftime(finish.date, start.date, units='secs'))/seconds.in.frame # ggplot palette & geometry setup twits.colors=brewer.pal(9, 'YlOrRd') roundcoef <- .48 textcolors <- c('deeppink3', 'coral3', 'chocolate3', 'chartreuse3', 'brown3', 'blue3', 'aquamarine3', 'dodgerblue3', 'deepskyblue3', 'deeppink3', 'darkorchid3', 'darkorange3', 'darkgreen', 'cyan4', 'gold3', 'forestgreen', 'firebrick3', 'green3', 'indianred3', 'hotpink3', 'maroon3', 'palegreen3', 'limegreen', 'orange3', 'olivedrab3', 'red3', 'royalblue3', 'springgreen3') # Function for circle points draw.circle <- function(center,diameter=1, npoints = 100){ r = diameter / 2 tt <- seq(0,2*pi,length.out = npoints) xx <- center[1] + r * cos(tt) yy <- center[2] + r * sin(tt) * roundcoef return(data.frame(x = xx, y = yy)) } ################## # # # LOADING DATA # # # ################## # Load tweets with coordinates & convert values to proper format twits <- read.csv2('ny_tweets.csv', header=F) colnames(twits) <- c('Link', 'Longitude', 'Latitude', 'Timestamp') twits$Timestamp <- strptime(twits$Timestamp, format='%Y-%m-%d %H:%M:%S') twits$Latitude <- round(as.numeric(as.character(twits$Latitude)), digits=1) twits$Longitude <- round(as.numeric(as.character(twits$Longitude)), digits=1) twits$Longitude <- sapply(twits$Longitude, function(x){ if(x < (-169)){ x<-360+x } else{x} }) # Load tweets with texts & convert date-time twit.texts <- read.csv2('ny_text_tweets.csv', header=F) colnames(twit.texts) <- c('Link', 'Latitude', 'Longitude', 'Timestamp', 'Text') twit.texts$Timestamp <- strptime(twit.texts$Timestamp, format='%Y-%m-%d %H:%M:%S') twit.texts <- twit.texts[, c(4,5)] twit.texts$t.delta <- rnorm(nrow(twit.texts), mean = 1, sd = .1) * texttime twit.texts$t.start <- twit.texts$Timestamp - twit.texts$t.delta * seconds.in.frame / 2 twit.texts$t.end <- twit.texts$Timestamp + twit.texts$t.delta * seconds.in.frame / 2 twit.texts$color <- textcolors[round(runif(nrow(twit.texts), 1, length(textcolors)))] twit.texts$x <- rnorm(nrow(twit.texts), mean = 100, sd = 30) twit.texts$y <- rnorm(nrow(twit.texts), mean = 56, sd = 15) twit.texts$size <- rnorm(nrow(twit.texts), mean = 10, sd = 2) twit.texts$opacity <- 0 # Countries to show on map countries <- c('USSR', 'Albania', 'Andorra', 'Austria', 'Belgium', 'Bulgaria', 'Czechoslovakia', 'Denmark', 'Finland', 'France', 'Germany', 'Hungary', 'Ireland', 'Italy', 'Liechtenstein', 'Luxembourg', 'Malta', 'Monaco', 'Netherlands', 'Norway', 'Poland', 'Portugal', 'Romania', 'San Marino', 'Sicily', 'Spain', 'Sweden', 'Switzerland', 'UK', 'Wales', 'Yugoslavia') # Load cities to mark on map (capitals etc) cities <- read.csv2('cities.csv', header=F) colnames(cities) <- c('name', 'Lat', 'Long') cities$Lat <- as.numeric(as.character(cities$Lat)) cities$Long <- as.numeric(as.character(cities$Long)) cities$Long <- sapply(cities$Long, function(x){ if(x < (-169)){ x<-360+x } else{x} }) # Load base map data full_map <- map_data('world') table(full_map$region) need.map <- subset(full_map, region %in% countries & long>-25 & long<190 & lat>25) ################# # # # HISTOGRAM # # # ################# p <- ggplot() p <- p + geom_histogram(aes(x=twits$Timestamp, fill = ..count..), binwidth = 3600) p <- p + ylab('Количество') + xlab('Время (по часам)') p <- p + theme(legend.position = 'none') p <- p + ggtitle(expression('Распределение "новогодних" твитов')) p ################### # # # UTILITY LOOP # # (PALETTE) # # # ################### max.color <- 0 for(i in 1:frames){ # Select only needed tweets frame.time <- start.date + i*seconds.in.frame frame.twits <- subset(twits, Timestamp <= frame.time & Timestamp > frame.time - ma.period) # Create table with values frame.twits <- melt(table(frame.twits$Latitude, frame.twits$Longitude)) colnames(frame.twits) <- c('Lat', 'Long', 'Volume') frame.twits$Lat <- as.numeric(as.character(frame.twits$Lat)) frame.twits$Long <- as.numeric(as.character(frame.twits$Long)) frame.twits <- frame.twits[frame.twits$Volume>0 & frame.twits$Long>=-25 & frame.twits$Long<=190 & frame.twits$Lat>=25 & frame.twits$Lat<=85,] if(max.color < max(log(frame.twits$Volume))){ max.color <- max(log(frame.twits$Volume)) } } ################# # # # MAIN LOOP # # # ################# for(i in 1:frames){ ################ # CALCULATIONS # ################ # Select only needed tweets frame.time <- start.date + i*seconds.in.frame frame.twits <- subset(twits, Timestamp <= frame.time & Timestamp > frame.time - ma.period) # Create table with values frame.twits <- melt(table(frame.twits$Latitude, frame.twits$Longitude)) # Filter empty cells colnames(frame.twits) <- c('Lat', 'Long', 'Volume') frame.twits$Lat <- as.numeric(as.character(frame.twits$Lat)) frame.twits$Long <- as.numeric(as.character(frame.twits$Long)) frame.twits <- frame.twits[frame.twits$Volume>0 & frame.twits$Long>=-25 & frame.twits$Long<=190 & frame.twits$Lat>=25 & frame.twits$Lat<=85,] frame.colors <- round(1 + (8*log(frame.twits$Volume)/max.color), digits=0) # Clock calculations curtime <- c(as.numeric(format(frame.time, '%H')), as.numeric(format(frame.time, '%M'))) clock.center <- c(180, 35) # Clock center arrow.r = c(5.5,8.8) # Arrows length circdat <- draw.circle(clock.center, diameter=20) # Clock background - 1st layer circdat2 <- draw.circle(clock.center, diameter=19.7) # Clock background - 2nd layer cirdat3 <- draw.circle(clock.center, diameter=18, npoints=13) # Hour points on clock # Clock arrows calculations if(curtime[1]>=12){curtime[1]=curtime[1]-12} hourval <- pi*(.5 - (curtime[1]+(curtime[2]/60))/6) minval <- pi*(.5 - curtime[2]/30) hour.x <- clock.center[1] + arrow.r[1] * cos(hourval) hour.y <- clock.center[2] + arrow.r[1] * sin(hourval) * roundcoef minute.x <- clock.center[1] + arrow.r[2] * cos(minval) minute.y <- clock.center[2] + arrow.r[2] * sin(minval) * roundcoef # Texts opacity calculation twit.texts$opacity <- as.numeric(by(twit.texts, 1:nrow(twit.texts), function(row){ if(frame.time < row$t.start | frame.time > row$t.end){ row$opacity <- 0 } else { row$opacity <- 0.7 * (1 - (abs(as.numeric(difftime(row$Timestamp, frame.time, unit='sec'))) / (row$t.delta * seconds.in.frame / 2))) } })) ############ # PLOTTING # ############ # Contour map p <- ggplot() p <- p + geom_polygon(aes(x=need.map$long, y=need.map$lat, group = need.map$group), colour='white', fill='grey20', alpha=.5) # Plot city points p <- p + geom_point(aes(cities$Long, cities$Lat), colour='skyblue', size=1.5) # Plot twitter data if(nrow(frame.twits)>0){ p <- p + geom_point(aes(frame.twits$Long,frame.twits$Lat, size=frame.twits$Volume * 5), colour=twits.colors[frame.colors], alpha = .75) } # Plot clock p <- p + geom_polygon(aes(x=circdat$x,y=circdat$y), colour='grey100', fill='grey100', alpha = .5) p <- p + geom_polygon(aes(x=circdat2$x,y=circdat2$y), colour='grey80', fill='grey80', alpha = .5) p <- p + geom_point(aes(cirdat3$x, cirdat3$y), colour='skyblue') # clock arrows p <- p + geom_segment(aes(x=clock.center[1], y=clock.center[2], xend=hour.x, yend=hour.y), size=3, colour='dodgerblue3') p <- p + geom_segment(aes(x=clock.center[1], y=clock.center[2], xend=minute.x, yend=minute.y), size=1.5, colour='dodgerblue4') p <- p + geom_point(aes(clock.center[1], clock.center[2]), colour='blue4') # Tweet texts plotting p <- p + geom_text(aes(x=twit.texts$x, y=twit.texts$y, label=iconv(twit.texts$Text,to='UTF-8')), colour=twit.texts$color, size=twit.texts$size, alpha = twit.texts$opacity) # Clear axis, legend, title p <- p + theme(axis.line=element_blank(),axis.text.x=element_blank(), axis.text.y=element_blank(),axis.ticks=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank(), legend.position = 'none', text=element_text(family='mono', size=20, face='bold', colour='dodgerblue3') ) p <- p + scale_x_continuous(limits = c(-15, 190)) p <- p + scale_y_continuous(limits = c(30, 82)) p <- p + ggtitle(expression('#HappyNewYear in Russian Twitter - 2013')) # Generate filename with standart length f.name <- as.character(i) repeat{ if(nchar(f.name) < nchar(as.character(frames))){ f.name <- paste('0', f.name, sep='') } else { break } } # Save frame ggsave(p, file=paste('frames/img', f.name, '.png', sep=''), width=6.4, height=3.6, scale = 3, dpi=100) }