Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- ###################
- # #
- # CONFIGURATION #
- # #
- ###################
- # Libraries
- library('ggplot2')
- library('maps')
- library('reshape2')
- library('RColorBrewer')
- # Future movie configuration
- setwd('/path/to/working/directory')
- start.date <- '2012-12-31 00:00:00'
- finish.date <- '2013-01-01 12:00:00'
- seconds.in.frame <- 30
- ma.period <- 60 * 30 # Moving averege period for tweet-stats in frame
- texttime <- 3 * 24 # Base time for text demonstration (in frames in final movie)
- # Loop computations
- start.date <- strptime(start.date, format='%Y-%m-%d %H:%M:%S')
- finish.date <- strptime(finish.date, format='%Y-%m-%d %H:%M:%S')
- frames <- as.numeric(difftime(finish.date, start.date, units='secs'))/seconds.in.frame
- # ggplot palette & geometry setup
- twits.colors=brewer.pal(9, 'YlOrRd')
- roundcoef <- .48
- textcolors <- c('deeppink3',
- 'coral3',
- 'chocolate3',
- 'chartreuse3',
- 'brown3',
- 'blue3',
- 'aquamarine3',
- 'dodgerblue3',
- 'deepskyblue3',
- 'deeppink3',
- 'darkorchid3',
- 'darkorange3',
- 'darkgreen',
- 'cyan4',
- 'gold3',
- 'forestgreen',
- 'firebrick3',
- 'green3',
- 'indianred3',
- 'hotpink3',
- 'maroon3',
- 'palegreen3',
- 'limegreen',
- 'orange3',
- 'olivedrab3',
- 'red3',
- 'royalblue3',
- 'springgreen3')
- # Function for circle points
- draw.circle <- function(center,diameter=1, npoints = 100){
- r = diameter / 2
- tt <- seq(0,2*pi,length.out = npoints)
- xx <- center[1] + r * cos(tt)
- yy <- center[2] + r * sin(tt) * roundcoef
- return(data.frame(x = xx, y = yy))
- }
- ##################
- # #
- # LOADING DATA #
- # #
- ##################
- # Load tweets with coordinates & convert values to proper format
- twits <- read.csv2('ny_tweets.csv', header=F)
- colnames(twits) <- c('Link', 'Longitude', 'Latitude', 'Timestamp')
- twits$Timestamp <- strptime(twits$Timestamp, format='%Y-%m-%d %H:%M:%S')
- twits$Latitude <- round(as.numeric(as.character(twits$Latitude)), digits=1)
- twits$Longitude <- round(as.numeric(as.character(twits$Longitude)), digits=1)
- twits$Longitude <- sapply(twits$Longitude, function(x){
- if(x < (-169)){
- x<-360+x
- }
- else{x}
- })
- # Load tweets with texts & convert date-time
- twit.texts <- read.csv2('ny_text_tweets.csv', header=F)
- colnames(twit.texts) <- c('Link', 'Latitude', 'Longitude', 'Timestamp', 'Text')
- twit.texts$Timestamp <- strptime(twit.texts$Timestamp, format='%Y-%m-%d %H:%M:%S')
- twit.texts <- twit.texts[, c(4,5)]
- twit.texts$t.delta <- rnorm(nrow(twit.texts), mean = 1, sd = .1) * texttime
- twit.texts$t.start <- twit.texts$Timestamp - twit.texts$t.delta * seconds.in.frame / 2
- twit.texts$t.end <- twit.texts$Timestamp + twit.texts$t.delta * seconds.in.frame / 2
- twit.texts$color <- textcolors[round(runif(nrow(twit.texts), 1, length(textcolors)))]
- twit.texts$x <- rnorm(nrow(twit.texts), mean = 100, sd = 30)
- twit.texts$y <- rnorm(nrow(twit.texts), mean = 56, sd = 15)
- twit.texts$size <- rnorm(nrow(twit.texts), mean = 10, sd = 2)
- twit.texts$opacity <- 0
- # Countries to show on map
- countries <- c('USSR',
- 'Albania',
- 'Andorra',
- 'Austria',
- 'Belgium',
- 'Bulgaria',
- 'Czechoslovakia',
- 'Denmark',
- 'Finland',
- 'France',
- 'Germany',
- 'Hungary',
- 'Ireland',
- 'Italy',
- 'Liechtenstein',
- 'Luxembourg',
- 'Malta',
- 'Monaco',
- 'Netherlands',
- 'Norway',
- 'Poland',
- 'Portugal',
- 'Romania',
- 'San Marino',
- 'Sicily',
- 'Spain',
- 'Sweden',
- 'Switzerland',
- 'UK',
- 'Wales',
- 'Yugoslavia')
- # Load cities to mark on map (capitals etc)
- cities <- read.csv2('cities.csv', header=F)
- colnames(cities) <- c('name', 'Lat', 'Long')
- cities$Lat <- as.numeric(as.character(cities$Lat))
- cities$Long <- as.numeric(as.character(cities$Long))
- cities$Long <- sapply(cities$Long, function(x){
- if(x < (-169)){
- x<-360+x
- }
- else{x}
- })
- # Load base map data
- full_map <- map_data('world')
- table(full_map$region)
- need.map <- subset(full_map, region %in% countries & long>-25 & long<190 & lat>25)
- #################
- # #
- # HISTOGRAM #
- # #
- #################
- p <- ggplot()
- p <- p + geom_histogram(aes(x=twits$Timestamp, fill = ..count..), binwidth = 3600)
- p <- p + ylab('Количество') + xlab('Время (по часам)')
- p <- p + theme(legend.position = 'none')
- p <- p + ggtitle(expression('Распределение "новогодних" твитов'))
- p
- ###################
- # #
- # UTILITY LOOP #
- # (PALETTE) #
- # #
- ###################
- max.color <- 0
- for(i in 1:frames){
- # Select only needed tweets
- frame.time <- start.date + i*seconds.in.frame
- frame.twits <- subset(twits, Timestamp <= frame.time & Timestamp > frame.time - ma.period)
- # Create table with values
- frame.twits <- melt(table(frame.twits$Latitude, frame.twits$Longitude))
- colnames(frame.twits) <- c('Lat', 'Long', 'Volume')
- frame.twits$Lat <- as.numeric(as.character(frame.twits$Lat))
- frame.twits$Long <- as.numeric(as.character(frame.twits$Long))
- frame.twits <- frame.twits[frame.twits$Volume>0 &
- frame.twits$Long>=-25 & frame.twits$Long<=190 &
- frame.twits$Lat>=25 & frame.twits$Lat<=85,]
- if(max.color < max(log(frame.twits$Volume))){
- max.color <- max(log(frame.twits$Volume))
- }
- }
- #################
- # #
- # MAIN LOOP #
- # #
- #################
- for(i in 1:frames){
- ################
- # CALCULATIONS #
- ################
- # Select only needed tweets
- frame.time <- start.date + i*seconds.in.frame
- frame.twits <- subset(twits, Timestamp <= frame.time & Timestamp > frame.time - ma.period)
- # Create table with values
- frame.twits <- melt(table(frame.twits$Latitude, frame.twits$Longitude))
- # Filter empty cells
- colnames(frame.twits) <- c('Lat', 'Long', 'Volume')
- frame.twits$Lat <- as.numeric(as.character(frame.twits$Lat))
- frame.twits$Long <- as.numeric(as.character(frame.twits$Long))
- frame.twits <- frame.twits[frame.twits$Volume>0 &
- frame.twits$Long>=-25 & frame.twits$Long<=190 &
- frame.twits$Lat>=25 & frame.twits$Lat<=85,]
- frame.colors <- round(1 + (8*log(frame.twits$Volume)/max.color), digits=0)
- # Clock calculations
- curtime <- c(as.numeric(format(frame.time, '%H')), as.numeric(format(frame.time, '%M')))
- clock.center <- c(180, 35) # Clock center
- arrow.r = c(5.5,8.8) # Arrows length
- circdat <- draw.circle(clock.center, diameter=20) # Clock background - 1st layer
- circdat2 <- draw.circle(clock.center, diameter=19.7) # Clock background - 2nd layer
- cirdat3 <- draw.circle(clock.center, diameter=18, npoints=13) # Hour points on clock
- # Clock arrows calculations
- if(curtime[1]>=12){curtime[1]=curtime[1]-12}
- hourval <- pi*(.5 - (curtime[1]+(curtime[2]/60))/6)
- minval <- pi*(.5 - curtime[2]/30)
- hour.x <- clock.center[1] + arrow.r[1] * cos(hourval)
- hour.y <- clock.center[2] + arrow.r[1] * sin(hourval) * roundcoef
- minute.x <- clock.center[1] + arrow.r[2] * cos(minval)
- minute.y <- clock.center[2] + arrow.r[2] * sin(minval) * roundcoef
- # Texts opacity calculation
- twit.texts$opacity <- as.numeric(by(twit.texts, 1:nrow(twit.texts), function(row){
- if(frame.time < row$t.start | frame.time > row$t.end){
- row$opacity <- 0
- } else {
- row$opacity <- 0.7 *
- (1 - (abs(as.numeric(difftime(row$Timestamp, frame.time, unit='sec'))) /
- (row$t.delta * seconds.in.frame / 2)))
- }
- }))
- ############
- # PLOTTING #
- ############
- # Contour map
- p <- ggplot()
- p <- p + geom_polygon(aes(x=need.map$long, y=need.map$lat, group = need.map$group),
- colour='white', fill='grey20', alpha=.5)
- # Plot city points
- p <- p + geom_point(aes(cities$Long, cities$Lat), colour='skyblue', size=1.5)
- # Plot twitter data
- if(nrow(frame.twits)>0){
- p <- p + geom_point(aes(frame.twits$Long,frame.twits$Lat, size=frame.twits$Volume * 5),
- colour=twits.colors[frame.colors], alpha = .75)
- }
- # Plot clock
- p <- p + geom_polygon(aes(x=circdat$x,y=circdat$y), colour='grey100', fill='grey100', alpha = .5)
- p <- p + geom_polygon(aes(x=circdat2$x,y=circdat2$y), colour='grey80', fill='grey80', alpha = .5)
- p <- p + geom_point(aes(cirdat3$x, cirdat3$y), colour='skyblue')
- # clock arrows
- p <- p + geom_segment(aes(x=clock.center[1], y=clock.center[2],
- xend=hour.x, yend=hour.y), size=3, colour='dodgerblue3')
- p <- p + geom_segment(aes(x=clock.center[1], y=clock.center[2],
- xend=minute.x, yend=minute.y), size=1.5, colour='dodgerblue4')
- p <- p + geom_point(aes(clock.center[1], clock.center[2]), colour='blue4')
- # Tweet texts plotting
- p <- p + geom_text(aes(x=twit.texts$x, y=twit.texts$y, label=iconv(twit.texts$Text,to='UTF-8')),
- colour=twit.texts$color, size=twit.texts$size, alpha = twit.texts$opacity)
- # Clear axis, legend, title
- p <- p + theme(axis.line=element_blank(),axis.text.x=element_blank(),
- axis.text.y=element_blank(),axis.ticks=element_blank(),
- axis.title.x=element_blank(),
- axis.title.y=element_blank(),
- legend.position = 'none',
- text=element_text(family='mono', size=20, face='bold', colour='dodgerblue3')
- )
- p <- p + scale_x_continuous(limits = c(-15, 190))
- p <- p + scale_y_continuous(limits = c(30, 82))
- p <- p + ggtitle(expression('#HappyNewYear in Russian Twitter - 2013'))
- # Generate filename with standart length
- f.name <- as.character(i)
- repeat{
- if(nchar(f.name) < nchar(as.character(frames))){
- f.name <- paste('0', f.name, sep='')
- } else {
- break
- }
- }
- # Save frame
- ggsave(p, file=paste('frames/img', f.name, '.png', sep=''), width=6.4, height=3.6, scale = 3, dpi=100)
- }
Add Comment
Please, Sign In to add comment