sobach

new_year_map_movie

Jan 9th, 2013
214
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # -*- coding: utf-8 -*-
  2.  
  3. ###################
  4. #                 #
  5. #  CONFIGURATION  #
  6. #                 #
  7. ###################
  8.  
  9. # Libraries
  10. library('ggplot2')
  11. library('maps')
  12. library('reshape2')
  13. library('RColorBrewer')
  14.  
  15. # Future movie configuration
  16. setwd('/path/to/working/directory')
  17. start.date <- '2012-12-31 00:00:00'
  18. finish.date <- '2013-01-01 12:00:00'
  19. seconds.in.frame <- 30
  20. ma.period <- 60 * 30 # Moving averege period for tweet-stats in frame
  21. texttime <- 3 * 24 # Base time for text demonstration (in frames in final movie)
  22.  
  23. # Loop computations
  24. start.date <- strptime(start.date, format='%Y-%m-%d %H:%M:%S')
  25. finish.date <- strptime(finish.date, format='%Y-%m-%d %H:%M:%S')
  26. frames <- as.numeric(difftime(finish.date, start.date, units='secs'))/seconds.in.frame
  27.  
  28. # ggplot palette & geometry setup
  29. twits.colors=brewer.pal(9, 'YlOrRd')
  30. roundcoef <- .48
  31. textcolors <- c('deeppink3',
  32.                 'coral3',
  33.                 'chocolate3',
  34.                 'chartreuse3',
  35.                 'brown3',
  36.                 'blue3',
  37.                 'aquamarine3',
  38.                 'dodgerblue3',
  39.                 'deepskyblue3',
  40.                 'deeppink3',
  41.                 'darkorchid3',
  42.                 'darkorange3',
  43.                 'darkgreen',
  44.                 'cyan4',
  45.                 'gold3',
  46.                 'forestgreen',
  47.                 'firebrick3',
  48.                 'green3',
  49.                 'indianred3',
  50.                 'hotpink3',
  51.                 'maroon3',
  52.                 'palegreen3',
  53.                 'limegreen',
  54.                 'orange3',
  55.                 'olivedrab3',
  56.                 'red3',
  57.                 'royalblue3',
  58.                 'springgreen3')
  59.  
  60. # Function for circle points
  61. draw.circle <- function(center,diameter=1, npoints = 100){
  62.   r = diameter / 2
  63.   tt <- seq(0,2*pi,length.out = npoints)
  64.   xx <- center[1] + r * cos(tt)
  65.   yy <- center[2] + r * sin(tt) * roundcoef
  66.   return(data.frame(x = xx, y = yy))
  67. }
  68.  
  69. ##################
  70. #                #
  71. #  LOADING DATA  #
  72. #                #
  73. ##################
  74.  
  75. # Load tweets with coordinates & convert values to proper format
  76. twits <- read.csv2('ny_tweets.csv', header=F)
  77. colnames(twits) <- c('Link', 'Longitude', 'Latitude', 'Timestamp')
  78. twits$Timestamp <- strptime(twits$Timestamp, format='%Y-%m-%d %H:%M:%S')
  79. twits$Latitude <- round(as.numeric(as.character(twits$Latitude)), digits=1)
  80. twits$Longitude <- round(as.numeric(as.character(twits$Longitude)), digits=1)
  81. twits$Longitude <- sapply(twits$Longitude, function(x){
  82.   if(x < (-169)){
  83.     x<-360+x
  84.   }
  85.   else{x}
  86. })
  87.  
  88. # Load tweets with texts & convert date-time
  89. twit.texts <- read.csv2('ny_text_tweets.csv', header=F)
  90. colnames(twit.texts) <- c('Link', 'Latitude', 'Longitude', 'Timestamp', 'Text')
  91. twit.texts$Timestamp <- strptime(twit.texts$Timestamp, format='%Y-%m-%d %H:%M:%S')
  92. twit.texts <- twit.texts[, c(4,5)]
  93. twit.texts$t.delta <- rnorm(nrow(twit.texts), mean = 1, sd = .1) * texttime
  94. twit.texts$t.start <- twit.texts$Timestamp - twit.texts$t.delta * seconds.in.frame / 2
  95. twit.texts$t.end <- twit.texts$Timestamp + twit.texts$t.delta * seconds.in.frame / 2
  96. twit.texts$color <- textcolors[round(runif(nrow(twit.texts), 1, length(textcolors)))]
  97. twit.texts$x <- rnorm(nrow(twit.texts), mean = 100, sd = 30)
  98. twit.texts$y <- rnorm(nrow(twit.texts), mean = 56, sd = 15)
  99. twit.texts$size <- rnorm(nrow(twit.texts), mean = 10, sd = 2)
  100. twit.texts$opacity <- 0
  101.  
  102. # Countries to show on map
  103. countries <- c('USSR',
  104.                'Albania',
  105.                'Andorra',
  106.                'Austria',
  107.                'Belgium',
  108.                'Bulgaria',
  109.                'Czechoslovakia',
  110.                'Denmark',
  111.                'Finland',
  112.                'France',
  113.                'Germany',
  114.                'Hungary',
  115.                'Ireland',
  116.                'Italy',
  117.                'Liechtenstein',
  118.                'Luxembourg',
  119.                'Malta',
  120.                'Monaco',
  121.                'Netherlands',
  122.                'Norway',
  123.                'Poland',
  124.                'Portugal',
  125.                'Romania',
  126.                'San Marino',
  127.                'Sicily',
  128.                'Spain',
  129.                'Sweden',
  130.                'Switzerland',
  131.                'UK',
  132.                'Wales',
  133.                'Yugoslavia')
  134.  
  135. # Load cities to mark on map (capitals etc)
  136. cities <- read.csv2('cities.csv', header=F)
  137. colnames(cities) <- c('name', 'Lat', 'Long')
  138. cities$Lat <- as.numeric(as.character(cities$Lat))
  139. cities$Long <- as.numeric(as.character(cities$Long))
  140. cities$Long <- sapply(cities$Long, function(x){
  141.   if(x < (-169)){
  142.     x<-360+x
  143.   }
  144.   else{x}
  145. })
  146.  
  147. # Load base map data
  148. full_map <- map_data('world')
  149. table(full_map$region)
  150. need.map <- subset(full_map, region %in% countries & long>-25 & long<190 & lat>25)
  151.  
  152. #################
  153. #               #
  154. #   HISTOGRAM   #
  155. #               #
  156. #################
  157.  
  158. p <- ggplot()
  159. p <- p + geom_histogram(aes(x=twits$Timestamp, fill = ..count..), binwidth = 3600)
  160. p <- p + ylab('Количество') + xlab('Время (по часам)')
  161. p <- p + theme(legend.position = 'none')
  162. p <- p + ggtitle(expression('Распределение "новогодних" твитов'))
  163. p
  164.  
  165. ###################
  166. #                 #
  167. #   UTILITY LOOP  #
  168. #    (PALETTE)    #
  169. #                 #
  170. ###################
  171.  
  172. max.color <- 0
  173. for(i in 1:frames){
  174.   # Select only needed tweets
  175.   frame.time <- start.date + i*seconds.in.frame
  176.   frame.twits <- subset(twits, Timestamp <= frame.time & Timestamp > frame.time - ma.period)
  177.  
  178.   # Create table with values
  179.   frame.twits <- melt(table(frame.twits$Latitude, frame.twits$Longitude))
  180.   colnames(frame.twits) <- c('Lat', 'Long', 'Volume')
  181.   frame.twits$Lat <- as.numeric(as.character(frame.twits$Lat))
  182.   frame.twits$Long <- as.numeric(as.character(frame.twits$Long))
  183.   frame.twits <- frame.twits[frame.twits$Volume>0 &
  184.                                frame.twits$Long>=-25 & frame.twits$Long<=190 &
  185.                                frame.twits$Lat>=25 & frame.twits$Lat<=85,]
  186.  
  187.   if(max.color < max(log(frame.twits$Volume))){
  188.     max.color <- max(log(frame.twits$Volume))
  189.   }
  190. }
  191.  
  192. #################
  193. #               #
  194. #   MAIN LOOP   #
  195. #               #
  196. #################
  197.  
  198. for(i in 1:frames){
  199.   ################
  200.   # CALCULATIONS #
  201.   ################
  202.  
  203.   # Select only needed tweets
  204.   frame.time <- start.date + i*seconds.in.frame
  205.   frame.twits <- subset(twits, Timestamp <= frame.time & Timestamp > frame.time - ma.period)
  206.  
  207.   # Create table with values
  208.   frame.twits <- melt(table(frame.twits$Latitude, frame.twits$Longitude))
  209.  
  210.   # Filter empty cells
  211.   colnames(frame.twits) <- c('Lat', 'Long', 'Volume')
  212.   frame.twits$Lat <- as.numeric(as.character(frame.twits$Lat))
  213.   frame.twits$Long <- as.numeric(as.character(frame.twits$Long))
  214.   frame.twits <- frame.twits[frame.twits$Volume>0 &
  215.                                frame.twits$Long>=-25 & frame.twits$Long<=190 &
  216.                                frame.twits$Lat>=25 & frame.twits$Lat<=85,]
  217.  
  218.   frame.colors <- round(1 + (8*log(frame.twits$Volume)/max.color), digits=0)
  219.  
  220.   # Clock calculations
  221.   curtime <- c(as.numeric(format(frame.time, '%H')), as.numeric(format(frame.time, '%M')))
  222.   clock.center <- c(180, 35)  # Clock center
  223.   arrow.r = c(5.5,8.8)        # Arrows length
  224.   circdat <- draw.circle(clock.center, diameter=20)  # Clock background - 1st layer
  225.   circdat2 <- draw.circle(clock.center, diameter=19.7) # Clock background - 2nd layer
  226.   cirdat3 <- draw.circle(clock.center, diameter=18, npoints=13) # Hour points on clock
  227.  
  228.   # Clock arrows calculations
  229.   if(curtime[1]>=12){curtime[1]=curtime[1]-12}
  230.   hourval <- pi*(.5 - (curtime[1]+(curtime[2]/60))/6)
  231.   minval <- pi*(.5 - curtime[2]/30)
  232.   hour.x <- clock.center[1] + arrow.r[1] * cos(hourval)
  233.   hour.y <- clock.center[2] + arrow.r[1] * sin(hourval) * roundcoef
  234.   minute.x <- clock.center[1] + arrow.r[2] * cos(minval)
  235.   minute.y <- clock.center[2] + arrow.r[2] * sin(minval) * roundcoef
  236.  
  237.   # Texts opacity calculation
  238.   twit.texts$opacity <- as.numeric(by(twit.texts, 1:nrow(twit.texts), function(row){
  239.     if(frame.time < row$t.start | frame.time > row$t.end){
  240.       row$opacity <- 0
  241.     } else {
  242.       row$opacity <- 0.7 *
  243.         (1 - (abs(as.numeric(difftime(row$Timestamp, frame.time, unit='sec'))) /
  244.                 (row$t.delta * seconds.in.frame / 2)))
  245.     }
  246.   }))
  247.  
  248.   ############
  249.   # PLOTTING #
  250.   ############
  251.  
  252.   # Contour map
  253.   p <- ggplot()
  254.   p <- p + geom_polygon(aes(x=need.map$long, y=need.map$lat, group = need.map$group),
  255.                         colour='white', fill='grey20', alpha=.5)
  256.  
  257.   # Plot city points
  258.   p <- p + geom_point(aes(cities$Long, cities$Lat), colour='skyblue', size=1.5)
  259.  
  260.   # Plot twitter data
  261.   if(nrow(frame.twits)>0){
  262.     p <- p + geom_point(aes(frame.twits$Long,frame.twits$Lat, size=frame.twits$Volume * 5),
  263.                         colour=twits.colors[frame.colors], alpha = .75)
  264.   }
  265.  
  266.   # Plot clock
  267.   p <- p + geom_polygon(aes(x=circdat$x,y=circdat$y), colour='grey100', fill='grey100', alpha = .5)
  268.   p <- p + geom_polygon(aes(x=circdat2$x,y=circdat2$y), colour='grey80', fill='grey80', alpha = .5)
  269.   p <- p + geom_point(aes(cirdat3$x, cirdat3$y), colour='skyblue')
  270.  
  271.   # clock arrows
  272.   p <- p + geom_segment(aes(x=clock.center[1], y=clock.center[2],
  273.                             xend=hour.x, yend=hour.y), size=3, colour='dodgerblue3')
  274.   p <- p + geom_segment(aes(x=clock.center[1], y=clock.center[2],
  275.                             xend=minute.x, yend=minute.y), size=1.5, colour='dodgerblue4')
  276.   p <- p + geom_point(aes(clock.center[1], clock.center[2]), colour='blue4')
  277.  
  278.   # Tweet texts plotting
  279.   p <- p + geom_text(aes(x=twit.texts$x, y=twit.texts$y, label=iconv(twit.texts$Text,to='UTF-8')),
  280.                      colour=twit.texts$color, size=twit.texts$size, alpha = twit.texts$opacity)
  281.  
  282.   # Clear axis, legend, title
  283.   p <- p + theme(axis.line=element_blank(),axis.text.x=element_blank(),
  284.                  axis.text.y=element_blank(),axis.ticks=element_blank(),
  285.                  axis.title.x=element_blank(),
  286.                  axis.title.y=element_blank(),
  287.                  legend.position = 'none',
  288.                  text=element_text(family='mono', size=20, face='bold', colour='dodgerblue3')
  289.   )
  290.  
  291.   p <- p + scale_x_continuous(limits = c(-15, 190))
  292.   p <- p + scale_y_continuous(limits = c(30, 82))
  293.   p <- p + ggtitle(expression('#HappyNewYear in Russian Twitter - 2013'))
  294.  
  295.   # Generate filename with standart length
  296.   f.name <- as.character(i)
  297.   repeat{
  298.     if(nchar(f.name) < nchar(as.character(frames))){
  299.       f.name <- paste('0', f.name, sep='')
  300.     } else {
  301.       break
  302.     }
  303.   }
  304.  
  305.   # Save frame
  306.   ggsave(p, file=paste('frames/img', f.name, '.png', sep=''), width=6.4, height=3.6, scale = 3, dpi=100)
  307. }
RAW Paste Data