Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # visit: http://www.reakkt.com/2014/12/when-marginal-price-of-call-goes-to-zero.html
- library(MASS)
- library(plotrix)
- library(Hmisc) # http://stackoverflow.com/questions/6243088/find-out-the-number-of-days-of-a-month-in-r
- ###
- # http://stackoverflow.com/questions/2261079/how-to-trim-leading-and-trailing-whitespace-in-r
- trim <- function( x ) {
- gsub("(^[[:space:]]+|[[:space:]]+$)", "", x)
- }
- EventIdx <- function(event_name) {
- which(trim(as.character(njudata[,"event"]))==event_name)
- }
- TimeStrNum <- function(x) {
- as.numeric(substr(x,1,2))+as.numeric(substr(x,4,5))/60
- }
- CallLength <- function(x) {
- matched <- regexpr("[0-9]{1,2}:[0-9]{1,2}",x,perl=TRUE)
- s0 <- matched[1]
- s1 <- s0+attr(matched,"match.length")-1
- hhmm <- substr(x,s0,s1)
- len <- nchar(hhmm)
- as.numeric(substr(hhmm,1,len-3))+as.numeric(substr(hhmm,len-1,len))/60
- }
- ClearNumber <- function(x) {
- x <- gsub("\\+","",x)
- x <- gsub(" ","",x)
- return(x)
- }
- EncodeNumber <- function(x,p) {
- tmp <- NULL
- for (i in rev(1:p)) {
- d <- x %/% 26^(i-1)
- tmp <- paste(tmp,LETTERS[d],sep="")
- x <- x - d*26^(i-1)
- }
- return( tmp )
- }
- DataLoad <- function(x) {
- len <- nchar(x)
- as.numeric(substr(x,1,len-4))
- }
- ###
- njudata <- read.csv("nju mobile data.csv",header=TRUE,sep=";")
- colnames(njudata) <- c("time","phone","qty","event","operator","cost")
- head(njudata)
- unique(njudata[,"event"])
- ###
- voiceIdx <- EventIdx("Rozmowa glosowa")
- smsIdx <- EventIdx("Wiadomosc SMS")
- dataIdx <- EventIdx("Dane")
- ### voice
- length(voiceIdx)
- timeStr <- strptime(as.character(njudata[voiceIdx,"time"]),format="%Y.%m.%d %H:%M")
- voice.days <- substr(timeStr,1,10)
- voice.time <- substr(timeStr,12,16)
- voice.yymm <- substr(voice.days,1,7)
- voice.hour <- substr(voice.time,1,2)
- voice.time.numeric <- sapply(voice.time, function(x) TimeStrNum(x))
- summary(voice.time.numeric)
- plot(density(voice.time.numeric),xlim=c(0,24),
- main="Voice Call Init. distribution",
- xlab="hour of a day")
- abline(v=min(voice.time.numeric),col="Red")
- abline(v=max(voice.time.numeric),col="Red")
- voice.call.length <- CallLength(as.character(njudata[voiceIdx,"qty"]))
- sum(voice.call.length)
- sum(voice.call.length)/as.numeric(difftime(tail(timeStr,1),timeStr[1],units="days"))
- plot(voice.call.length,type="h")
- plot(cumsum(voice.call.length),type="l")
- abline(lm( (y=cumsum(voice.call.length)) ~ (x=1:length(voice.call.length)) ) , col="Blue")
- summary(voice.call.length)
- plot(density(voice.call.length),main="Voice Call Length [min]",xlab="minutes")
- boxplot(voice.call.length~factor(weekdays(timeStr,abbreviate=TRUE),c("Mon","Tue","Wed","Thu","Fri","Sat","Sun")),
- main="Voice call lengths by weekdays",
- ylab="minutes")
- ### called
- called <- as.character(njudata[voiceIdx,"phone"])
- called <- sapply(called, function(x) ClearNumber(x))
- calledNo <- length(unique(called))
- required.letters <- ceiling(log(calledNo,26))
- called.code <- sample(27:26^required.letters, calledNo, replace=FALSE)
- called.symb <- sapply(called.code, function(x) EncodeNumber(x,required.letters))
- called.coded <- rep("",length(called))
- for (i in 1:calledNo) called.coded[ which(called==unique(called)[i]) ] <- called.symb[i]
- called.stat <- matrix(0,calledNo,4)
- colnames(called.stat) <- c("qty","len","avg","sd")
- rownames(called.stat) <- called.symb
- for (i in 1:length(called)) {
- calledId <- which(called.symb==called.coded[i])
- called.stat[calledId,"qty"] <- called.stat[calledId,"qty"]+1
- called.stat[calledId,"len"] <- called.stat[calledId,"len"]+voice.call.length[i]
- }
- for (i in 1:nrow(called.stat)) {
- idx <- which(called.coded==called.symb[i])
- called.stat[i,"avg"] <- called.stat[i,"len"]/called.stat[i,"qty"]
- called.stat[i,"sd"] <- sd(voice.call.length[idx])
- }
- par(mfrow=c(2,1))
- plot(sort(called.stat[,"qty"]),type="h",
- ylab="Number of calls to a number")
- plot(sort(called.stat[,"len"]),type="h",
- ylab="Number of minutes to a number")
- par(mfrow=c(1,1))
- boxplot(voice.call.length~called.coded,
- main="Call characteristic for different recipients",
- xlab="called",ylab="call length in minutes")
- plot(called.stat[,"qty"],called.stat[,"avg"],pch=16,
- main="Number of calls vs Time of call (to a number)",
- xlab="number of calls",ylab="average call length [min]")
- abline(lm(called.stat[,"avg"]~called.stat[,"qty"]),col="Blue")
- abline(h=mean(voice.call.length),col="Red",lty="dashed")
- lines(called.stat[,"qty"],called.stat[,"sd"],type="h",col="Green")
- plot(called.stat[,"qty"],called.stat[,"sd"],log="x")
- nrow(njudata[voiceIdx,])
- calledNo
- ### data
- timeStr <- strptime(as.character(njudata[dataIdx,"time"]),format="%Y.%m.%d %H:%M")
- data.days <- substr(timeStr,1,10)
- data.time <- substr(timeStr,12,16)
- data.yymm <- substr(data.days,1,7)
- data.hour <- substr(data.time,1,2)
- transmitted <- sapply(as.character(njudata[dataIdx,"qty"]), function(x) DataLoad(x))
- plot(transmitted/1000,type="h",
- main="Data transmission process",
- xlab="time [minutes]",ylab="MB")
- boxplot((transmitted/1000)~data.hour,
- main="Data transmission per hour",
- log="y",ylab="MB (log)",xlab="hour of a day")
- plot(cumsum(transmitted/1000),type="l",
- main="Cummulative data transmission",
- xlab="time [minutes]",ylab="MB")
- abline(lm((y=cumsum(transmitted/1000))~(x=1:length(transmitted))),col="Blue")
- sum(transmitted)/1000
- difftime(tail(timeStr,1),timeStr[1])
- transmission.day <- sum(transmitted)/1000/as.numeric(difftime(tail(timeStr,1),timeStr[1],units="days"))
- transmission.day
- transmission.day*30
- ### voice + data
- # hourly
- comb.stats <- matrix(0,24,2)
- colnames(comb.stats) <- c("voice","data")
- for (i in 1:24) {
- idx.voice <- which((as.numeric(voice.hour)+1)==i)
- idx.data <- which((as.numeric(data.hour)+1)==i)
- if (length(idx.voice)>0) comb.stats[i,"voice"] <- mean(voice.call.length[idx.voice])
- if (length(idx.data)>0) comb.stats[i,"data"] <- mean(transmitted[idx.data]/1000)
- }
- comb.stats[,"voice"] <- rescale(comb.stats[,"voice"],c(0,1))
- comb.stats[,"data"] <- rescale(comb.stats[,"data"],c(0,1))
- matplot(comb.stats,type="l",lty="solid",
- main="Voice vs. Data",
- xlab="hour of a day",ylab="average intensity %")
- legend("topright",c("voice","data"),text.col=c("Black","Red"),bty="n",bg="n")
- # monthly
- all.months <- sort(unique(union(voice.yymm,data.yymm)))
- month.stats <- matrix(0,length(all.months),2)
- colnames(month.stats) <- c("voice","data")
- rownames(month.stats) <- all.months
- timeStr.all <- strptime(as.character(njudata[,"time"]),format="%Y.%m.%d %H:%M")
- dayB <- head(timeStr.all,1) # begining
- dayE <- tail(timeStr.all,1) # end
- for (this.month in all.months) {
- month.idx <- which(all.months==this.month)
- daysN <- monthDays(as.Date(paste(this.month,"-01",sep="")))
- if (month.idx==1) daysN <- daysN-as.numeric(substr(dayB,9,10))+1
- if (month.idx==length(all.months)) daysN <- as.numeric(substr(dayE,9,10))
- voice.idx <- which(voice.yymm==this.month)
- data.idx <- which(data.yymm==this.month)
- month.stats[month.idx,"voice"] <- sum(voice.call.length[voice.idx])/daysN
- month.stats[month.idx,"data"] <- sum(transmitted[data.idx])/1000/daysN
- }
- month.stats
- # month.stats[,"voice"] <- rescale(month.stats[,"voice"],c(0,1))
- # month.stats[,"data"] <- rescale(month.stats[,"data"],c(0,1))
- matplot(month.stats,type="l",lty="solid",
- ylab="average daily usage",xaxt="n")
- axis(1,1:7,all.months)
- legend("topright",c("voice","data"),text.col=c("Black","Red"),bty="n",bg="n")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement