Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(RMySQL)
- library(DBI)
- conn <- dbConnect(MySQL(), dbname = "stream", username = "streamusr" ,password = "00000000" , host = "140.120.55.97" , port = 3306)
- conn1 <- dbConnect(MySQL(), dbname = "topmso", username = "streamusr" ,password = "00000000" , host = "140.120.55.97" , port = 3306)
- dbSendQuery(conn,'set names utf8')
- dbSendQuery(conn1,'set names utf8')
- datapart1<-dbGetQuery(conn, "SELECT * FROM `stream_view_temp`")
- datapart2<-dbGetQuery(conn1, "SELECT * FROM `stream_view_temp`")
- datapart3<-dbGetQuery(conn1, "SELECT * FROM `stream_view_temp_rec`")
- datapart4<-dbGetQuery(conn1, "SELECT * FROM `live_stream`")
- datapart5<-dbGetQuery(conn1, "SELECT * FROM `boquet_assoc`")
- datapart6<-dbGetQuery(conn1, "SELECT * FROM `boquet_assoc_service`")
- datapart7<-dbGetQuery(conn1, "SELECT * FROM `ed_city`")
- datapart8<-dbGetQuery(conn1, "SELECT * FROM `ed_operator`")
- datapart9<-dbGetQuery(conn1, "SELECT * FROM `ed_town`")
- datapart10<-dbGetQuery(conn1, "SELECT * FROM `service_desc`")
- datapart11<-dbGetQuery(conn1, "SELECT * FROM `stream_view_temp_cloud_user`")
- datawip<-datapart2[,-9]
- colnames(datawip)<-c("session_id","cust_no","create_time","destroy_time","ei_start_time","ei_end_time","ei_event_name","SD_SERVICE_NAME")
- finaldata<-rbind(datapart1,datawip)
- #刪除sessionid重複的資料,(通常發生狀況為停留在同一頻道但節目變換)
- productdata<-finaldata[!duplicated(finaldata[,1]),]
- productdata<-productdata[order(productdata[,3]),]
- #去除記錄到的NA值
- productdata<-na.omit(productdata)
- record<-NULL
- i<-1
- #找出每一位有註冊的用戶所使用的資訊
- while(i<=nrow(datapart11)){
- temp<-subset(productdata,productdata[,2]==datapart11[i,1])
- if(length(temp)>0){
- j<-1
- datarec<-NULL
- #紀錄該用戶頻道類別轉移的資訊
- while(j<nrow(temp)){
- kx<-as.numeric(as.POSIXct(temp[j,4]))-as.numeric(as.POSIXct(temp[j+1,3]))
- if(kx<10){
- #紀錄上下兩相連節目
- transfer<-c(temp[j,8],temp[j+1,8],j)
- datarec<-rbind(datarec,transfer)}
- j<-j+1
- }}
- record<-rbind(record,datarec)
- i<-i+1}
- write.csv(record,file="record.csv",fileEncoding = "big5")
- data <- record[,1:2]
- CHcount<-matrix(0,nrow=16,ncol=16)
- #做空白矩陣紀錄轉移的機率,datapart5[2:17,1]為所有頻道的分類
- CHC<-cbind(datapart5[2:17,2],CHcount)
- CHC<-rbind(t(datapart5[1:17,2]),CHC)
- CHType<-datapart5[2:17,1]
- #利用 service_desc 提供的 頻道與其對應的編號合併為新表格,用作頻道屬性的分類
- CH<-cbind(datapart10[,2],datapart10[,4])
- gtemp<-matrix(0,ncol=198,nrow=1)
- trans<-datapart6[261:482,]
- ###將節目頻道與對應到的屬性編號合成,並刪除NULL值
- i<-1
- while(i<=198){
- gtemp[1,i]<-as.numeric(subset(trans,trans[,2]==CH[i,1])[1,1])
- i<-i+1}
- CH<-cbind(t(gtemp),CH)
- CH<-na.omit(CH)
- CHtab<-as.data.frame(table(CH[,1]))
- j<-1
- while(j<nrow(CHC)){
- #找出第j個屬性的所有頻道,這邊使用j+1原因在於當初為了製表清晰,因此CHC[1,1]值為全部,從CHC[2,1]後開始排序
- fircnum<-as.numeric(subset(datapart5,datapart5[,2]==CHC[j+1,1])[,1])
- processdata<-subset(datapart6,datapart6[,1]==fircnum)[,2]
- l<-1
- total<-0
- subtotal<-matrix(0,ncol=16,nrow=1)
- while(l<=length(processdata)){
- trans<-subset(CH,CH[,2]==processdata[l])[3]
- #挑出紀錄中,所有第一個觀看頻道為l頻道的組合
- subdata<-subset(data,data[,1]==trans)
- m<-1
- while(m<nrow(CHC)){
- #這邊則是根據第一個屬性觀看其中頻道的組合中,進行分類
- temp<-as.numeric(subset(datapart5,datapart5[,2]==CHC[m+1,1])[,1])
- tempprocess<-subset(datapart6,datapart6[,1]==temp)[,2]
- n<-1
- while(n<=length(tempprocess)){
- #從第一個觀看頻道為l頻道的組合,分類第二個觀看頻道的屬性,並累加
- findch<-subset(CH,CH[,2]==tempprocess[n])[,3]
- subsubtemp<-subset(subdata,subdata[,2]==findch)
- subtotal[1,m]<-subtotal[1,m]+nrow(subsubtemp)
- total<-total+nrow(subsubtemp)
- n<-n+1}
- m<-m+1}
- l<-l+1}
- #將累加次數轉換為比例,並放入CHC表格中
- if(total==0)CHC[j+1,2:17]<-subtotal else CHC[j+1,2:17]<-subtotal/total
- j<-j+1}
- #即可得初步馬柯夫轉移矩陣
- write.csv(CHC,file="CHC.csv",fileEncoding = "big5")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement