Advertisement
Guest User

Untitled

a guest
Jul 25th, 2017
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.20 KB | None | 0 0
  1. library(RMySQL)
  2. library(DBI)
  3.  
  4.  
  5. conn <- dbConnect(MySQL(), dbname = "stream", username = "streamusr" ,password = "00000000" , host = "140.120.55.97" , port = 3306)
  6. conn1 <- dbConnect(MySQL(), dbname = "topmso", username = "streamusr" ,password = "00000000" , host = "140.120.55.97" , port = 3306)
  7. dbSendQuery(conn,'set names utf8')
  8. dbSendQuery(conn1,'set names utf8')
  9. datapart1<-dbGetQuery(conn, "SELECT * FROM `stream_view_temp`")
  10. datapart2<-dbGetQuery(conn1, "SELECT * FROM `stream_view_temp`")
  11. datapart3<-dbGetQuery(conn1, "SELECT * FROM `stream_view_temp_rec`")
  12. datapart4<-dbGetQuery(conn1, "SELECT * FROM `live_stream`")
  13. datapart5<-dbGetQuery(conn1, "SELECT * FROM `boquet_assoc`")
  14. datapart6<-dbGetQuery(conn1, "SELECT * FROM `boquet_assoc_service`")
  15. datapart7<-dbGetQuery(conn1, "SELECT * FROM `ed_city`")
  16. datapart8<-dbGetQuery(conn1, "SELECT * FROM `ed_operator`")
  17. datapart9<-dbGetQuery(conn1, "SELECT * FROM `ed_town`")
  18. datapart10<-dbGetQuery(conn1, "SELECT * FROM `service_desc`")
  19. datapart11<-dbGetQuery(conn1, "SELECT * FROM `stream_view_temp_cloud_user`")
  20.  
  21. datawip<-datapart2[,-9]
  22. colnames(datawip)<-c("session_id","cust_no","create_time","destroy_time","ei_start_time","ei_end_time","ei_event_name","SD_SERVICE_NAME")
  23.  
  24. finaldata<-rbind(datapart1,datawip)
  25.  
  26. #刪除sessionid重複的資料,(通常發生狀況為停留在同一頻道但節目變換)
  27.  
  28. productdata<-finaldata[!duplicated(finaldata[,1]),]
  29. productdata<-productdata[order(productdata[,3]),]
  30. #去除記錄到的NA值
  31. productdata<-na.omit(productdata)
  32.  
  33. record<-NULL
  34. i<-1
  35. #找出每一位有註冊的用戶所使用的資訊
  36. while(i<=nrow(datapart11)){
  37. temp<-subset(productdata,productdata[,2]==datapart11[i,1])
  38. if(length(temp)>0){
  39. j<-1
  40. datarec<-NULL
  41. #紀錄該用戶頻道類別轉移的資訊
  42. while(j<nrow(temp)){
  43. kx<-as.numeric(as.POSIXct(temp[j,4]))-as.numeric(as.POSIXct(temp[j+1,3]))
  44. if(kx<10){
  45. #紀錄上下兩相連節目
  46. transfer<-c(temp[j,8],temp[j+1,8],j)
  47. datarec<-rbind(datarec,transfer)}
  48. j<-j+1
  49. }}
  50.  
  51. record<-rbind(record,datarec)
  52. i<-i+1}
  53.  
  54. write.csv(record,file="record.csv",fileEncoding = "big5")
  55.  
  56. data <- record[,1:2]
  57. CHcount<-matrix(0,nrow=16,ncol=16)
  58. #做空白矩陣紀錄轉移的機率,datapart5[2:17,1]為所有頻道的分類
  59. CHC<-cbind(datapart5[2:17,2],CHcount)
  60. CHC<-rbind(t(datapart5[1:17,2]),CHC)
  61.  
  62. CHType<-datapart5[2:17,1]
  63. #利用 service_desc 提供的 頻道與其對應的編號合併為新表格,用作頻道屬性的分類
  64. CH<-cbind(datapart10[,2],datapart10[,4])
  65. gtemp<-matrix(0,ncol=198,nrow=1)
  66. trans<-datapart6[261:482,]
  67.  
  68. ###將節目頻道與對應到的屬性編號合成,並刪除NULL值
  69. i<-1
  70. while(i<=198){
  71.  
  72. gtemp[1,i]<-as.numeric(subset(trans,trans[,2]==CH[i,1])[1,1])
  73. i<-i+1}
  74.  
  75. CH<-cbind(t(gtemp),CH)
  76. CH<-na.omit(CH)
  77. CHtab<-as.data.frame(table(CH[,1]))
  78.  
  79.  
  80. j<-1
  81. while(j<nrow(CHC)){
  82. #找出第j個屬性的所有頻道,這邊使用j+1原因在於當初為了製表清晰,因此CHC[1,1]值為全部,從CHC[2,1]後開始排序
  83. fircnum<-as.numeric(subset(datapart5,datapart5[,2]==CHC[j+1,1])[,1])
  84. processdata<-subset(datapart6,datapart6[,1]==fircnum)[,2]
  85.  
  86. l<-1
  87. total<-0
  88. subtotal<-matrix(0,ncol=16,nrow=1)
  89.  
  90. while(l<=length(processdata)){
  91. trans<-subset(CH,CH[,2]==processdata[l])[3]
  92. #挑出紀錄中,所有第一個觀看頻道為l頻道的組合
  93. subdata<-subset(data,data[,1]==trans)
  94.  
  95. m<-1
  96. while(m<nrow(CHC)){
  97. #這邊則是根據第一個屬性觀看其中頻道的組合中,進行分類
  98. temp<-as.numeric(subset(datapart5,datapart5[,2]==CHC[m+1,1])[,1])
  99. tempprocess<-subset(datapart6,datapart6[,1]==temp)[,2]
  100. n<-1
  101. while(n<=length(tempprocess)){
  102. #從第一個觀看頻道為l頻道的組合,分類第二個觀看頻道的屬性,並累加
  103. findch<-subset(CH,CH[,2]==tempprocess[n])[,3]
  104. subsubtemp<-subset(subdata,subdata[,2]==findch)
  105. subtotal[1,m]<-subtotal[1,m]+nrow(subsubtemp)
  106. total<-total+nrow(subsubtemp)
  107. n<-n+1}
  108. m<-m+1}
  109. l<-l+1}
  110.  
  111. #將累加次數轉換為比例,並放入CHC表格中
  112. if(total==0)CHC[j+1,2:17]<-subtotal else CHC[j+1,2:17]<-subtotal/total
  113.  
  114. j<-j+1}
  115. #即可得初步馬柯夫轉移矩陣
  116.  
  117. write.csv(CHC,file="CHC.csv",fileEncoding = "big5")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement