Advertisement
Guest User

Untitled

a guest
Jun 17th, 2016
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.55 KB | None | 0 0
  1. ##1. load data from PG
  2. library(RPostgreSQL)
  3. con<-dbConnect(PostgreSQL(),user='postgres',password='1',dbname='uchiru_login_20160303')
  4. query1<-"drop table cards_and_errors_g2"
  5. rs<-dbSendQuery(con,query)
  6. query<-"create table cards_and_errors_g2 as
  7. with t1 as (
  8. select a.student_id,
  9. f.value grade
  10. from sessions a
  11. inner join cards b
  12. on(a.content_card_id = b.content_card_id)
  13. inner join lessons c
  14. on(b.lesson_id = c.id)
  15. inner join chapters d
  16. on(c.chapter_id = d.id)
  17. inner join topics e
  18. on(d.topic_id = e.id)
  19. left join grades f
  20. on(e.grade_id = f.id)
  21. where f.value = 2 and
  22. a.content_card_id in (585,579,934,640,642,656,608,609,612,611,
  23. 616,615,617,613,614,756,573,758,620,757,
  24. 760,680,753,651,655,598,646,599,624,631,
  25. 627,628,681,670,666,683,704,597,759,618,
  26. 762,634,722,635,636,637,638,626,639,682,
  27. 627,629,630,632,684,671,676,675,797,798,
  28. 607,738,739,741,934,923,686,687,688,705,
  29. 689,692,700,720,706,709,667,703,685,782,
  30. 740,653,800,645,743,744,746,747,752,749,
  31. 851,764,767,679,678,639,673,726,725,784)
  32. group by a.student_id,
  33. f.value
  34. having count(distinct a.content_card_id) > 5
  35. ),
  36. t2 as(
  37. select a.student_id sid,
  38. t1.grade,
  39. a.content_card_id cid,
  40. a.total,
  41. a.total - a.right err,
  42. a.spent,
  43. a.created_at
  44. from sessions a
  45. inner join cards b
  46. on(a.content_card_id = b.content_card_id)
  47. inner join lessons c
  48. on(b.lesson_id = c.id)
  49. inner join chapters d
  50. on(c.chapter_id = d.id)
  51. inner join topics e
  52. on(d.topic_id = e.id)
  53. left join grades f
  54. on(e.grade_id = f.id)
  55. inner join t1
  56. on(a.student_id = t1.student_id and
  57. f.value = t1.grade)
  58. where a.completed = 't' and
  59. a.total > 0
  60. ),
  61. t3 as (
  62. select sid,
  63. cid,
  64. err
  65. from t2
  66. )
  67. select * from t3"
  68. rs<-dbSendQuery(con,query)
  69.  
  70. query<-"select *
  71. from crosstab('select sid, cid, err
  72. from cards_and_errors_g2
  73. order by 1, 2',
  74. $$VALUES (573),(579),(585),(597),(598),(599),(607),(608),(609),(611),
  75. (612),(613),(614),(615),(616),(617),(618),(620),(624),(626),
  76. (627),(628),(629),(630),(631),(632),(634),(635),(636),
  77. (637),(638),(639),(640),(642),(645),(646),(651),(653),
  78. (655),(656),(666),(667),(670),(671),(673),(675),(676),(678),
  79. (679),(680),(681),(682),(683),(684),(685),(686),(687),(688),
  80. (689),(692),(700),(703),(704),(705),(706),(709),(720),(722),
  81. (725),(726),(738),(739),(740),(741),(743),(744),(746),(747),
  82. (749),(752),(753),(756),(757),(758),(759),(760),(762),(764),
  83. (767),(782),(784),(797),(798),(800),(851),(923),(934)$$)
  84. as ct(sid integer, c573_17 integer,c579_2 integer,c585_1 integer,c597_38 integer,c598_26 integer,c599_28 integer,c607_61 integer,c608_7 integer,c609_8 integer,c611_10 integer,
  85. c612_9 integer,c613_14 integer,c614_15 integer,c615_12 integer,c616_11 integer,c617_13 integer,c618_40 integer,c620_19 integer,c624_29 integer,c626_48 integer,
  86. c627_31 integer,c628_32 integer,c629_52 integer,c630_53 integer,c631_30 integer,c632_54 integer,c634_42 integer,c635_44 integer,c636_45 integer,
  87. c637_46 integer,c638_47 integer,c639_49 integer,c640_4 integer,c642_5 integer,c645_84 integer,c646_27 integer,c651_24 integer,c653_82 integer,
  88. c655_25 integer,c656_6 integer,c666_35 integer,c667_77 integer,c670_34 integer,c671_56 integer,c673_97 integer,c675_58 integer,c676_57 integer,c678_95 integer,
  89. c679_94 integer,c680_22 integer,c681_33 integer,c682_50 integer,c683_36 integer,c684_55 integer,c685_79 integer,c686_67 integer,c687_68 integer,c688_69 integer,
  90. c689_71 integer,c692_72 integer,c700_73 integer,c703_78 integer,c704_37 integer,c705_70 integer,c706_75 integer,c709_76 integer,c720_74 integer,c722_43 integer,
  91. c725_99 integer,c726_98 integer,c738_62 integer,c739_63 integer,c740_81 integer,c741_64 integer,c743_85 integer,c744_86 integer,c746_87 integer,c747_88 integer,
  92. c749_90 integer,c752_89 integer,c753_23 integer,c756_16 integer,c757_20 integer,c758_18 integer,c759_39 integer,c760_21 integer,c762_41 integer,c764_92 integer,
  93. c767_93 integer,c782_80 integer,c784_100 integer,c797_59 integer,c798_60 integer,c800_83 integer,c851_91 integer,c923_66 integer,c934_3 integer)"
  94. rs<-dbSendQuery(con,query)
  95. d<-fetch(rs,n=-1) #d<-read.csv('stud_card_err_g2_5.csv',head=T,sep=',')
  96.  
  97. ##2. prepare data
  98. d[is.na(d)]<--1
  99. cOrder<-c('c585_1','c579_2','c934_3','c640_4','c642_5','c656_6',
  100. 'c608_7','c609_8','c612_9','c611_10','c616_11','c615_12',
  101. 'c617_13','c613_14','c614_15','c756_16','c573_17','c758_18',
  102. 'c620_19','c757_20','c760_21','c680_22','c753_23','c651_24',
  103. 'c655_25','c598_26','c646_27','c599_28','c624_29','c631_30',
  104. 'c627_31','c628_32','c681_33','c670_34','c666_35','c683_36',
  105. 'c704_37','c597_38','c759_39','c618_40','c762_41','c634_42',
  106. 'c722_43','c635_44','c636_45','c637_46','c638_47','c626_48',
  107. 'c639_49','c682_50','c629_52','c630_53','c632_54','c684_55',
  108. 'c671_56','c676_57','c675_58','c797_59','c798_60','c607_61',
  109. 'c738_62','c739_63','c741_64','c923_66','c686_67','c687_68',
  110. 'c688_69','c705_70','c689_71','c692_72','c700_73','c720_74',
  111. 'c706_75','c709_76','c667_77','c703_78','c685_79','c782_80',
  112. 'c740_81','c653_82','c800_83','c645_84','c743_85','c744_86',
  113. 'c746_87','c747_88','c752_89','c749_90','c851_91','c764_92',
  114. 'c767_93','c679_94','c678_95','c673_97','c726_98','c725_99',
  115. 'c784_100')
  116. d$R<-round(runif(nrow(d),1,1000))
  117. ##2.1. sanity checks
  118. ##2.2. modeling
  119. ##2.3. stats gathering
  120. library(gbm)
  121. library(ROCR)
  122. library(zoo)
  123. pb<-txtProgressBar(style=3,max=length(cOrder))
  124. mstat<-NULL
  125. #setTxtProgressBar(pb,0)
  126. for(i in c(6:length(cOrder))){
  127. ld<-d[d$R<200,cOrder[1:i]]
  128. td<-d[d$R>=800,cOrder[1:i]]
  129. names(ld)[ncol(ld)]<-'y'
  130. names(td)[ncol(td)]<-'y'
  131. #print(names(ld))
  132. ld$y<-1*(ld$y>5)
  133. td$y<-1*(td$y>5)
  134. if(max(ld$y,na.rm=T)+max(td$y,na.rm=T)<2){
  135. mstat<-rbind(mstat,data.frame(id=i,y=cOrder[i],lAUC=0,tAUC=0))
  136. setTxtProgressBar(pb,i-6)
  137. next
  138. }
  139. m1<-gbm(y~.,data=ld,n.trees=100,interaction.depth=2,shrinkage=.05,distribution='bernoulli')
  140. p<-predict(m1,ld,type='response',n.trees=100)
  141. pr<-prediction(p,ld$y)
  142. pe<-performance(pr,'sens','spec')
  143. lAUC<-sum(diff(pe@y.values[[1]])*rollmean(pe@x.values[[1]],2))
  144. #plot(pe,main=cOrder[i],col='blue')#write png
  145. p<-predict(m1,td,type='response',n.trees=100)
  146. pr<-prediction(p,td$y)
  147. pe<-performance(pr,'sens','spec')
  148. tAUC<-sum(diff(pe@y.values[[1]])*rollmean(pe@x.values[[1]],2))
  149. #plot(pe,add=T,col='red')
  150. mstat<-rbind(mstat,data.frame(id=i,y=cOrder[i],lAUC,tAUC))
  151. query<-paste("insert into mstat values(",i,",'",cOrder[i],"',",lAUC,",",tAUC,")",sep="")
  152. dbSendQuery(con, query)
  153. setTxtProgressBar(pb,i-6)
  154. }
  155. con.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement