Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ##1. load data from PG
- library(RPostgreSQL)
- con<-dbConnect(PostgreSQL(),user='postgres',password='1',dbname='uchiru_login_20160303')
- query1<-"drop table cards_and_errors_g2"
- rs<-dbSendQuery(con,query)
- query<-"create table cards_and_errors_g2 as
- with t1 as (
- select a.student_id,
- f.value grade
- from sessions a
- inner join cards b
- on(a.content_card_id = b.content_card_id)
- inner join lessons c
- on(b.lesson_id = c.id)
- inner join chapters d
- on(c.chapter_id = d.id)
- inner join topics e
- on(d.topic_id = e.id)
- left join grades f
- on(e.grade_id = f.id)
- where f.value = 2 and
- a.content_card_id in (585,579,934,640,642,656,608,609,612,611,
- 616,615,617,613,614,756,573,758,620,757,
- 760,680,753,651,655,598,646,599,624,631,
- 627,628,681,670,666,683,704,597,759,618,
- 762,634,722,635,636,637,638,626,639,682,
- 627,629,630,632,684,671,676,675,797,798,
- 607,738,739,741,934,923,686,687,688,705,
- 689,692,700,720,706,709,667,703,685,782,
- 740,653,800,645,743,744,746,747,752,749,
- 851,764,767,679,678,639,673,726,725,784)
- group by a.student_id,
- f.value
- having count(distinct a.content_card_id) > 5
- ),
- t2 as(
- select a.student_id sid,
- t1.grade,
- a.content_card_id cid,
- a.total,
- a.total - a.right err,
- a.spent,
- a.created_at
- from sessions a
- inner join cards b
- on(a.content_card_id = b.content_card_id)
- inner join lessons c
- on(b.lesson_id = c.id)
- inner join chapters d
- on(c.chapter_id = d.id)
- inner join topics e
- on(d.topic_id = e.id)
- left join grades f
- on(e.grade_id = f.id)
- inner join t1
- on(a.student_id = t1.student_id and
- f.value = t1.grade)
- where a.completed = 't' and
- a.total > 0
- ),
- t3 as (
- select sid,
- cid,
- err
- from t2
- )
- select * from t3"
- rs<-dbSendQuery(con,query)
- query<-"select *
- from crosstab('select sid, cid, err
- from cards_and_errors_g2
- order by 1, 2',
- $$VALUES (573),(579),(585),(597),(598),(599),(607),(608),(609),(611),
- (612),(613),(614),(615),(616),(617),(618),(620),(624),(626),
- (627),(628),(629),(630),(631),(632),(634),(635),(636),
- (637),(638),(639),(640),(642),(645),(646),(651),(653),
- (655),(656),(666),(667),(670),(671),(673),(675),(676),(678),
- (679),(680),(681),(682),(683),(684),(685),(686),(687),(688),
- (689),(692),(700),(703),(704),(705),(706),(709),(720),(722),
- (725),(726),(738),(739),(740),(741),(743),(744),(746),(747),
- (749),(752),(753),(756),(757),(758),(759),(760),(762),(764),
- (767),(782),(784),(797),(798),(800),(851),(923),(934)$$)
- as ct(sid integer, c573_17 integer,c579_2 integer,c585_1 integer,c597_38 integer,c598_26 integer,c599_28 integer,c607_61 integer,c608_7 integer,c609_8 integer,c611_10 integer,
- c612_9 integer,c613_14 integer,c614_15 integer,c615_12 integer,c616_11 integer,c617_13 integer,c618_40 integer,c620_19 integer,c624_29 integer,c626_48 integer,
- c627_31 integer,c628_32 integer,c629_52 integer,c630_53 integer,c631_30 integer,c632_54 integer,c634_42 integer,c635_44 integer,c636_45 integer,
- c637_46 integer,c638_47 integer,c639_49 integer,c640_4 integer,c642_5 integer,c645_84 integer,c646_27 integer,c651_24 integer,c653_82 integer,
- c655_25 integer,c656_6 integer,c666_35 integer,c667_77 integer,c670_34 integer,c671_56 integer,c673_97 integer,c675_58 integer,c676_57 integer,c678_95 integer,
- c679_94 integer,c680_22 integer,c681_33 integer,c682_50 integer,c683_36 integer,c684_55 integer,c685_79 integer,c686_67 integer,c687_68 integer,c688_69 integer,
- c689_71 integer,c692_72 integer,c700_73 integer,c703_78 integer,c704_37 integer,c705_70 integer,c706_75 integer,c709_76 integer,c720_74 integer,c722_43 integer,
- c725_99 integer,c726_98 integer,c738_62 integer,c739_63 integer,c740_81 integer,c741_64 integer,c743_85 integer,c744_86 integer,c746_87 integer,c747_88 integer,
- c749_90 integer,c752_89 integer,c753_23 integer,c756_16 integer,c757_20 integer,c758_18 integer,c759_39 integer,c760_21 integer,c762_41 integer,c764_92 integer,
- c767_93 integer,c782_80 integer,c784_100 integer,c797_59 integer,c798_60 integer,c800_83 integer,c851_91 integer,c923_66 integer,c934_3 integer)"
- rs<-dbSendQuery(con,query)
- d<-fetch(rs,n=-1) #d<-read.csv('stud_card_err_g2_5.csv',head=T,sep=',')
- ##2. prepare data
- d[is.na(d)]<--1
- cOrder<-c('c585_1','c579_2','c934_3','c640_4','c642_5','c656_6',
- 'c608_7','c609_8','c612_9','c611_10','c616_11','c615_12',
- 'c617_13','c613_14','c614_15','c756_16','c573_17','c758_18',
- 'c620_19','c757_20','c760_21','c680_22','c753_23','c651_24',
- 'c655_25','c598_26','c646_27','c599_28','c624_29','c631_30',
- 'c627_31','c628_32','c681_33','c670_34','c666_35','c683_36',
- 'c704_37','c597_38','c759_39','c618_40','c762_41','c634_42',
- 'c722_43','c635_44','c636_45','c637_46','c638_47','c626_48',
- 'c639_49','c682_50','c629_52','c630_53','c632_54','c684_55',
- 'c671_56','c676_57','c675_58','c797_59','c798_60','c607_61',
- 'c738_62','c739_63','c741_64','c923_66','c686_67','c687_68',
- 'c688_69','c705_70','c689_71','c692_72','c700_73','c720_74',
- 'c706_75','c709_76','c667_77','c703_78','c685_79','c782_80',
- 'c740_81','c653_82','c800_83','c645_84','c743_85','c744_86',
- 'c746_87','c747_88','c752_89','c749_90','c851_91','c764_92',
- 'c767_93','c679_94','c678_95','c673_97','c726_98','c725_99',
- 'c784_100')
- d$R<-round(runif(nrow(d),1,1000))
- ##2.1. sanity checks
- ##2.2. modeling
- ##2.3. stats gathering
- library(gbm)
- library(ROCR)
- library(zoo)
- pb<-txtProgressBar(style=3,max=length(cOrder))
- mstat<-NULL
- #setTxtProgressBar(pb,0)
- for(i in c(6:length(cOrder))){
- ld<-d[d$R<200,cOrder[1:i]]
- td<-d[d$R>=800,cOrder[1:i]]
- names(ld)[ncol(ld)]<-'y'
- names(td)[ncol(td)]<-'y'
- #print(names(ld))
- ld$y<-1*(ld$y>5)
- td$y<-1*(td$y>5)
- if(max(ld$y,na.rm=T)+max(td$y,na.rm=T)<2){
- mstat<-rbind(mstat,data.frame(id=i,y=cOrder[i],lAUC=0,tAUC=0))
- setTxtProgressBar(pb,i-6)
- next
- }
- m1<-gbm(y~.,data=ld,n.trees=100,interaction.depth=2,shrinkage=.05,distribution='bernoulli')
- p<-predict(m1,ld,type='response',n.trees=100)
- pr<-prediction(p,ld$y)
- pe<-performance(pr,'sens','spec')
- lAUC<-sum(diff(pe@y.values[[1]])*rollmean(pe@x.values[[1]],2))
- #plot(pe,main=cOrder[i],col='blue')#write png
- p<-predict(m1,td,type='response',n.trees=100)
- pr<-prediction(p,td$y)
- pe<-performance(pr,'sens','spec')
- tAUC<-sum(diff(pe@y.values[[1]])*rollmean(pe@x.values[[1]],2))
- #plot(pe,add=T,col='red')
- mstat<-rbind(mstat,data.frame(id=i,y=cOrder[i],lAUC,tAUC))
- query<-paste("insert into mstat values(",i,",'",cOrder[i],"',",lAUC,",",tAUC,")",sep="")
- dbSendQuery(con, query)
- setTxtProgressBar(pb,i-6)
- }
- con.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement