Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /******************************************************************************
- Author, date: Paul Brown, JUL2015
- Macro name: simul_data, iterat_simul (run simul_data iteratively in order to
- obtain the desired correlations between the outcomes), nearestcorr
- Description: simulate random samples from a multivariate normal distribution,
- transform variables to required types. Create a dataset containing
- num random samples indicated by the variable samp=1,2,...num.
- Outcomes may be TTE, dichotmous, normal or log-normal (developed in SAS 9.4)
- Reference: Sun et al., Circulation: Heart Failure, 2012
- Austin, Generating survival times, Stat in Med 2012
- http://blogs.sas.com/content/iml/2012/11/28/computing-the-nearest-
- correlation-matrix.html
- ******************************************************************************/
- ********************************************************************;
- *** define macro parameters ***;
- ********************************************************************;
- %macro simul_data(n=100, /*total sample size ie n/2 in each grp*/
- num=1000, /*no. of random samples*/
- numvars=5, /*number of variables*/
- tte_fu=30, /*follow-up time for censoring of TTE endpts, corresponds
- to the response estimates assumed for TTE outcomes*/
- var1_tp=SURV, /*variable type: bino=dichotomous, norm=normal,
- logn=log-normal ie log(post baseline/baseline),
- or surv=time-to-event (exponential)*/
- var1_hi=0.93, /*hi=active grp, the base response*/
- var1_df=-0.02, /*df=treatment difference, defines response for Lo=contrl grp*/
- var1_sd=, /*if var1_tp=norm or lognorm (sd for log(post/base))*/
- var2_tp=SURV, var2_hi=0.86, var2_df=-0.02, var2_sd=,
- var3_tp=BINO, var3_hi=0.2, var3_df=0.02, var3_sd=,
- var4_tp=NORM, var4_hi=2500, var4_df=-500, var4_sd=2700,
- var5_tp=LOGN, var5_hi=0.5, var5_df=0.2, var5_sd=0.7,
- /*initial working correlations */
- corr12=0, corr13=0, corr14=0, corr15=0,
- corr23=0, corr24=0, corr25=0,
- corr34=0, corr35=0,
- corr45=0);
- %let gtype1=%upcase(&var1_tp) ;
- %let gtype2=%upcase(&var2_tp) ;
- %let gtype3=%upcase(&var3_tp) ;
- %let gtype4=%upcase(&var4_tp) ;
- %let gtype5=%upcase(&var5_tp) ;
- ********************************************************************;
- *** convert to normal variates ***;
- ********************************************************************;
- data _null_;
- %do i=1 %to &numvars;
- %if &>ype&i=SURV %then %do;
- surv_hi=&&var&i._hi;
- surv_lo=&&var&i._hi+&&var&i._df;
- *--hazards;
- survh_hi=-log(surv_hi)/&tte_fu;
- survh_lo=-log(surv_lo)/&tte_fu;
- *--log hazards;
- survz_hi=log(survh_hi);
- survz_lo=log(survh_lo);
- *--number of deaths;
- survd_hi=(&n/2)*(1-surv_hi);
- survd_lo=(&n/2)*(1-surv_lo);
- *--variance;
- survv_hi=1/survd_hi;
- survv_lo=1/survd_lo;
- call symput("var&i.v_hi",trim(left(put(survv_hi,best.))));
- call symput("var&i.v_lo",trim(left(put(survv_lo,best.))));
- call symput("var&i.z_hi",trim(left(put(survz_hi,best.))));
- call symput("var&i.z_lo",trim(left(put(survz_lo,best.))));
- %end;
- %else %if &>ype&i=BINO %then %do;
- bino_hi=&&var&i._hi;
- bino_lo=&&var&i._hi+&&var&i._df;
- *--log odds;
- binoz_hi=log(bino_hi/(1-bino_hi));
- binoz_lo=log(bino_lo/(1-bino_lo));
- *--variance of log odds;
- binov_hi=1/((&n/2)*bino_hi*(1-bino_hi));
- binov_lo=1/((&n/2)*bino_lo*(1-bino_lo));
- call symput("var&i.v_hi",trim(left(put(binov_hi,best.))));
- call symput("var&i.v_lo",trim(left(put(binov_lo,best.))));
- call symput("var&i.z_hi",trim(left(put(binoz_hi,best.))));
- call symput("var&i.z_lo",trim(left(put(binoz_lo,best.))));
- %end;
- %else %if &>ype&i=NORM %then %do;
- normv_hi=&&var&i._sd**2;
- normv_lo=&&var&i._sd**2;
- normz_lo=&&var&i._hi+&&var&i._df;
- normz_hi=&&var&i._hi;
- call symput("var&i.v_hi",trim(left(put(normv_hi,best.))));
- call symput("var&i.v_lo",trim(left(put(normv_lo,best.))));
- call symput("var&i.z_hi",trim(left(put(normz_hi,best.))));
- call symput("var&i.z_lo",trim(left(put(normz_lo,best.))));
- %end;
- %else %if &>ype&i=LOGN %then %do;
- *--log ratio (post baseline/baseline);
- lognz_lo=log(&&var&i._hi+&&var&i._df);
- lognz_hi=log(&&var&i._hi);
- *--variance of log ratio;
- lognv_hi=&&var&i._sd**2;
- lognv_lo=&&var&i._sd**2;
- call symput("var&i.v_hi",trim(left(put(lognv_hi,best.))));
- call symput("var&i.v_lo",trim(left(put(lognv_lo,best.))));
- call symput("var&i.z_hi",trim(left(put(lognz_hi,best.))));
- call symput("var&i.z_lo",trim(left(put(lognz_lo,best.))));
- %end;
- %end;
- run;
- ********************************************************************;
- *** simulate data from multivariate normal ***;
- ********************************************************************;
- proc iml;
- %nearestcorr();
- *specify parameters of multivariate normal dist;
- mean_hi=1:&numvars; varn_hi=1:&numvars;
- mean_lo=1:&numvars; varn_lo=1:&numvars;
- %do i=1 %to &numvars;
- mean_hi[&i]=&&var&i.z_hi;
- varn_hi[&i]=&&var&i.v_hi;
- mean_lo[&i]=&&var&i.z_lo;
- varn_lo[&i]=&&var&i.v_lo;
- %end;
- corr_tmp_={1 &corr12 &corr13 &corr14 &corr15,
- &corr12 1 &corr23 &corr24 &corr25,
- &corr13 &corr23 1 &corr34 &corr35,
- &corr14 &corr24 &corr34 1 &corr45,
- &corr15 &corr25 &corr35 &corr45 1};
- corr_tmp=corr_tmp_[1:&numvars, 1:&numvars]; /*in case numvar < 5*/
- eigval=eigval(corr_tmp);
- if all(eigval>0) then corr=corr_tmp; /*if positive definite*/
- else corr=NearestCorr(corr_tmp); /*otherwise find nearest pos def matrix*/
- print corr;
- covr_hi=corr#sqrt(varn_hi`*varn_hi);
- covr_lo=corr#sqrt(varn_lo`*varn_lo);
- print covr_hi; print covr_lo;
- *obtain random samples (seed=dec2014);
- call randseed(1214);
- hi=randnormal((&n/2)*&num,mean_hi,covr_hi);
- lo=randnormal((&n/2)*&num,mean_lo,covr_lo);
- *create sas datasets;
- samp=colvec(repeat(T(1:&num),1,&n/2));
- z=samp||hi;
- create rand_hi from z[c={"samp" "x1" "x2" "x3" "x4" "x5"}];
- append from z;
- close rand_hi;
- z=samp||lo;
- create rand_lo from z[c={"samp" "x1" "x2" "x3" "x4" "x5"}];
- append from z;
- close rand_lo;
- quit;
- *combine data for the 2 grps;
- data randztmp;
- format trt $10.;
- set rand_hi (in=h) rand_lo (in=l);
- *rename nominal Lo\Hi groups;
- if h then trt='Active';
- else if l then trt='Control';
- run;
- *sort data;
- proc sort data=randztmp;
- by samp trt;
- run;
- *finalise data, create patient number;
- data randz;
- retain subjno;
- set randztmp;
- by samp trt;
- if first.samp then subjno=1;
- else subjno=subjno+1;
- run;
- ********************************************************************;
- *** transform from normal variates ***;
- ********************************************************************;
- data randsamp;
- set randz;
- %do i=1 %to &numvars;
- %if &>ype&i=SURV %then %do;
- var&i=round(-log(ranuni(1412))/exp(x&i),1)+1;
- var&i.2=var&i; var&i.c=0;
- if var&i gt &tte_fu then do;
- var&i.2=&tte_fu;
- var&i.c=1;
- end;
- label var&i="Survival endpoint &i"
- var&i.2="Survival endpoint &i with censoring"
- var&i.c="Survival endpoint &i censoring indicator";
- %end;
- %else %if &>ype&i=BINO %then %do;
- if trt='Active' then do;
- x&i._=(x&i-&&var&i.z_hi)/sqrt(&&var&i.v_hi);
- x&i.__=tinv(1-&&var&i._hi,&n/2-1);
- if x&i._ gt x&i.__ then var&i=1;
- else var&i=0;
- end;
- else if trt='Control' then do;
- x&i._=(x&i-&&var&i.z_lo)/sqrt(&&var&i.v_lo);
- x&i.__=tinv(1-(&&var&i._hi+&&var&i._df),&n/2-1);
- if x&i._ gt x&i.__ then var&i=1;
- else var&i=0;
- end;
- drop x&i._ x&i.__;
- label var&i="Dichotomous endpoint &i";
- %end;
- %else %if &>ype&i=NORM %then %do;
- var&i=x&i;
- label var&i="Continuous endpoint &i";
- %end;
- %else %if &>ype&i=LOGN %then %do;
- var&i=100*(exp(x&i)-1);
- label var&i="Pct change endpoint &i";
- %end;
- %end;
- drop x1-x&numvars ;
- label trt='Treatment' samp='Rand sample no.' subjno='Patient no.';
- run;
- proc sort data=randsamp;
- by samp trt;
- run;
- %mend simul_data;
- ********************************************************************;
- *** iterations to obtain correlations ***;
- ********************************************************************;
- %macro iterat_simul(n_=100,num_=1000,numvars_=5,criterion=0.025,maxiter=50,
- aim12=0.1, aim13=-0.06, aim14=0.05, aim15=0, aim23=-0.03,
- aim24=0, aim25=0, aim34=-0.6, aim35=0, aim45=0,/*desired correlations*/
- out=finalsamp, out_iter=finaliter); /*output datasets*/
- %global gnumvars gnum gtype1 gtype2 gtype3 gtype4 gtype5 gprec ;
- %let gnumvars=&numvars_;
- %let gnum=&num_;
- %let t0 = %sysfunc(datetime());
- data iterations;
- set _null_;
- run;
- %simul_data(n=&n_,num=&num_,numvars=&numvars_);
- %let gprec=1;
- %let iterat=0;
- %do %while (&gprec gt &criterion and &iterat lt &maxiter);
- /*prec (precision) = max difference between actual
- correlation and the desired value*/
- %let iterat=%eval(&iterat+1);
- ods output pearsonCorr=corrns1
- (keep=variable var:
- rename=(var1-var&numvars_ = varc1-varc&numvars_));
- proc corr data=randsamp pearson; /*pearson used even for binary outcomes, produces the same corr
- as the biserial point correlation*/
- var var1-var&numvars_; /*careful to exclude var[]2 (survival vars)*/
- run;
- proc sort data=corrns1;
- by variable;
- run;
- data randzc;
- set randz;
- rename x1-x&numvars_ = var1-var&numvars_;
- run;
- ods output pearsonCorr=corrns2
- (keep=variable var:
- rename=(var1-var&numvars_ = varcz1-varcz&numvars_));
- proc corr data=randzc pearson;
- var var:; /*across random samples*/
- run;
- proc sort data=corrns2;
- by variable;
- run;
- data corrns;
- merge corrns1 corrns2;
- by variable;
- run;
- data iterations;
- format precision 7.5 ;
- retain prectmp 0;
- set iterations corrns (in=a);
- conv=0.005; /*conv=convergence limit*/
- if a then do;
- iteration=&iterat;
- %do p=1 %to 5;
- %do b=1 %to 5;
- if variable="var&p" then do;
- %if &b gt &p and &b le &numvars_ %then %do; /*corrns above diagonal*/
- diff&p&b=abs(varc&b-&&aim&p&b);
- prectmp=max(prectmp,diff&p&b);
- if diff&p&b lt conv then c&p&b=varcz&b; /*desired corrn attained*/
- else do; /*desired value not attained, thus adjust*/
- if &&aim&p&b gt 0 then c&p&b=min((&&aim&p&b/varc&b)*varcz&b,1); /*+ve corrns*/
- else c&p&b=max((&&aim&p&b/varc&b)*varcz&b,-1); /*-ve corrns*/
- end;
- call symput("c&p&b",trim(left(put(c&p&b,best.)))); /*corrn assigned its new value*/
- %end;
- end;
- %if &b gt &p and (&p gt &numvars_ or &b gt &numvars_) %then %do;
- call symput("c&p&b",trim(left(put(0,best.)))); /*superfluous correlations*/
- %end;
- %end;
- %end;
- if variable="var&numvars_" then call symput("gprec",trim(left(put(prectmp,7.5))));
- precision=prectmp; /*this precision will be compared against the criterion and the loop
- terminated if satisfied*/
- end;
- run;
- %simul_data(n=&n_,num=&num_,numvars=&numvars_,
- corr12=&c12, corr13=&c13, corr14=&c14, corr15=&c15,
- corr23=&c23, corr24=&c24, corr25=&c25,
- corr34=&c34, corr35=&c35,
- corr45=&c45);
- %end;
- %let speed=%sysfunc(datetime())-&t0;
- data &out;
- set randsamp;
- run;
- data &out_iter;
- retain iteration variable varc1-varc&numvars_ precision speed ; /*put vars in order*/
- format varc1-varc&numvars_ 5.2;
- set iterations;
- speed=put(&speed,6.2)||' secs';
- keep iteration variable varc1-varc&numvars_ precision speed;
- run;
- proc sort data=&out_iter;
- by iteration variable;
- run;
- %mend iterat_simul;
- *** end *****************************************;
RAW Paste Data