/********************************************************************************/ /* FILENAME: SampleCodes.txt */ /* */ /* DATE/LAST MODIFIED: 05.27.02/05.27.02 */ /* */ /* AUTHOR: Danh V. Nguyen, Department of Statistics, Texas A&M University. */ /* */ /* DESCRIPTION: */ /* This file contains some sample SAS codes to run partial least squares (PLS) */ /* and Cox's proportional hazard regression. */ /* */ /* NOTES: */ /* Comments can be found below along with the codes. For details on PLS see */ /* the SAS documentation for PROC PLS (pp.2691-2734) and PROC PHREG (pp.2561- */ /* 2657 SAS/STAT User's Guide, Version 8, NC, SAS Institute Inc. (1999). Many */ /* examples are given there. */ /* */ /* Have fun. Cheers. Danh */ /********************************************************************************/ options ls=80 ps=60; /*------------------------------------------------------------------------------*/ /* Change these input parameters to suit your data set. */ /* The complete gene expression matrix (Xcomplete.txt) should have */ /* column 1, ..., column p */ /* for the expression values of */ /* gene 1, ..., gene p. */ /* The survival data (Ycomplete.txt) should have at least two columns: */ /* col1=observed time */ /* col2=censoring status (0=censored, 1=failure) */ /* ... */ /*------------------------------------------------------------------------------*/ %let p=3846; %let Xfilename=Xcomplete.txt; %let Yfilename=Ycomplete.txt; /*------------------------------------------------------------------------------*/ /* Load data. */ /*------------------------------------------------------------------------------*/ /* Load complete gene expression matrix, X. */ data Xcomp; infile "&Xfilename" delimiter=' ' lrecl=200000; input x1-x&p; run; /* Load survival data (matrix), Y. */ data Ycomp; infile "&Yfilename" delimiter=' ' lrecl=20000; input surtime status; run; data comb; merge Xcomp Ycomp; run; /*------------------------------------------------------------------------------*/ /* RUNPLS macro. */ /*------------------------------------------------------------------------------*/ %macro RUNPLS(dsxin, dsyin, p, lv); proc pls data=&dsxin method=pls outmodel=EST1 lv=&lv; model &dsyin = x1-x&p; output out=OUTPLS p=yhat1 yresidual=yres1 xresidual=xres1-xres&p xscore=xscr yscore=yscr stdy=stdy stdx=stdx h=h press=press t2=t2 xqres=xqres yqres=yqres; run; %mend; /*------------------------------------------------------------------------------*/ /* RUNPHREG macro. */ /*------------------------------------------------------------------------------*/ %macro RUNPHREG(dsin, dsnew, time, status, lv); proc phreg data=OUTPLS; model &time*&status(0)=xscr1-xscr&lv / ties=efron; baseline covariates=&dsnew out=a survival=s logsurv=ls loglogs=lls; run; %mend; /*------------------------------------------------------------------------------*/ /* Run PLSPH regression. */ /*------------------------------------------------------------------------------*/ %let varx=2; /*This should be equal to lv in %RUNPLS(dsxin, dsyin, p, lv)*/ /*Make data values of interest for evaluating the survival curves from PHR.*/ data predavg; input xscr1-xscr&varx; datalines; -7.4339 -14.9736 0.0306 -2.2267; run; %RUNPLS(dsxin=comb, dsyin=surtime, p=3846, lv=2); %RUNPHREG(dsin=OUTPLS, dsnew=predavg, time=surtime, status=status, lv=2);