****************************************************************************************; * Import TREC accelerometry data.sas * * Author: Nate Johnson * Division of Clinical Epidemiology * Case Western Reserve University * * * Date: 10/24/06 * Updated: 11/29/06 (see notes) * * Purpose: This program takes several steps to read in and process data from Physical * Activity Monitor for the TREC study and prepare data to be processed using * programs received from David Berrigan (berrigad@mail.nih.gov) that were used * to process NHANES data. * * Requirements: The program assumes that raw PAM data is stored as individual .dat files * in a single directory (specified by the user). The participant id should be * some function of the file name of the raw data file, and the syntax to create * that id is supplied by the user below. * * Process: The program takes the following steps: * 1) The user specifies three parameters that are used in processing. * a) MYPATH is the full path where the raw PAM data files are stored. * This can be a network path (\\servername\path) or local (c:\path). * b) MYID is the name of the participant id variable. * c) GETID is the sas syntax that is used to extract the participant id * from the raw data file name. * 2) Raw data (one record per person per minute) are read in from .dat files * for each participant. * 3) Raw data are processed to timestamp each minute and remove any artifact * data. * * Next Steps: Before processing with the NHANES programs, it is also necessary to include * participant age (as an integer) in the dataset in a variable called "ridageyr" * to be consistent with the NHANES datasets. ****************************************************************************************; * Modification Notes * 11/29/06 - nlj - program has been modified to be able to be used more generally to * import raw RAM data files from a specified directory. ****************************************************************************************; ****************************************************************************************; * SET PARAMETERS FOR DATA PROCESSING: * MYPATH: the full path where raw data files (.dat) are stored. * For the Cleveland site, the files are stored on a network fileserver * ; %let mypath = \\ServerName\Path\To\Data\Folder; * * MYID: the name of the variable holding the participant id. * For the Cleveland site, the variable is called "personid." * ; %let myid = personid; * * GETID: the code necessary to extract the unique id from the file name (variable f2r). * For the Cleveland site, the files are stored as TR####.dat, with the participant * id being a four-digit number following the 'TR'. * ; %let getid = input(substr(f2r,3,4),4.); * ****************************************************************************************; options source nodate nonumber nofmterr formdlim = ' '; data _null_; call symput("datetoday",put("&sysdate"d,mmddyy8.)); call symput("date6",put("&sysdate"d,mmddyy6.)); run; * get time at start of processing to track program efficiency during testing; data _null_; v = time(); call symput('start',put(v,time8.)); run; ****************************************************************************************; * BEGIN IMPORT ****************************************************************************************; * Get listing of data files in apporpriate directory; %macro getfilelist; data _null_; mypipe = "pipe 'dir " || '"' || "&mypath.\*.dat" || '"' || " /b'"; call symput('mypipe',mypipe); run; filename accdata &mypipe; ****************************************************************************************; * IMPORT HEADER INFORMATION ****************************************************************************************; * For each data file in listing, read in header information to get start time and start date; data start; retain &myid; infile accdata truncover; input f2r $16.; f2r = trim(left(f2r)); &myid = &getid; fil2read="&mypath.\"||f2r; * for each file, read in header information to get start time; infile n filevar=fil2read end=done firstobs=3 lrecl=500 obs=4; do while(not done); input @12 starttime: time8. #2 @12 startdate: mmddyy10.; end; * Create datetime variable to store date and time that recording started; sdt = dhms(startdate,hour(starttime),minute(starttime),second(starttime)); format starttime time8. startdate mmddyy8. sdt datetime19.; * store number of data files to read; call symput('nrec',_n_); run; filename accdata clear; %mend; %getfilelist; ************************************************************************************; * IMPORT RAW DATA ************************************************************************************; * RUNIMPORT macro will cycle through each data file in directory, read in all lines of data, and perform initial data processing; %macro runimport; %do i=1 %to &nrec; * cycle through each data file; * get the ith file name using the header information dataset; data _null_; i = &i; set start point=i; call symput('file',f2r); stop; run; * read in the ith data file raw data; data n&i; f2r = "&file"; &myid = &getid; fil2read="&mypath.\&file"; infile n filevar=fil2read firstobs=11 lrecl=500; input paxinten_in @@; n = _n_; drop f2r; run; * use start time to generate time stamp for each minute; data n&i (drop = sdt); merge n&i (in=a) start (keep=&myid sdt); by &myid; if a; timestamp = sdt + ((n-1)*60); format timestamp datetime19.; day = day(datepart(timestamp)); run; proc sort data=n&i; by day n; run; * For each minute, store the last and next valid score to use in recoding artifact data; * use lag function to store last valid score for each minute; data n&i; set n&i; by day; retain lastvalid; if first.day then do; if paxinten_in < -32700 then lastvalid = .; else lastvalid = paxinten_in; end; else do; * for each minute, store the last valid value; if paxinten_in > -32700 then lastvalid = paxinten_in; end; run; proc sort data=n&i; by day descending n; run; * use lag function on reversed data to store next score for each minute; data n&i; set n&i; by day; retain nextvalid; if first.day then do; if paxinten_in < -32700 then nextvalid = .; else nextvalid = paxinten_in; end; else do; * for each minute, store the last valid value; if paxinten_in > -32700 then nextvalid = paxinten_in; end; run; proc sort data=n&i; by n; run; * append the ith dataset to the first dataset, and remove the ith dataset; %if &i > 1 %then %do; proc append base=n1 data=n&i; run; proc datasets library=work nolist; delete n&i; quit; %end; %end; %mend; %runimport; * get time at end of import step to track program efficiency during testing; data _null_; v = time(); call symput('import',put(v,time8.)); run; ****************************************************************************************; * END IMPORT ****************************************************************************************; ****************************************************************************************; * PROCESS RAW DATA AND PREPARE FOR SCORING WITH NHANES PROGRAMS ****************************************************************************************; * get time at beginning of processing step to track program efficiency during testing; data _null_; v = time(); call symput('processing',put(v,time8.)); run; * HANDLE INVALID DATA; * in the case of invalid data, impute the mean of the valid values before and after the invalid period; * eg. if one minute of invalid data, the intensity score for that minute is the mean of the adjacent * minutes. If ten consecutive minutes of invalid data, the score for each of those minutes is *the mean of the minutes immediately before and after that period of invalid data; data pax (keep=&myid n timestamp paxinten paxinten_in day); retain &myid n timestamp paxinten paxinten_in; set n1; if paxinten_in < -32767 then paxinten = round((lastvalid + nextvalid)/2); else paxinten = paxinten_in; run; proc sort data=pax; by &myid day; run; data pax2 (drop=day); set pax; by &myid day; retain paxday; if first.&myid then paxday = 1; else if first.day then paxday = paxday + 1; run; data pax3; set pax2; rename n = paxn; seqn = &myid; day = paxday; run; proc sort data=pax3; by seqn day paxn; run; * clean up datasets; proc datasets library=work nolist; delete n1 pax pax2 start; quit; * get time at end of processing step to track program efficiency during testing; data _null_; v = time(); call symput('now',put(v,time8.)); run; ****************************************************************************************; * END PROCESS RAW DATA ****************************************************************************************; options nosource; data _null_; st = input("&start",time8.); imp = input("&import",time8.); pro = input("&processing",time8.); end = input("&now",time8.); impdur = imp - st; produr = end - pro; totdur = end - st; call symput('impdur',put(impdur,time8.)); call symput('produr',put(produr,time8.)); call symput('totdur',put(totdur,time8.)); run; %put *********************************************************; %put * NOTE: Program Start Time: &start; %put *; %put * NOTE: Import Completed: &import; %put * NOTE: Import Duration: (&impdur.); %put *; %put * NOTE: Processing Completed: &processing; %put * NOTE: Processing Duration: (&produr.); %put *; %put * NOTE: Program End Time: &now; %put * NOTE: Program Total Duration: (&totdur.); %put *********************************************************; options source;