*-------------------------------------------------------------------------;
* This program classifies each day of accelerometer wear as either meeting;
* or not meeting the definition of an `exercise day`. Then, the collection;
* exercise and non-exercise days for each individual are analyzed to      ;
* determine if the probability of a particular day being an exercise day  ;
* is greater than some predefined cutoff. These estimated probabilities   ;
* may then be used to compute an estimated population prevalence of       ;
* `adherence` to exercise recommendations based on the predefined cutoff. ;
* The derivation of this technique is shown in the following document:    ;
* < put name of document here>                                            ;
*-------------------------------------------------------------------------;

*-------------------------------------------------------------------------;
* Set up SAS library for input data sets. Modify for your specific use.   ;
* Input data sets:pam_perperson_reweighted                                ;
*                 pam_perday_reweighted                                   ;
*                 from the reweighting program                            ;
*-------------------------------------------------------------------------;

libname in "&home\EATS_NHANES\sasdata\NHANES\03-04";

*-------------------------------------------------------------------------;
* SAS option `NOFMTERR` stops SAS from halting if a variable in an input  ;
* data set has an associated format that cannot be loaded.                ;
*-------------------------------------------------------------------------;

options NOFMTERR;

*-------------------------------------------------------------------------;
* Run PROC FORMAT step to define formats for specific variables.          ;
*-------------------------------------------------------------------------;

proc format;
    value yesno
        1='Yes'
        0='No';
    value gender
        0='All'
        1='Male'
        2='Female';
    value agegrp
        0='All'
        1='6-11'
        2='12-15'
        3='16-19'
        4='20-29'
        5='30-39'
        6='40-49'
        7='50-59'
        8='60-69'
        9='70+';
    value age9f
        0='All'
        1='6-11'
        2='12-19'
        3='20-29'
        4='30-39'
        5='40-49'
        6='50-59'
        7='60-69'
        8='70-79'
        9='80+';
    value race3f
        0='All'
        1='Non-Hispanic White+Other'
        2='Non-Hispanic Black'
        3='Mexican American';
    value wkday
        1='Sunday'
        2='Monday'
        3='Tuesday'
        4='Wednesday'
        5='Thursday'
        6='Friday'
        7='Saturday';
run;

*-------------------------------------------------------------------------;
* Classify each record in the per day data set as either meeting or not   ;
* meeting the definition of an `exercise day`. The variable WEARHR1060    ;
* has value 1 if the individual wore the monitor for at least 10 hours of ;
* the particular day. Variables ADH_DAY1 has the value 1 if the monitor   ;
* was worn for at least 10 hours and the total duration of moderate and   ;
* vigorous activity was at least 60 minutes (counting every minute). The  ;
* variable ADH_DAY has the value 1 if the monitor was worn for at least 10;
* hours and total duration of moderate and vigorous exercise was at least ;
* 30 minutes (counting minutes in bouts where at least 8 of 10 minutes met;
* the threshold of moderate/vigorous activity). The only values possible  ;
* for WEARHR1060, ADH_DAY1, and ADH_DAY are 0 and 1.                      ;
*                                                                         ;
* If WEARHR1060=0, then the information from that day is ignored when     ;
* estimating the adherence probability.                                   ;
*                                                                         ;
* You may choose to use different criteria for determining what defines an;
* `exercise day` or even what defines a `potential exercise day`. The     ;
* criteria used in this example are those chosen for children and adults, ;
* respectively, as indicated below.                                       ;
*-------------------------------------------------------------------------;

proc sort data=in.pam_perday_reweighted out=perday;
    by sdmvstra sdmvpsu riagendr seqn day;
run;
data perday;
    set perday;
    * Defines a `potential exercise day`;
    wearhr1060=(wear_hr>=10);
    * Criterion selected for children;
    adh_day1=(wearhr1060 and tot_dur_mv1 >= 60);
    * Criterion selected for adults;
    adh_day=(wearhr1060 and tot_dur_mv >= 30);
run;

*-------------------------------------------------------------------------;
* Compute the number of potential exercise days (VALDAYS = sum of         ;
* WEARHR1060) as well as the number of days that meet each of the         ;
* `exercise day` criteria: (DAYS_ADH1 and DAYS_ADH = sums of ADH_DAY1 and ;
* ADH_DAY). Use the WT_1 variable for weighting. The BY and ID statements ;
* ensure that the output data set ADHERE contains all the important survey;
*  design information.                                                    ;
*-------------------------------------------------------------------------;

proc means noprint data=perday;
    var adh_day1 adh_day wearhr1060;
    by sdmvstra sdmvpsu riagendr seqn;
    id wt_1;
    output out=adhere(drop=_type_ _freq_)
        sum=days_adh1 days_adh valdays;
run;

*-------------------------------------------------------------------------;
* Now estimate the probability that a given person adheres to the         ;
* recommendation that at least 5 of every 7 days is an `exercise day`,    ;
* according to the two criteria.                                          ;
*-------------------------------------------------------------------------;

data adhere;
    set adhere;
    p_adhere=1-PROBBETA(5/7,days_adh+1,valdays-days_adh+1);
    p_adhere1=1-PROBBETA(5/7,days_adh1+1,valdays-days_adh1+1);
run;

*-------------------------------------------------------------------------;
* The weighted average of the estimated adherence probabilities is the    ;
* estimated population prevalence of adherence. It is important to use a  ;
* procedure designed for analysis of survey data. Here we use PROC        ;
* SURVEYMEANS. Additional SAS programming is required to apply different  ;
* criteria to the appropriate age groups.                                 ;
*-------------------------------------------------------------------------;

proc surveymeans data=adhere;
    cluster sdmvpsu;
    strata sdmvstra;
    weight wt_1;
    domain riagendr;
    var p_adhere p_adhere1;
run;