/**************************************************************************** * * * SAS Macro Distrib_Bivariate estimates the bivariate distribution of true * * usual intake of two foods/nutrients using Monte Carlo simulation. * * The first food/nutrient can be episodically consumed or consumed every * * day, while the second food/nutrient is assumed to be consumed every day. * * * * Macro Distrib_Bivariate reads data sets parms_b and pred_x_b output by * * SAS macro NLMixed_Bivariate, and uses Monte Carlo simulation of the * * random effects to generate the distribution of true usual intake. * * * * The method for two foods/nutrients that are consumed every day is * * described in: * * Freedman LS, et al. The population distribution of ratios of usual * * intake of dietary components that are consume every day can be * * estimated from repeated 24-hour recalls. J Nutr. 2010;140:111-116. * * * ***************************************************************************** * * * Macro Parameters: * * * * Required Parameters: * * param = name of SAS data set containing the estimated model * * parameters from macro NLMixed_Bivariate. * * predicted = name of SAS data set containing the predicted values * * calculated by macro NLMixed_Bivariate. * * typically, each subject has only one predicted value, * * so has only one observation in the data set. * * some models, however, allow a subject to have * * different predicted values on different days (e.g., * * subjects may eat differently on weekends than on * * weekdays). for such models, each subject should have * * one observation for each unique predicted value. * * macro Distrib_Univariate will then calculate true * * usual intake as a weighted average of usual intake on * * the different days (see day_wht, below). * * subject = name of the variable that uniquely identifies each * * subject in the predicted data set (i.e., ID variable). * * modeltype = model for first food/nutrient: * * to fit the two-part (epsisodic) model, specify * * modeltype = TWOPART * * to fit the one-part (every day) model, specify * * modeltype = ONEPART * * * * Optional Parameters: * * link = link function for the probability part of the two- * * part model. to fit a logistic model, specify * * link = logit * * to fit a probit model, specify * * link = probit * * by default, link = logit. * * if modeltype = ONEPART, then link is ignored. * * nsim_mc = number of pseudo-individuals to simulate for each * * real individual in the data set. if the data set has * * n subjects, then the monte carlo distribution will * * have nsim_mc * n pesudo-individuals. * * by default, nsim_mc = 1. * * day_wgt = name of the "day weight" variable in the predicted * * data set. if the model allows subjects to have * * different predicted values on different days (see * * predicted, above), then macro distrib will calculate * * each subject's true usual intake as a weighted average * * of usual intake on the different days (using the * * weights in the "day weight" variable) . for example, * * if each subject has one predicted value for weekends * * (Friday-Sunday) and one predicted value for weekdays * * (Monday-Thursday), then the day weight variable should * * equal 3/7 for the weekend predicted value and 4/7 for * * the weekday predicted value. * * by default, if subjects have multiple predicted * * values, then true usual intake is calculated as an * * unweighted average of usual intake on different days. * * min_a1 = minimum true usual intake for a one-part model, or * * mininum true usual amount on consumption day for a * * two-part model, for the first food/nutrient. * * the monte carlo method generates amount consumed on * * a transformed scale, then back-transforms it to the * * original scale. occasionally, the generated amount is * * too small to be back-transformed. when this happens, * * amount on the original scale is set to min_a1. * * by default, min_a1 = 0. * * min_a2 = minimum true usual intake for the second food/nutrient.* * by default, min_a2 = 0. * * backtran = 1 to use a numerical integration method to integrate * * back-transformed reported intake over the distribution * * of within-person error. * * = 2 to use a Taylor linearization approximation to the * * integral (not recommended). * * = 3 to back-transform without integrating over the * * distribution of within-person error. (not recommended).* * by default, backtran = 1. * * print = Y to print summary of the monte carlo distribution. * * = N to supress printing summary of the distribution. * * by default, print = Y. * * ntitle = number of titles defined by the user. * * by default, ntitle = 2. * * * ***************************************************************************** * * * Output Data Set: * * * * _mcsim = data set containing the monte carlo distribution of true usual * * intake of the food/nutrient. if the the predicted data set * * has n subjects, _mcsim will have nsim_mc * n pseudo-subjects. * * _mcsim contains all the numeric variables in the predicted * * data set, plus the following variables: * * * * t1 = true usual intake of first food/nutrient. * * t2 = true usual intake of second food/nutrient. * * a1 = true usual amount on consumption day for first * * food/nutrient. * * bc_a1 = Box-Cox transformed true usual amount a1. * * bc_t2 = Box-Cox transformed true usual intake t2. * * * * if fitting the two-part model, then _mcsim also contains * * the following variables: * * * * p1 = true probability to consume. * * linear_pred_p1 = transformed true probability to consume: * * = log(p1/(1-p1)) for logistic model * * = probit(p1) for probit model * * * ****************************************************************************/ %macro Distrib_Bivariate (param = , predicted = , subject = , modeltype = , link = LOGIT, nsim_mc = 1, day_wgt = , min_a1 = 0, min_a2 = 0, backtran = 1, print = Y, ntitle = 2); %global seed; /* seed for random number generator. */ %let print = %upcase(%substr(&print,1,1)); %let modeltype = %upcase(&modeltype); %let link = %upcase(&link); %if (&backtran ^= 2 & &backtran ^= 3) %then %let backtran = 1; %if (&link ^= PROBIT) %then %let link = LOGIT; %if (&modeltype ^= ONEPART) %then %let modeltype = TWOPART; /* get predicted values and parameter estimates. */ data _predicted; set &predicted; run; proc sort data=_predicted; by &subject; data _predicted; set _predicted end=eof; by &subject; if (first.&subject) then _id + 1; retain multiobs 0; if (first.&subject & not last.&subject) then multiobs = 1; if (eof) then call symput("multiobs",trim(left(put(multiobs, 1.)))); run; /* determine number and order of random effects u. */ %let dim_u = 0; %let index_u = ; %let name_u = ; %if (&modeltype ^= ONEPART) %then %do; %let dim_u = %eval(&dim_u + 1); %let index_u = &index_u 1; %let name_u = &name_u u1; %end; %let dim_u = %eval(&dim_u + 1); %let index_u = &index_u 2; %let name_u = &name_u u2; %let dim_u = %eval(&dim_u + 1); %let index_u = &index_u 3; %let name_u = &name_u u3; /* generate monte carlo random effects u. */ %put initial seed = &seed; proc iml; seed = &seed; use _predicted; read all var _num_ into pred [colname=varnames]; read all var{_id} into id; close _predicted; use ¶m; read all var{%do i = 1 %to &dim_u; %let ii = %scan(&index_u,&i,%str( )); %do j = 1 %to %eval(&i-1); %let jj = %scan(&index_u,&j,%str( )); COV_U&jj.U&ii %end; VAR_U&ii %end; } into var_u; close ¶m; var_u = sqrsym(var_u); u_zero = (vecdiag(var_u) = 0); u_nonzero = (vecdiag(var_u) > 0); var_u = var_u # (u_nonzero * u_nonzero`); var_u = var_u + diag(u_zero); root_u = root(var_u); root_u = root_u # (u_nonzero * u_nonzero`); n = max(id); n_u = nrow(var_u); nsim = &nsim_mc; /* create sas data set of random effects. */ names = {"mc_id" "_sim"} || varnames || {%do i = 1 %to &dim_u; %let ii = %scan(&index_u,&i,%str( )); "u&ii" %end;}; rec = repeat(., 1, ncol(pred) + n_u + 2); create _mcsim from rec [colname=names]; offset = 0; do i = 1 to n; u = repeat(.,nsim,n_u); call rannor (seed, u); u = u * root_u; predi = pred[loc(id = i),]; npredi = nrow(predi); sim = (1:nsim)`; u = u @ repeat(1,npredi,1); sim = sim @ repeat(1,npredi,1); mc_id = offset + sim; rec = mc_id || sim || repeat(predi,nsim,1) || u; append from rec; offset = offset + nsim; end; close _mcsim; call symput("seed", trim(left(char(seed)))); quit; %put final seed = &seed; /* generate monte carlo distribution of true usual intake. */ /* add random effects to predicted values to generate true usual intake. */ data _mcsim (drop=a1_lambda a2_lambda var_e2 var_e3 temp %if (&backtran = 2) %then ginv_a1 ginv_a2 d2ginv_a1 d2ginv_a2; %if (&backtran = 1) %then c1-c9 w1-w9 ai sumw; ); if (_n_ = 1) then set ¶m(keep=a1_lambda a2_lambda var_e2 var_e3); set _mcsim; by mc_id; /* generate p = probability to consume (probit or logit link). */ %if (&modeltype = ONEPART) %then %do; linear_pred_p1 = .; p1 = 1; %end; %else %do; linear_pred_p1 = pred_x_p1 + u1; %if (&link = PROBIT) %then p1 = probnorm(linear_pred_p1) %str(;); %else p1 = 1 / (1 + exp(-linear_pred_p1)) %str(;); %end; /* generate A = amount on consumption day. */ bc_a1 = pred_x_a1 + u2; bc_a2 = pred_x_a2 + u3; /* back-transform amount to original scale. */ /* integrate back-transformation over distr. of within-person error (exact for log transformation). */ %if (&backtran ^= 3) %then %do; if (bc_a1 > .Z & a1_lambda = 0) then do; a1 = exp(bc_a1 + var_e2 / 2); end; if (bc_a2 > .Z & a2_lambda = 0) then do; a2 = exp(bc_a2 + var_e3 / 2); end; %end; /* integrate back-transformation over distr. of within-person error (gauss-hermite quadrature). */ %if (&backtran = 1) %then %do; c1 = -2.1; w1 = 0.063345; c2 = -1.3; w2 = 0.080255; c3 = -0.8; w3 = 0.070458; c4 = -0.5; w4 = 0.159698; c5 = 0.0; w5 = 0.252489; c6 = 0.5; w6 = 0.159698; c7 = 0.8; w7 = 0.070458; c8 = 1.3; w8 = 0.080255; c9 = 2.1; w9 = 0.063345; array c (*) c1-c9; array w (*) w1-w9; if (bc_a1 > .Z & a1_lambda ^= 0) then do; std_e2 = sqrt(var_e2); a1 = 0; sumw = 0; do i = 1 to dim(c); bc_ai = bc_a1 + std_e2 * c(i); temp = a1_lambda * bc_ai + 1; if (temp > 0) then ai = temp**(1/a1_lambda); else ai = 0; ai = max(ai, &min_a1); a1 = a1 + w(i) * ai; sumw = sumw + w(i); end; a1 = a1 / sumw; end; if (bc_a2 > .Z & a2_lambda ^= 0) then do; std_e3 = sqrt(var_e3); a2 = 0; sumw = 0; do i = 1 to dim(c); bc_ai = bc_a2 + std_e3 * c(i); temp = a2_lambda * bc_ai + 1; if (temp > 0) then ai = temp**(1/a2_lambda); else ai = 0; ai = max(ai, &min_a2); a2 = a2 + w(i) * ai; sumw = sumw + w(i); end; a2 = a2 / sumw; end; %end; /* %if (&backtran = 1) %then %do */ /* integrate back-transformation over distr. of within-person error (taylor linearization). */ %if (&backtran = 2) %then %do; if (bc_a1 > .Z & a1_lambda ^= 0) then do; temp = a1_lambda * bc_a1 + 1; if (temp > 0) then do; ginv_a1 = temp**(1/a1_lambda); d2ginv_a1 = (1 - a1_lambda) * temp**(1/a1_lambda - 2); a1 = ginv_a1 + d2ginv_a1 * var_e2 / 2; end; else a1 = 0; end; if (bc_a2 > .Z & a2_lambda ^= 0) then do; temp = a2_lambda * bc_a2 + 1; if (temp > 0) then do; ginv_a2 = temp**(1/a2_lambda); d2ginv_a2 = (1 - a2_lambda) * temp**(1/a2_lambda - 2); a2 = ginv_a2 + d2ginv_a2 * var_e3 / 2; end; else a2 = 0; end; %end; /* %if (&backtran = 2) %then %do */ /* don't integrate back-transformation over distr. of within-person error. */ %if (&backtran = 3) %then %do; if (bc_a1 > .Z) then do; if (a1_lambda = 0) then a1 = exp(bc_a1); else do; temp = a1_lambda * bc_a1 + 1; if (temp > 0) then a1 = temp**(1/a1_lambda); else a1 = 0; end; end; if (bc_a2 > .Z) then do; if (a2_lambda = 0) then a2 = exp(bc_a2); else do; temp = a2_lambda * bc_a2 + 1; if (temp > 0) then a2 = temp**(1/a2_lambda); else a2 = 0; end; end; %end; /* %if (&backtran = 3) %then %do */ /* ensure positive amounts. */ if (a1 ^= . & a1_lambda ^= 0) then a1 = max(a1, &min_a1); if (a2 ^= . & a2_lambda ^= 0) then a2 = max(a2, &min_a2); /* generate t1 = p1 * a1. */ t1 = p1 * a1; t2 = a2; bc_t2 = bc_a2; drop a2 bc_a2; label %if (&modeltype ^= ONEPART) %then pred_x_p1 = " "; pred_x_a1 = " " pred_x_a2 = " "; run; /* if multiple observations per subject, then average them. */ %if (&multiobs = 1) %then %do; proc means data=_mcsim noprint; by mc_id; %if (&day_wgt ^= %str()) %then weight &day_wgt%str(;); var t1 t2 a1 bc_a1 bc_t2 %if (&modeltype ^= ONEPART) %then p1 linear_pred_p1 pred_x_p1; pred_x_a1 pred_x_a2 &name_u; output out=_outmeans(keep=mc_id t1 t2 a1 bc_a1 bc_t2 %if (&modeltype ^= ONEPART) %then p1 linear_pred_p1 pred_x_p1; pred_x_a1 pred_x_a2 &name_u) mean=; run; data _mcsim; merge _mcsim(drop=t1 t2 a1 bc_a1 bc_t2 %if (&modeltype ^= ONEPART) %then p1 linear_pred_p1 pred_x_p1; pred_x_a1 pred_x_a2 &name_u) _outmeans; by mc_id; if (first.mc_id); run; %end; /* %if (&multiobs = 1) %then %do */ %if (&print = Y) %then %do; title%eval(&ntitle + 1) "Monte Carlo Distribution of True Usual Intake"; proc means data=_mcsim n mean var std stderr min max; var t1 t2 %if (&modeltype ^= ONEPART) %then p1; a1 %if (&modeltype ^= ONEPART) %then linear_pred_p1; bc_a1 bc_t2 %if (&modeltype ^= ONEPART) %then pred_x_p1; pred_x_a1 pred_x_a2 &name_u; run; proc corr data=_mcsim; var t1 t2 &name_u; run; proc univariate data=_mcsim plot normal nextrval=10 nextrobs=0; var t1 t2; run; title%eval(&ntitle + 1); %end; /* %if (&print = Y) %then %do */ %mend Distrib_Bivariate; /*** end of macro Distrib_Bivariate ****************************************/