*****************************************************************************************
*                           ASA24_Beta_Update_Nutrients.SAS                             *
*                                                                                       *
*This SAS program can be used to update INF and TN files obtained from the Beta         *
*version of ASA24.  The program updates nutrient data from version 1.0 of the Food      *
*and Nutrient Database for Dietary Surveys (FNDDS) to version 4.1, which was used by    *
*USDA to process the dietary intake data from the 2007-08 National Health and Nutrition *
*Examination Survey.  The program includes steps to replace food codes from FNDDS 1.0   *
*with FNDDS 4.1 codes where necessary, updates data for all nutrients, and adds data    *
*for 4 nutrients (vitamin D, choline, added vitamin E, and added vitamin B12).  A food  *
*description variable is also added.  The resulting updated INF and TN files are        *
*consistent with the output files available from ASA24 Version 1.0.  The updated        *
*files can be further analyzed by the researcher or if desired, merged with data from   *
*ASA24 Version 1. See the Version 1 sample output files and data dictionaries available *
*from the Researcher Web site for more details on the variables.                        *
*                                                                                       *
*NOTE:  A second program that updates nutrients AND MyPyramid Equivalent variables is   *
*also available.  If you wish to also update MyPyramid Equivalent values, please refer  *
*to the ASA24_Beta_Update_Nutrients_MyP.SAS program.                                    *
*                                                                                       *
*This program has been tested using SAS, version 9.2. and uses the following datasets:  *
*                                                                                       *
*1) INF and TN CSV files to be updated - request and download from the Beta             *
*   Researcher Web site.                                                                *
*                                                                                       *
*2) Sub_Codes XLS file - included in the ZIP folder at                                  *
*   riskfactor.cancer.gov/tools/instruments/asa24                                       *
*   This file includes the data indicating substitutions of FNDDS 1.0 food codes with   *
*   FNDDS 4.1 food codes where necessary.                                               *
*                                                                                       *
*3) FNDDS 4.1 SAS files - these files include the updated nutrient information that     *
*   will be applied to the data in the INF and TN files and can be found in the         *
*   ZIP folder at                                                                       *
*   riskfactor.cancer.gov/tools/instruments/asa24                                       *
*   The specific FNDDS files used are:                                                  *
*   'fnddsnutval.sas7bdat'                                                              *
*   'mainfooddesc.sas7bdat'                                                             *
*   'modnut.sas7bdat'                                                                   *
*   'moddesc.sas7bdat'                                                                  *
*                                                                                       *
*Details on FNDDS can be found at www.ars.usda.gov/Services/docs.htm?docid=12089        *
*                                                                                       *
*The program creates 2 new files - updated INF and TN CSV files.                        *
*                                                                                       *
*There are 3 steps.  In step 1, modifications are needed to specify the location and    *
*names of the datasets and files.  Steps 2 and 3 can then be run without modification.  *
*                                                                                       *
*The program was written by Lisa Kahle, Information Management Services, Inc. and       *
*documented by Sharon Kirkpatrick, National Cancer Institute.                           *
*                                                                                       *
******************************************************************************************;


title 'UPDATE OF ASA24 BETA OUTPUT';
title2 'FNDDS 4.1';

*****************************************************************************************
*STEP 1 - Specify the required datasets and create the new INF and TN output files      *
*****************************************************************************************;

/*Identify the location of the input FNDDS 4.1 SAS datasets.*/
libname FNDDS 'C:\FNDDS'; /*location of fnddsnutval, mainfooddesc, modnut, and moddesc files*/

*Identify the Excel file of ASA24 FNDDS 1.0 and 4.1 food codes and substitutions;
filename subfile 'C:\ASA24code\sub_codes.xls'; /*location and name of file indicating necessary food code substitutions - Sub_Codes.xls*/

/*Identify beta version ASA24 files */
filename inf 'C:\ASA24code\Beta_943_INF.csv'; /*location and name of INF file to be updated*/
filename tn 'C:\ASA24code\Beta_944_TN.csv'; /*location and name of TN file to be updated*/

/*Creates new INF and TN output files*/
filename inf4 'C:\ASA24code\B943_fndds4_INF.csv'; /*location and name of updated INF file created by the program*/
filename tn4 'C:\ASA24code\B944_fndds4_TN.csv'; /*location and name of updated TN file created by the program*/


******NOTE: IF ALL DATASETS AND FILES ARE NAMED AND SPECIFIED CORRECTLY, NO MODIFICATIONS ARE NEEDED FROM THIS POINT FORWARD;


*********************************************************************************************
*STEP 2:  This step prepares the files containing the FNDDS modification codes and food     *
*codes, along with their corresponding nutrients per 100 grams of food,                     *
*into the required format and creates variables for merging.                                *
*                                                                                           *
*NOTE:  Every food has a food code that is associated with nutrient information.  Some      *
*foods also have a modification code (modcode) associated with a modified recipe that more  *
*closely matches the food reported by the respondent (e.g., egg fried in butter instead     *
*of margarine).  This part of the program includes code specifying when modcodes are        *
*available and should be used in place of a food code.  This is part of the automated       *
*coding process that takes place within ASA24 and is reflected in any nutrient analyses     *
*downloaded from the Researcher Web site.                                                   *
*********************************************************************************************;

*Create a new dataset, which links nutrient names from ASA24 to the FNDDS nutrient codes;
data nutname;
  Nutrient_code=208 ; nutname='KCAL    '; output nutname;
  Nutrient_code=203 ; nutname='PROT    '; output nutname;
  Nutrient_code=204 ; nutname='TFAT    '; output nutname;
  Nutrient_code=205 ; nutname='CARB    '; output nutname;
  Nutrient_code=255 ; nutname='MOIS    '; output nutname;
  Nutrient_code=221 ; nutname='ALC     '; output nutname;
  Nutrient_code=262 ; nutname='CAFF    '; output nutname;
  Nutrient_code=263 ; nutname='THEO    '; output nutname;
  Nutrient_code=269 ; nutname='SUGR    '; output nutname;
  Nutrient_code=291 ; nutname='FIBE    '; output nutname;
  Nutrient_code=301 ; nutname='CALC    '; output nutname;
  Nutrient_code=303 ; nutname='IRON    '; output nutname;
  Nutrient_code=304 ; nutname='MAGN    '; output nutname;
  Nutrient_code=305 ; nutname='PHOS    '; output nutname;
  Nutrient_code=306 ; nutname='POTA    '; output nutname;
  Nutrient_code=307 ; nutname='SODI    '; output nutname;
  Nutrient_code=309 ; nutname='ZINC    '; output nutname;
  Nutrient_code=312 ; nutname='COPP    '; output nutname;
  Nutrient_code=317 ; nutname='SELE    '; output nutname;
  Nutrient_code=401 ; nutname='VC      '; output nutname;
  Nutrient_code=404 ; nutname='VB1     '; output nutname;
  Nutrient_code=405 ; nutname='VB2     '; output nutname;
  Nutrient_code=406 ; nutname='NIAC    '; output nutname;
  Nutrient_code=415 ; nutname='VB6     '; output nutname;
  Nutrient_code=417 ; nutname='FOLA    '; output nutname;
  Nutrient_code=431 ; nutname='FA      '; output nutname;
  Nutrient_code=432 ; nutname='FF      '; output nutname;
  Nutrient_code=435 ; nutname='FDFE    '; output nutname;
  Nutrient_code=418 ; nutname='VB12    '; output nutname;
  Nutrient_code=320 ; nutname='VARA    '; output nutname;
  Nutrient_code=319 ; nutname='RET     '; output nutname;
  Nutrient_code=321 ; nutname='BCAR    '; output nutname;
  Nutrient_code=322 ; nutname='ACAR    '; output nutname;
  Nutrient_code=334 ; nutname='CRYP    '; output nutname;
  Nutrient_code=337 ; nutname='LYCO    '; output nutname;
  Nutrient_code=338 ; nutname='LZ      '; output nutname;
  Nutrient_code=323 ; nutname='ATOC    '; output nutname;
  Nutrient_code=430 ; nutname='VK      '; output nutname;
  Nutrient_code=601 ; nutname='CHOLE   '; output nutname;
  Nutrient_code=606 ; nutname='SFAT    '; output nutname;
  Nutrient_code=607 ; nutname='S040    '; output nutname;
  Nutrient_code=608 ; nutname='S060    '; output nutname;
  Nutrient_code=609 ; nutname='S080    '; output nutname;
  Nutrient_code=610 ; nutname='S100    '; output nutname;
  Nutrient_code=611 ; nutname='S120    '; output nutname;
  Nutrient_code=612 ; nutname='S140    '; output nutname;
  Nutrient_code=613 ; nutname='S160    '; output nutname;
  Nutrient_code=614 ; nutname='S180    '; output nutname;
  Nutrient_code=645 ; nutname='MFAT    '; output nutname;
  Nutrient_code=626 ; nutname='M161    '; output nutname;
  Nutrient_code=617 ; nutname='M181    '; output nutname;
  Nutrient_code=628 ; nutname='M201    '; output nutname;
  Nutrient_code=630 ; nutname='M221    '; output nutname;
  Nutrient_code=646 ; nutname='PFAT    '; output nutname;
  Nutrient_code=618 ; nutname='P182    '; output nutname;
  Nutrient_code=619 ; nutname='P183    '; output nutname;
  Nutrient_code=627 ; nutname='P184    '; output nutname;
  Nutrient_code=620 ; nutname='P204    '; output nutname;
  Nutrient_code=629 ; nutname='P205    '; output nutname;
  Nutrient_code=631 ; nutname='P225    '; output nutname;
  Nutrient_code=621 ; nutname='P226    '; output nutname;
  Nutrient_code=328 ; nutname='VITD    '; output nutname;
  Nutrient_code=421 ; nutname='CHOLN   '; output nutname;
  Nutrient_code=573 ; nutname='VITE_ADD'; output nutname;
  Nutrient_code=578 ; nutname='B12_ADD '; output nutname;
  run;

*Sort the new nutname dataset by nutrient code to allow merging with the FNDDS dataset;
proc sort data=nutname;
  by Nutrient_code;
  run;

*Read in FNDDS modification codes data;
data modf4;
  set fndds.modnut;
  run;

*Sort the modification code file by nutrient code to merge with nutrient names;
proc sort data=modf4;
  by Nutrient_code;
  run;

*Merge modcode-based FNDDS values with nutrient names - retain nutrient values and modcode variable to merge on called usemod;
data modf4;
  merge modf4 (in=m) nutname (in=n);
  by Nutrient_code;
  usemod=1*Modification_code;
  if nutname ne '';
  run;

*Sort the modcode data by nutrient code;
proc sort data=modf4;
  by usemod;
  run;

*Transpose the modcode data to end with one observation per modcode, including all nutrient values;
proc transpose data=modf4 out=allmod;
  by usemod;
  var Nutrient_value;
  id nutname;
  run;

*Get mod code description;
data modnames (keep=usemod food_description);
  set fndds.moddesc;
  usemod=1*Modification_code;
  food_description=Modification_description;
  run;

*Sort modcode name file by mod code;
proc sort data=modnames;
  by usemod;
  run;

*Set the foodcode that will be merged on to '0' for all modcode values;
data allmod;
  merge allmod modnames;
  by usemod;
  usefd=0;
  run;

*Read in FNDDS food code data;
data foodf4;
  set fndds.fnddsnutval;
  run;

*Sort the food code file by nutrient code to merge with nutrient names;
proc sort data=foodf4;
  by Nutrient_code;
  run;

*Merge foodcode-based FNDDS values with nutrient names - retain nutrient values and foodcode variable to merge on called usefd;
data foodf4;
  merge foodf4 nutname;
  by Nutrient_code;
  usefd=1*food_code;
  if nutname ne '';
  run;

*Sort the foodcode data by food code;
proc sort data=foodf4;
  by usefd;
  run;

*Transpose the foodcode data to end with one observation per foodcode, including all nutrient values;
proc transpose data=foodf4 out=allfood;
  by usefd;
  var Nutrient_value;
  id nutname;
  run;

*Get food code description;
data fdnames (keep=usefd food_description);
  set fndds.mainfooddesc;
  usefd=1*Food_code;
  food_description=Main_food_description;
  run;

*Sort foodcode name file by food code;
proc sort data=fdnames;
  by usefd;
  run;

*Add food names to data and set the modcode that you will merge on to '0' for all foodcode values;
data allfood;
  merge allfood fdnames;
  by usefd;
  usemod=0;
  run;

*Combine modcode based values and foodcode based values;
data fndds4;
  set allmod allfood;
  run;

*Sort by the foodcode/modcode which will be used for FNDDS nutrient values;
proc sort data=fndds4;
  by usefd usemod;
  run;

*********************************************************************************************
*STEP 3:  This step prepares the files containing the intake information to be merged       *
*with FNDDS 4.1 data by reported foodcode or modcode OR by substitute modcode or foodcode   *
*if indicated in the sub_codes file, then produces the updated INF and TN files.            *
*                                                                                           *
*NOTE:  Every food has a food code that is associated with nutrient information.  Some      *
*foods also have a modification code (modcode) associated with a modified recipe that more  *
*closely mataches the food reported by the respondent (e.g., egg fried in butter instead    *
*of margarine).  This part of the program includes code specifying when modcodes are        *
*available and should be used in place of a food code.  This is part of the automated       *
*coding process that takes place within ASA24 and is reflected in any nutrient analyses     *
*downloaded from the Researcher Web site.                                                   *
*********************************************************************************************;

*Input the Beta ASA24 INF file;
proc import datafile=inf
  out=inf
  dbms=csv
  replace;
  getnames=yes;
  run;

data _null_;
    set inf end=eof;
    count+1;
    if eof then call symput("INF_nobs",count);
run;

proc import datafile=inf
  out=inf
  dbms=csv
  replace;
  getnames=yes;
  guessingrows=&INF_nobs;
  run;

*From original INF file, keep variables other than nutrient values, and create a new variable that keeps track of the row order (origord);
data inf (keep=username--foodamt foodcomp origord);
  set inf;
  origord=_n_;
  run;

*Sort INF file by foodcode and modcode;
proc sort data=inf;
  by foodcode modcode;
  run;

*Input the Sub_Codes excel file that shows which FNDDS 4.1 codes to use for FNDDS 1.0 codes;
proc import datafile=subfile
  out=subfile
  dbms=xls
  replace;
  getnames=yes;
  run;

data _null_;
    set subfile end=eof;
    count+1;
    if eof then call symput("Sub_nobs",count);
run;

proc import datafile=subfile
  out=subfile
  dbms=xls
  replace;
  getnames=yes;
  guessingrows=&Sub_nobs;
  run;

*Sort the Sub_Codes file by foodcode and modcode;
proc sort data=subfile;
  by foodcode modcode;
  run;

*Merge INF file with Sub_Codes file to determine any substitutions to be made and add a sub variable that indicates whether a substitution has been made;
data inf;
  merge inf (in=i) subfile (in=d);
  by foodcode modcode;
  if i;
  if d then subflag=1;
    else subflag=0;
  run;

*Decide which foodcode and modcode to use from the FNDDS database;
data inf;
  set inf;
  *Apply food code substitutions;
    *If not a food code with a substitution, use foodcode or modcode as given on the inf file;
  if subflag=0 then do;
    usefd=foodcode;
    usemod=modcode;
    end;
    *If a substitution is needed, use the substitution from the Sub_Codes file;
  else if subflag=1 then do;
    usefd=Foodcode4;
    usemod=ModCode4;
    end;
  *Take care of modcode/foodcode merge variables;
    *If modcode based, only merge on modcode and set foodcode to zero;
    *If foodcode based, only merge on foodcode and set modcode to zero;
  if usemod > 0 then usefd=0;
    else if usefd > 0 then usemod=0;
  run;

*Sort the INF data according to the foodcode and modcode to be used for FNDDS merge;
proc sort data=inf;
  by usefd usemod;
  run;

*Merge INF with FNDDS file by usefd and usemod;
data inf;
  merge inf (in=i) fndds4;
  by usefd usemod;
  if i;
  run;

*Calculate amount of nutrients for each item based on grams of food consumed - DO NOT MODIFY - ARRAY CAPTURES ALL NUTRIENTS IN FILE;
data inf;
  set inf;

  ARRAY NUTS (J) prot--pfat;
  DO OVER NUTS;
    NUTS=NUTS*foodamt/100;
  END;
  run;

*Sort INF DATA back into original order;
proc sort;
  by origord;
  run;

*Use sql to rearrange order of variables, only retain variables that will be in final data, and sum sub variable to show number of substitutions per recall;

proc sql;
  create table inf4 as
    select
     UserName, UserID, RecallNo, RecallAttempt, RecallStatus, IntakeDate, IntakeDay,
   CompletionDate, Lang, Occ_No, Occ_Time, Occ_Name, EatWith, WatchTV, Location, FoodNum, FoodType, FoodSrce, CodeNum, FoodCode,
   ModCode, HowMany, SubCode, PortionCode, FoodAmt, KCAL, PROT, TFAT, CARB, MOIS, ALC, CAFF, THEO, SUGR, FIBE, CALC, IRON, MAGN,
   PHOS, POTA, SODI, ZINC, COPP, SELE, VC, VB1, VB2, NIAC, VB6, FOLA, FA, FF, FDFE, VB12, VARA, RET, BCAR, ACAR, CRYP, LYCO, LZ,
   ATOC, VK, CHOLE, SFAT, S040, S060, S080, S100, S120, S140, S160, S180, MFAT, M161, M181, M201, M221, PFAT, P182, P183, P184,
   P204, P205, P225, P226, VITD, CHOLN, VITE_ADD, B12_ADD, foodcomp, food_description
    from inf;
    quit;
    run;

*Create the new INF CSV file which includes FNDDS 4.1 values;
proc export data=inf4
  file=inf4
  dbms=csv
  replace;
  run;

*Sort the inf data by person/day identifiers to get daily totals;
proc sort data=inf;
  by UserName RecallNo;
  run;

*Get daily totals by person and day of new FNDDS 4.1 nutrients - DO NOT MODIFY - ARRAY CAPTURES ALL NUTRIENTS IN FILE;
proc means data=inf noprint;
  by UserName RecallNo;
  var prot--pfat;
  output out=tot sum= ;
  run;

*Input the ASA24 Beta total nutrient (TN) file;
proc import datafile=tn
  out=tn
  dbms=csv
  replace;
  getnames=yes;
  run;

data _null_;
    set tn end=eof;
    count+1;
    if eof then call symput("TN_nobs",count);
run;

proc import datafile=tn
  out=tn
  dbms=csv
  replace;
  getnames=yes;
  guessingrows=&TN_nobs;
  run;

*From original TN file, keep everything but nutrient values, and create a new variable that keeps track of the row order (origord);
data tn (keep=username--saltused datacomp origord);
  set tn;
  origord=_n_;
  run;

*Sort the TN data by person/day identifiers;
proc sort data=tn;
  by username recallno;
  run;

*Merge original TN variables with new daily totals;
data tn;
  merge tn tot;
  by username recallno;
  run;

*Sort back into original order;
proc sort data=tn;
  by origord;
  run;

*Use sql to rearrange order of variables, and only retain variables that will be in final data;
proc sql;
  create table tn4 as
    select
     UserName, UserID, RecallNo, RecallAttempt, RecallStatus, IntakeDate, IntakeDay,
   CompletionDate, Lang, NumFoods, NumCodes, AMTUsual, SaltType, SaltFreq, SaltUsed,
   KCAL, PROT, TFAT, CARB, MOIS, ALC, CAFF, THEO, SUGR, FIBE, CALC, IRON, MAGN,
   PHOS, POTA, SODI, ZINC, COPP, SELE, VC, VB1, VB2, NIAC, VB6, FOLA, FA, FF, FDFE, VB12, VARA, RET, BCAR, ACAR, CRYP, LYCO, LZ,
   ATOC, VK, CHOLE, SFAT, S040, S060, S080, S100, S120, S140, S160, S180, MFAT, M161, M181, M201, M221, PFAT, P182, P183, P184,
   P204, P205, P225, P226, VITD, CHOLN, VITE_ADD, B12_ADD, datacomp
    from tn;
    quit;
    run;

*Create the new TN file which includes FNDDS 4.1 values;
proc export data=tn4
  file=tn4
  dbms=csv
  replace;
  run;

*If the program ran successfully, you now have 2 new files - INF and TN - with updated nutrient information based on FNDDS 4.1;

*END;
