**************************************************************************************************************************************
**** THIS CODE DEALS WITH INITIAL CLEANING FOLLOWING DATA ENTRY ACCORDING TO INSTRUCTIONS DESCRIBED IN "Data entry SIT-Q-7d.pdf";	 *
**** AND DEALS WITH VARIABLES WHICH REQUIRE CLEANING																				 *
*** THIS CODE FOLLOWS THE CLEANING INSTRUCTIONS AS DESCRIBED IN "Guidelines primary cleaning SIT-Q-7d.pdf"							 *
** USE THE ADDITIONAL .DO FILE FOR ID-BASED STUDY SPECIFIC CLEANING	(EG. FOR (-10) OR (-66) VARIABLES (SEE BELOW)					 *
* Use the Secondary cleaning .do file (2_Sec_clean_summ_SIT-Q-7d_Generic.do) for further summarisation of variables					 *																						 *     					
*																																     *
*	  Authors: Katrien Wijndaele & Kate Westgate 																					 *					
*local Version = "v2.0 - 25/07/2013"																								 *					
**************************************************************************************************************************************	

local FOLDER 						= "V:\P5_PhysAct\"
local INPUT_FILE 					= "SIT-Q-7d_INPUT_FILE" /*This is the name of your file with entered raw data (.csv format)*/
local OUTPUT_FILE 					= "`INPUT_FILE'_primary_cleaned"
local LOG_FILE						= "Additional Variables to be cleaned_(-10)_logfile"
local ADDITIONAL_DO_FILE 			= "Additional_studyspecific_cleaning_SIT-Q-7d.do" /*NAME OF ADDITIONAL .DO FILE FOR STUDY SPECIFIC CLEANING*/
local SECONDARY_CLEANING_DO_FILE 	= "2_Sec_clean_summ_SIT-Q-7d_Generic.do"

local STOP				= 1 /*Set to 0 for non-stop, 1: to run to check for any variables requiring manual edits,*/
***************************************************************************************************************************************
cd "`FOLDER'"
insheet using "`INPUT_FILE'.csv", clear comma case
set more off


*Drop any IDs necessary if you have any duplicate files (Example below)
*drop if StudyID=="xxxx"





drop WD_Batch
destring dD14a_dur_occ1, replace

***********************************************************************************************************
***THIS SECTION FLAGS ALL THE (-10) VARIABLES THAT WILL REQUIRE MANUAL CLEANING						 	***
***********************************************************************************************************

cap log close
log using "`FOLDER'/`LOG_FILE'.smcl", replace
foreach var of varlist * {
capture confirm string variable `var' /*confirm whether the variable is a string variable or not (meaning it contains some (-) values: If it is, do the following*/
if!_rc{
qui count if substr(`var',1,5)=="(-10)" 
local count=r(N)
di "THERE WERE `count' (-10) VARIABLES TO BE CLEANED FOR `var':"
list StudyID `var' if substr(`var',1,5)=="(-10)" & lower(`var')!="(-10) n/a", abbreviate(30) 
}
}
log off

***********************************************************************************************************
***DEALING WITH SECTION 1: SLEEPING & NAPPING VARIABLES (Error types (-4), (-6), (-10), (-1 mins only) 	***
***********************************************************************************************************
foreach var of varlist dA1_1_go_sle_wd_hr dA1_1_go_sle_wd_min dA1_2_get_up_wd_hr dA1_2_get_up_wd_min dA1_3_go_sle_wndd_hr dA1_3_go_sle_wndd_min dA1_4_get_up_wndd_hr dA1_4_get_up_wndd_min {
capture confirm string variable `var' /*confirm whether the variable is a string variable or not (meaning it contains some (-) values: If it is, do the following*/
if!_rc{

cap drop temp1 temp2 temp3* diffhr diffmin

di "READING IN VARIABLE NAME: `var'"
gen temp1=.
gen temp2=""
gen temp3=""

*This section removes the "(-x)" values to identify the issue. (NB:can't just substring alone due to different length of entered values)
replace temp1=strpos(`var'," ") 
replace temp2=substr(`var',1,temp1)
replace temp3=substr(`var', temp1+1,.)/*temp3 becomes the value*/
replace temp2=subinstr(temp2,"(","",.)
replace temp2=subinstr(temp2,")","",.) /*temp2 becomes the indicator variable ie. -4 or -10 for instance*/
destring temp2, replace
split temp3, p(" to ") /*splits the values if it is a range variable*/

tempname range1 range2 diff
tempvar range1 range2 diff

rename `var' `var'_orig /*Renames variables to "_orig" to maintain original (raw) variables & values*/
local newvar=subinstr("`var'","_orig","",.) /*Creating the new variable (which is the same as the original variable but will be cleaned from here on)*/
di "NEWVAR=`newvar'"

***DEALING WITH "(-4)" (RANGES) & "(-6)" (MISSING AM/PM BOX ENTRY) VARIABLES***

local i=substr("`newvar'",-3,.) /*Identify whether it is an hr variable or a min variable*/
if `"`i'"'==`"_hr"'{ /*do the following for the hr variables (where the range is entered)*/
gen hourcode=temp2
destring hourcode, replace

*Extract time of ranges into time format
gen range1=clock(temp31, "hm")
format range1 %tcHH:MM
gen range2=clock(temp32, "hm")
format range2 %tcHH:MM

*Calculate the mid-point of the range & use this for subsequent analysis
drop temp31 temp32 
gen diff= (range2-range1)/2+range1
format diff %tcHH:MM
replace diff=(diff+msofhours(12)) if range2<range1 /*Add 12hrs if range crosses midnight for correct calculation*/

gen diffhr = hhC(diff)
gen diffmin = mmC(diff)
order diff* temp*
tostring diffhr diffmin, replace

gen `newvar' = diffhr if temp2==-4 /*if it is a range variable use the hr calculated above*/
replace `newvar'=temp3 if temp2==-6 /*if it is a query am/pm use value in temp3 ready to deal with later*/
replace `newvar'=`var'_orig if temp2==./*use original values if there is no problem with it*/
destring `newvar', replace
order `var'* `newvar' 
drop diff range1 range2
}

if `"`i'"'==`"min"'{ /*do the following for the min variables*/
gen `newvar'= diffmin if hourcode==-4 /*if it is a range variable use the mins calculated above*/
order `var'* `newvar' 
replace `newvar'=`var'_orig if temp2==. & hourcode!=-4 /*use original values if there is no problem with it and it is not a range variable*/
replace `newvar'=temp3 if temp2==-6 /*replace with split value from string if just am/pm problem (will not effect mins)*/
destring `newvar', replace

local newvarhr=subinstr("`newvar'","_min","_hr",.)/*look for the equivalent hr variable*/
di "NEWVARhr=`newvarhr'"
replace `newvar'=0 if  `newvar'==-1 & `newvarhr'!=-1 /*Replace with 0 if there is a valid hour value but no valid min value entered*/
drop diffhr diffmin hourcode
}

drop temp* 
}
cap drop range1 range2 diff
}
order StudyID dA1_1_go_sle_wd_hr_orig dA1_1_go_sle_wd_min_orig dA1_1_go_sle_wd_hr dA1_1_go_sle_wd_min  dA1_2_get_up_wd_hr_orig dA1_2_get_up_wd_min_orig dA1_2_get_up_wd_hr dA1_2_get_up_wd_min dA1_3_go_sle_wndd_hr_orig dA1_3_go_sle_wndd_min_orig dA1_3_go_sle_wndd_hr dA1_3_go_sle_wndd_min dA1_4_get_up_wndd_hr_orig dA1_4_get_up_wndd_min_orig dA1_4_get_up_wndd_hr dA1_4_get_up_wndd_min

*Correcting for 24hr clock mistakes (same for weekday and weekend)
replace dA1_1_go_sle_wd_hr = dA1_1_go_sle_wd_hr+12 if dA1_1_go_sle_wd_hr >=dA1_2_get_up_wd_hr & substr(dA1_1_go_sle_wd_hr_orig,1,4)=="(-6)"/*for those where no am/pm is defined!*/
replace dA1_1_go_sle_wd_hr = dA1_1_go_sle_wd_hr+12 if dA1_1_go_sle_wd_hr >=dA1_2_get_up_wd_hr & dA1_1_go_sle_wd_hr>=10 & dA1_1_go_sle_wd_hr<=12 /*for those people who don't know what time 10,11 or 12am/pm is (but is not a (-6)!)*/
replace dA1_1_go_sle_wd_hr = 0 if dA1_1_go_sle_wd_hr ==24 

replace dA1_3_go_sle_wndd_hr = dA1_3_go_sle_wndd_hr+12 if dA1_3_go_sle_wndd_hr >= dA1_4_get_up_wndd_hr & substr(dA1_3_go_sle_wndd_hr_orig,1,4)=="(-6)"
replace dA1_3_go_sle_wndd_hr = dA1_3_go_sle_wndd_hr+12 if dA1_3_go_sle_wndd_hr >=dA1_4_get_up_wndd_hr & dA1_3_go_sle_wndd_hr>=10 & dA1_3_go_sle_wndd_hr<=12 /*for those people who don't know what time 10,11 or 12am/pm is!*/
replace dA1_3_go_sle_wndd_hr = 0 if dA1_3_go_sle_wndd_hr==24 

***DEALING WITH (-10) SLEEP VARIABLES***
* ==> ON AN INDIVIDUAL ID BASIS, MAKE DECISION & CHANGE ALL (-10) SLEEP VARIABLES BASED ON THE WRITTEN ENTRY WHERE NECESSARY

/*
*EXAMPLE
sort dA1_1_go_sle_wd_hr_orig
replace  dA1_1_go_sle_wd_hr = -1 if StudyID=="1234" 

*/
gen do_manual_clean_stage=1
do "`ADDITIONAL_DO_FILE'"//Runs the study specific .do file with manual changes (Section 1)
replace do_manual_clean_stage=2

***FLAGGING UNREALISTIC GO-SLEEP & GET-UP TIMES BASED ON DURATION CALCULATION***
gen go_sle_wd_time_dec = dA1_1_go_sle_wd_hr + (dA1_1_go_sle_wd_min/60)/*convert the times for go-sleep & get-up as decimals*/
gen get_up_wd_time_dec = dA1_2_get_up_wd_hr + (dA1_2_get_up_wd_min/60)
gen go_sle_wndd_time_dec = dA1_3_go_sle_wndd_hr + (dA1_3_go_sle_wndd_min/60)
gen get_up_wndd_time_dec = dA1_4_get_up_wndd_hr + (dA1_4_get_up_wndd_min/60)

gen wd_dur_sleep_dec=./*Calculating durations of sleep using above*/
replace wd_dur_sleep_dec=(24-go_sle_wd_time_dec) + get_up_wd_time_dec if (go_sle_wd_time_dec>=get_up_wd_time_dec & go_sle_wd_time_dec>=12)|(go_sle_wd_time_dec>=get_up_wd_time_dec & go_sle_wd_time_dec<12)
replace wd_dur_sleep_dec=get_up_wd_time_dec - go_sle_wd_time_dec if (go_sle_wd_time_dec<get_up_wd_time_dec & go_sle_wd_time_dec<12)|(go_sle_wd_time_dec<get_up_wd_time_dec & go_sle_wd_time_dec>12)
gen wndd_dur_sleep_dec=.
replace wndd_dur_sleep_dec=(24-go_sle_wndd_time_dec) + get_up_wndd_time_dec if (go_sle_wndd_time_dec>=get_up_wndd_time_dec & go_sle_wndd_time_dec>=12)|(go_sle_wndd_time_dec>=get_up_wndd_time_dec & go_sle_wndd_time_dec<12)
replace wndd_dur_sleep_dec=get_up_wndd_time_dec - go_sle_wndd_time_dec if (go_sle_wndd_time_dec<get_up_wndd_time_dec & go_sle_wndd_time_dec<12)|(go_sle_wndd_time_dec<get_up_wndd_time_dec & go_sle_wndd_time_dec>12)

/*Flagging durations (using "-66") if duration <=3hrs sleep & >=14hrs sleep*/
gen FLAG_Sleep=.
replace FLAG_Sleep = -66 if wd_dur_sleep_dec<=3 | wd_dur_sleep_dec>=14 | wndd_dur_sleep_dec<=3 | wndd_dur_sleep_dec>=14

drop go_sle_wd_time_dec get_up_wd_time_dec go_sle_wndd_time_dec get_up_wndd_time_dec wd_dur_sleep_dec wndd_dur_sleep_dec
sort StudyID
log on
di "The following IDs have been flagged for duration of sleep reported. Deal with these individuals accordingly (if necessary)...these may be column swaps at time of reporting"
list StudyID dA1_1_go_sle_wd_hr dA1_2_get_up_wd_hr dA1_3_go_sle_wndd_hr dA1_4_get_up_wndd_hr FLAG_Sleep if FLAG_Sleep ==-66 
log off
/*
* ==> MANUALLY CORRECT -66 CASES AS NECESSARY  
*     ENTER THESE CHANGES IN YOUR ADDITIONAL .DO FILE FOR STUDY SPECIFIC CLEANING
*EXAMPLE (manual entry of times to use)
replace  dA1_1_go_sle_wd_hr = 6 if StudyID=="1234" 
replace  dA1_1_go_sle_wd_min = 0 if StudyID=="1234" 
etc...
*/
*********

log close
view "`FOLDER'/`LOG_FILE'.smcl"
if `STOP'==1{
STOP 1 - STEP 1: DEAL WITH ANY ISSUES REQUIRING MANUAL EDITS EXPORTED IN LOG (ADDRESS THESE IN THE "ADDITIONAL_DO_FILE.DO" FILE)
}




*drop dA1_1_temp dA1_2_temp dA1_3_temp dA1_4_temp dA1_2_temp2 dA1_3_temp2

*********************************************************************************************
***DEALING WITH "(-4)" RANGE ISSUES IN WHOLE QUESTIONNAIRE (EXCEPT SLEEPING): ***
*********************************************************************************************
foreach var of varlist  dC5_1_freq_to_from_occ1 dC5_2_freq_to_from_occ2 dC7_1_freq_part_of_occ1 dC7_2_freq_part_of_occ2 dD13a_freq_occ1 dD13b_freq_occ2 dD15a_freq_break_occ1 dD15b_freq_break_occ2 dE18_freq_break_tv {
capture confirm string variable `var' /*confirm whether the variable is a string variable or not (meaning it contains some (-) values: If it is, do the following*/
if!_rc{

cap drop temp1 temp2 temp3
gen temp1=.
gen temp2=""
gen temp3=""

replace temp1=strpos(`var'," ")
replace temp2=substr(`var',1, temp1)
replace temp3=substr(`var', temp1+1,.)
replace temp2=subinstr(temp2,"(","",.)
replace temp2=subinstr(temp2,")","",.)
replace temp2=trim(temp2)
split temp3, p(" to ")

rename `var' `var'_orig /*Renames variables to _orig to maintain original variables & values as untouched*/
local newvar=subinstr("`var'","_orig","",.) /*Creating the new variable name which was the same as the original variable name*/
di "NEWVAR=`newvar'"
gen `newvar'= `var'

*REPLACE WITH -1 IF (-10) N/A 
replace `newvar'="-1" if `var'=="(-10) N/A" /*Set to missing if entry was N/A*/
replace `newvar'="-1" if `var'=="(-10) n/a"

*DEALING WITH (-4) RANGE VARIABLES
capture confirm var temp32 /*if temp32 is present(meaning it is a range value), do the following...*/
display _rc
di "VAR=`var'"
if _rc==111 {
}
if _rc!=111 {
		destring temp31 temp32, force replace
		recast float temp31 temp32
		gen diff=.
* Variables need to be treated differently so divided into the following groups:
*a)Calculate the average of the 2 as integer 
	if `"`var'"'== "dC5_1_freq_to_from_occ1" |`"`var'"'== "dC5_2_freq_to_from_occ2" |`"`var'"'== "dC7_1_freq_part_of_occ1" |`"`var'"'== "dC7_2_freq_part_of_occ2"{
		replace diff=round((temp32+temp31)/2) 
		replace `newvar' = "-1" if temp32>7 & temp32!=./*Replace with missing if outside range*/
		}
*b)Calculate average of the 2 as decimal 
	if `"`var'"'== "dD13a_freq_occ1" |`"`var'"'== "dD13b_freq_occ2"{
		replace diff=((temp32+temp31)/2) 
		replace `newvar' = "-1" if temp32>7 & temp32!=./*Replace with missing if outside range*/
		}
*c)Calculate average of the 2 as integer
	if `"`var'"'== "dD15a_freq_break_occ1" |`"`var'"'== "dD15b_freq_break_occ2" |`"`var'"'== "dE18_freq_break_tv"{
		replace diff=round((temp32+temp31)/2)
		}
	
	tostring diff, replace

replace `newvar'= diff if temp2=="-4" /*Replace variable with new value*/
drop diff
cap drop tempvar	
}
drop temp*
destring `newvar', replace
order `var'_orig `newvar'
}
}

*********************************************************************************************
***DEALING WITH ALL (-5) VARIABLES (when more than one specified) ***
*********************************************************************************************

foreach var of varlist  dA2_1_dur_nap_wd dA2_2_dur_nap_wndd dB3_1_dur_meal_wd_br dB3_2_dur_meal_wd_lu dB3_3_dur_meal_wd_di /*
*/dB3_4_dur_meal_wndd_br dB3_5_dur_meal_wndd_lu dB3_6_dur_meal_wndd_di dC6_1_dur_to_from_occ1 dC6_2_dur_to_from_occ2 /*
*/dC8_1_dur_part_of_occ1 dC8_2_dur_part_of_occ2 dC9_1_dur_apart_from_wd dC9_2_dur_apart_from_wndd dD14a_dur_occ1 dD14b_dur_occ2 /*
*/dE17_1_dur_tv_wd dE17_2_dur_tv_wndd dE17_3_dur_comp_wd dE17_4_dur_comp_wndd dE17_5_dur_game_wd dE17_6_dur_game_wndd dE19_1_freq_savor /*
*/dE19_2_freq_sweet dE19_3_freq_ice dE19_4_freq_yoghurt dE19_5_freq_soda dE19_6_freq_frjuice dE19_7_freq_squash dE19_8_freq_milk dE19_9_freq_tea /*
*/dE19_10_freq_alcoh dE19_12_freq_other dE20_1_dur_read_wd dE20_2_dur_read_wndd dE20_3_dur_house_wd dE20_4_dur_house_wndd /*
*/dE20_5_dur_cari_wd dE20_6_dur_cari_wndd dE20_7_dur_hobb_wd dE20_8_dur_hobb_wndd dE20_9_dur_soc_wd dE20_10_dur_soc_wndd /*
*/dE20_11_dur_music_wd dE20_12_dur_music_wndd dE20_14_dur_other_wd dE20_15_dur_other_wndd dD11a_type_occ1 dD11b_type_occ2{

capture confirm string variable `var'
if!_rc{

rename `var' `var'_orig /*Renames variables to _orig to maintain original variables & values as untouched*/

local newvar=subinstr("`var'","_orig","",.)
di "NEWVAR=`newvar'"

cap drop temp1 temp2 temp3
gen temp1=.
gen temp2=""
gen temp3=""

replace temp1=strpos(`var'," ")/*Splitting range (-5) variables to allow for calculations later*/
replace temp2=substr(`var',1,temp1)
replace temp3=substr(`var', temp1+1,.)
replace temp2=subinstr(temp2,"(","",.)
replace temp2=subinstr(temp2,")","",.)
replace temp3=subinstr(temp3,"AND","and",.)
replace temp2=trim(temp2)
split temp3, p(" and ")

*REPLACE WITH -1 IF (-10) N/A
capture confirm var temp31 
display _rc
di "VAR=`var'"
if _rc==111 {
}
if _rc!=111 {
gen `newvar' = `var' /*Creating the new variable name which was the same as the original variable name*/
replace `newvar'="-1" if lower(`var')=="(-10) n/a"
}

capture confirm var temp32 
display _rc
di "VAR=`var'"
if _rc==111 {
}
if _rc!=111 {
destring temp31 temp32, replace
recast float temp31 temp32

*Replaces new variable with original if not a -5 variable
*Replacing category with middle or lowest category
gen newvalue=.
replace newvalue= floor((temp31 + temp32)/2) 
tostring newvalue, replace
*********************
replace `newvar'= newvalue if temp2=="-5" & `"`var'"'!="dD11a_type_occ1" 
tostring temp31 temp32, replace

*if more than one type of occupation is selected, keep information by coding eg. (-5)1 and 2 recode as 1.2*/
replace `newvar'= temp31 + "." + temp32 if (`"`var'"'=="dD11a_type_occ1" | `"`var'"'=="dD11b_type_occ2") & temp2=="-5"
list StudyID dD11a_type_occ1 dD11b_type_occ2 if temp2=="-5"

drop newvalue
cap drop tempvar

}
order `newvar' `var'_orig 
drop temp*

}
}

* ==> ON AN INDIVIDUAL ID BASIS, MAKE DECISION & CHANGE ALL (-10) VARIABLES BASED ON THE WRITTEN ENTRY WHERE NECESSARY
* example: 
/*
sort dC5_1_freq_to_from_occ1_orig
replace  dC5_1_freq_to_from_occ1 = "-1" if StudyID=="1234" | StudyID=="5678" 
*/

do "`ADDITIONAL_DO_FILE'"//Runs the study specific .do file with manual changes (Section 2)
drop do_manual_clean_stage

destring dA2_1_dur_nap_wd dA2_2_dur_nap_wndd dC5_1_freq_to_from_occ1 dB3_5_dur_meal_wndd_lu dC5_2_freq_to_from_occ2 dC6_1_dur_to_from_occ1 dC6_2_dur_to_from_occ2/*
*/ dC7_1_freq_part_of_occ1 dC7_2_freq_part_of_occ2 dC8_2_dur_part_of_occ2 dD11a_type_occ1 dD11b_type_occ2 dD13a_freq_occ1 dD14a_dur_occ1 dD15a_freq_break_occ1 dD13b_freq_occ2 /*
*/ dD15b_freq_break_occ2 dE18_freq_break_tv dE19_2_freq_sweet dE19_4_freq_yoghurt dE19_7_freq_squash dE19_8_freq_milk dE19_9_freq_tea /*
*/dE19_12_freq_other dE20_1_dur_read_wd dE20_2_dur_read_wndd dE20_10_dur_soc_wndd, replace


order StudyID d_questionnaire d_repres_typ_w dA1_1_go_sle_wd_hr_orig dA1_1_go_sle_wd_min_orig dA1_1_go_sle_wd_hr dA1_1_go_sle_wd_min dA1_2_get_up_wd_hr_orig dA1_2_get_up_wd_min_orig dA1_2_get_up_wd_hr dA1_2_get_up_wd_min dA1_3_go_sle_wndd_hr_orig dA1_3_go_sle_wndd_min_orig dA1_3_go_sle_wndd_hr dA1_3_go_sle_wndd_min dA1_4_get_up_wndd_hr_orig dA1_4_get_up_wndd_min_orig dA1_4_get_up_wndd_hr dA1_4_get_up_wndd_min dA2_1_dur_nap_wd dA2_2_dur_nap_wndd dB3_1_dur_meal_wd_br dB3_2_dur_meal_wd_lu dB3_3_dur_meal_wd_di dB3_4_dur_meal_wndd_br dB3_5_dur_meal_wndd_lu_orig dB3_5_dur_meal_wndd_lu dB3_6_dur_meal_wndd_di dC4_status_occ dC5_1_freq_to_from_occ1_orig dC5_1_freq_to_from_occ1 dC5_2_freq_to_from_occ2_orig dC5_2_freq_to_from_occ2 dC6_1_dur_to_from_occ1_orig dC6_1_dur_to_from_occ1 dC6_2_dur_to_from_occ2 dC7_1_freq_part_of_occ1_orig dC7_1_freq_part_of_occ1 dC7_2_freq_part_of_occ2_orig dC7_2_freq_part_of_occ2 dC8_1_dur_part_of_occ1 dC8_2_dur_part_of_occ2 dC9_1_dur_apart_from_wd dC9_2_dur_apart_from_wndd dD10_status_occ dD11a_type_occ1 dD12a_name_occ1 dD13a_freq_occ1_orig dD13a_freq_occ1 dD14a_dur_occ1 dD15a_freq_break_occ1_orig dD15a_freq_break_occ1 dD16_status_occ2 dD11b_type_occ2 dD12b_name_occ2 dD13b_freq_occ2_orig dD13b_freq_occ2 dD14b_dur_occ2 dD15b_freq_break_occ2_orig dD15b_freq_break_occ2 dE17_1_dur_tv_wd dE17_2_dur_tv_wndd dE17_3_dur_comp_wd dE17_4_dur_comp_wndd dE17_5_dur_game_wd dE17_6_dur_game_wndd dE18_freq_break_tv_orig dE18_freq_break_tv dE19_1_freq_savor dE19_2_freq_sweet_orig dE19_2_freq_sweet dE19_3_freq_ice dE19_4_freq_yoghurt_orig dE19_4_freq_yoghurt dE19_5_freq_soda dE19_6_freq_frjuice dE19_7_freq_squash_orig dE19_7_freq_squash dE19_8_freq_milk_orig dE19_8_freq_milk dE19_9_freq_tea_orig dE19_9_freq_tea dE19_10_freq_alcoh dE19_11_name_other dE19_12_freq_other_orig dE19_12_freq_other dE20_1_dur_read_wd_orig dE20_1_dur_read_wd dE20_2_dur_read_wndd_orig dE20_2_dur_read_wndd dE20_3_dur_house_wd dE20_4_dur_house_wndd dE20_5_dur_cari_wd dE20_6_dur_cari_wndd dE20_7_dur_hobb_wd dE20_8_dur_hobb_wndd dE20_9_dur_soc_wd dE20_10_dur_soc_wndd_orig dE20_10_dur_soc_wndd dE20_11_dur_music_wd dE20_12_dur_music_wndd dE20_13_name_other dE20_14_dur_other_wd dE20_15_dur_other_wndd Diabetes_Questionnaire4Test_addi
sort StudyID

save `OUTPUT_FILE'.dta, replace

do "`SECONDARY_CLEANING_DO_FILE'"
