/** @file ms_createdistbaselinetable.sas @brief This macro creates a baseline table, and an optional covariate profile table. @details - Create baseline profile. - Create baseline table. - Strip age group of special characters. - Square age for reporting. - Create age group dummies. - Create sex dummies. - Create race dummies. - Create hispanic dummies. - Create year dummies. - For type 4: - Create PrePostInd dummies. - Create preg_outcome dummies. - Get list of requested outcomes for squaring. - Create riskscore categories dummies. - Convert scores categories to cat1, cat2, ... catN. - Create lab covariates additional variables. - Because we do not know which covariates have additional lab variables, we need to get this information from the list of variables available in _RawData. - Sorting by name to make sure covar#lab_result variable is always before covar#lab_unit for the algorithm. - Loop through lab additional variables and get distinct values for each one. - Restrict to character variables. Numeric variables are linked to a character variable (i.e. covar#lab_unit1 is linked to covar#lab_result1). - If variable is character, loop through distinct values. - Remove forbidden characters from new variable names. - If the variable is linked to a numeric variable, add it to the list to compute mean and std. - Merge back dummies. - Create a new patient indicator to count distinct patients. - Process discrete and continuous variables and merge the two. - Square with missing variables for sex, race, hispanic, year and PrePostInd (for type 4). - Square riskscores categories (if applicable). - Add missing groups (if any). - Create switchstep indicator for when stacking datasets and remove dummy covariates (if any). @par Program inputs - dplocal.&RUNID._ads_&DATA._&I.(Dataset containing one record per GROUP, PATID, and INDEXDT.) - infolder.&COHORTFILE. (Dataset used to define enrollment and demographic requirements, type of cohort identification strategy.) @par Program outputs - msoc.&RUNID._baseline&OUTCOHORT._&I. (Dataset including metrics for cohorts of interest during baseline.) - msoc.&RUNID._profile_&I. (Dataset containing patients and episodes by their unique combination of covariates observed.) * **Usage** %ms_createdistbaselinetable(WHERE=(&groupvar. in("&&prev&groupvar.s.") and previnc_def in("Both","Prev")), T3OUT=); @param [in] WHERE Where clause to restrict DPLocal.&RUNID._ads_&data._&i. for computations. @param [out] T3OUT For Type 3 analyses, it takes the value of _an and _an_censor for analysis and analysis_censor datasets. It is blank for all non-Type 3 analyses. <h4> SAS Macros Dependencies </h4> None. @author Sentinel Coordinating Center (info@sentinelsystem.org) **/ %macro ms_createdistbaselinetable(where=, t3out=); %put =====> MACRO CALLED: ms_createdistbaselinetable ; proc sql noprint; select count(*) into: nusers from DPLocal.&RUNID._ads_&data._&i.(where=(&where.)); quit; %if %eval(&nusers.>0) %then %do; *************************** * Create baseline profile * ***************************; %if "&profile." = "y" %then %do; data _tempcovar; set DPLocal.&RUNID._ads_&data._&i.(where=(&where.)); length patient 3; patient = 1; run; /*covariates not applicable b/c anchored on indexdt_exp will appear in table as missing*/ proc means data=_tempcovar noprint nway missing; var patient; class &groupvar. patid %do j=1 %to &numcovars.; &&covarnum&j. %end;; id patient; output out=_covarprofile(drop=_:) max(patient) = npts sum(patient) = n_episodes; run; proc means data=_covarprofile nway noprint missing; var Npts n_episodes; class &groupvar. %do j=1 %to &numcovars.; &&covarnum&j. %end;; output out=msoc.&runid._profile&outcohort.&t3out._&i.(drop=_:) sum=; run; proc datasets nowarn noprint lib=work; delete _tempcovar _covarprofile; quit; %end; ************************* * Create Baseline table * *************************; *Strip agegroup of special characters; data _RawData; set DPLocal.&RUNID._ads_&data._&i.(drop = agegroupnum where=(&where.)); by &groupvar. patid indexdt &delnum.; length patient 3; agegroup=TRANSLATE(agegroup,"_","-"); agegroup=upcase(compress(agegroup,' <>+-')); patient=1; run; *Square Age for reporting; data _tempage(keep=agegroup agegroup2); set infolder.&cohortfile.(keep=agestrat); if missing(agestrat) then agestrat='00-01 02-04 05-09 10-14 15-18 19-21 22-44 45-64 65-74 75+'; c = countw(agestrat,' '); do i = 1 to c; agegroup= scan(agestrat, i, ' '); agegroup=TRANSLATE(agegroup,"_","-"); agegroup=upcase(compress(agegroup,' <>+-')); agegroup2="AGE" || strip(agegroup); output; end; run; *Get dynamic list of age groups and years; proc sql noprint; create table _tempyear as select distinct year, "YEAR_" || strip(put(year,best.)) as year2 format $10. from _RawData; quit; proc sql noprint; select agegroup, agegroup2 into :agegrouplist separated by " ", :agegroupvars separated by " " from _tempage; select year, year2 into :yearlist separated by " ", :yearvars separated by " " from _tempyear; quit; proc datasets nowarn noprint lib=work; delete _tempage _tempyear; quit; %if %eval(&type.=4) %then %do; *Get list of requested outcomes for squaring; proc sql noprint; select distinct upcase(code) into :preg_outcome_list separated by "|" from &cohortcodes._po; quit; %end; %if %length(&riskscores_cat_list.) > 0 %then %do; *Create riskscore categories dummies; %do rskscore = 1 %to %sysfunc(countw(&riskscores_cat_list.)); %let riskscore=%upcase( %sysfunc(tranwrd(%scan(&riskscores_cat_list., &rskscore.),%str(_cat),%str( )))); proc sql noprint; select riskscorecat into :riskscorecat trimmed from RiskScorefile where upcase(riskscore) = "&riskscore."; quit; *Convert scores categories to cat1, cat2, ... catN; data _RawDataScores; set _RawData(where=(&riskscore._cat ne "") keep=&groupvar. patid indexdt &delnum. patient &riskscore._cat); %do loop = 1 %to %sysfunc(countw(&riskscorecat, ' ')); if &riskscore._cat = "%scan(&riskscorecat., &loop., %str( ))" then &riskscore._cat = "cat&loop."; %end; run; %isdata(dataset=_RawDataScores); %if %eval(&nobs.>0) %then %do; *Create riskscore categories dummies - can be multiple index per patient; proc transpose data=_RawDataScores out=_&riskscore._Cat(keep=&groupvar. patid indexdt &riskscore._: &delnum.) prefix=&riskscore._; var patient; by &groupvar. patid indexdt &delnum.; id &riskscore._cat; run; %end; %else %do; *Create empty dataset with one category (there will always be at least one); data _&riskscore._Cat; set _RawDataScores(obs=0 keep=&groupvar. patid indexdt &delnum.); &riskscore._cat1=0; run; %end; %end; %end; /* Create lab covariates additional variables */ %let numlabvars = 0; * Number of additional lab variables (covar#lab_result and covar#lab_unit); %let n_numericlabvars = 0; * Number of distinct numeric lab variables to create (one per distinct unit per numeric lab covariate); %let numericlabvars=; * List of lab related numeric variables to add to MSOC.&RUNID._baseline&outcohort._&i.; %let categoricallabvars=; * List of lab related categorical variables to add to MSOC.&RUNID._baseline&outcohort._&i.; %if %str("&run_covlabs.") = %str("Y") %then %do; * Because we do not know which covariates have additional lab variables, we need to get this information from the list of variables available in _RawData; proc contents data=_RawData out=_vars noprint; quit; * Sorting by name to make sure covar#lab_result variable is always before covar#lab_unit for the algorithm below; proc sort data=_vars(where=(index(upcase(NAME), "COVAR") > 0 and (index(upcase(NAME), "LAB_RESULT") > 0 or index(upcase(NAME), "LAB_UNIT") > 0)) keep=name type); by NAME; run; proc sql noprint; select count(*) into :numlabvars from _vars; select name into :labvarnames separated by ' ' from _vars; select type into :labvartypes separated by ' ' from _vars; quit; %if &numlabvars. > 0 %then %do; %put &=numlabvars; %put &=labvarnames; %put &=labvartypes; /* Loop through lab additional variables and get distinct values for each one*/ %do j=1 %to &numlabvars.; %let labvarname = %scan(&labvarnames., &j.); %let labvartype = %scan(&labvartypes., &j.); %put &=labvarname; /* Restrict to character variables. Numeric variables are linked to a character variable (i.e. covar#lab_unit1 is linked to covar#lab_result1) and will be processed below */ %if &labvartype. = 2 %then %do; proc sql noprint; select count(distinct &labvarname.) into :numlabvalues&j. from _RawData where not missing(&labvarname.); select distinct &labvarname. into :&&labvarname.&j. separated by '!' from _RawData where not missing(&labvarname.); quit; %put &&numlabvalues&j.; %if %eval(&&numlabvalues&j.>0) %then %put &&&labvarname.&j.; %end; %end; data _covarlabs; set _RawData(keep=&groupvar. patid indexdt &labvarnames. &delnum. patient); by &groupvar. patid indexdt &delnum.; %do j=1 %to &numlabvars.; %if &j. > 1 %then %let prevlabvarname = %scan(&labvarnames., &j.-1); %else %let prevlabvarname=; %if &j. > 1 %then %let prevlabvartype = %scan(&labvartypes., &j.-1); %else %let prevlabvartype=; %let labvarname = %scan(&labvarnames., &j.); %let labvartype = %scan(&labvartypes., &j.); %let newlabvarname = %sysfunc(tranwrd(&labvarname., lab_result, lbres)); %let newlabvarname = %sysfunc(tranwrd(&newlabvarname., lab_unit, lbunit)); %let prevnewlabvarname = %sysfunc(tranwrd(&prevlabvarname., lab_result, lbres)); %let prevnewlabvarname = %sysfunc(tranwrd(&prevnewlabvarname., lab_unit, lbunit)); /* If variable is character, loop through distinct values */ %if &labvartype. = 2 %then %do; %do k=1 %to &&numlabvalues&j.; %let labvalue = %scan(&&&labvarname.&j., &k., !); %put &=labvalue; /* Remove forbidden characters from new variable names */ %let suffix=_%sysfunc(prxchange(s/[^a-zA-Z0-9]/_/,-1,%lowcase(&labvalue))); if &labvarname. = "&labvalue." then do; N_&&newlabvarname.&suffix. = patient; end; label N_&&newlabvarname.&suffix. = &labvalue.; %let categoricallabvars = &categoricallabvars. N_&&newlabvarname.&suffix.; /* If the variable is linked to a numeric variable, add it to the list to compute mean and std later */ %if &prevlabvartype. = 1 %then %do; if &labvarname. = "&labvalue." then do; N_&&prevnewlabvarname.&suffix. = &prevlabvarname.; end; label N_&&prevnewlabvarname.&suffix. = &labvalue.; %let n_numericlabvars = %eval(&n_numericlabvars. + 1); %let numericlabvars = &numericlabvars. N_&&prevnewlabvarname.&suffix.; %end; %end; /* loop through number of distinct lab values for the current additional lab variable */ %end; /* if additional lab variable is character */ %end; /* loop through number of additional lab variables */ drop &labvarnames.; run; %end; /* Number of additional lab variables > 0 */ %put &=n_numericlabvars; %put &=numericlabvars; %put &=categoricallabvars; %end; /* Create lab covariates additional variables */ *Create dummies; data _RawData; %if &numlabvars. > 0 or %length(&riskscores_cat_list.) > 0 %then %do; merge %end; %else %do; set %end; _RawData(drop=patient) %if &numlabvars. > 0 %then %do; _covarlabs %end; %if %length(&riskscores_cat_list.) > 0 %then %do; %do rskscore = 1 %to %sysfunc(countw(&riskscores_cat_list.)); _%scan(&riskscores_cat_list., &rskscore.) %end; %end; ; by &groupvar. patid indexdt &delnum.; *Create agegroup dummies; %do agegrp=1 %to %sysfunc(countw(&agegrouplist.)); %let agegroupvar = %scan(&agegroupvars., &agegrp.); if agegroup = "%scan(&agegrouplist., &agegrp.)" then &agegroupvar. = 1; else &agegroupvar. = .; %end; *Create sex dummies; Sex_F = (SEX = 'F'); Sex_M = (SEX = 'M'); Sex_O = (SEX = 'O'); *Create race dummies; Race_0 = (RACE='0'); Race_1 = (RACE='1'); Race_2 = (RACE='2'); Race_3 = (RACE='3'); Race_4 = (RACE='4'); Race_5 = (RACE='5'); Race_M = (RACE='M'); *Create hispanic dummies; Hispanic_U = (HISPANIC = 'U'); Hispanic_Y = (HISPANIC = 'Y'); Hispanic_N = (HISPANIC = 'N'); *Create year dummies; %do yr=1 %to %sysfunc(countw(&yearlist.)); %let yearvar = %scan(&yearvars., &yr.); if year = %scan(&yearlist., &yr.) then &yearvar. = 1; else &yearvar. = 0; %end; %if %eval(&type.=4) %then %do; *Create PrePostInd dummies; length PrePostInd_PRE PrePostInd_EARL PrePostInd_FULL PrePostInd_LATE PrePostInd_POST PrePostInd_NONE PrePostInd_NA 3; PrePostInd_PRE = (PREPOSTIND= 'PRE'); PrePostInd_EARL = (PREPOSTIND= 'EARL'); PrePostInd_FULL = (PREPOSTIND= 'FULL'); PrePostInd_LATE = (PREPOSTIND= 'LATE'); PrePostInd_POST = (PREPOSTIND= 'POST'); PrePostInd_NONE = (PREPOSTIND= 'NONE'); PrePostInd_NA = (PREPOSTIND= 'NA'); *Create preg_outcome dummies; %do outcome=1 %to %sysfunc(countw(&preg_outcome_list.)); %let preg_outcome = %upcase(%scan(&preg_outcome_list., &outcome., |)); preg_outcome_&preg_outcome. = (preg_outcome="&preg_outcome."); %end; %end; *create a new patient indicator to count distinct patients; length patient 3; patient=0; if first.patid then patient=1; if not first.patid then do; array Sexes Sex_:; do over Sexes; Sexes=.; end; array races race_:; do over races; races=.; end; array hispanics hispanic_:; do over hispanics; hispanics=.; end; end; run; *Process discrete vars; proc means data=_RawData(drop=age agegroup &riskscores_cat_list.) nway noprint; var patient Age: Sex_: year_: race_: hispanic_: %if %eval(&numcovars. ge 1) %then %do; %do j=1 %to &numcovars.; covar&j. %end; %end; %if %eval(&type.=4) %then %do; prepostind_: preg_outcome_: %end; %if "&data." = "mstr_mi" %then %do; exp_: %end; &categoricallabvars. %if %length(&riskscores_cat_list.) > 0 %then %do; %do rskscore = 1 %to %sysfunc(countw(&riskscores_cat_list.)); %scan(&riskscores_cat_list., &rskscore.): %end; %end; ; class &groupvar.; output out=_Discrete(drop=_:) sum=; run; *Process continuous vars; proc means data=_RawData nway noprint; var Age &riskscoreslist. NumAV NumOA NumIP NumIS NumED NumGeneric NumClass NumRx %if %eval(&type.=4) %then %do; ga_birth %end; %if "&data." = "mstr_mi" %then %do; ga_first adjusteddisp_pre adjusteddisp_t1 adjusteddisp_t2 adjusteddisp_t3 birth_enroll enroll_diff %end; %if &n_numericlabvars. > 0 %then %do; &numericlabvars. %end; ; class &groupvar.; output out=_Continuous(rename=_freq_=N_episodes drop=_type_) mean= mean_Age %if &riskscoreslist. ne %str() %then %do; %do co_list = 1 %to %sysfunc(countw(&riskscoreslist.)); mean_%scan(&riskscoreslist., &co_list.) %end; %end; mean_NumAV mean_NumOA mean_NumIP mean_NumIS mean_NumED mean_NumGeneric mean_NumClass mean_NumRx %if %eval(&type.=4) %then %do; mean_ga_birth %end; %if "&data." = "mstr_mi" %then %do; mean_ga_first mean_disp_pre mean_adjusteddisp_t1 mean_adjusteddisp_t2 mean_adjusteddisp_t3 mean_birth_enroll mean_enroll_diff %end; %if &n_numericlabvars. > 0 %then %do; %do k = 1 %to &n_numericlabvars.; mean_%scan(%sysfunc(tranwrd(&numericlabvars., N_, %str())), &k.) %end; %end; std= std_Age %if &riskscoreslist. ne %str() %then %do; %do co_list = 1 %to %sysfunc(countw(&riskscoreslist.)); std_%scan(&riskscoreslist., &co_list.) %end; %end; std_NumAV std_NumOA std_NumIP std_NumIS std_NumED std_NumGeneric std_NumClass std_NumRx %if %eval(&type.=4) %then %do; std_ga_birth %end; %if "&data." = "mstr_mi" %then %do; std_ga_first std_disp_pre std_adjusteddisp_t1 std_adjusteddisp_t2 std_adjusteddisp_t3 std_birth_enroll std_enroll_diff %end; %if &n_numericlabvars. > 0 %then %do; %do k = 1 %to &n_numericlabvars.; std_%scan(%sysfunc(tranwrd(&numericlabvars., N_, %str())), &k.) %end; %end; ; run; *merge the two; data msoc.&RUNID._baseline&outcohort.&t3out._&i.; merge _discrete _continuous; by &groupvar.; * Square with missing variables for Sex, Race, Hispanic, Year and PrePostInd(for type4); dsid = open("_discrete"); %if %eval(&SQUARE_BASELINE_SEX_F.>0) %then %do; if varnum(dsid,"Sex_F") = 0 then Sex_F=0; %end; %if %eval(&SQUARE_BASELINE_SEX_M.>0) %then %do; if varnum(dsid,"Sex_M") = 0 then Sex_M=0; %end; %if %eval(&SQUARE_BASELINE_SEX_O.>0) %then %do; if varnum(dsid,"Sex_O") = 0 then Sex_O=0; %end; %do race_iter=1 %to &racecount; %let race_value = %scan(&racelist,&race_iter); %if %eval(&&SQUARE_BASELINE_RACE_&race_value.>0) %then %do; if varnum(dsid,"Race_&race_value.") = 0 then Race_&race_value.=0; %end; %end; %if %eval(&SQUARE_BASELINE_Hispanic_Y.>0) %then %do; if varnum(dsid,"Hispanic_Y") = 0 then Hispanic_Y=0; %end; %if %eval(&SQUARE_BASELINE_Hispanic_N.>0) %then %do; if varnum(dsid,"Hispanic_N") = 0 then Hispanic_N=0; %end; %if %eval(&SQUARE_BASELINE_Hispanic_U.>0) %then %do; if varnum(dsid,"Hispanic_U") = 0 then Hispanic_U=0; %end; %do year_iter=&StartYear. %to &EndYear.; if varnum(dsid,"Year_&year_iter.") = 0 then Year_&year_iter.=0; %end; %if %eval(&type.=4) %then %do; if varnum(dsid,"PrePostInd_PRE") = 0 then PrePostInd_PRE = 0; if varnum(dsid,"PrePostInd_EARL") = 0 then PrePostInd_EARL = 0; if varnum(dsid,"PrePostInd_FULL") = 0 then PrePostInd_FULL = 0; if varnum(dsid,"PrePostInd_LATE") = 0 then PrePostInd_LATE = 0; if varnum(dsid,"PrePostInd_POST") = 0 then PrePostInd_POST = 0; if varnum(dsid,"PrePostInd_NONE") = 0 then PrePostInd_NONE = 0; if varnum(dsid,"PrePostInd_NA") = 0 then PrePostInd_NA = 0; %do outcome=1 %to %sysfunc(countw(&preg_outcome_list., '|')); %let preg_outcome=%upcase(%scan(&preg_outcome_list., &outcome., %str(|))); if varnum(dsid,"preg_outcome_&preg_outcome.") = 0 then preg_outcome_&preg_outcome. = 0; %end; %end; rc= close(dsid); drop rc dsid; run; * Square riskscores categories if applicable; %if %length(&riskscores_cat_list.) > 0 %then %do; %do rskscore = 1 %to %sysfunc(countw(&riskscores_cat_list.)); %let riskscore= %upcase( %sysfunc(tranwrd(%scan(&riskscores_cat_list., &rskscore.),%str(_cat),%str( )))); proc sql noprint; select riskscorecat into :riskscorecat trimmed from RiskScorefile where upcase(riskscore) = "&riskscore."; quit; data data msoc.&RUNID._baseline&outcohort.&t3out._&i.; set msoc.&RUNID._baseline&outcohort.&t3out._&i.; dsid = open("msoc.&RUNID._baseline&outcohort.&t3out._&i."); %do loop = 1 %to %sysfunc(countw(&riskscorecat, ' ')); if varnum(dsid,"&riskscore._cat&loop.") = 0 then &riskscore._cat&loop.=0; else if &riskscore._cat&loop.=. then &riskscore._cat&loop.=0; %end; rc= close(dsid); drop rc dsid; run; %end; %end; *add missing groups if any; proc sql noprint; create table _MissingGroups as %if "&groupvar" = "group" %then %do; select distinct (cohortgrp) as &groupvar., %end; %if "&groupvar" = "analysisgrp" %then %do; select distinct (analysisgrp) as &groupvar., %end; 0 as patient, 0 as N_Episodes from baselinegrps where &groupvar. not in (select distinct (&groupvar.) from msoc.&RUNID._baseline&outcohort.&t3out._&i.) ; quit; /* Create switchstep indicator for when stacking datasets and remove dummy covariates if any */ data msoc.&RUNID._baseline&outcohort.&t3out._&i.; set msoc.&RUNID._baseline&outcohort.&t3out._&i. %if %length(&dummycovarlist.) > 0 %then %do; (drop=&dummycovarlist.) %end; _MissingGroups %if %sysfunc(index(&outcohort.,_prev)) > 0 %then %do; (where = (&groupvar. in ("&&prev&groupvar.s."))) %end;; %if %eval(&type.=6) %then %do; length switchstep 3; switchstep = &suffixvarnum.; %end; run; /*clean up*/ proc datasets nowarn noprint lib=work; delete _agegroups _continuous _discrete _genders _hispanic _missingagegroups _missinggroups _prepostind _PregOutcomes _races _rawdata _years _covarlabs _vars %if %length(&riskscores_cat_list.) > 0 %then %do; %do rskscore = 1 %to %sysfunc(countw(&riskscores_cat_list.)); _%scan(&riskscores_cat_list., &rskscore.) %end; %end; ; quit; %end; %else %do; %put WARNING: (Sentinel) No users. Baseline table &RUNID._baseline&outcohort.&t3out._&i. will not be produced.; %end; %put NOTE: ******** END OF MACRO: ms_createdistbaselinetable ********; %mend ms_createdistbaselinetable;