**************************************************************************************************** * PROGRAM OVERVIEW **************************************************************************************************** * * PROGRAM: aggregate_l2_datasets.sas * Created (mm/dd/yyyy): 02/03/2021 * *-------------------------------------------------------------------------------------------------- * PURPOSE: * This macro aggregates L2 MSOC datasets from each DP into one dataset; * * Program inputs: * - infile = the name of the file to aggregate across DPs * - outfile = the name of the final file * - pscsfile = QRP input file that defines analysis * - whereclause = condition to restrict infile * - convrule = comma or space delimited list of indicator numbers to consider model having converged * - convdata = dataset that contains convergence status (QRP [runid]_estimates_[periodid]) * - settomissvars = comma delimited variables to set to missing if model does not converge * - renameclause = optional rename statement when reading in dataset * * Program outputs: * - a dataset &outfile. containing DP data with DP indentification variable * * Programming Notes: * * *-------------------------------------------------------------------------------------------------- * CONTACT INFO: * Sentinel Coordinating Center * info@sentinelsystem.org * ***************************************************************************************************; %macro aggregate_l2_datasets(infile=, outfile=, pscsfile=, whereclause=, convrule=, convdata=, settomissvars=, renameclause=, runidvar=); %put =====> MACRO CALLED: aggregate_l2_datasets; %if &outfile ^= aggwd and %index(&infile.,varinfo) = 0 %then %do; proc datasets library = work nolist nowarn; delete &outfile.; quit; %end; %do dps = 1 %to %eval(&num_dp.); %let dpidsiteid = %scan(&random_dplist,&dps); %let maskedID = %scan(&masked_dplist,&dps); *Manage convergence status - if model did not meet convergence status, then set variables list in SETTOMISSVARS to missing; %let convergeclause = ; %let converge = 1; %if %length(&convrule.)>0 %then %do; %if &pscsfile. = psmatchfile | &pscsfile. = stratificationfile | &pscsfile. = iptwfile %then %do; %if %sysfunc(exist(&dpidsiteid..&convdata))=1 %then %do; data _non_converged_models; set &dpidsiteid..&convdata.(where=(lowcase(psestimategrp)="&psestimategrp" | lowcase(analysisgrp) = "&analysisgrp")); if status not in(%quote(&convrule.)) then do; call symputx('converge', 0); output; end; run; %isdata(dataset=_non_converged_models); %if %eval(&nobs.>0) %then %do; /*build if clause*/ %do bc = 1 %to &nobs.; data _null_; set _non_converged_models(keep=subgroup subgroupcat); if _n_ = &bc. then do; call symputx('estimatessubgroup', subgroup); call symputx('estimatessubgroupcat', subgroupcat); if &bc. >1 then call symputx('orclause', 'or'); else call symputx('orclause', ''); end; run; %let convergeclause = &convergeclause. &orclause. (subgroup = "&estimatessubgroup" and subgroupcat = "&estimatessubgroupcat."); %end; %end; proc datasets nowarn noprint lib=work; delete _non_converged_models; quit; %end; %end; %end; %if %sysfunc(exist(&dpidsiteid..&infile))=1 %then %do; data _temp_&dps.; set &dpidsiteid..&infile.(where=(&whereclause.)&renameclause.); length dpidsiteid $4.; dpidsiteid = "&maskedID."; dum0=1; dp=input("&dps.",best.); %if %length(&runidvar) > 0 %then %do; length runid $5.; runid="&runid."; %end; /* Assign codecat and codetype for HDPS Vars */ %if %index(&infile.,varinfo) > 0 %then %do; length ranking 8 frequency $18 codetype $5 codecat $2 periodid 3; if index(dimension,'ICD') > 0 then do; codetype = reverse(substr(strip(reverse(dimension)),1,2)); codecat = reverse(substr(strip(reverse(dimension)),3,2)); end; else if index(dimension,'DRUGCLASS') > 0 then do; codetype = 'CLASS'; codecat = 'RX'; end; else do; codetype = scan(dimension,-1,'_'); codecat = 'PX'; end; if index(var_name,'Frequent') > 0 then frequency = 'Frequent'; else if index(var_name,'Any') > 0 then frequency = 'Any'; else if index(var_name,'Often') > 0 then frequency = 'Often'; else frequency = substr(var_name, index(var_name, '_Q')+1); periodid = &periodid.; keep psestimategrp analysisgrp subgroup subgroupcat codecat codetype dpidsiteid frequency ranking code periodid runid; %end; /*set variables to missing if convergence not met*/ %if %eval(&converge.=0) %then %do; if &convergeclause. then call missing(&settomissvars.); %end; run; /*Append to &outfile*/ proc append data=_temp_&dps. base=&outfile. force; run; /*If varinfo then append for msocdata output*/ %if %index(&infile.,varinfo) > 0 %then %do; data _temp_varinfo_&dps.; set &dpidsiteid..&infile.(where=(&whereclause.)); length dpidsiteid $4.; dpidsiteid = "&maskedID."; %if %length(&runidvar) > 0 %then %do; length runid $5.; runid="&runid."; %end; run; proc append data=_temp_varinfo_&dps. base=agg_varinfo force; run; %end; %end; %else %do; %put WARNING: (Sentinel) &infile does not exist for &dpidsiteid..; %end; /*Write warning to log if data exist by analysisgrp is missing from file*/ %isdata(dataset=_temp_&dps); %if %eval(&nobs.=0) %then %do; %put WARNING: (Sentinel File &infile exists for DP &DPIDSITEID., but analysisgrp &analysisgrp. is missing; %end; /*Delete temporary dataset*/ proc datasets nowarn noprint nolist lib=work; delete _temp_&dps. _temp_varinfo_&dps.; quit; %end;*loop through DPs; %if &output_agg_data. = Y and &leavebehindreport. = N %then %do; %if %sysfunc(exist(msocdata.agg_%scan(&infile.,2,_)_&periodid.))=0 | &outfile = aggwd %then %do; data msocdata.agg_%scan(&infile.,2,_)_&periodid.; %if %index(&infile.,varinfo) > 0 %then %do; set agg_varinfo; %end; %else %do; set &outfile.; %end; run; %end; %else %do; data msocdata.agg_%scan(&infile.,2,_)_&periodid.; set msocdata.agg_%scan(&infile.,2,_)_&periodid. %if %index(&infile.,varinfo) > 0 %then %do; agg_varinfo; %end; %else %do; &outfile.; %end; run; %end; %end; %put NOTE: ******** END OF MACRO: aggregate_l2_datasets ********; %mend aggregate_l2_datasets;