**************************************************************************************************** * PROGRAM OVERVIEW **************************************************************************************************** * * PROGRAM: ms_processInputfiles.sas * * Created (mm/dd/yyyy): 12/19/2014 * Last modified: 12/21/2017 * Version: 2.4 * *-------------------------------------------------------------------------------------------------- * PURPOSE: * This macro will process the program input files variables. * * Program inputs: * - a dataset containing the cohort definition codes * - a dataset containing inclusion/exclusion codes * - a dataset containing the monitoring file period * - a dataset containing the cohort definition parameters * * Program outputs: * - datasets with formatted input files variables * - _diag, _proc, _ndc and _labs that will be used for code extraction * * PARAMETERS: * * Programming Notes: * * * *-------------------------------------------------------------------------------------------------- * CONTACT INFO: * Mini-Sentinel Coordinating Center * info@mini-sentinel.org * *-------------------------------------------------------------------------------------------------- * CHANGE LOG: * * Version Date Initials Comment (reference external documentation when available) * ------- -------- -------- --------------------------------------------------------------- * 1.3 04/13/15 DM Added defensive coding to remove true duplicates in the input files * * 1.4 04/21/15 EM Created the looks dataset to have looks with non overlapping dates * Added defensive coding checks to check surveillance parameters accuracy * * 1.5 06/26/2015 DM Added code to ensure HDPS variable integrity. * Removed the code from 1.4 related to Surveillance activity * * 1.6 09/14/2015 DM Added code to process new Type 4 analysis input files * * 1.7 01/15/2016 VC PreExisting conditions enhancements * * 1.8 06/27/2016 DM Added the code removed in 1.5 related to Surveillance activity * * 1.9 11/30/2016 AP Added code to adjust EndDate if DP Max Date < EndDate * * 2.0 03/23/2017 DM Changes for the QRP 4.0 version * * 2.1 05/05/2017 AP Added code to compile list of groups to output race/hispanic info * * 2.2 06/26/2017 AP Modified for the additional of Type 5 analysis * Updated codetype length in ComborbsCodes to avoid mismatched length warnings * Hardcoded length of code to be the same across all files (QRP-294) * Added _aniv and _dte output tables for cohorts identified by age or calendar date * * 2.3 10/03/2017 AP Changed PregCodes to PregDur (QRP-388) * * 2.4 12/21/2017 AP Implemented CODEDAYS parameter in inclusion/exclusion and covariate files (QRP-455) * ***************************************************************************************************; %macro ms_processInputfiles(); %put =====> MACRO CALLED: ms_processInputfiles v2.4; /**********************************************/ /* Diagnosis, Procedure, and Dispensing codes */ /**********************************************/ *Cohortcodes; data &COHORTCODES.; set infolder.&COHORTCODES.; Code=compress(Code,'. '); run; *InclusionCodes; %IF %SYSFUNC(exist(infolder.&INCLUSIONCODES.))=1 %THEN %DO; data &INCLUSIONCODES.; set infolder.&INCLUSIONCODES.; if CondInclusion=1 then do; T1_INDEX="INC"; T2_INDEX="INC"; T3_INDEX="INC"; T4_INDEX="INC"; T5_INDEX="INC"; end; if CondInclusion=0 then do; T1_INDEX="EXC"; T2_INDEX="EXC"; T3_INDEX="EXC"; T4_INDEX="EXC"; T5_INDEX="EXC"; end; T2_FUP="NOT"; T3_COMP="NOT"; T3_RISK="NOT"; T4_FUP="NOT"; *Defensive coding: upcase condlevel and subcondlevel; condlevel=upcase(condlevel); subcondlevel=upcase(subcondlevel); if missing(codedays) then codedays = 1; run; *Ensure only 1 codedays value per group condlevel subcondlevel; proc sql noprint; create table _codedays_check as select distinct group , condlevel , subcondlevel from &INCLUSIONCODES. group by group, condlevel, subcondlevel having count(distinct codedays) > 1 ; quit; %isdata(dataset=_codedays_check); %if %eval(&nobs.>0) %then %do; %put WARNING: Different CODEDAYS value specified within the same SUBCOND. Check Inclusion/Exclusion code file integrity; %end; proc sort data=&INCLUSIONCODES. out=&INCLUSIONCODES.; by group CondLevel SubCondLevel; run; *Create numeric Condlevel and SubCondlevel variables for looping; data &INCLUSIONCODES. (drop=l_subcond l_cond); set &INCLUSIONCODES.; format cond subcond 8.; by group CondLevel; l_cond=lag(CondLevel); l_subcond=lag(SubCondLevel); if first.group then do; cond=1; call missing(l_cond); end; else do; if CondLevel=l_cond then do; cond=cond; end; else if CondLevel^=l_cond then do; cond=cond+1; end; end; *Create numeric subcondlevel variable; if first.CondLevel then do; subcond=1; call missing(l_subcond); end; else do; if SubCondLevel=l_subcond then do; subcond=subcond; end; else if SubCondLevel^=l_subcond then do; subcond=subcond+1; end; end; retain cond subcond; run; *Append inclusion and Exclusion codes to work.&COHORTCODES.; data &COHORTCODES.; set &COHORTCODES. &INCLUSIONCODES.; Code=compress(Code,'. '); run; %END; /***********************************************/ /* TYPE4 Pregnancy Pre/Post term codes */ /***********************************************/ %IF %SYSFUNC(exist(infolder.&PREGDUR.))=1 %THEN %DO; data &PREGDUR.; set infolder.&PREGDUR.; Code=compress(Code,'. '); T1_INDEX="NOT"; T2_INDEX="NOT"; T3_INDEX="NOT"; T4_INDEX="PCY"; T2_FUP="NOT"; T3_COMP="NOT"; T3_RISK="NOT"; T4_FUP="NOT"; run; * Make sure pre/post term codes with the same priorities have the same duration; proc sql noprint; create table _DurationFreq as select count(distinct Duration) as NumDuration, PriorityGroup1, PriorityGroup2, Priority from &PREGDUR. group by PriorityGroup1, PriorityGroup2, Priority; select max(NumDuration) into :NumDuration from _DurationFreq; quit; %if (&NumDuration. gt 1) %then %do; %put WARNING: Some PriorityGroup1 or PriorityGroup2 codes have the same priority but different duration; %end; data &COHORTCODES.; set &COHORTCODES. &PREGDUR.; run; %END; /***********************************************/ /* TYPE4 Medication of interest/drug use codes */ /***********************************************/ proc sort nodupkey data= infolder.&COHORTCODES. out=MPNList; by T4_INDEX; where T4_INDEX =:"MP"; run; %ISDATA(dataset=MPNList); %IF %EVAL(&NOBS.>0) %THEN %DO; proc sql noprint; select count(T4_INDEX) into :NUM_MPN from MPNList; quit; %END; %put &NUM_MPN.; *Defensive coding - remove input files true duplicates; proc sort nodupkey data=&COHORTCODES.; by _ALL_; run; * Make sure CodeSupply is the same within a particular group; proc sql noprint; create table _CodeSupplyFreq as select count(distinct CodeSupply) as NumCodeSupply, Group from &COHORTCODES. group by Group; select max(NumCodeSupply) into :NumCodeSupply from _CodeSupplyFreq; quit; %if (&NumCodeSupply. gt 1) %then %do; %put WARNING: Some group(s) in the &COHORTCODES. input file have more than one value specified for the CodeSupply variable; %end; /*******************/ /* Monitoring File */ /*******************/ %IF %SYSFUNC(exist(infolder.&Monitoringfile.))=1 %THEN %DO; *limiting to periodid set by user; *Default period selection; data _null_; %IF %STR(&PERIODIDSTART.) = %STR() or %LENGTH(&PERIODIDSTART.) = 0. %THEN %DO; call symputx("PERIODIDSTART",1); %END; %IF %STR(&PERIODIDEND.) = %STR() or %LENGTH(&PERIODIDEND.) = 0. %THEN %DO; call symputx("PERIODIDEND",99999); %END; run; *Taking min startfollowup and max enddate; proc means data=infolder.&Monitoringfile. nway noprint; var startfollowup enddate ; where &PERIODIDSTART. <= PeriodId <= &PERIODIDEND.; output out=&Monitoringfile._agg min(startfollowup)=startfollowup max(enddate)=enddate; run; data &Monitoringfile.; set infolder.&Monitoringfile.; run; %IF %SYSFUNC(exist(infolder.&TYPE1FILE.))=1 %THEN %DO; data _null_; set infolder.&TYPE1FILE.; if _n_ = 1 then do; call symputx('Censor_DPEnd',upcase(strip(Censor_DPEnd))); end; run; %end; %IF %SYSFUNC(exist(infolder.&TYPE2FILE.))=1 %THEN %DO; data _null_; set infolder.&TYPE2FILE.; if _n_ = 1 then do; /*Only 1 Censor_dpend value allowed*/ call symputx('Censor_DPEnd',upcase(strip(Censor_DPEnd))); end; run; %end; /*Turn off CENSOR_DPEND if using SURVEILLANCEMODE*/ %IF "&SURVEILLANCEMODE."="f" or "&SURVEILLANCEMODE."="p" %THEN %DO; %if &Censor_DPEnd. = Y %then %do; %let Censor_DPEnd = N; %put WARNING: Censor_DPEnd turned off because Surveillance mode is being used; %end; %end; %if &Censor_DPEnd. = Y %then %do; /*add dp_maxdate to monitoring file*/ data &Monitoringfile._agg(drop=dp_maxdate); set &Monitoringfile._agg; format dp_maxdate date9.; dp_maxdate = &DP_MaxDate.; if dp_maxdate < enddate then do; enddate = dp_maxdate; /*recode enddate*/ call symputx("DPMax_Adjust", 1); /*Set DPMax_adjust flag to track adjustment*/ end; run; data &Monitoringfile.(drop=dp_maxdate); set &Monitoringfile.; format dp_maxdate date9.; dp_maxdate = &DP_MaxDate.; if dp_maxdate < enddate then do; enddate = dp_maxdate; /*recode enddate*/ end; run; %end; data &Monitoringfile._agg; set &Monitoringfile._agg; call symputx("startfollowup",startfollowup); call symputx("enddate",enddate); run; %put &startfollowup. &enddate. &numperiodid.; * Create Non Overlapping Looks dates; proc sort data=&Monitoringfile. out=looks; by PERIODID; run; data looks; set looks; format lenddate LookStart LookEnd date9.; LookStart=startfollowup; LookEnd=enddate; if periodid>1 then LookStart=lenddate+1; lenddate=enddate; retain lenddate; rename PeriodId = Look; keep PeriodId LookStart LookEnd; run; %END; %IF "&SURVEILLANCEMODE."="f" or "&SURVEILLANCEMODE."="p" %THEN %DO; %if &PERIODIDSTART. ne &PERIODIDEND. %then %do; %put WARNING: Surveillance mode runs for only one period, i.e. PERIODIDSTART must be equal to PERIODIDEND.; %abort; %end; * Prior data existence for Surveillance; %IF %EVAL(&PERIODIDSTART.>=2) %THEN %DO; %let DPLPrior = %ms_check_path(&DPLPrior); %put &DPLPrior.; libname DPLPrior "&DPLPrior." access=readonly; %ISDATA(dataset=DPLPRIOR.&RUNID._mstr); %put &NOBS.; %IF %EVAL(&NOBS.=0) %THEN %DO; %put WARNING: DPLPRIOR.&RUNID._mstr not found or is empty.; %abort; %END; %ISDATA(dataset=DPLPRIOR.&RUNID._signature); %put &NOBS.; %IF %EVAL(&NOBS.>0) %THEN %DO; data _null_; set DPLPRIOR.&RUNID._signature; if var="PERIODIDEND" then call symput("LASTRUN",strip(VALUE)); run; %put &LASTRUN.; %if %eval(&PERIODIDSTART.-1 ne &LASTRUN.) %then %do; %put WARNING: DPLPrior is not pointing to the last run.; %abort; %end; %END; %if %str(%UPCASE("&ANALYSIS."))=%str("PS") %then %do; proc sql noprint; select max(comp_order) into :Max_comp_order from infolder.&COMPARISON.; quit; %do k=1 %to &Max_comp_order.; %ISDATA(dataset=DPLPRIOR.&runID._matched_&k._%eval(&PERIODIDSTART.-1)); %IF %EVAL(&NOBS.=0) %THEN %DO; %put WARNING: DPLPRIOR.&runID._matched_&k._%eval(&PERIODIDSTART.-1)) not found or is empty.; %abort; %END; %end; %end; %END; /*PERIODIDSTART>=2*/ %END; /*********************************/ /* Preprare Raw extraction files */ /*********************************/ %ISDATA(dataset=infolder.&Comorbfile.); %IF %EVAL(&NOBS.>0) %THEN %DO; *Adapt for different lengths to avoid w a r n i n g s later; data ComborbsCodes; length codetype $3. code $11. CODECAT $2.; format code $11.; set infolder.Comorbcodes; if code_grp="DIAG" then CODECAT="DX"; Comorbcode=1; output; if wildcard="Y" then do; if Length(code)=3 then do; code=strip(code)||"*"; output; end; if Length(code)<=4 then do; code=strip(code)||"*"; output; end; end; run; %let ComborbsCodes=ComborbsCodes(keep=Code CodeCat Codetype Comorbcode); %END; %ISDATA(dataset=infolder.&COVARIATECODES.); %IF %EVAL(&NOBS.>0) %THEN %DO; *Select covariates codes (not combination of covatiates); *to prevent w a r n i n g when covariatecodes contains >11 length for CC codes, reset length of code*; data _&COVARIATECODES.; length code $11; format code $11.; set infolder.&COVARIATECODES.(rename=code=code1); where codecat not in ("CC"); code=code1; drop code1; /*Defensive if blank*/ if codedays = . then codedays = 1; run; /*codedays variable should be specified at the covariate level. Run a check to ensure value is unique within covariate*/ %let cov_codedays_check = ; proc sql noprint; select max(count) into: cov_codedays_check from (select covarnum, count(distinct codedays) as count from _&COVARIATECODES. group by covarnum) group by count; quit; %if %eval(&cov_codedays_check.) >1 %then %do; %put WARNING: Multiple values of codedays per Covarnum. Revise input file; %end; data CovarCodes; set _&Covariatecodes.; Comorbcode=0; run; %END; %ELSE %DO; data CovarCodes; studyname = ""; StockGroup=""; Code = ""; CodeCat = ""; Codetype = ""; CARESETTINGPRINCIPAL = ""; CovarNum = 0; Comorbcode = 0; CovFrom=.; CovTo=.; codedays=.; run; /*check consistency with PROFILE parameter*/ %if "&profile" = "y" %then %do; %put WARNING: PROFILE set to Y, however covariatecodes not specified. PROFILE will be set to N.; %let profile = n; %end; %END; %let CovarCodes=CovarCodes(keep=Code CodeCat Codetype CARESETTINGPRINCIPAL studyname StockGroup CovarNum Comorbcode covfrom covto codedays); %let CovarCodes2=if CodeCat in("PX","DX") then output CovMeds; proc sort nodupkey data=CovarCodes out=StudyNames(keep=studyname CovarNum); by CovarNum; run; *Combine COHORTCODES, ComborbsCodes and CovarCodes codes to pull data from CDM only once; data _diag(drop=comb) _proc(drop=comb) _ndc _dte _aniv; set &COHORTCODES. &ComborbsCodes. &CovarCodes.; Code=compress(Code,'. '); length=length(code); CodeCat=upcase(CodeCat); comb=0; if CodeCat in('RX') then output _ndc; if CodeCat in('DX') then output _diag; if CodeCat in('PX') then output _proc; if codecat in('DT') then output _dte; if codecat in('AN') then output _aniv; keep Code CodeCat Codetype comb; run; *Checking for the presence of optional lab vars; data _null_; dset=open("&COHORTCODES."); call symput ('Labvars',put(varnum(dset,'RawLabResult'),best.)); run; %put &Labvars.; %IF %EVAL(&Labvars.>=1) %THEN %DO; data _lab; set &COHORTCODES.; Code=compress(Code,'. '); length=length(code); CodeCat=upcase(CodeCat); RawCaresetting=Caresettingprincipal; RawGroup=Group; if CodeCat in('LB') then output _lab; keep Code CodeCat Codetype RawLabResult RawLabDateType RawCaresetting RawGroup;* RawCaresetting; run; %END; %ELSE %DO; data _lab; set &COHORTCODES.(obs=0); RawGroup=Group; keep Code CodeCat Codetype RawGroup;* RawLabResult RawLabDateType Caresettingprincipal;* RawCaresetting; run; %END; *Creating the number of groups to create cohorts; %isdata(dataset=infolder.&COHORTFILE.); data _null_; call symputx("Ngroup","&nobs."); run; *Determine if the chart variable exists in MSDD enrollment table; data _null_; dset=open("indata.&ENRTABLE."); call symput ('chartvar',put(varnum(dset,'CHART'),best.)); run; %put &chartvar.; /*List of groups where race_out = Y or hispanic_out = Y*/ proc sql noprint; select group into: race_groups separated by ' ' from infolder.&COHORTFILE. where upcase(race_out) = "Y"; select group into: hispanic_groups separated by ' ' from infolder.&COHORTFILE. where upcase(hispanic_out) = "Y"; quit; %global race_groups2 hispanic_groups2; %let race_groups2= ''; %let hispanic_groups2 = ''; %if %length(&race_groups) ne 0 %then %do; %create_comma_charlist(inlist=&race_groups., outlist=race_groups2); %end; %if %length(&hispanic_groups) ne 0 %then %do; %create_comma_charlist(inlist=&hispanic_groups., outlist=hispanic_groups2); %end; *Creating rows for neverexposed group for type2; *Assign same cohortfile to the unexposed group; %ISDATA(dataset=infolder.&TYPE2FILE.); %IF %EVAL(&NOBS.>0) %THEN %DO; proc sql; create table _cohorttype as select a.*, b.NEVEREXPOSEDCOHORT from infolder.&cohortfile. a, infolder.&TYPE2FILE. b where a.group = b.group; quit; data cohortfile (drop = NEVEREXPOSEDCOHORT); length group $40; set _cohorttype; output; if upcase(strip(NEVEREXPOSEDCOHORT)) = "Y" then do; group = cats(group,"_nvrexp"); output; end; run; %END; %ELSE %DO; data cohortfile; length group $40; set Infolder.&cohortfile.; run; %END; /*********************************************/ /* Utilization File and Comorbidity File */ /*********************************************/ %macro createnewinput(indata=, outdata=); %ISDATA(dataset=infolder.&indata.); %IF %EVAL(&NOBS.>0) %THEN %DO; %ISDATA(dataset=infolder.&TYPE2FILE.); %IF %EVAL(&NOBS.>0) %THEN %DO; proc sql; create table _&outdata.type as select a.*, b.NEVEREXPOSEDCOHORT from infolder.&indata. a, infolder.&TYPE2FILE. b where upcase(strip(a.group)) = upcase(strip(b.group)); quit; data &outdata. (drop = NEVEREXPOSEDCOHORT); length group $40; set _&outdata.type; output; if upcase(strip(NEVEREXPOSEDCOHORT)) = "Y" then do; group = cats(group,"_nvrexp"); output; end; run; %END; %ELSE %DO; data &outdata.; length group $40; set Infolder.&indata.; run; %END; %END; %mend createnewinput; %createnewinput(indata = &UTILFILE., outdata = utilfile); %createnewinput(indata = &COMORBFILE., outdata = COMORBFILE); /************************/ /* HDPS Integrity Check */ /************************/ %if (%UPCASE("&ANALYSIS.") eq "PS") %then %do ; %let message=; *Get groups that are not compared with a predefined model; data _NotPredefined; set infolder.&COMPARISON.(rename=comp=group where=(upcase(HDPS) ne "N")) infolder.&COMPARISON.(rename=control=group where=(upcase(HDPS) ne "N")); keep group HDPSWin: HDPS; run; proc means data=_NotPredefined nway noprint; var HDPSWinfrom HDPSWinTo; output out=HDPS_WIN min(HDPSWinfrom)= max(HDPSWinTo)=; run; data _NULL_; set HDPS_WIN; call symputx("MinHDPSWinfrom",put(HDPSWinfrom,best.)); call symputx("MaxHDPSWinTo",put(HDPSWinTo,best.)); run; %put &MinHDPSWinfrom. &MaxHDPSWinTo.; proc sort nodupkey data=_NotPredefined; by group; run; *Create dataset to id which group for HDPS; data HDPSSettings; set _NotPredefined(in=b); by group; drop HDPSWin:; run; %end; %ISDATA(dataset=infolder.&comparison.); %IF %EVAL(&NOBS.>0) %THEN %DO; data HDPS; set infolder.&comparison.; where upcase(HDPS)="Y"; format group $100.; if control ne "" then do; group=control; output; end; if comp ne "" then do; group=comp; output; end; keep group HDPSWinFrom HDPSWinTo; run; proc sort nodupkey data=HDPS; by group HDPSWinFrom HDPSWinTo; run; *check if the same group is there more than once (then it is associated with mon than one HDPS window); proc means data=HDPS nway noprint; var HDPSWinFrom HDPSWinTo; class group; output out=HDPS min(HDPSWinFrom)= max(HDPSWinTo)=; run; %let HDPSPROB=0; data HDPS; set HDPS; if _FREQ_>1 then call symput("HDPSPROB","1"); run; %put &HDPSPROB.; %IF %EVAL(&HDPSPROB. = 1) %THEN %DO; %put WARNING: The same group is associated with more than one HDPS Window; %END; %END; %put NOTE: ******** END OF MACRO: ms_processInputfiles v2.4 ********; %mend ms_processInputfiles;