**************************************************************************************************** * PROGRAM OVERVIEW **************************************************************************************************** * * PROGRAM: figure_survivalcurves_createdata.sas * Created (mm/dd/yyyy): 07/14/2021 * *-------------------------------------------------------------------------------------------------- * PURPOSE: This macro computes Kaplan-Meier and Cumulative Distribution Function (CDF) estimates * * Program inputs: * - MSOC dataset containing 1 row per day and columns containing counts of episodes censored * * Program outputs: * * * PARAMETERS: * - dataset: aggregate dataset from %aggregate_report_tables * - rename: syntax to rename variables on input dataset * - curve: KM, CDF or CIF * - whereclause: where statement to restrict &dataset * - dayvar: variable name for censor day variable * - includegroups: Groups to include in report * - includevars: For KM and CDF curves, censor reasons to include in final dataset. * For CIF curves, censor reason to use as competing risk * - eventvar: For CIF curves, censor reason to use as event of interest * - transposedata: Y/N/T whether all groups are in 1 plot (Y=Yes, N=No, T=only execute transpose) * - discardnegativetimegroups: list of groups to remove negative ttswitch (Relevant for T6 only) * - figure: standard figure # from FIGUREFILE * * Programming Notes: * - Curve = CDF will produce a 1-CDF plot * - Requires group names to be unique across runIDs * - Can specify transposedata = T if prior dataset exists with stacked data - used to produce * 2nd plot with the same data * *-------------------------------------------------------------------------------------------------- * CONTACT INFO: * Sentinel Coordinating Center * info@sentinelsystem.org * ***************************************************************************************************; %macro figure_survivalcurves_createdata(dataset=, rename=, curve=, whereclause=, dayvar=, includegroups=, includevars=, eventvar=, transposedata=, discardnegativetimegroups=, figure=); %put =====> MACRO CALLED: figure_survivalcurves_createdata; /*--------------------------------------------------------------------------------------------*/ /* Confirm input dataset exists */ /*--------------------------------------------------------------------------------------------*/ %isdata(dataset=&dataset.); %if %eval(&nobs.<1) %then %do; %put WARNING: SENTINEL: plot &figure. requested, however dataset &dataset. does not exist; %goto skipfigure; %end; /*--------------------------------------------------------------------------------------------*/ /* If transposedata T then do no execute aggregation and computation */ /*--------------------------------------------------------------------------------------------*/ %if &transposedata. ne T %then %do; /*--------------------------------------------------------------------------------------------*/ /* Select rows and columns and aggregate data */ /*--------------------------------------------------------------------------------------------*/ proc means data=&dataset.(&rename. where=(&whereclause.)) nway noprint; var episodes &includevars. %if "&curve" = "CIF" %then %do; &eventvar. %end;; class runid group &dayvar. / missing; output out=_survivaldata(drop=_: where=(missing(day)=0) rename=&dayvar.=day) sum=; run; %isdata(dataset=_survivaldata); %if %eval(&nobs.<1) %then %do; %put WARNING: SENTINEL: plot &figure. requested, however no observations available to compute curve; %goto skipfigure; %end; /*--------------------------------------------------------------------------------------------*/ /* Remove negative time for Type 6 */ /*--------------------------------------------------------------------------------------------*/ %let minday = 0; %if %str(&discardnegativetimegroups.) ne %str() %then %do; data _survivaldata; set _survivaldata; if group in (&discardnegativetimegroups.) and day <0 then delete; run; %end; /*--------------------------------------------------------------------------------------------*/ /* Square data to include 1 row per day */ /*--------------------------------------------------------------------------------------------*/ /*find minimum day (can be negative for Type 6)*/ %if &reporttype. = T6 %then %do; proc sql noprint; select min(day) into: minday from _survivaldata; quit; %let minday = %sysfunc(min(-1,&minday.-1)); %end; %else %do; %let minday = 0; %end; data _squaresurvival(rename=i=day); set _survivaldata(keep=runid group day); by runid group day; if last.group then do; /*type 6 - length = 4*/ %if &reporttype.=T6 %then %do; length i 4; %end; do i = &minday. to day; output; end; end; drop day; run; data _survivaldata; merge _survivaldata _squaresurvival _squaregroup; by runid group day; *set missing to 0; array change episodes &includevars. %if "&curve" = "CIF" %then %do; &eventvar. %end;; do over change; if change=. then change=0; end; run; proc sql noprint undo_policy=none; create table _survivaldata as select x.*, y.order from _survivaldata x inner join groupsfile(where=(includeinfigure='Y')) y on x.runid = y.runid and x.group = y.group %if &reporttype. = T6 %then %do; where y.switchanalysis = 'Y' %if &figure = F5 or &figure = F7 %then %do; and y.switch2indicator = 'Y' %end; %end; order by group, day; quit; /*--------------------------------------------------------------------------------------------*/ /* Build macro variables */ /*--------------------------------------------------------------------------------------------*/ %let censorreasonnum = %sysfunc(countw(&includevars.)); /*number of censor reasons*/ %let survival_cumlist = %sysfunc(prxchange(s/(\w+)/cum_$1/,-1,&includevars.)); %let survival_sumlist = %sysfunc(prxchange(s/(\w+)/sum_$1/,-1,&includevars.)); %let survival_cdflist = %sysfunc(prxchange(s/(\w+)/cdf_$1/,-1,&includevars.)); /*--------------------------------------------------------------------------------------------*/ /* Compute total number of episodes for each censoring criteria */ /*--------------------------------------------------------------------------------------------*/ data cumulative_totals(keep=group &survival_cumlist. cum_episodes); array cum{*} &survival_cumlist. cum_episodes; array censorcriteria{*} &includevars. episodes; do i = 1 to dim(cum); cum{i} = 0; end; do until(last.group); set _survivaldata; by group; do a = 1 to dim(cum); cum{a} = cum{a} + censorcriteria{a}; end; end; run; /*--------------------------------------------------------------------------------------------*/ /* Compute KM or 1-CDF estimate */ /*--------------------------------------------------------------------------------------------*/ %isdata(dataset=labelfile); /*if labelfile exists*/ data figure&figure.(rename=lag_episodes_atrisk=episodes_atrisk); set _survivaldata; by group day; array sum{*} &survival_sumlist. sum_episodes; array varlist{*} &includevars. episodes; array cum{*} &survival_cumlist.; %if "&curve" = "CDF" %then %do; array cdflist{*} &survival_cdflist. ; %end; if first.group then do; merge cumulative_totals; by group; do i = 1 to dim(varlist); sum{i} = varlist{i}; end; end; else do; do i = 1 to dim(varlist); sum{i} = varlist{i} + sum{i} ; end; end; retain &survival_sumlist. sum_episodes; /*Episodes_atrisk used in atrisk table in plot and for KM curve*/ episodes_atrisk = cum_episodes - sum_episodes; lag_episodes_atrisk = lag(episodes_atrisk); if first.group then lag_episodes_atrisk = episodes_atrisk; %if "&curve" = "CDF" %then %do; /*------1-CDF plot-----*/ do a = 1 to dim(cdflist); if cum{a} = 0 then cdflist{a} = 1; else cdflist{a} = 1-(sum{a} / cum{a}); end; drop a i; %end; /*-----KM Plot---------*/ %else %if "&curve" = "KM" %then %do; if first.group then do; %do cns = 1 %to %eval(&censorreasonnum.); km_%scan(&includevars., &cns.) = 1; %end; end; else do; %do cns = 1 %to %eval(&censorreasonnum.); if %scan(&includevars., &cns.) > 0 then do; km_%scan(&includevars., &cns.) = km_%scan(&includevars., &cns.)*(1-(%scan(&includevars., &cns.)/lag_episodes_atrisk)); end; else do; km_%scan(&includevars., &cns.) = km_%scan(&includevars., &cns.); end; %end; end; retain %do cns = 1 %to %eval(&censorreasonnum.); km_%scan(&includevars., &cns.) %end; ; %end; /*assign raw group label as grouplabel if no label file - next step assigns label from labelfile*/ %if %eval(&nobs.<1) %then %do; length grouplabel $40; grouplabel = group; %end; /*Assign censoring criteria labels. CIF label will be assigned after the cumulative incidence computation*/ %if "&curve" ne "CIF" %then %do; %do lbl = 1 %to %eval(&censorreasonnum.); %let s=; %if %scan(&includevars., &lbl.) = cens_switch %then %do; %if &dataset. = agg_t6plota %then %let s = 1; %else %if &dataset. = agg_t6plotb %then %let s = 2; %end; %let var = %scan(&includevars., &lbl.); label &&curve._%scan(&includevars., &lbl.) = "&&&var.&s._label"; %end; %end; /*-----CIF Plot--------*/ %else %do; rename &includevars. = competingrisk; %end; keep runid group: order day lag_episodes_atrisk %if "&curve" = "CIF" %then %do; &includevars. &eventvar. %end; %else %do; &curve._: %end;; run; %if "&curve" = "CIF" %then %do; proc sort data=figure&figure.; by group day; run; * Step 1: Calculate the Kaplan-Meier estimate using both the event of interest and the competing risk as event; data figure&figure.; set figure&figure.; by group day; if first.group then do; survival=1; prev_survival=survival; end; else do; survival=prev_survival*((episodes_atrisk - (&eventvar. + competingrisk))/episodes_atrisk); prev_survival=survival; end; retain prev_survival; drop prev_survival; run; * Step 2: Calculate the cumulative incidence using only the event of interest as event; data figure&figure.; set figure&figure.; by group day; prev_survival=lag(survival); if first.group then do; prev_survival=survival; cumincidence=0; end; else do; failure=&eventvar./episodes_atrisk; incidence=failure*prev_survival; cumincidence=cumincidence+incidence; end; retain cumincidence; drop competingrisk &eventvar. survival prev_survival failure incidence; rename cumincidence=cif_&eventvar.; %if &dataset. = agg_t6plota %then %let s = 1; %else %if &dataset. = agg_t6plotb %then %let s = 2; label cumincidence = &&&eventvar.&s._label.; run; %end; /* CIF curve computation */ /*--------------------------------------------------------------------------------------------*/ /* Assign Group label */ /*--------------------------------------------------------------------------------------------*/ %if %eval(&nobs.>0) %then %do; proc sql noprint undo_policy = none; create table figure&figure. as select a.*, case when not missing(b.label) then b.label else a.group end as grouplabel length=&label_length. from figure&figure. as a left join labelfile(where=(labeltype = 'grouplabel')) as b on a.group = b.group and a.runid = b.runid order by a.group, a.day; quit; %end; /*--------------------------------------------------------------------------------------------*/ /* if transposedata=N then create 1 dataset per group */ /*--------------------------------------------------------------------------------------------*/ %if &transposedata. = N %then %do; data %do g = 1 %to &numgroups.; figure&figure._group&g. %end; ; set figure&figure.; %do g = 1 %to &numgroups.; if order = &g. then output figure&figure._group&g.; %end; run; /*delete extraneous datasets*/ %do g = 1 %to &numgroups.; %isdata(dataset=(figure&figure._group&g.)); %if %eval(&nobs.<1) %then %do; proc datasets nowarn noprint lib=work; delete figure&figure._group&g.; quit; %end; %end; %end; %end; /*end transposedata ne T*/ /*--------------------------------------------------------------------------------------------*/ /* When all groups in 1 plot (only 1 censor reason per plot), reformat data */ /*--------------------------------------------------------------------------------------------*/ %if &transposedata. = Y | &transposedata.=T %then %do; proc sort data=figure&figure.; by day order; run; proc transpose data=figure&figure. out=_tempfigure1&figure.(drop=_name_ _label_) prefix=&curve._group; by day; id order; idlabel grouplabel; var &curve._&includevars.; run; proc transpose data=figure&figure. out=_tempfigure2&figure.(drop=_name_) prefix=episodes_atrisk; by day; id order; idlabel grouplabel; var episodes_atrisk; run; /*merge both datasets together and fill in missing at risk values (when 1 group has longer followup)*/ data figure&figure.; merge _tempfigure1&figure. _tempfigure2&figure.; by day; array epiatrisk{*} episodes_atrisk:; do i = 1 to dim(epiatrisk); if missing(epiatrisk{i}) then epiatrisk{i} = 0; end; order=1; drop i; run; %end; %skipfigure: proc datasets nowarn nolist noprint lib=work; delete _squaresurvival _survivaldata _cumulative_totals _tempfigure:; quit; %put =====> END MACRO: figure_survivalcurves_createdata; %mend;