**************************************************************************************************** * PROGRAM OVERVIEW **************************************************************************************************** * * PROGRAM: ms_nearestneighbormatch.sas * * Created (mm/dd/yyyy): 01/26/2015 * Last modified: 06/30/2019 * Version: 1.3 * *-------------------------------------------------------------------------------------------------- * PURPOSE: * This program will perform propensity score nearest neighbor optimal matching between two groups * * Program inputs: * -dataset containing the propensity scores * * * PARAMETERS: * InFile= The name of the dataset containing the propensity scores. * OutFile= The name of the dataset containing the matches and their controls. * MatchVars= The combination of variables that defines the records match level. * PSVar= The name of the variable containing the propensity scores. * MatchRatio= The maximum number of controls for one match. * WithReplacement= Matching with or without replacement. * Caliper= The maximum allowable distance between match and control propensity scores. * MatchAnalysis=PS or MS for PS or MFM tool * * Program outputs: * -dataset with the matches and their controls * * Programming Notes: * * *-------------------------------------------------------------------------------------------------- * CONTACT INFO: * Sentinel Coordinating Center * info@sentinelsystem.org * *-------------------------------------------------------------------------------------------------- * CHANGE LOG: * * Version Date Initials Comment (reference external documentation when available) * ------- -------- -------- --------------------------------------------------------------- * 1.1 03/09/16 AH Added Digits as a macro parameter instead of calculating it * from Caliper. * * 1.2 03/19/18 RR Added ability to run with MFM algorithm. * * 1.3 06/30/19 AP Changed MFM parameter to MatchAnalysis * ***************************************************************************************************; %macro ms_NearestNeighborMatch(InFile=, OutFile=, MatchVars=, PSVar=, MatchRatio=2, WithReplacement=N, Caliper=1.1, Digits=0.0000000000000001, matchanalysis=); %put =====> MACRO CALLED: ms_NearestNeighborMatch v1.3; %macro findInCtrl(dir,key); rc=ictrl.setcur(key:&key.); * dist=0; do while(CTRL_YN=0 and rc=0 /*and dist<MinDist*/); rc=ictrl.&dir.(); *Skip Treatment placholders - will always start at a trt placeholder; %if "&matchanalysis" eq "MS" %then %do; if Ctrl_PS ne trt_PS then leave; %end; %else %do; if abs(Ctrl_PS - trt_PS) >= MinDist then leave; %end; * if floor(abs(Ctrl_PS - trt_PS)/&digits.)*&digits. >= MinDist then leave; end; %mend; %macro findInTrt(dir,key); rc=itrt.setcur(key:&key.); * dist=0; do while(TRT_YN=0 and rc=0 /*and dist <MinDist*/); rc=itrt.&dir.(); /*Skip Control placholders - will always start at a ctrl placeholder*/ %if "&matchanalysis" eq "MS" %then %do; if Ctrl_PS ne trt_PS then leave; %end; %else %do; if abs(Ctrl_PS - trt_PS) >= MinDist then leave; %end; * if floor(abs(Ctrl_PS - trt_PS)/&digits.)*&digits. >= MinDist then leave; end; %mend; %macro AddToHeap(YesNoVar); %if "&matchanalysis" eq "MS" %then %do; dist = Ctrl_PS - TRT_PS; if &YesNoVar.=1 and dist = minDist then do; %end; %else %do; dist = floor(abs(Ctrl_PS - trt_PS)/&digits.)*&digits.; if &YesNoVar.=1 and dist < minDist then do; %end; heap_Ctrl_PS=Ctrl_PS; heap_trt_PS=trt_PS; Rand=ranuni(123455); N=N+1; heap.add(); *output this pair of neighbors to the heap space; end; %mend; %macro DeleteFromHeap; *bring the hash iterator to a null pointer thereby allowing for the clear() method on the hash object itself to be succesful.; rc0=iheap.first(); rc0=iheap.prev(); rc0=heap.remove(); %mend; %macro DeleteFrom(type1,type2,key); *one key only; key0=&key.; *bring the hash iterator to a null pointer thereby allowing for the clear() method on the hash object itself to be succesful.; rc0=&type1..first(); rc0=&type1..prev(); rc0=&type2..remove(key: key0); %mend; *Sort raw data; proc sort data=&InFile. out=_raw; by &PSVar. exposure; run; %LET match_Error=; %do i=1 %to &MatchRatio.; data treated(rename=CTRL_N=TRT_CTRL_N) controls(rename=TRT_N=CTRL_TRT_N); set _raw; TRT_YN=exposure=1; CTRL_YN=exposure=0; lastExposure=lag(Exposure); if _N_=1 then do; TRT_N=1; CTRL_N=1; end; else do; if exposure=1 then TRT_N=TRT_N+1; else if exposure=0 and lastExposure=1 then TRT_N=TRT_N+1; if exposure=0 then CTRL_N=CTRL_N+1; else if exposure=1 and lastExposure=0 then CTRL_N=CTRL_N+1; end; retain TRT_N CTRL_N; run; * post process placeholders for direct access; data treated; set treated; by TRT_N; if _N_=1 then Set_num=0; if first.TRT_N; if TRT_YN=1 then Set_num=Set_num+1; ForSort=ranuni(1250); rename &PSVar.=TRT_PS; retain Set_num; keep &MatchVars. &PSVar. TRT_N TRT_CTRL_N TRT_YN Set_num ForSort; run; data Controls; set Controls; by CTRL_N; if first.CTRL_N; ForSort=ranuni(1251); rename &PSVar.=CTRL_PS; keep &MatchVars. &PSVar. CTRL_N CTRL_TRT_N CTRL_YN ForSort exposure; run; %if "&matchanalysis" eq "MS" %then %do; proc sort data = treated; by ForSort; run; proc sort data = controls; by ForSort; run; %end; data _NULL_; *Reading the "Augmented" Controls space into memory; if _N_= 1 then do; if 0 then set controls; declare hash ctrl(hashexp:20, dataset: "Controls", ordered: 'a'); *Note: here speed increase with ordered - Ctrl_N sequential so no worries; declare hiter ictrl('ctrl'); ctrl.defineKey('Ctrl_N'); ctrl.defineData('Ctrl_N','Ctrl_PS','Ctrl_TRT_N','CTRL_YN','PatId','exposure'); ctrl.defineDone(); call missing(Ctrl_N,Ctrl_PS,Ctrl_TRT_N,CTRL_YN); end; *The Match space - Memory for Storing Final Selected Matches; if _N_= 1 then do; declare hash mat(); mat.defineKey('Trt_N','MatchNumber'); mat.defineData('Trt_N','Ctrl_N','MatchNumber'); mat.defineDone(); call missing(Trt_N,Ctrl_N,N,MatchNumber); end; *The Treated space - reading all Treated into Memory; if _N_= 1 then do; declare hash trt(hashexp:20, dataset: "Treated", ordered: 'a'); *Note: here speed increase with ordered - Ctrl_N sequential so no worries; declare hiter itrt('trt'); trt.defineKey('TRT_N'); trt.defineData('TRT_N','TRT_PS','TRT_CTRL_N','TRT_YN'); trt.defineDone(); call missing(TRT_N,TRT_PS,TRT_CTRL_N,TRT_YN); end; *The Heap space - temporary space to store potential matches or matched pairs; if _N_= 1 then do; declare hash heap(ordered: 'a'); heap.defineKey('Dist','Rand','N'); *n is to ensure uniqueness; heap.defineData('Dist','Rand','N','Trt_N','Ctrl_N','heap_trt_PS','heap_Ctrl_PS'); *rand is for ties; heap.defineDone(); call missing(Dist,Ctrl_N,N,Rand,heap_trt_PS,heap_Ctrl_PS); end; *Variable initialization; N=0; MatchNumber=&i.; MinDist=&caliper.; MatchRatio=&MatchRatio.; *Fill the heap with neighbor pairs; rc1=itrt.first(); do while(rc1=0); if TRT_YN=1 then do; %findInCTRL(prev,TRT_CTRL_N); * means the control number in control for this treatment : TRT_CTRL_N; %AddToHeap(CTRL_YN); %findInCTRL(next,TRT_CTRL_N); %AddToHeap(CTRL_YN); end; rc1=itrt.next(); end; *poll -Top-Down or sorted FILO); declare hiter iheap('heap'); rc2=iheap.first(); do while(rc2=0); *Are both trt_N and ctrl_N still available to create a pair?; IsTrtStillAvail=trt.check(key:TRT_N)=0; IsCtrlStillAvail=ctrl.check(key:CTRL_N)=0; dist1=.; dist2=.; *dist = abs(heap_trt_PS - heap_Ctrl_PS); * dist = round(abs(heap_trt_PS - heap_Ctrl_PS),&digits.); if IsTrtStillAvail=1 and IsCtrlStillAvail=1 /*and (dist < mindist)*/ then do; *caliper was aldready checked by AddToHeap; rc0=mat.add(); %DeleteFrom(itrt,trt,TRT_N); *one key only; %DeleteFrom(ictrl,ctrl,Ctrl_N); *one key only; %DeleteFromHeap; end; else if IsTrtStillAvail=1 and IsCtrlStillAvail=0 then do; *caliper was aldready checked by AddToHeap;; *REFERENCE PS FOR DISTANCE CALCULATIONS; DeletedPS=heap_Ctrl_PS; *Control not available for matching; %DeleteFromHeap; rc=trt.find(key: TRT_N); * load TRT_CTRL_N and TRT_PS into active memory; *Look left; %findInCTRL(prev,TRT_CTRL_N); if CTRL_YN=1 then do; dist1=abs(Ctrl_PS - DeletedPS); Ctrl_PS1=Ctrl_PS; Ctrl_N1=Ctrl_N; end; *Look Right; %findInCTRL(next,TRT_CTRL_N); if CTRL_YN=1 then do; dist2=abs(Ctrl_PS - DeletedPS); Ctrl_PS2=Ctrl_PS; Ctrl_N2=Ctrl_N; end; *which is better; if dist1 ne . and dist2 eq . then do; * dist=dist1; Ctrl_PS=Ctrl_PS1; Ctrl_N=Ctrl_N1; end; else if dist1 eq . and dist2 ne . then do; * dist=dist2; Ctrl_PS=Ctrl_PS2; Ctrl_N=Ctrl_N2; end; else if . < dist1 <= dist2 then do; * dist=dist1; Ctrl_PS=Ctrl_PS1; Ctrl_N=Ctrl_N1; end; else if . < dist2 < dist1 then do; * dist=dist2; Ctrl_PS=Ctrl_PS2; Ctrl_N=Ctrl_N2; end; if dist1 ne . or dist2 ne . then do; CTRL_YN=1; %AddToHeap(CTRL_YN); end; end; else if IsTrtStillAvail=0 and IsCtrlStillAvail=1 then do; *caliper was aldready checked by AddToHeap;; *REFERENCE PS FOR DISTANCE CALCULATIONS; DeletedPS=heap_trt_PS; *treated not available for matching; %DeleteFromHeap; rc=ctrl.find(key: CTRL_N); * load CTRL_TRT_N and CTRL_PS into active memory; *Look left; %findInTRT(prev,CTRL_TRT_N); if TRT_YN=1 then do; dist1=abs(Trt_PS - DeletedPS); Trt_PS1 =Trt_PS; trt_N1=trt_N; end; *Look Right; %findInTRT(next,CTRL_TRT_N); if TRT_YN=1 then do; dist2=abs(Trt_PS - DeletedPS); Trt_PS2 =Trt_PS; trt_N2=Trt_N; end; *which is better; if dist1 ne . and dist2 eq . then do; * dist=dist1; trt_PS=trt_PS1; trt_N=trt_N1; end; else if dist1 eq . and dist2 ne . then do; * dist=dist2; trt_PS=trt_PS2; trt_N=trt_N2; end; else if . < dist1 <= dist2 then do; * dist=dist1; trt_PS=trt_PS1; trt_N=trt_N1; end; else if . < dist2 < dist1 then do; * dist=dist2; trt_PS=trt_PS2; trt_N=trt_N2; end; if dist1 ne . or dist2 ne . then do; TRT_YN=1; %AddToHeap(TRT_YN); end; end; else do; %DeleteFromHeap; end; *poll; rc2 = iheap.first(); end; /* itrt.delete(); trt.delete(); iheap.delete(); heap.delete(); */ mat.output(dataset:"res1"); %if %eval(&MatchRatio. >1 and &i. <&MatchRatio.) %then %do; ctrl.output(dataset:"ctrl"); %end; run; *Reassinging PatIds and PS values; *Assign Matchvars of treated; proc sql noprint undo_policy=none; create table res2 as select base.*, match.ctrl_N, match.MatchNumber from Treated as base, res1 as match where match.Trt_N=base.Trt_N; quit; *Assign Matchvars of controls; proc sql noprint undo_policy=none; create table _outfile(drop=Trt_N Ctrl_N) as select match.*, ctrl.patid as ctrl_Patid, ctrl.Ctrl_PS, abs(Ctrl_PS-Trt_PS) as match_distance from res2 as match, Controls as ctrl where ctrl.ctrl_N=match.ctrl_N order by Patid, MatchNumber; quit; %if &i.=1 %then %do;*overwrite; data &outfile.; set _outfile; run; %end; %else %do; data &outfile.; set &outfile. _outfile; by Patid MatchNumber; run; %end; %if %eval(&MatchRatio. >1 and &i. <&MatchRatio.) %then %do; *Assign Matchvars of controls; data _null_; dsid=open("ctrl"); call symputx("NOBSCTRL",attrn(dsid,"NLOBS")); run; %if &NOBSCTRL.=0 %then %do; %let i=&MatchRatio.+1; %end; %else %do; data _raw; set _raw(where=(exposure=1)) ctrl(where=(exposure=0) rename=Ctrl_PS=&PSVar.); by &PSVar. exposure; *if b then exposure=0; keep PatId &PSVar. exposure; run; %end; %end; *Cleaning up in case of multiple runs; proc datasets library=work nowarn nolist; delete _outfile; *delete Treated Controls res1 res2; quit; %end; *if outfile is empty, no matches were found; %ISDATA(dataset=&outfile.); %IF %EVAL(&NOBS.=0) %THEN %DO; %let match_Error=1; %END; %put NOTE: ********END OF MACRO: ms_NearestNeighborMatch v1.3********; %mend;