**************************************************************************************************** * PROGRAM OVERVIEW **************************************************************************************************** * * PROGRAM: ms_processwildcards.sas * * Created (mm/dd/yyyy): 07/31/2014 * Last modified: 07/31/2014 * Version: 1.1 * *-------------------------------------------------------------------------------------------------- * PURPOSE: * This program will expand codes that were supplied by the user using ranges (-) and/or wilcards (*). * This program can only be used with diagnosis claims. * * Program inputs: * * Program outputs: * * PARAMETERS: * -InFile = The name of the sas dataset containing the codes to expand * -CodeVar = The name of the sas variable containing the codes to expand * -CodeType = The name of the sas variable containing the code types of the codes to expand * -OutFile = The name of the output sas dataset containing the expanded codes * * Programming Notes: * (1) When using the range operator, lengths of codes on both sides of the ‘-‘ MUST be the same and the use of * must also be the same on both sides * (2) Wildcards (*) are always indicative of one character. For example, 250* will always expand into a four-digit code, never a five- digit code, * while 250** will always expand into a five- digit code. Similarly, if a requestor would like ALL permutations of three-, four- and five- digit codes, * they will need to specify it as such in the input file (i.e., 250, 250*, 250**). * (3) If a digit value should be capped, multiple single entries should be specified intead of ranges. For example, if AMI codes where the 5th digit is * always 0, 1 or 2 are required, 3 entries (410*0, 410*1 and 410*2) should be specified instead of a range (410*0-410*2) * * Supported Examples: * Code="310-312" *Expected: 310 to 312 * Code="E123-E128" *Expected: E123 to E128 * Code="250*" *Expected: 2500 to 2509 * Code="250*-252*" *Expected: 2500 to 2529 * Code="410*0-410*2" *Expected: 41000 to 41092 * Code="410*0" *Expected: 41000 to 42090 5th digit always 0 * Code="25*0*" *Expected: 25000 to 25909 4th digit always 0 * Code="99**-9962" *Expected: 9900 to 9962 * Code="140-208" *Expected: 140 to 208 * Code="140**-208**" *Expected: 14000 to 20899 * * Not Supported Examples: * Code="2841*-285*" * Code="140*-15*" * * *-------------------------------------------------------------------------------------------------- * CONTACT INFO: * Mini-Sentinel Coordinating Center * info@mini-sentinel.org * *-------------------------------------------------------------------------------------------------- * CHANGE LOG: * * Version Date Initials Comment (reference external documentation when available) * ------- -------- -------- --------------------------------------------------------------- * mm/dd/yy * ***************************************************************************************************; %MACRO ms_processwildcards(InFile=, CodeVar=Code, CodeType=CodeType, OutFile=); %put =====> MACRO CALLED: ms_processwildcards v1.1; *Dataset with instruction to manage code ranges dynamically; data _Loops; format StartLoop $30. Instruction $50. EndLoop $5.; array Min{20} Min1-Min20; array Max{20} Max1-Max20; array Iterator{20} Iter1-Iter20; do i = 1 to 20; StartLoop = "do Iter" || strip(put(i,best.)) || " = Min" || strip(put(i,best.)) || " to Max" || strip(put(i,best.)) || ";"; Instruction = "strip(put(Iter" || strip(put(i,best.)) || ",best.))"; EndLoop = "end;"; output; end; keep StartLoop Instruction EndLoop; run; *Mantis 1174; proc sort data=&InFile. out=_tmp; by _ALL_; run; data &OutFile._tmp _combo_tmp; set _tmp; if &CodeType. not in("09") then output _combo_tmp; else output &OutFile._tmp; run; * Determine codes length; data &OutFile._tmp; set &OutFile._tmp ; *Ranges of codes; if index(&CodeVar.,'-') then do; lBound=strip(scan(compress(&CodeVar.,'EV'),1, '-')); uBound=strip(scan(compress(&CodeVar.,'EV'),2, '-')); CodeLength = max(length(lBound), length(uBound)); *Last digit is a wildcard; if substr(uBound,length(uBound),1) eq "*" then CodeLength=CodeLength-1; end; else do; CodeLength=length(compress(&CodeVar.,'EV')); *Last digit is a wildcard; if substr(&CodeVar.,length(&CodeVar.),1) eq "*" then CodeLength=CodeLength-1; end; run; *Mantis #1153. Must have at least one code to process...; proc sql noprint; select count(&CodeVar.) into: NOBS from &OutFile._tmp; quit; %put &NOBS.; %IF %EVAL(&NOBS.>=1) %THEN %DO; %macro LoopCodes(); proc datasets nolist nowarn library=work; delete &OutFile._work; quit; proc sql noprint; select max(CodeLength) into: NbLoops from &OutFile._tmp; quit; %put &NbLoops.; %do i = 1 %to &NbLoops.; *Get dynamic instructions to execute for code ranges; proc sql noprint; select StartLoop into :StartLoops separated by ' ' from _Loops(obs=&i.); quit; proc sql noprint; select Instruction into :Instructions separated by '||' from _Loops(obs=&i.); quit; proc sql noprint; select EndLoop into :EndLoops separated by ' ' from _Loops(obs=&i.); quit; * Output Codes; data &OutFile._tmp2; set &OutFile._tmp; where CodeLength=&i.; *Ranges of codes; if index(&CodeVar.,'-') then do; lBound=strip(scan(compress(&CodeVar.,'EV'),1, '-')); uBound=strip(scan(compress(&CodeVar.,'EV'),2, '-')); end; *No ranges of codes; else do; lBound=strip(compress(&CodeVar.,'EV')); uBound=lBound; end; prefixcode=substr(strip(scan(&CodeVar.,1, '-')),1,1); suffix=""; if substr(uBound,length(uBound),1) eq "*" then suffix="*"; MinVal=input(substr(translate(lBound,'0','*'),1,CodeLength),best.); MaxVal=input(substr(translate(uBound,'9','*'),1,CodeLength),best.); array Min{20} Min1-Min20; array Max{20} Max1-Max20; do j = 1 to CodeLength; *No ranges of codes; if lBound = uBound then do; if j <= length(lBound) and substr(lBound,j,1) ne "*" then Min{j}=input(substr(lBound,j,1),best.); else Min{j}=0; if j <= length(uBound) and substr(uBound,j,1) ne "*" then Max{j}=input(substr(uBound,j,1),best.); else Max{j}=9; if Max{j} <= Min{j} and lBound ne uBound then do; do k = 1 to CodeLength-1; if Min{k} < Max{k} then do; Min{j}=0; Max{j}=9; end; end; end; end; *Ranges of codes; else do; Min{j}=0; Max{j}=9; end; end; *Dynamic processing according to Code Length; &StartLoops.; if prefixcode in ('E' 'V') then &CodeVar.=strip(prefixcode) || &Instructions. || strip(suffix); else &CodeVar.=&Instructions. || strip(suffix); if input(&Instructions.,best.)>=MinVal and input(&Instructions.,best.)<=MaxVal then output; &EndLoops.; drop j k Min: Max: Iter: CodeLength MinVal MaxVal; run; proc contents data=&OutFile._tmp2 noprint out=_tilt ; data _null_ ; set _tilt ; call symput('nobs',trim(left(put(nobs,15.)))) ; run; %put &nobs.; %if %eval(&nobs.>0) %then %do; proc append base = &OutFile._work data = &OutFile._tmp2 force; run; %end; %end; %mend LoopCodes; %LoopCodes(); *Process wildcards; data &OutFile.(rename = CodeNew=&CodeVar.); set &OutFile._work; Exact=index(&CodeVar.,'*')=0; &CodeVar.=compress(&CodeVar.,'*'); CodeNew=&CodeVar.; *output; if Exact=0 then do; do i = 0 to 9; CodeNew=strip(&CodeVar.) || strip(put(i,1.)); output; end; end; else output; drop &CodeVar. Exact i lBound uBound PrefixCode Suffix; run; %END; data &OutFile.; set &OutFile. _combo_tmp; run; proc sort nodupkey data=&OutFile.; by _All_; run; proc datasets library=work nolist nowarn; delete &OutFile._: _tilt _loops _combo_tmp; quit; %put NOTE: ********END OF MACRO: ms_processwildcards v1.1********; %MEND ms_processwildcards;