****************************************************************************************************
*                                           PROGRAM OVERVIEW
****************************************************************************************************
*
* PROGRAM: ms_mfu.sas  
*
* Created (mm/dd/yyyy): 05/15/2018
* Last modified:
* Version: 1.0
*
*--------------------------------------------------------------------------------------------------
* PURPOSE:
*  This macro performs a Most Frequent Use (MFU) analysis                                         
*   
*  Program inputs:   
*
*                                     
*  Program outputs:  
*
*                                                                                                  
*  PARAMETERS:                                                                       
*            
*  Programming Notes:                                                                                
*                                                                           
*
*--------------------------------------------------------------------------------------------------
* CONTACT INFO: 
*  Sentinel Coordinating Center
*  info@sentinelsystem.org
*
*--------------------------------------------------------------------------------------------------
*  CHANGE LOG: 
*
*   Version   Date       Initials      Comment (reference external documentation when available)
*   -------   --------   --------   ---------------------------------------------------------------
*
***************************************************************************************************;

%macro ms_mfu(where=, t3out=);
		
	%put =====> MACRO CALLED: ms_mfu v1.0;

	/*Determine number of analyses*/
	proc sql noprint;
		select max(ANALYSISNUM) into: mfucnt
		from infolder.&mfufile;
	quit;

	/*loop through analyses*/
	%do a = 1 %to &mfucnt.;

		/*number of CodeCat/CodeType within analysis*/
		data _currentMFU;
			set infolder.&mfufile.(where=(ANALYSISNUM=&a.));
			if _n_ = 1 then do;
					/*These parameters are set for the analysis and not unique to CODECAT/CODETYPE*/
					call symputx('TOPXX',TOPXX);
					call symputx('MFUFROM',MFUFROM);
					call symputx('MFUTO',MFUTO);
					call symputx('COUNTMETHOD',upcase(COUNTMETHOD));
			end;
		run;
		%put &TOPXX. &MFUFROM. &MFUTO.;

		/*CODECAT list*/
		proc sql noprint;
			select distinct codecat into: codecatlist separated by ' '
			from _currentMFU;
		quit;
		%put &codecatlist.;

		*DX/PX codes;
		%if %index(&codecatlist., DX)>0 | %index(&codecatlist., PX)>0 %then %do;
			data _mfudxpx;
				set _currentMFU;
				where codecat in('DX', 'PX');
			run;
			
 			*Break out CaresettingPrincipal into Caresetting and Principal;
    		%ms_caresettingprincipal(InFile=_mfudxpx, 
         		                     Var=CareSettingPrincipal, 
                   			         OutFile=_mfudxpx);

			*Select all codetypes;
			%let dx_codetypelist = 'XX'; /*Dummy code*/
			%let px_codetypelist = 'XX';
				proc sql noprint;
					select distinct "'"||(compress(codetype))||"'" into: dx_codetypelist separated by "," 
					from _mfudxpx
					where codecat = 'DX';
					select distinct "'"||(compress(codetype))||"'" into: px_codetypelist separated by "," 
					from _mfudxpx
					where codecat = 'PX';
				quit;
				%put &dx_codetypelist &px_codetypelist.;

			*Stack Claims datasets;
			data claims_dxpx(where=(&where.));
                length code $&codelength.;
				set DPLocal.&RUNID._Claims_icddx09(rename=dx_codetype = codetype where=(codetype in (&dx_codetypelist.)) in=a)
					DPLocal.&RUNID._Claims_icddx10(rename=dx_codetype = codetype where=(codetype in (&dx_codetypelist.)) in=b)
					DPLocal.&RUNID._Claims_dxOT(rename=dx_codetype = codetype where=(codetype in (&dx_codetypelist.)) in=c)

					DPLocal.&RUNID._Claims_icdpx09(rename=px_codetype = codetype where=(codetype in (&px_codetypelist.)) in=d)
					DPLocal.&RUNID._Claims_icdpx10(rename=px_codetype = codetype where=(codetype in (&px_codetypelist.)) in=e)
					DPLocal.&RUNID._Claims_pxOT(rename=px_codetype = codetype where=(codetype in (&px_codetypelist.)) in=f)
					DPLocal.&RUNID._Claims_cpt(rename=px_codetype = codetype where=(codetype in (&px_codetypelist.)) in=g)
					DPLocal.&RUNID._Claims_hcpcs(rename=px_codetype = codetype where=(codetype in (&px_codetypelist.)) in=h);

					if a or b or c then codecat = 'DX';
					if d or e or f or g or h then codecat = 'PX';	
			run;
			
			*Envelope will not be run for MFU analysis;

			*Extract records;
			proc sql noprint;
				create table _dxpxcodes as
				select claim.patid,
					   compress(claim.code,' .') as code format=%sysfunc(cats($,&codelength.,.)) length=&codelength.,
					   claim.codetype format=$3. length=3,
					   claim.codecat,
					   claim.group
				from claims_dxpx as claim, _mfudxpx as mfu
				where claim.codetype = mfu.codetype and   
					((mfu.EncType = claim.EncType and mfu.Pdx = claim.Pdx) or  /*No Wilcards*/
		           	(mfu.EncType = "**"          and mfu.Pdx = claim.Pdx) or  /*Any EncType*/
		          	(mfu.EncType = claim.EncType and mfu.Pdx = "*") or        /*Any Pdx*/
		           	(mfu.EncType = "**"          and mfu.Pdx = "*"))
					and %MS_PeriodsOverlap(period1=IndexDt+coalesce(&MFUFROM.,-99999) IndexDt+coalesce(&MFUTO.,99999),                                             
                                           period2=ADate);
        	quit;

			proc append data=_dxpxcodes base=_allmfucodes force; run;
		%end; *DX/PX codes;

		*RX codes;
		%if %index(&codecatlist., RX)>0 %then %do;
			data _mfurx;
				set _currentMFU(where=(codecat='RX'));
			run;

			*Determine whether to extract 09, 11, or both;
			proc sql noprint;
				select distinct codetype into: rx_codetypelist separated by ' ' 
				from _mfurx;
			quit;
			%put &rx_codetypelist.;

			*Extract 9-digit NDC codes;
			%if %index(&rx_codetypelist, 09) > 0 %then %do;
				proc sql noprint;
					create table _rxcodes09 as
					select claim.patid,
						   compress(substr(claim.code, 1,9)) as code format=%sysfunc(cats($,&codelength.,.)) length=&codelength.,
						   '09' as codetype format=$3. length=3,
						   'RX' as codecat,
						   claim.group
					from DPLocal.&RUNID._Claims_rx(where=(&where.)) as claim,
						 _mfurx as mfu
					where %MS_PeriodsOverlap(period1=IndexDt+coalesce(&MFUFROM.,-99999) IndexDt+coalesce(&MFUTO.,99999),                                             
	                                         period2=ADate);
	        	quit;
				proc append data=_rxcodes09 base=_allmfucodes force; run;
			%end;

			*Extract 11-digit NDC codes;
			%if %index(&rx_codetypelist, 11) > 0 %then %do;
				proc sql noprint;
					create table _rxcodes11 as
					select claim.patid,
						   compress(claim.code) as code format=%sysfunc(cats($,&codelength.,.)) length=&codelength.,
						   '11' as codetype format=$3. length=3,
						   'RX' as codecat,
						   claim.group
					from DPLocal.&RUNID._Claims_rx(where=(&where.)) as claim,
						 _mfurx as mfu
					where %MS_PeriodsOverlap(period1=IndexDt+coalesce(&MFUFROM.,-99999) IndexDt+coalesce(&MFUTO.,99999),                                             
                                            period2=ADate);
        		quit;
				proc append data=_rxcodes11 base=_allmfucodes force; run;
				%end; 
			%end; *RX codes;
			  		
			*Lab codes;
			%if %index(&codecatlist., LB)>0 %then %do;
				data _mfulb_01_02
					 _mfulb;
					set _currentMFU(where=(codecat='LB'));

					if substr(codetype,1,2) in ('01','02') then output _mfulb_01_02;
						else output _mfulb; 
				run;

				%isdata(dataset=_mfulb_01_02);
				%if %eval(&nobs.>0) %then %do;

					/*Select codetypes*/
					proc sql noprint;
						select distinct "'"||(compress(codetype))||"'" into: lb_codetypelist separated by ' ' 
						from _mfulb_01_02;
					quit;
					%put &lb_codetypelist;

					%let result_type_n = 'XXX';
					%let result_type_c = 'XXX';

					%macro assign_lab_resulttype(cat=);
						*Determine whether to restrict to N, C, or both;
						%if %index(&lb_codetypelist., &cat.N) > 0 %then %do;
							%let result_type_n = 'N';
						%end;
						%if %index(&lb_codetypelist., &cat.C) > 0 %then %do;
							%let result_type_c = 'C';
						%end;
					%mend;

					/*Count SOC-defined lab codes (codetype = 01N, 01C)*/
					%if %index(&lb_codetypelist., 01) > 0 %then %do;

						%assign_lab_resulttype(cat=01);

						*Code in infolder.&LABSCODEMAP. should be unique;
	        			proc sort nodupkey data=infolder.&LABSCODEMAP. out=_Map;
	        			by Code;
	        			run;

	        			*Extract Lab Records;
	        		    proc sql noprint;
	        			create table _mfulab01 as
	        			select claim.patid,
							   claim.group,
	        			       compress(_map.Code,' .') as code format=%sysfunc(cats($,&codelength.,.)) length=&codelength.,
	                           cat('01',claim.result_type) as codetype format=$3. length=3,
							   'LB' as codecat
	        			from _Map as look,
	        			     DPLocal.&RUNID._Claims_lab(where=(&where.)) as claim
	        			where upcase(strip(look.ms_test_name))  		= upcase(strip(claim.ms_test_name)) and 
	                          upcase(strip(look.ms_test_sub_category)) 	= upcase(strip(claim.ms_test_sub_category)) and 
	                          upcase(strip(look.specimen_source))      	= upcase(strip(claim.specimen_source)) and 
	        			      upcase(strip(look.ms_result_unit))      	= upcase(strip(claim.ms_result_unit)) and
							  upcase(strip(look.result_type))			= upcase(strip(claim.result_type)) and
		    				  upcase(strip(look.fast_ind))				= upcase(strip(claim.fast_ind))and
	        			      upcase(strip(look.pt_loc))				= upcase(strip(claim.pt_loc)) and
							  upcase(strip(claim.result_type))			in (&result_type_n, &result_type_c) and 
							  %MS_PeriodsOverlap(period1=claim.IndexDt+coalesce(&MFUFROM.,-99999) claim.IndexDt+coalesce(&MFUTO.,99999),                                             
	                                            period2=claim.ADate);
	        			quit;

						proc append data=_mfulab01 base=_allmfucodes force; run;
					%end;

					/*Count LOINC lab codes (codetype = 02N, 02 C)*/
					%if %index(&lb_codetypelist., 02) > 0 %then %do;

						%assign_lab_resulttype(cat=02);

	        			*Extract Lab Records;
	        		    proc sql noprint;
	        			create table _mfulab02 as
	        			select claim.patid,
							   claim.group,
	        			       compress(LOINC) as code format=%sysfunc(cats($,&codelength.,.)) length=&codelength.,
	                           cat('02',claim.result_type) as codetype format=$3. length=3,
							   'LB' as codecat
	        			from  DPLocal.&RUNID._Claims_lab(where=(&where.)) as claim
	        			where upcase(strip(claim.result_type)) in (&result_type_n, &result_type_c) and 
							  LOINC is not missing and 
							  %MS_PeriodsOverlap(period1=claim.IndexDt+coalesce(&MFUFROM.,-99999) claim.IndexDt+coalesce(&MFUTO.,99999),                                             
	                                            period2=claim.ADate);
	        			quit;

						proc append data=_mfulab02 base=_allmfucodes force; run;
					%end;
				%end;

				%isdata(dataset=_mfulb);
				%if %eval(&nobs.>0) %then %do;
					/*Count PX lab codes)*/
						data _lkup03;
							set _mfulb(rename=codetype=codetype1);
								codetype = substr(codetype1,1,2);
								result_nc = substr(codetype1,3,1);
						run;

	        			*Extract Lab Records;
	        		    proc sql noprint;
	        			create table _mfulab03 as
	        			select claim.patid,
							   claim.group,
	        			       compress(px) as code format=%sysfunc(cats($,&codelength.,.)) length=&codelength.,
	                           lkup.codetype format=$3. length=3,
							   'LB' as codecat
	        			from  DPLocal.&RUNID._Claims_lab(where=(&where.)) as claim,
							  _lkup03 as lkup
	        			where upcase(strip(lkup.codetype)) = upcase(strip(claim.px_codetype)) and
							  PX is not missing and 
							  upcase(strip(lkup.result_nc)) = upcase(strip(claim.result_type)) and 
							  %MS_PeriodsOverlap(period1=claim.IndexDt+coalesce(&MFUFROM.,-99999) claim.IndexDt+coalesce(&MFUTO.,99999),                                             
	                                            period2=claim.ADate);
	        			quit;

						proc append data=_mfulab03 base=_allmfucodes force; run;
				%end;
			%end;

		*Calculate Top XX and output aggregate dataset;
		%let countmethodvar = codecount;
		%if "&countmethod" = "P" %then %do;
			%let countmethodvar = patcount;
		%end;

		proc sql noprint;
			create table _aggregate_counts as
			select group,
				   code,
				   codecat, 
				   codetype, 
				   count(*) as codecount,
				   count(distinct patid) as patcount
			from _allmfucodes
			group by group, code, codecat, codetype
			order by group, &countmethodvar. desc;
		quit;

		data _codecounts;
			set _aggregate_counts;
			by group descending &countmethodvar.;
			analysisnum =&a.;
			if first.group then rank = 0;
			rank = rank +1;
			retain rank;

			if rank <= &topxx. then output;
		run;

		%if %eval(&a.=1) %then %do;
			data msoc.&runid._mfu&t3out.;
				set _codecounts;
			run;
		%end;
		%else %do;
			proc append data=_codecounts base=msoc.&runid._mfu&t3out. force; run;
		%end;

		proc datasets lib=work nowarn noprint;
			delete _aggregate_counts _codecounts _allmfucodes _rxxcodes11 _rxxcodes09 _dxpxcodes _mfudxpx _mfurx _mfulab _currentMFU;
		quit;
	%end; *Analysisnum loop;

	%put NOTE: ******** END OF MACRO: ms_mfu  v1.0 ********;

%mend ms_mfu;