****************************************************************************************************
*                                           PROGRAM OVERVIEW
****************************************************************************************************
*
* PROGRAM: ms_createpov1.sas  
*
* Created (mm/dd/yyyy): 02/02/2017
* Last modified: 10/03/2017
* Version: 1.3
*
*--------------------------------------------------------------------------------------------------
* PURPOSE:
*	This program will find all potential index dates and create the POV1 dataset 
*
*  Program inputs:                                                                                   
*	-Dataset with claims defined as being potential index dates   
* 
*  Program outputs:                                                                                                                                       
* 	-Dataset with all potential index dates (POV1)
* 
* PARAMETERS:
*  	-CohortId = Identifier for the cohort being processed (primary, secondary)
*
*  Programming Notes:                                                                                
*                                                                           
*
*
*--------------------------------------------------------------------------------------------------
* CONTACT INFO: 
*  Mini-Sentinel Coordinating Center
*  info@mini-sentinel.org
*
*--------------------------------------------------------------------------------------------------
*  CHANGE LOG: 
*
*   Version   Date       Initials	   Comment (reference external documentation when available)
*   -------   --------   --------   ---------------------------------------------------------------
*   1.1		  05/04/2017	AP		1. Extracted hispanic variable from &demtable
*									2. Converted GEOG to a group level parameter
*
*	1.2		  07/11/2017	AP		Fixed typo in _groupindex&cohortid.
*
*   1.3		  10/03/2017	AP		1. Added HHS and CB region geographic stratifications
*									2. Commented out code to calculate age at each dispensing (QRP-447)
*									3. Recode A/U sex values (QRP-453)
*
***************************************************************************************************;

%macro ms_createpov1(CohortId=);

%put =====> MACRO CALLED: ms_createpov1 v1.3;

	%let paramsuffix=;
	%if %str("&CohortId.") ne %str("") %then %do;
		%let paramsuffix=2;
	%end;

    *Add Cohort defining parameters to potential index dates;
    data _GroupIndex&CohortId.;
    set _GroupIndex&CohortId.;
    if _N_=1 then set _CohortDef;
    run;	

	%if %eval(&&itt&paramsuffix. ne 0) %then %do; *ITT;
		data _GroupIndexITT&CohortId.;
	   		set _GroupIndex&CohortId.;        
        run;
    %end;

	*Find list of all potential index dates meeting washout;		 
	%if %eval(&type. eq 1) %then %do;
		%let NoFindGapDuplicate=N;
	%end;

	*Apply Washout considering exposure;
	%ms_findgap(InFile=_GroupIndex&CohortId.,
	            OutFile=_POV1&CohortId., 
	            sort=PatId Adate descending expiredt,
	            NoDup=&NoFindGapDuplicate., 
	            gapby=PatId, 
	            gap=WashPer&paramsuffix.,
				dupby=patid adate);		

/**********/
/* Demogs */
/**********/

	proc sort nodupkey data=_POV1&CohortId. out=_UniquePts&CohortId.(keep=PatId);
	by PatId;
	run;

	*Age and demogs;		
    proc sql noprint;        
		create table _DemoCount&CohortId. as
        select Dem.PatId, 
			   Count(Dem.PatId) as numdemo
        from indata.&demtable as DEM 
             inner join 
             _UniquePts&CohortId. as pts
        on DEM.patid = pts.patid
		group by Dem.PatId;

		create table _Demo&CohortId. as
        select distinct pts.*, 
                        BIRTH_DATE, 
                      	case when sex = 'F' then 'F'
							 when sex = 'M' then 'M'
							 else 'O'						
						end as sex,
                        RACE,
						HISPANIC,
                        (ADate-birth_date)/365.25 as Age ,
						zip,
						zip_date
        from indata.&demtable as DEM 
             inner join 
             _POV1&CohortId. as pts			 
        on DEM.patid = pts.patid
			 inner join _DemoCount&CohortId. (where=(numdemo=1)) as cnt
		on cnt.patid = pts.patid;

		create table _POV1&CohortId.
		as select * 
		from _demo&CohortId.;        
    quit;

	*Process Zip Codes;
	%IF %STR(&GEOG.) ne %STR() & %LENGTH(&GEOG.) > 0. %then %do;
    %ISDATA(dataset=infolder.&ZIPFILE.);	
	    %IF %EVAL(&NOBS.>0) %THEN %DO;
			proc sql noprint;
				create table _zip&CohortId. as
				select pts.*,
					   lkup.statecode,
					   lkup.hhs_region,
					   lkup.cb_region
				from _POV1&CohortId. as pts
				left join infolder.&zipfile. as lkup
				on pts.zip = lkup.zip
				order by pts.patid, adate;
			quit;

			data _POV1&CohortId.(drop=statecode hhs_region cb_region);
			length zip3 state hhs_reg cb_reg $7;
			set _zip&CohortId.;
				if missing(zip) then do;
					zip3 = 'Missing';
					state = 'Missing';
					hhs_reg = 'Missing';
					cb_reg = 'Missing';
				end;
				else if missing(statecode) then do;
					zip3 = 'Invalid';
					state = 'Invalid';
					hhs_reg = 'Invalid';
					cb_reg = 'Invalid';
				end;
				else do;
					zip3 = substr(strip(zip),1,3);
					state = statecode;
					cb_reg = cb_region;
					hhs_reg = hhs_region;
					if statecode ne '' and missing(hhs_region) then do;
						hhs_reg = 'Other';
					end;
					if statecode ne '' and missing(cb_reg) then do;
						cb_reg = 'Other';
					end;
				end;
			if missing(zip_date) or adate < zip_date then Zip_uncertain = 'Y';
			else zip_uncertain = 'N';
			run;
		%END;
		%else %do;
			%put WARNING: Geographic stratifications requested but ZIPFILE not specified.;
		%end;
	%END;

    *CALCULATE AGE and AGE STRATA;
    %ms_agestrat(infile=_POV1&CohortId.,
                 outfile=_POV1&CohortId.,
                 startdt=birth_date,
                 enddt=ADate,
                 timestrat=&agestrat.);


	/*
	This code extracts demographic criteria (age) at each dispensing. 
	Not needed for Type 5 because age is calculated at index date

	%if %eval(&type.=5 & &cohortid.=2) %then %do;
		proc sql;
			create table _Demo_util&CohortId. as
	        select distinct pts.*, 
	                        BIRTH_DATE, 
	                        case when sex = 'F' then 'F'
							 when sex = 'M' then 'M'
							 else 'O'						
							end as sex,
	                        RACE,
							HISPANIC,
	                        (ADate-birth_date)/365.25 as Age ,
							zip,
							zip_date
	        from indata.&demtable as DEM 
	             inner join 
	             _groupindex&CohortId. as pts			 
	        on DEM.patid = pts.patid
				 inner join _DemoCount&CohortId. (where=(numdemo=1)) as cnt
			on cnt.patid = pts.patid;

			create table _groupindex&CohortId.
			as select * 
			from _Demo_util&CohortId.;    
		quit;

   	 	%ms_agestrat(infile=_groupindex&CohortId.,
                 outfile=_groupindex&CohortId.,
                 startdt=birth_date,
                 enddt=ADate,
                 timestrat=&agestrat.);

		data _groupindex&CohortId.(drop=Age AgeGroup MinAgeDate MaxAgeDate)
	         _AgeGroups_util(keep=PatId Adate Age AgeGroup MinAgeDate MaxAgeDate birth_date sex Race hispanic rename=Adate=IndexDt);
	    set _POV1&CohortId.;
			%IF %STR("&CohortId.") = %STR("") %THEN %DO;
				if strip(AgeGroup) ne "";
			%END;
		  	%IF %STR(&GEOG.) = %STR() or %LENGTH(&GEOG.) = 0. %then %do;
				length zip3 state cb_reg hhs_reg $7 Zip_uncertain $1;
				zip3 = '';
				state = '';
				cb_reg = '';
				hhs_reg = '';
				Zip_uncertain = '';
			%end; 
		run;

		proc sort nodupkey data=_AgeGroups_util;
	    by PatId IndexDt;
	    run;
	%end;
	*/


	data _POV1&CohortId.(drop=Age AgeGroup MinAgeDate MaxAgeDate)
         _AgeGroups&CohortId.(keep=PatId Adate Age AgeGroup MinAgeDate MaxAgeDate birth_date sex Race hispanic zip: state hhs_reg cb_reg rename=Adate=IndexDt);
    set _POV1&CohortId.;
	%IF %STR("&CohortId.") = %STR("") %THEN %DO;
		if strip(AgeGroup) ne "";
	%END;
  	%IF %STR(&GEOG.) = %STR() or %LENGTH(&GEOG.) = 0. %then %do;
		length zip3 state cb_reg hhs_reg $7 Zip_uncertain $1;
		zip3 = '';
		state = '';
		cb_reg = '';
		hhs_reg = '';
		Zip_uncertain = '';
	%end; 
	run;

	proc sort nodupkey data=_AgeGroups&CohortId.;
    by PatId IndexDt;
    run;

	*Add information relative to death and death censoring for later use;
	%if %str("&CohortId.") = %str("") %then %do;
		proc sql noprint undo_policy=none;
		    create table _POV1&CohortId. as
		    select b.*,
		           e.EnrEpisodeCensoredAtDeath,
				   e.DeathDt,
				   e.enr_end_equal_death
		    from _POV1&CohortId. as b left join
		          ENR_&GROUP. as e
		    on b.PatId = e.PatId and 
		       b.Enr_Start = e.Enr_Start and 
		       b.Enr_End = e.Enr_End
		    order by b.PatId,
		             b.ADate;
	    quit;

		proc sql noprint undo_policy=none;
		    create table _groupindex&CohortId. as
		    select b.*,
		           e.EnrEpisodeCensoredAtDeath,
				   e.DeathDt
		    from _groupindex&CohortId. as b left join
		          ENR_&GROUP. as e
		    on b.PatId = e.PatId and
			   b.Enr_Start = e.Enr_Start and 
		       b.Enr_End = e.Enr_End
		    order by b.PatId,
		             b.ADate;
	    quit;
	%end;

	%IF %EVAL(&type=1 | &type=2 | &type.=5) %THEN %DO;
	    *Add looking period related variables to POV1;
        proc sql noprint undo_policy=none;
        create table _POV1&CohortId. as
        select dat.*,
               look.LookEnd as IndexDtLookEndDt,
               Look.Look as IndexLook
        from _POV1&CohortId. as dat,
             looks as look
        where look.LookStart<=dat.Adate<=look.LookEnd
        order by Patid,Adate;
        quit;
	%END;

%put NOTE: ********END OF MACRO: ms_createpov1 v1.3********;

%mend ms_createpov1;