**********************************;
*001.Cohort definition - part1    ;
**********************************;


 options ps = 54 ls = 72 obs = max pageno = 1;

 libname out       '/PHShome/rl037/rishi_medicare_rpdr/may2022';

 libname in '/storage2/cdm_data/MS_DUA52264_CMS-MCR-C4IHDR/Ver2';

 libname rpdr    '/storage/storage2/cdm_data/MS_RPDR_DATAMART_C4IHDR/RPDR_C4IHDR_Mart_rcvd_2020-08-29';
 libname x_mcare '/storage/storage2/cdm_data/MS_DUA52264_CMS-MCR-C4IHDR/Ver2/restricted_xwalk/';
 libname new XLSX "/storage1/PHShome/rl037/rishi_medicare_rpdr/appendix_ci2_programmer.xlsx";

**********************************;
* step1- exclude control patients ;
* and DM drug users               ;
**********************************;

data partners_pts;
   set in.demographic2007_2019(keep = patid flag_controls);
   if Flag_controls = 'Partners';
keep patid;
run;


 %macro temp;

 %do year = 2012 %to 2019;

 data rx&year;
    set in.dispensing&year(keep = patid generic Dispense_Date);
 generic = left(lowcase(generic));

 if substr(generic,1,  11 )  =  'sitagliptin'       or
     substr(generic,1, 11 )  =  'saxagliptin'       or
     substr(generic,1, 11 )  =  'linagliptin'       or
     substr(generic,1, 10 )  =  'alogliptin'        then class = 'DPP4 ';

 if substr(generic,1,  13 )  =  'empagliflozin'       or
     substr(generic,1, 13 )  =  'canagliflozin'       or
     substr(generic,1, 13 )  =  'dapagliflozin'       or
     substr(generic,1, 13 )  =  'ertugliflozin'       then class ='SGLT2';
 if class ^= ' ';
 keep patid Dispense_Date generic class;
 run;

 %end;

 data dm_meds;
    set  %do year = 2012 %to 2019;
                  rx&year
         %end;
        ;
 run;

 %mend;
 %temp;

 proc sort nodup;
   by patid dispense_date generic;
 run;

data index_meds;
     merge dm_meds(in = in1)
           partners_pts(in = in2);
     by patid;
     if in1 and in2;
run;

data index(rename=(dispense_date = indexdt));
   set index_meds;
if '01Jan2013'd <= dispense_date <= '31dec2019'd;
keep patid dispense_date;
run;

 proc sort nodupkey data = index;
      by patid indexdt;
 run;


 proc sort nodupkey data = index  out = ids1(keep = patid);
      by patid;
 run;

 *************************************;
 * step3 - eligible in 180d prior     ;
 *************************************;

proc sql;
     create table elig(keep = patid indexdt enr_end) as
     select * from  in.enrollment_abd_2007_2019_rollup(keep = patid enr_start enr_end) elig,
              index(keep = patid indexdt)
     where index.patid    = elig.patid and
           enr_start <= (indexdt - 181)   and
           enr_end > indexdt;
  quit;


proc sort nodupkey data = elig;
   by patid indexdt;
run;

 proc sort nodupkey data = elig out = ids2(keep = patid);
    by patid;
 run;


proc sql;
   create table death(keep = patid indexdt death_date rename=(death_date = deathdt)) as
     select * from in.death2007_2019(keep = patid death_date) dem,
              elig(keep = patid indexdt) ids
     where dem.patid    = ids.patid ;
quit;

proc sort data = death;
  by patid indexdt deathdt;
run;

proc sort nodupkey data = death;
  by patid indexdt;
run;

 data ids_elig;
    merge elig(in = in1)
          death;
    by patid indexdt;
    if in1;
 if deathdt = . or deathdt > indexdt;
 keep patid indexdt deathdt enr_end;
 run;


proc sort nodupkey data = ids_elig out = ids2(keep = patid indexdt);
   by patid;
run;

 ***********************************;
 * step 3- include type2 dm patients;
 ***********************************;

 libname new XLSX "/storage1/PHShome/rl037/rishi_medicare_rpdr/march2022/ci2_codebook_updated.xlsx";


 data dm_dx(rename=(code_type = type));
    set new.cohort_creation;
 if variable_name = 'type 2 diabetes';
 dx = compress(code, '.');
 keep variable_name dx code_type;
 run;

 proc sort nodupkey;
   by dx;
 run;

 data dm09
      dm10;
    set dm_dx;
if type ='09' then output dm09;
if type ='10' then output dm10;
keep dx;
run;

%macro temp;

%do year = 2012 %to 2019;

proc sql;
   create table dx&year(keep = patid indexdt dx admit_date enc_type dx_type) as
   select * from  in.diagnosis&year(keep = patid dx admit_date enc_type dx_type) dx,
            ids_elig(keep = patid indexdt) ids
   where ids.patid    = dx.patid and
        (indexdt - 180) <= admit_date <= indexdt;
quit;

%end;


data dx;
   set  dx2019 %do year = 2012 %to 2018;
                 dx&year
               %end;
       ;
run;

%mend;
%temp;

 data dx09
      dx10;
   set dx;
if dx_type = '09' then output dx09;
if dx_type = '10' then output dx10;
keep patid dx admit_date indexdt;
run;

proc sql;
      create table dx9_dm(keep= patid admit_date indexdt) as
      select * from  dm09(keep = dx) temp,
               dx09(keep = patid dx admit_date indexdt) claims
      where temp.dx    = claims.dx ;
   quit;


 proc sort nodup;
   by patid ;
run;


proc sql;
      create table dx10_dm(keep= patid admit_date indexdt) as
      select * from  dm10(keep = dx) temp,
               dx10(keep = patid dx admit_date indexdt) claims
      where temp.dx    = claims.dx ;
   quit;


 proc sort nodup;
   by patid ;
 run;

 data dm_dx_prior;
      set dx10_dm
          dx9_dm;
keep patid indexdt;
run;

proc sort nodupkey data = dm_dx_prior;
    by patid indexdt;
run;

 data ids_dm_dx;
    merge ids_elig(in = in1)
          dm_dx_prior(in = in2);
    by patid indexdt;
    if in1 and in2;
 keep patid indexdt;
 run;


proc sort nodupkey data = ids_dm_dx out = ids3(keep = patid indexdt);
   by patid;
run;

 ***********************************;
 * step 4- exclude prior users      ;
 ***********************************;

proc sql;
      create table prior_users(keep= patid indexdt) as
      select * from  index_meds(keep=patid dispense_date) rx,
               ids_dm_dx(keep = patid indexdt) ids
      where ids.patid    = rx.patid and
      (indexdt - 180) <= dispense_date < indexdt;
quit;

 proc sort nodupkey data = prior_users;
     by patid indexdt;
run;


 data ids_first_users;
    merge ids_dm_dx(in = in1)
          prior_users(in = in2);
    by patid indexdt;
    if in1 and NOT in2;
 keep patid indexdt;
 run;


proc sort nodupkey data = ids_first_users out = ids4(keep = patid indexdt);
   by patid;
run;


 data ids_demo;
       merge ids_first_users(in = in1)
             in.demographic2007_2019(in = in2 keep = patid birth_date sex race );
      by patid;
      if in1 and in2;
   age = int((indexdt - birth_date)/365.25);
   if age => 65;
if sex ^= ' ';
run;

proc sort nodupkey data = ids_demo out = ids5(keep = patid);
   by patid ;
run;

data rpdr;
   set rpdr.demographic(keep = Medicare_PatID PATIENT_NUM);
run;

proc sort nodupkey data = rpdr;
  by Medicare_PatID;
run;

data ids_demo1;
  merge ids_demo(in = in1)
        rpdr(in = in2 rename=(Medicare_PatID = patid));
  by patid;
  if in1 and in2;
run;

proc sort nodupkey data = ids_demo1 out = ids6(keep = patid);
   by patid ;
run;

 data class;
      merge ids_demo1(in = in1)
            index_meds(in = in2 rename=(dispense_date = indexdt));
     by patid indexdt;
     if in1 and in2;
  point = indexc(generic,'/', ' ');
  generic1 = substr(generic,1,(point-1));
 keep patid indexdt generic1 class generic;
 run;

 proc sort nodupkey data = class;
    by patid indexdt generic1;
 run;

 data one two;
     set class;
     by patid indexdt;
 if first.indexdt and last.indexdt then output one;
 else                                   output two;
 run;

 proc sort nodupkey data = one out = ids7(keep= patid);
     by patid;
run;


***********************************;
* cohort definition - part2        ;
* excluisons                       ;
***********************************;

 %macro temp;

 %do year = 2012 %to 2019;

proc sql;
   create table gen&year(keep = patid dispense_date) as
     select * from in.dispensing&year(keep = patid generic Dispense_Date) rx,
              out.list_generic(keep = generic) list
     where rx.generic    = list.generic ;
quit;


 %end;

 data exc_meds;
    set  %do year = 2012 %to 2019;
                  gen&year
         %end;
        ;
 run;

 %mend;
 %temp;


 proc sort nodupkey;
   by patid dispense_date;
 run;

proc sql;
   create table exc_rx_prior(keep = patid indexdt) as
   select * from  exc_meds(keep = patid dispense_date) dx,
            one(keep = patid indexdt) ids
   where ids.patid    = dx.patid and
        (indexdt - 180) <= dispense_date <= indexdt;
quit;


proc sort nodupkey data = exc_rx_prior;
    by patid indexdt;
run;


 data dm09
      dm10;
    set out.dx_list_name;
if variable_name = 'cancer' or
   variable_name = 'any fungal infection' then delete;
if type ='09' then output dm09;
if type ='10' then output dm10;
keep dx variable_name;
run;

proc freq;
  table variable_name;
run;




%macro temp;

%do year = 2012 %to 2019;

proc sql;
   create table dx&year(keep = patid indexdt dx admit_date enc_type dx_type) as
   select * from  in.diagnosis&year(keep = patid dx admit_date enc_type dx_type) dx,
            one(keep = patid indexdt) ids
   where ids.patid    = dx.patid and
        (indexdt - 180) <= admit_date <= indexdt;
quit;

      proc sql;
        create table px&year(keep=patid indexdt px_date px px_type enc_type) as
        select * from in.procedures&year(keep= patid px_date px px_type enc_type ) dx,
                 one(keep = patid indexdt) ids
        where ids.patid    = dx.patid     and
              (indexdt - 180) <= px_date <= indexdt;
     quit;


      proc sort nodup data = px&year;
         by patid indexdt px_date px;
      run;
%end;


data dx;
   set  dx2019 %do year = 2012 %to 2018;
                 dx&year
               %end;
       ;
run;


data px;
   set  px2019 %do year = 2012 %to 2018;
                 px&year
               %end;
       ;
run;

%mend;
%temp;


 data dx09
      dx10;
   set dx;
if dx_type = '09' then output dx09;
if dx_type = '10' then output dx10;
keep patid dx admit_date indexdt;
run;

proc sql;
      create table dx9_exc(keep= patid admit_date indexdt) as
      select * from  dm09(keep = dx) temp,
               dx09(keep = patid dx admit_date indexdt) claims
      where temp.dx    = claims.dx ;
   quit;


 proc sort nodup;
   by patid ;
run;


proc sql;
      create table dx10_exc(keep= patid admit_date indexdt) as
      select * from  dm10(keep = dx) temp,
               dx10(keep = patid dx admit_date indexdt) claims
      where temp.dx    = claims.dx ;
   quit;


 proc sort nodup;
   by patid ;
 run;

 data exc_dx_prior;
      set dx10_exc
          dx9_exc;
keep patid indexdt;
run;

proc sort nodupkey data = exc_dx_prior;
    by patid indexdt;
run;



 data px09
      px10
      pxcpt;
   set px;
 if px_type = '09' then output px09;
 if px_type = '10' then output px10;
 if px_type = 'C4' or  px_type = 'HC' then output pxcpt;
 run;


 data exc09
      exc_cpt
      exc10;
    set out.px_list;
if type ='09'  then output exc09;
if type ='10'  then output exc10;
if type ='CPT' then output exc_cpt;
keep px;
run;


proc sql;
      create table px9_exc(keep= patid px_date indexdt) as
      select * from  exc09(keep = px) temp,
               px09(keep = patid px px_date indexdt) claims
      where temp.px    = claims.px ;
   quit;


 proc sort nodup;
   by patid indexdt;
run;

proc sql;
      create table px10_exc(keep= patid px_date indexdt) as
      select * from  exc10(keep = px) temp,
               px10(keep = patid px px_date indexdt) claims
      where temp.px    = claims.px ;
   quit;


 proc sort nodup;
   by patid indexdt;
run;


proc sql;
      create table pxcpt_exc(keep= patid px_date indexdt) as
      select * from  exc_cpt(keep = px) temp,
               pxcpt(keep = patid px px_date indexdt) claims
      where temp.px    = claims.px ;
   quit;


 proc sort nodup;
   by patid indexdt;
run;

data exc_px_prior;
      set pxcpt_exc
          px10_exc
          px9_exc ;
keep patid indexdt;
run;

data exclude;
    set exc_px_prior
        exc_dx_prior
        exc_rx_prior;
keep patid indexdt;
run;

proc sort nodupkey;
  by patid indexdt;
run;

 data ids_exclude;
     merge one(in = in1)
           exclude(in = in2);
     by patid indexdt;
     if in1 and NOT in2;
 run;

data ids_exclude;
      set ids_exclude;
if generic = 'empagliflozin/linagliptin' then delete;
run;

 proc sort nodupkey data = ids_exclude out = ids8(keep= patid);
     by patid;
 run;

 proc sort nodupkey data = ids_exclude;
     by patid;
 run;


data out.dm_cohort;
     merge ids_exclude(in = in1 keep= patid indexdt)
           one(keep= patid indexdt class generic generic1 rename=(generic = generic_full generic1 =generic))
           ids_demo(keep  = patid indexdt age sex race)
           ids_demo1(keep = patid indexdt PATIENT_NUM)
           ids_elig(keep  = patid indexdt deathdt enr_end);
     by patid indexdt;
     if in1;
run;

 proc sort nodupkey;
     by patid;
run;