/*-------------------------------------------------------------------------------------*\
|  PROGRAM NAME: scdm_data_qa_review-level3.sas                                         |
|                                                                                       |
|---------------------------------------------------------------------------------------|
|  PURPOSE:                                                                             |
|     The purpose of the program is to create cross-table level 3 output datasets for   |
|     all SCDM tables                                                                   |
|---------------------------------------------------------------------------------------|
|  PROGRAM INPUT:                                                                       |
|     see scdm_data_qa_review_master_file.sas                                           |
|                                                                                       |
|  PROGRAM OUTPUT:                                                                      |
|     see Workplan PDF                                                                  |
|---------------------------------------------------------------------------------------|
|  CONTACT:                                                                             |
|     Sentinel Coordinating Center                                                      |
|     info@sentinelsystem.org                                                           |
\*-------------------------------------------------------------------------------------*/

*-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-;
*  PLEASE DO NOT EDIT BELOW WITHOUT CONTACTING THE SENTINEL OPERATIONS CENTER           ;
*-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-;

/*---------------------------------------------------------------------------------*/
/* Combine temporary flags datasets                                                */
/*---------------------------------------------------------------------------------*/
%macro l2_flags_final;
  data all_l1_l2_flags(where=(count >= 1));
    set dplocal.l2_flags_: 
    %if %sysfunc(exist(dplocal.all_l1_flags,data)) %then %do;
      dplocal.all_l1_flags
    %end;
    ;
    keep flagid abortyn flagtype flag_descr count;
  run;

  proc sql noprint;
    create table dplocal.all_l1_l2_flags (drop = _sortType) as
    select distinct a.flagid, 
                    a.abortyn, 
                    a.flagtype, 
                    a.flag_descr, 
                    a.count format=comma15. informat=comma15.,
                    b.count_obs as records_evaluated label='',
                    divide(a.count, b.count_obs) as proportion_flagged,
                    CASE WHEN flagtype = 'Note' THEN 1 ELSE 0 end as _sortType                       
    from all_l1_l2_flags a
    left join dplocal.all_l1_record_count b
    on b.tabid = 
    /* Join record counts based on single or cross table check. If cross table, use second table in join */
    case when ^index(scan(a.flagid,1,'_'),'-') then scan(a.flagid,1,'_')
    else scan(scan(a.flagid,1,'_'),-1,'-')
    end 
    ORDER BY _sortType, flagid
    ;
    drop table all_l1_l2_flags
    ;
  quit;

  proc datasets lib=dplocal nolist nodetails nowarn;
    delete l2_flags_:;
  quit;

%mend l2_flags_final;
%l2_flags_final;

/*---------------------------------------------------------------------------------*/
/* Create level 3 summary dataset                                                  */
/*---------------------------------------------------------------------------------*/
%macro varcounts (var);
  %local i ds;
  %let ds=%upcase(all_l2_&var._match);

  proc sql noprint;
    select NAME into :tabidlist separated by ' '
    from dictionary.columns
    where LIBNAME="DPLOCAL" and MEMNAME=%str("&ds.") and upcase(NAME) ne "COUNT"
    ;
  quit;

  %do i=1 %to %sysfunc(countw(&tabidlist.));
    %let tabid=%scan(&tabidlist.,&i.);
    proc sql noprint;
      create table tmp_&var._&i. as
      select "&tabid." as TabID
           , sum(count) as &var. format=comma15.
      from dplocal.all_l2_&var._match (keep=&tabid. count where=(&tabid.='1'))
      ;
    quit;
  %end;

  %set_ds (libin=work, dsin_prefix=tmp_&var._, libout=work, dsout=temp_nobs_&var.);

  proc sort data=temp_nobs_&var.;
    by tabid;
  run;
%mend;
%varcounts (patid);
%varcounts (encounterid);
%varcounts (providerid);
%varcounts (facilityid);

data dplocal.all_l3_summary;
  merge dplocal.all_l1_nobs (rename=(count_obs=AllRec))
        dplocal.all_l2_nobs_dup (rename=(count_dup=DupRec))
        temp_nobs_patid
        temp_nobs_encounterid
        temp_nobs_providerid
        temp_nobs_facilityid
        dplocal.tmp_minmax_dates;
  by TabID;
run;

/* Combine all date distribution datasets into all_l3_dates_dist */
%set_ds (libin=dplocal, dsin_prefix=date_dist_, libout=dplocal, dsout=all_l3_dates_dist);

proc datasets lib=dplocal nowarn nolist nodetails;
  delete all_l1_nobs all_l2_nobs_dup tmp_:;
run;quit;

/*---------------------------------------------------------------------------------*/
/* Create DEM Level 3 Age tables using newly calculated &DP_maxdate value          */
/*---------------------------------------------------------------------------------*/
*calculate age and age groups;
%let qa_table= ;
%macro get_qa_table ;
  %if &do_partitions %then %do ;
    data t_view /view=t_view ;
      set qadata.&demtable.:(keep=birth_date sex) ;
      where upper(sex) = 'F' and floor((intck('month',birth_date,&DP_MaxDate.)-(day(&DP_MaxDate.)<day(birth_date)))/12)
            between 10 and 54 ;
    run;
    %let qa_table = t_view ;
  %end;
  %else %do;
    %let qa_table = qadata.&demtable(keep=birth_date sex)
      where upper(sex) = 'F' and floor((intck('month',birth_date,&DP_MaxDate.)-(day(&DP_MaxDate.)<day(birth_date)))/12)
            between 10 and 54 ;
  %end;
%mend;
%get_qa_table ;
proc sql noprint;
  create table temp as
  select floor((intck('month',birth_date,&DP_MaxDate.)-(day(&DP_MaxDate.)<day(birth_date)))/12) as age_years
       , sum(n) as count
  from dplocal.dem_birthd
  group by 1
  ;
  drop table dplocal.dem_birthd
  ;

  create table msoc.dem_l3_agecat as
  select put(age_years,agecat_years.) as Age_group length=50
       , sum(count) as count format=comma15.
  from temp
  group by 1
  union
  select '19. Pediatric Population I (0-18 yrs)' as age_group length=50,
          sum(count) as count format=comma15.
  from temp
  where age_years between 0 and 18
  group by 1
  union
  select '20. Pediatric Populations II (0-21 yrs)' as age_group length=50,
          sum(count) as count format=comma15.
  from temp
  where age_years between 0 and 21
  group by 1
  union
  select '21. Young Adult Coverage Eligible (0-<26 yrs)' as age_group length=50,
          sum(count) as count format=comma15.
  from temp
  where age_years between 0 and 25
  group by 1
  union
  select '22. Childbearing Age (10-54 yrs)' as age_group length=50,
         count(*) as count format=comma15.
  from &qa_table
  group by 1;
quit;

*distribution of age;
proc means nolabels data=temp missing StackODSOutput n mean std min p1 P5 P25 median P75 P95 p99 max vardef=weight;
  var age_years;
  weight count;
  ods output summary=msoc.dem_l3_ageyr_stats;
run;

proc datasets library=msoc nolist nodetails nowarn;
  modify dem_l3_ageyr_stats;
  rename stddev=std;
  format m: p: std  8.1;
  informat m: p: std  8.1;
quit;


/*---------------------------------------------------------------------------------*/
/* Add DP to all datasets in MSOC folder, except signature files                   */
/*---------------------------------------------------------------------------------*/
proc sql noprint;
    create table _msoc_metadata as
    select libname, memname
    from dictionary.tables
    where libname = 'MSOC' and memname not like '%_SIGNATURE';
quit;

data _null_;
  set _msoc_metadata;
  dsid = open(cats(libname,'.',memname));
  if varnum(dsid,'DP')=0 then do;
        call execute('data '||cats(libname,'.',memname)||';
                           length DP $6.;
                           retain DP "&dp";
                           set '||cats(libname,'.',memname)||';
                           run;');
  end;
  rc = close(dsid);
run;

/*---------------------------------------------------------------------------------*/
/* Create metadata file for use with QA Common Components (CC) package             */
/*---------------------------------------------------------------------------------*/
%macro cc_metadata;
  %local c i n var value cctablist;

  /* Obtain from work.control_flow since msoc.control_flow_3 does not contain all table names */
  proc sql noprint;
    select distinct lowcase(cc_table)
      into: cctablist separated by "|"
    from msoc.cc_control_flow
    where lowcase(cc_table) ne "x"
    ;
  quit;

  proc datasets lib=msoc nolist nowarn;
       delete cc_control_flow;
  quit;

  %let macro_var_list=%upcase(DP|ETL|Phase|SCDMVer|dp_mindate|dp_maxdate|NumSession|SASCMD|SASConnect|SASGrid|GridSrv|NumPartitions|ParTable|&cctablist.|miltable);

  data msoc.qa_cc_metadata;
    length Variable $32 Value $255;
  %do i=1 %to %sysfunc(countw(&macro_var_list.));
    %let var=%scan(&macro_var_list.,&i.,|);
      %if %index(&var.,DP_) %then %let value=%sysfunc(putn(&&&var.,date9.));
      %else %let value = &&&var.;
      Variable = "&var." ;
      Value =  "&value.";
      output;
    %end;
  run;

%mend cc_metadata;
%cc_metadata;


/*---------------------------------------------------------------------------------*/
/* Move specific files from DPLOCAL to MSOC                                        */
/*---------------------------------------------------------------------------------*/
%macro move_files;
  %local i filect filelist;
  proc sql;
    select memname, count(memname) into :filelist separated by ' ', :filect trimmed
    from dictionary.tables
    where libname="DPLOCAL" and memname like "____L__%" and memname ne "ALL_L1_FLAGS"
    ;
  quit;
  %do i=1 %to &filect.;
    %let file=%scan(&filelist.,&i.);

    proc sql noprint;
      create table msoc.&file. as
      select "&dp." as DP length=6
           , *
      from dplocal.&file.
      ;
      drop table dplocal.&file.
      ;
    quit;
  %end;
%mend;
%move_files;

/*---------------------------------------------------------------------------------*/
/* automate creation of DP Characteristics Sign-Off Report
/*   this assume that previous etl's all_l1_l2_flags dataset are located at inputfiles/qar
/*---------------------------------------------------------------------------------*/
%macro create_char_sign_off_report ;
  %local etlm1  ;
  %let etlm1 = %eval(&etl -1) ;

  proc sort data=msoc.all_l1_l2_flags
            out=all_l1_l2_flags;
  by flagID;
  run ;

  %if %sysfunc(exist(infolder.all_l1_l2_flags,data)) %then
  %do;
    proc sort data=infolder.all_l1_l2_flags
              out =all_l1_l2_flags_x;
    by flagID;
    run ;

    data all_l1_l2_flags_combined ;
      if 0 then set all_l1_l2_flags(drop=count) ;   /* bring var list from dataset except count */
      format count_&etlm1. count_&etl. diff_count_&etl._&etlm1. comma15. ;  /* order var */
      format pctchg_count_&etl._&etlm1. comma7.2 ;

      merge all_l1_l2_flags(in=in_new rename=(count=count_&etl.))
            all_l1_l2_flags_x(in=in_old rename=(count=count_&etlm1.
                 DP=O_DP AbortYN=O_AbortYN FlagType=O_FlagType Flag_Descr=O_flag_Descr ) ) ;
      by flagID;
      drop O_: ;
      if (in_old and not in_new) then do;
        DP = O_DP ;
        AbortYN = O_AbortYN ;
        FlagType = O_FlagType ;
        Flag_Descr = O_flag_Descr ;
      end;

      if missing(count_&etlm1.) then do;
          diff_count_&etl._&etlm1. = . ;
          pctchg_count_&etl._&etlm1. = . ;
        end ;
      else do;
          diff_count_&etl._&etlm1. = count_&etl. - count_&etlm1. ;
          pctchg_count_&etl._&etlm1. = diff_count_&etl._&etlm1. / count_&etlm1. * 100 ;
        end;
      length Sign_off $ 8 Comment $ 100. ;
      call missing(Sign_off, Comment) ;
    run;

    proc datasets lib=work nolist nodetails nowarn;
      delete all_l1_l2_flags_x ;
    quit;
  %end ;
  %else %do ;
    %put WARNING:previous version of all_l1_l2_flags.sas7bdat in inputfiles does NOT exist - program will continue;
    data all_l1_l2_flags_combined ;
      if 0 then set all_l1_l2_flags(drop=count) ;
      format count_&etlm1. count_&etl. diff_count_&etl._&etlm1. comma15. ;
      format pctchg_count_&etl._&etlm1. comma7.2 ;
      set all_l1_l2_flags(rename=(count=count_&etl.)) ;
      count_&etlm1. = . ;
      diff_count_&etl._&etlm1. = . ;
      pctchg_count_&etl._&etlm1. = . ;
      length Sign_off $ 8 Comment $ 100. ;
      call missing(Sign_off, Comment) ;
    run;
  %end ;

  ods _all_ close;
  ods excel file="&msoc./data_characteristics_sign_off_report_&dpid._etl&etl..xlsx" options(sheet_name='all_l1_l2_flags');

  proc print data=all_l1_l2_flags_combined noobs;
  run;

  ods _all_ close;

  proc datasets lib=work nolist nodetails nowarn;
    delete all_l1_l2_flags_combined;
  quit;
%mend;
%create_char_sign_off_report ;


/*---------------------------------------------------------------------------------*/
/* Clean up empty datasets from DPLOCAL                                            */
/*---------------------------------------------------------------------------------*/
proc sql noprint;
  select memname into :ds separated by " "
  from dictionary.tables where libname='DPLOCAL'
  having nobs-delobs=0
  ;
quit;

/*---------------------------------------------------------------------------------*/
/* Delete unnecessary DPLOCAL datasets                                             */
/*---------------------------------------------------------------------------------*/
proc datasets lib=dplocal nolist nowarn nodetails;
  delete &ds.
         l2_nodup_:
         lab_testdates_ym
         pro_adate_ym_count
         dia_adate_ym_count
         l2_distinctkey_enr
         enr_overlap_patid_dates;
quit;

*-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-;
* END scdm_data_qa_review_level3.sas                                                    ;
*-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-;