/**************************************************************************************** * SENTINEL PROGRAM ***************************************************************************************** * NAME: scdm_data_qa_review-maskresults.sas * * PURPOSE: This program evaluates select QA package output based on user-defined parameter * to identify whether conditions for masking apply (results specific to select TabID) * and numeric value between 1 and user defined max inclusive. If conditions met, * the numeric value will be masked alongside other related statistics to prevent back * calculation * * MAJOR STEPS: * 1) Evaluate QA package output to identify files where masking may be indicated * 2) Evaluate each file to define conditions for and variables to mask * 3) Evaluate masking conditions and if met, mask numeric variable values * 4) Evaluate count of masking updates made per file. If only one, mask additional records in file * per conditions previously defined to prevent back calculation * * KEY DEPENDENCIES/CONSTRAINTS/CAVEATS: * * PROGRAM INPUT: QA Package MSOC folder output * * PROGRAM OUTPUT: Masked QA Package Output (note file output to MSOC directory will be * evaluated for masking but only updated to masked if conditions met to do so * ****************************************************************************************/ /* Create temp subdirectories in dplocal for organizing masking process */ options DLCREATEDIR; libname mmsoc "&MSOC.mask"; options NODLCREATEDIR; /* create concatenated libname to reference data for masking, selecting mask version if it exists rather than original version from msoc */ libname maskin (mmsoc msoc) access=readonly; /* concatenated libref to mask multiple tabids - if masked version exists, this version will be selected when macro iterates otherwise dataset from qapackage msoc will be selected*/ %let maskMin = 10;/* max value for matching - masking will be triggered if value between 1 and this value inclusive */ /* known numeric variables that should not be used to determine masking requirement but masked if requirement met by other variable within row of results */ %LET EXACTMATCH_STR = %STR("MIN" "P1" "P5" "P25" "MEDIAN" "P75" "P95" "P99" "MAX" "MODE", "PROPORTION_FLAGGED"); /* if exact match on variable name, space delimited quoted list*/ *** process data to apply matching ***; /* Select list of datasets and variable where masking could apply i.e. not signature or metadata */ Proc sql NOPRINT ; create table work._DS_SELECT as select distinct upcase(memname) as memname , upcase(name) as name , upcase(type) as type , length from dictionary.columns where libname='MSOC' and memtype = 'DATA' and memname not in ('ALL_L1_SCDM_COMP', 'ALL_L1_CONT', 'DTH_MODULE_ALL_L1_CONT') order by memname, name; quit; *** Macro to define and apply masking conditions per qa dataset ***; %macro filter_memlist (tabid_list = ); %do m = 1 %to %sysfunc(countw(&tabid_list.)); %let mask_tabid = %scan(&tabid_list., &m.); %* Process list to define datasets that should be reviewed for masking and variables within that ; %* will be evaluated to do so ; DATA dplocal.mask_list_&mask_tabid.; length ind_tabid_memname ind_tabid_var ind_tabid_index ind_id_mask ind_apply_mask 3; SET work._DS_SELECT ; by memname; /* limit to datasets that are based on ALL SCDM tables or specific table specified by user parameter */ if not(index(memname, 'SIGNATURE')) and ( substr(memname,1,3) = 'ALL' or index(memname, %upcase("&mask_tabid._"))); /* dataset level indicators for whether masking logic should be applied to table */ /* filename references specific SCDM table */ ind_tabid_memname = (index(memname, %upcase("&mask_tabid._"))); /* variable name references specific SCDM table */ ind_tabid_var = ( not(ind_tabid_memname) and ( index(name, %upcase("&mask_tabid._")) or name = %upcase("&mask_tabid."))); /* variable value has potential to reference specific SCDM table - note additional processing required to ascertain */ ind_tabid_index = ( not(ind_tabid_var) and type = 'CHAR' and name ne 'DP' and length >= 3); /* variable level indicator */ /* variable value that should be used to identify whether masking required i.e. does not mean minimum n requirement */ ind_id_mask = ( type = 'NUM' and name not in (&EXACTMATCH_STR.) and not(index(name, 'DATE')) and not(index(name, 'LENGTH'))); /* indicator for variable value to be masked if ind_id_mask triggered */ ind_apply_mask = ( type = 'NUM' ); run; %* Create parameter with list of all datasets where masking may be required; %local list_memfilter; %let list_memfilter = ; proc sql NOPRINT; select distinct memname into :list_memfilter separated by ' ' from ( select memname , sum(ind_tabid_memname) as sum_ind_tabid_memname , sum(ind_tabid_var) as sum_ind_tabid_var , sum(ind_tabid_index) as sum_ind_tabid_index , sum(ind_id_mask) as sum_ind_id_mask from dplocal.mask_list_&mask_tabid. group by memname) where (sum_ind_tabid_memname or sum_ind_tabid_var or sum_ind_tabid_index) and sum_ind_id_mask; quit; %* iterate through each distinct memname to evaluate, mask as required, and save result to dplocal directory; %if %length(&list_memfilter.) > 0 %then %do i = 1 %to %sysfunc(countw(&list_memfilter.)); %let memfilter = %scan(&list_memfilter., &i.); %let list_ind = tabid_memname tabid_index tabid_var id_mask apply_mask; %* Select metadata for single dsn and populate parameters to identify and apply masking; %do j = 1 %to %sysfunc(countw(&list_ind.)); %let indtype = %scan(&list_ind., &j.); %let list_&indtype. =; proc sql NOPRINT; select name into :list_&indtype. separated by ' ' from dplocal.mask_list_&mask_tabid. where memname = "&memfilter." and ind_&indtype.; quit; %end; %* end j do loop; %* Create temp masked dataset; data _&memfilter. (drop = _ind:) ; set maskin.&memfilter. end = eof; /* dummy var */ _ind_dummy=.; /* if tabid in filename, set indicator */ _&mask_tabid._memname= 0; %if %length(&list_tabid_memname.) > 0 %then _&mask_tabid._memname= 1;; /* filter any variables based on variable name to identify if masking applies */ _&mask_tabid._varname=0; %if %length(&list_tabid_var.) > 0 %then %do; %do b = 1 %to %sysfunc(countw(&list_tabid_var.)); %let var_name = %scan(&list_tabid_var., &b.); _ind_var_name_&b. = (&var_name. = '1'); %end; %* end b do loop; _&mask_tabid._varname = sum(of _ind_var_name_:); %end; %* end tabid var assignment; /* filter any variable based on value to identify if masking applies and apply within if/then do loop */ _&mask_tabid._index =0; %if %length(&list_tabid_index.) > 0 %then %do; %do a = 1 %to %sysfunc(countw(&list_tabid_index.)); %let tabid_index = %scan(&list_tabid_index., &a.); _ind_tabid_index_&a. =( upcase(&tabid_index) = %upcase("&mask_tabid.") or index(upcase(&tabid_index), %upcase("&mask_tabid._")) or index(upcase(&tabid_index), %upcase("&mask_tabid.-"))); %end; _&mask_tabid._index =sum(of _ind_tabid_index_:); %end; %* end tabid variable value assignment; /* evaluate mask id variables and apply making to all applicable values if min threshold not met */ _masked_&mask_tabid. = .; if sum(of _&mask_tabid._memname, _&mask_tabid._varname, _&mask_tabid._index) > 0 then do; _masked_&mask_tabid. = 0; %do c = 1 %to %sysfunc(countw(&list_id_mask.)); %let id_mask = %scan(&list_id_mask., &c.); if 0< &id_mask. <= &maskMin. then do; _masked_&mask_tabid. = 1; %do d = 1 %to %sysfunc(countw(&list_apply_mask.)); %let apply_mask = %scan(&list_apply_mask., &d.); &apply_mask. = .M; %end; %* end d loop; end; %end; %* end c loop; end; run; %let SumMaskN = ; proc sql NOPRINT; select count(*) into :SumMaskN trimmed from _&memfilter. where _masked_&mask_tabid. = 1; quit; proc sort data = _&memfilter.; by &list_id_mask.; run; /* if single record masked, mask additional records to prevent ability to back calculate */ data mmsoc.&memfilter.; set _&memfilter.; /* dummy var */ _ind_dummy = .; %if &SumMaskN.= 1 %then %do; retain _addmaskn_&mask_tabid.; if _masked_&mask_tabid. > . then do; /* evaluate mask id variables and apply masking to all applicable values if min threshold not met */ %do c = 1 %to %sysfunc(countw(&list_id_mask.)); %let id_mask = %scan(&list_id_mask., &c.); if &id_mask. > 0 and _addmaskn_&mask_tabid. < 1 then do; _addmaskn_&mask_tabid. = sum(_addmaskn_&mask_tabid., 1); %do d = 1 %to %sysfunc(countw(&list_apply_mask.)); %let apply_mask = %scan(&list_apply_mask., &d.); &apply_mask. = .R; %end; %* end d loop; end; %end; %* end c loop; end; %end; %* end additional masking do statement; run; %end; %* end i do loop; %end; %* end m do loop; %mend filter_memlist; /* move files created in dplocal to msoc output */ %macro move_masked; %* identify intermediate dataset created in dplocal directory ; %let list_memmove = ; proc sql NOPRINT; select distinct memname into :list_memmove separated by ' ' from dictionary.columns where upcase(libname)='MMSOC' and upcase(memtype) = 'DATA'; quit; %* iterate through each dataset to create version in msoc directory renamed to add prefix indicating masking evaluated and applied if indicated; %if %length(&list_memmove.) > 0 %then %do k = 1 %to %sysfunc(countw(&list_memmove.)); %let memmove = %scan(&list_memmove., &k.); %* preserve unmasked/original version of dataset; data dplocal.&memmove.; set msoc.&memmove.; run; %* move reviewed and potentially masked dataset to msoc directory; data msoc.&memmove.; set MMSOC.&memmove. (drop = _:); run; %end; %* end k loop; %mend move_masked; %filter_memlist (tabid_list = &MaskTabid_List.); %move_masked; proc datasets lib=MMSOC nolist nowarn kill; run; quit; filename tempdir "&MSOC.mask"; data _null_; rc=fdelete('tempdir'); put rc=; msg=sysmsg(); put msg=; run; *** ---------------------------------- END PROGRAM ----------------------------------------------- ***;