/*-------------------------------------------------------------------------------------*\ | PROGRAM NAME: | | scdm_data_qa_review-demographic.sas | | | |---------------------------------------------------------------------------------------| | PURPOSE: | | The purpose of this program is to perform data quality checks on the Demographic | | table. | |---------------------------------------------------------------------------------------| | PROGRAM INPUT: | | see 00.0_scdm_data_qa_review_master_file.sas | | | | PROGRAM OUTPUT: | | see Workplan PDF | |---------------------------------------------------------------------------------------| | CONTACT: | | Sentinel Coordinating Center | | info@sentinelsystem.org | \*-------------------------------------------------------------------------------------*/ *-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-; * PLEASE DO NOT EDIT BELOW WITHOUT CONTACTING THE SENTINEL OPERATIONS CENTER ; *-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-; /*-------------------------------------------------------------------------------------*/ /* START Level 2 */ /*-------------------------------------------------------------------------------------*/ %level2; /*-------------------------------------------------------------------------------------*/ /* START Level 3 */ /*-------------------------------------------------------------------------------------*/ %let level=3; /* Define catvars_a and catvars_b from lkl_all_l1 **/ proc sql noprint; select Variable into: catvars_a separated by ", " from infolder.lkp_all_l1 where upcase(tabid)=upcase("&module.") and upcase(VarType)="C" and upcase(Variable) ne "POSTALCODE"; select Variable into: catvars_b separated by " " from infolder.lkp_all_l1 where upcase(tabid)=upcase("&module.") and upcase(VarType)="C" and upcase(Variable) ne "POSTALCODE"; quit; * birth_date: list with # of records for use in the L3 module (after DP maxdate is created); %let qa_table= ; %macro get_qa_table ; %if &do_partitions and not %eval(&module in &tabid_exc) %then %do ; data t_view /view=t_view ; set qadata.&&&module.table.: (keep=birth_date &catvars_b.) ; run; %let qa_table = t_view ; %end ; %else %do; %let qa_table = qadata.&&&module.table (keep=birth_date &catvars_b.) ; %end; %mend; %get_qa_table ; proc sql noprint; create table dplocal.&module._temp as select birth_date , &catvars_a. , count(*) as n from &qa_table group by birth_date, &catvars_a. ; quit; %remove_labels(dplocal,&module._temp); proc sql noprint; create table dplocal.&module._birthd as select birth_date , sum(n) as n from dplocal.&module._temp (keep=birth_date n) group by birth_date ; quit; proc sql noprint; create table msoc.&module._l3_catvars as select &catvars_a. , sum(n) as count format=comma15. from dplocal.&module._temp (keep=&catvars_b. n) group by &catvars_a. ; quit; /** Delete catvars_a and catvars_b from global symbol table */ data _null_; call symdel("catvars_a", "nowarn"); call symdel("catvars_b", "nowarn"); run; %macro l3_zip; /* Load the data from the zip lookup table into a hash table and merge SCDM table on key variable ZIP to assign STATECODE*/ %if %sysfunc(exist(infolder.lkp_dem_zip,data)) %then %do; %if &do_partitions and not %eval(&module in &tabid_exc) %then %let pn=: ; %else %let pn= ; data dplocal.zip1 (drop=rc); declare Hash postalstate (); /* declare the name POSTALSTATE for hash */ rc = postalstate.DefineKey ('postalcode'); /* identify fields to use as keys */ rc = postalstate.DefineData ('statecode'); /* identify fields to use as data */ rc = postalstate.DefineDone (); /* complete hash table definition */ do until (eof1) ; /* loop to read records from lookup file */ set infolder.lkp_dem_zip end = eof1; rc = postalstate.add (); /* add each record to the hash table */ end; do until (eof2) ; /* loop to read records from WORK.ZIP1 */ set qadata.&&&module.table.&pn (keep=postalcode postalcode_date) end = eof2; call missing(statecode); /* initialize the variable to fill */ rc = postalstate.find (); /* lookup each ZIP in hash POSTALSTATE */ output; /* write record WORK.POSTALSTATE*/ end; stop; run; /* create masked (pseudo) zip code variable ZIP-masked */ data work.&module._temp1; set dplocal.zip1; length z1-z5 $1 temp $20; temp=tranwrd(postalcode, " ", "_"); *change spaces to underscores; %do j=1 %to 5; z&j.= substr(temp,&j.,1); *parse zip characters into 5 separate variables; %end; run; proc datasets lib=dplocal memtype=data nolist; delete zip1 ; run; quit; data work.&module._temp2; set work.&module._temp1; %do i=1 %to 5; if anydigit(z&i.)=1 then z2_&i.="N"; *convert numeric characters to "N"; else if z&i="_" then z2_&i.="_"; *keep "-" for spaces; else if notdigit(z&i.)=1 then z2_&i.="a"; *convert alpha characters to "a"; %end; drop temp z1-z5; run; data work.zip; length Zip_masked $5; set work.&module._temp2; zip_masked=z2_1||z2_2||z2_3||z2_4||z2_5; *concatenate 5 characters create to a pseudo zip variable; drop z2_1-z2_5; run; proc sql noprint; create table work.&module._temp3 as select PostalCode_Date , count(*) as n from work.zip (keep = PostalCode_Date ) group by 1 ; quit; %remove_labels( work , &module._temp3 ) ; %date_percentiles (libin=work, dsin=&module._temp3, libout=dplocal, dsout=date_dist_&module., vars=PostalCode_Date); proc datasets lib=work nolist; delete &module._temp: ; run; quit; proc sql noprint; /* create new variables ZIP-CHECK and ZIP-STATE to characterize ZIP*/ create table work.zip1 as select put(postalcode_date,year4.) as Year , zip_masked , case when postalcode=' ' then 'Blank' when statecode ne ' ' then 'Valid' else 'Invalid' end as Zip_Check length=7 , case when calculated zip_check='Blank' then 'Missing' when calculated zip_check='Valid' then statecode when calculated zip_check='Invalid' then 'Invalid' end as Zip_State length=7 from work.zip ; /* output table to characterize ZIP by year */ create table msoc.dem_l3_zip_y as select * , count(*) as count format=comma15. from work.zip1 (keep=year zip_masked) group by year, zip_masked ; /* output table to characterize the validity of zip values by year*/ create table msoc.dem_l3_zip_verify_y as select * , count(*) as count format=comma15. from work.zip1 (keep=year zip_check) group by year, zip_check ; /* output table to characterize distribution of Zip State */ create table msoc.dem_l3_zip_state as select * , count(*) as count format=comma15. from work.zip1 (keep=zip_state) group by zip_state ; quit; %end; %mend; %l3_zip; proc sql ; drop table dplocal.&module._temp ; ;quit; proc datasets kill lib=work nolist nowarn nodetails; quit; *-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-; * END scdm-data-qa-review-demographic.sas ; *-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-;