**************************************************************************************************** * PROGRAM OVERVIEW **************************************************************************************************** * * PROGRAM: icd10tree_lookup.sas * Created (mm/dd/yyyy): 02/28/22 *-------------------------------------------------------------------------------------------------- * PURPOSE: * This program will create the ICD10 tree lookup table to accompany QRP. The program creates a * child_parent tree from a standard or user provided file as well as prunes the tree * based off of a user provided exclusion file. * * Program inputs: * - dx_icd10_full_lookup.sas7bdat * - Level1_2_lookup.csv * - ICD10TreeFile * - ICD10ExcludeFile * * Program outputs (msoc): * - icd10_child_parent * - icd10_child_parent.csv * * Programming Notes: * - If the ICD 10 tree is created using the Optum ICD 10 data files, the date in the file name * if the date the ICD 10 data was refreshed. If the user provides an ICD 10 tree OR exclusions * are applied, then the date in the file name is the date qrp_lookupfiles was executed * - utility macro %isdata() is called in this macro * *-------------------------------------------------------------------------------------------------- * CONTACT INFO: * Sentinel Coordinating Center * info@sentinelsystem.org * ***************************************************************************************************; %macro icd10tree_lookup(); %put =====> MACRO CALLED: icd10tree_lookup; /**********************************************************************************************/ /* Create ICD 10 Tree using Optum ICD 10 data files /**********************************************************************************************/ %if %upcase(&CreateICD10TreeFile) eq Y %then %do; /*location of ICD10 data files*/ libname ICD10 "&ICD10Path" access=readonly; /*read in Level1_2_lookup csv file - contains 1st and 2nd level codes*/ proc import out= _level1_2_lookup datafile ="&INFOLDER.level1_2_lookup.csv" dbms = csv replace; getnames = yes; run; %isdata(dataset=ICD10.DX_ICD10_FULL_LOOKUP); %if %eval(&nobs.<=0) %then %do; %put ERROR: (Sentinel) CreateICD10TreeFile = Y, however dataset dx_icd10_full_lookup.sas7bdat cannot be found or ICD10PATH is not specified correctly.; %abort; %end; /*create table with level from dx_icd10_full_lookup and level1_2_lookup*/ proc sql noprint; create table lookup as select distinct a.*, catx('_',compress(LevelStart),compress(LevelEnd)) as tree_l1 length=11, b.level from ICD10.dx_icd10_full_lookup as a left join _level1_2_lookup as b on levelstart<=substr(code,1,3)<=levelend; quit; /*last modified date*/ proc sql noprint; select lowcase(put(modate, dtdate.)) into: file_date trimmed from dictionary.tables where libname = 'ICD10' and memname = 'DX_ICD10_FULL_LOOKUP'; quit; /*create full tree*/ data icd10_wide_tree(rename=(code=node) keep=code tree_l7 tree_l6 tree_l5 tree_l4 tree_l3 tree_l1 level); length tree_l7 tree_l6 tree_l5 tree_l4 tree_l3 $11; set lookup; l=length(code); tree_l7=code; if l=7 then do; tree_l6=compress(substr(code,1,6),' ')||'grp'; tree_l5=compress(substr(code,1,5),' ')||'grp'; tree_l4=compress(substr(code,1,4),' ')||'grp'; tree_l3=compress(substr(code,1,3),' ')||'grp'; end; if l=6 then do; tree_l6=compress(code,' ')||'grp'; tree_l5=compress(substr(code,1,5),' ')||'grp'; tree_l4=compress(substr(code,1,4),' ')||'grp'; tree_l3=compress(substr(code,1,3),' ')||'grp'; end; if l=5 then do; tree_l6=compress(code,' ')||'n'||'grp'; tree_l5=compress(code,' ')||'grp'; tree_l4=compress(substr(code,1,4),' ')||'grp'; tree_l3=compress(substr(code,1,3),' ')||'grp'; end; if l=4 then do; tree_l6=compress(code,' ')||'nn'||'grp'; tree_l5=compress(code,' ')||'n'||'grp'; tree_l4=compress(code,' ')||'grp'; tree_l3=compress(substr(code,1,3),' ')||'grp'; end; if l=3 then do; tree_l6=compress(code,' ')||'nnn'||'grp'; tree_l5=compress(code,' ')||'nn'||'grp'; tree_l4=compress(code,' ')||'n'||'grp'; tree_l3=compress(code,' ')||'grp'; end; run; proc sql noprint undo_policy = none; create table icd10_wide_tree as select a.*, b.tree_l1 as tree_l2 from icd10_wide_tree as a left join lookup as b on a.node = b.code where b.level = 2; delete from icd10_wide_tree where level = 2; quit; /*Determine number of levels from the "level" variables in the dataset*/ /*Note - default tree has 7 levels*/ proc contents noprint data=icd10_wide_tree out=_treevars (keep=name); quit; proc sql noprint; select max(input(compress(name,'','F'),8.)) into: maxlevel trimmed from _treevars where index(name,"tree_l"); quit; %if %eval(&maxlevel ne 7) %then %do; %put WARNING: (Sentinel) ICD\-10 tree does not have 7 levels. Tree may have data integrity issues; %end; /*Convert horizontal tree to vertical tree*/ data _tmp; set icd10_wide_tree; tree_l0=" "; %do i=&maxlevel %to 1 %by -1; %let j=%eval(&i -1); child=tree_l&i.; parent=tree_l&j.; i=&i; if child ne '' and child ne parent then output; %end; keep child parent i; run; proc sort nodupkey data=_tmp out=icd10_child_parent (drop=i); by descending i child parent; quit; %end; /*end CreateICD10TreeFile = Y*/ /**********************************************************************************************/ /* if user provides an ICD 10 child/parent tree, read file in /**********************************************************************************************/ %if "&ICD10TreeFile." ne "" %then %do; %isdata(dataset=infolder.&ICD10TreeFile.); %if %eval(&nobs.>0) %then %do; /*using sysdate to notate when the file was created*/ %let file_date = %sysfunc(lowcase(&sysdate.)); data icd10_child_parent; set infolder.&ICD10TreeFile.; run; %end; %end; /*end read in of ICD10TREEFILE*/ /**********************************************************************************************/ /* Apply optional exclusions to ICD10 Tree File /**********************************************************************************************/ %if "&ICD10ExcludeFile." ne "" %then %do; /*check for existance of needed datasets*/ %isdata(dataset=infolder.&ICD10ExcludeFile.); %if %eval(&nobs.<=0) %then %do; %put ERROR: (Sentinel) ICD10ExcludeFile is specified, however the file cannot be found; %abort; %end; %isdata(dataset=icd10_child_parent); %if %eval(&nobs.<=0) %then %do; %put ERROR: (Sentinel) ICD10ExcludeFile is specified, however a ICD10TREEFILE was not provided or the ICD10 Tree file was not created.; %abort; %end; %let file_date = %sysfunc(lowcase(&sysdate.)); /*error checking - check to see there are no duplicates in ICD10EXCLUDEFILE*/ proc sql noprint; create table _exclude_dups as select * from infolder.&ICD10ExcludeFile. group by node, level having count(*) >= 2; quit; %isdata(dataset=_exclude_dups); %if %eval(&nobs.>0) %then %do; %put ERROR: (Sentinel) duplicates found on exclude file. Check for accuracy; %abort; %end; /*convert vertical tree to horizontal tree*/ *1st level - no parent; data leaf1 restoftree; set icd10_child_parent; id = _n_; /*row identifier*/ if missing(parent)=1 then output leaf1; else output restoftree; run; %let maxlevel = 2; /*Extract additional levels if they exist*/ %isdata(dataset=restoftree); %if %eval(&nobs.>0) %then %do; *loop through until no more children; %let rc = 1; %do %until (&rc.=0); proc sql noprint; create table leaf&maxlevel. as select * from restoftree where parent in (select child as parent from leaf%eval(&maxlevel.-1)); quit; *remove rows from restoftree; proc sql noprint undo_policy=none; create table restoftree as select * from restoftree where id not in (select id from leaf&maxlevel.); quit; %isdata(dataset=restoftree); %if %eval(&nobs.>0) %then %do; %let maxlevel = %eval(&maxlevel.+1); %end; %else %do; /*stop looping*/; %let rc = 0; %end; %end; %put maxlevel = &maxlevel; *Merge across all levels to build out full tree; proc sql noprint; create table fulltree as select /*last leaf*/ leaf&maxlevel..child as node , leaf&maxlevel..child as tree_l&maxlevel. %do leaf = %eval(&maxlevel.-1) %to 2 %by -1; , leaf&leaf..child as tree_l&leaf. %end; , leaf2.parent as tree_l1 from leaf&maxlevel. as leaf&maxlevel. %do leaf = %eval(&maxlevel.-1) %to 2 %by -1; left join leaf&leaf. as leaf&leaf. on leaf&leaf..child = leaf%eval(&leaf.+1).parent %end; ; quit; %end; /*additional levels*/ %else %do; *number of levels in tree; %let maxlevel = %eval(&maxlevel-1); /*Equals 1*/ %end; *clean up; proc datasets nowarn noprint lib=work; delete leaf: restoftree; quit; /*Merge ICD10ExcludeFile and horizontal tree*/ proc sql noprint; create table exclude_lookup as select a.*, b.node as exclude_node, b.level as exclude_level, cat("tree_l",b.Level) as exclude_tree from fulltree as a full join infolder.&ICD10ExcludeFile. as b on a.node = b.node %do le = 1 %to &maxlevel; or a.tree_l&le = b.node %end;; quit; /*error checking - check to see if there are excluded nodes/level that exist within another*/ %isdata(dataset=fulltree); %let fulltree_obs = &nobs.; %isdata(dataset=exclude_lookup); %let exclude_obs = &nobs.; %if %eval(&exclude_obs.> &fulltree_obs.) %then %do; %put ERROR: (Sentinel) there is a node that is a child to another node in the exclude file, check node and level for accuracy; %abort; %end; /*remove the nodes and levels from exclude file*/ data exclude_lookup (drop = exclude:); set exclude_lookup; %do level_sub = 1 %to &maxlevel; if exclude_tree = "tree_l&level_sub" and tree_l&level_sub = exclude_node then do; %do rem = &level_sub %to &maxlevel; tree_l&rem = ''; %end; end; else if exclude_tree = "tree_l&level_sub" and tree_l&level_sub ne exclude_node then do; /*look for nodes at exclude level not found in the table*/ put 'ERROR: (Sentinel) node at the level requested cannot be found, check node and level for accuracy'; abort; end; %end; run; /*recreate vertical child-parent tree*/ *; data _tmp; set exclude_lookup; tree_l0=" "; %do i=&maxlevel %to 1 %by -1; %let j=%eval(&i -1); child=tree_l&i.; parent=tree_l&j.; i=&i; if child ne '' and child ne parent then output; %end; keep child parent i; run; proc sort nodupkey data=_tmp out=icd10_child_parent(drop=i); by descending i child parent; quit; %end; /*End if ICDExcludefile is supplied.*/ /*final processing*/ %isdata(dataset=icd10_child_parent); %if %eval(&nobs.>0) %then %do; /*error checking - check to see there are duplicates in final tree*/ proc sql noprint; create table _tree_dups as select * from icd10_child_parent group by child, parent having count(*) >= 2; quit; %isdata(dataset=_tree_dups); %if %eval(&nobs.>0) %then %do; %put WARNING: (Sentinel) duplicates found on tree file. Check for accuracy; %end; /*save final tree to MSOC folder and export as csv*/ data msoc.icd10_child_parent(label="Modification Date &file_date"); set icd10_child_parent; run; proc export data = msoc.icd10_child_parent outfile= "&MSOC.icd10_child_parent.csv" dbms=csv replace; putnames=no; run; /*clean up*/ proc datasets nowarn noprint lib=work; delete _: icd10: tree: exclude_lookup fulltree; quit; %end; %put =====> END MACRO: icd10tree_lookup; %mend;