I am trying to analyse the HRs for different disease subtypes in a nested case-control study using the dataset with a high proportion of missing values for two variables that I need for adjusted risk model. For non-adjusted model I used Donna Spiegelman's Subtype macro https://www.hsph.harvard.edu/donna-spiegelman/software/subtype/.
Now I want to run an adjusted model using the dataset with Multiply imputed Data for categorical variables a and b. My idea was to first run Proc MI :
proc mi data=Subtype nimpute=100 out=ImputSP seed=1305468 ;
class a b disease_sub infection ;
fcs plots=trace ( std mean );
var a b age disease_sub infection;
fcs discrim( a b /classeffects=include) nbiter=100;
run;
And then run the edited version of the subtype macro in which I've added "by imputation" option in 3 parts of the macro code.
/* get list of values of outcometype */
proc sort data=newdatname; by outcometype ; run;
proc means noprint data=newdatname; var outcometype;
output out=_expblist_ mean=m_outcometype;
by outcometype;
where outcometype ne . ;
run;
data _expblist_;
set _expblist_ end=_end_;
call symput('_eb_'||trim(left(_n_)), trim(left(m_outcometype)));
if _end_ then call symput ('_neb_', trim(left(_n_)));
run;
%if &studydesign eq CACO %then %do; %let reftype=0; %end;
ods listing close;
proc logistic data=newdatname outest=LL;
by _imputation_;
model outcometype(ref="&reftype")=&exposureND &exposureD_ &unconstrvar /link=glogit covb;
%if &studydesign eq CACA %then %do;
%do j=1 %to &_nexpND;
*variables for pair-wise LRT of heterogeneity test;
%do j=1 %to &_nexpND;
%do i=1 %to &_neb_;
if outcometype=&i then _expND_&j=&&_expND_&j._&i..;
%end;
rename _expND_&j=&&_expND_&j;
%end;
%do j=1 %to &_nexpD;
%do k=1 %to &&_nexpD_&j;
%do i=1 %to &_neb_;
if outcometype=&i then _expD_&j._&k=&&_expD_&j._&k._&i..;
%end;
rename _expD_&j._&k=&&_expD_&j._&k..;
%end;
%end;
%do j=1 %to &_nexpND;
%do i=1 %to %eval(&_neb_-1);
%do k=%eval(&i+1) %to &_neb_;
_expND_&j._&i._vs_&j._&k.= _expND_&j*(outcometype=&&_eb_&i|outcometype=&&_eb_&k);
%end;
%end;
%end;
%do j=1 %to &_nexpD;
%do i=1 %to %eval(&_neb_-1);
%do k=%eval(&i+1) %to &_neb_;
%do m=1 %to &&_nexpD_&j;
_expD_&j._&m._&i._vs_&j._&m._&k.=_expD_&j._&m.*(outcometype=&&_eb_&i|outcometype=&&_eb_&k);
%end;
%end;
%end;
%end;
run;
%end;
ods listing close;
proc phreg data=newdatname outest=LL %if "&covs" eq "YES" %then %do; covs %end; nosummary;
model (entrytime,time)*censoring(0)=
%do j=1 %to &_nexpND;
%do i=1 %to &_neb_;
_expND_&j._&i
%end;
%end;
%do j=1 %to &_nexpD;
%do k=1 %to &&_nexpD_&j;
%do i=1 %to &_neb_;
_expD_&j._&k._&i
%end;
%end;
%end;
%if "&augmented" eq "NO" %then %do;
%do j=1 %to &_nunconstrvar;
%do i=1 %to &_neb_;
_ucv_&j._&i
%end;
%end;
%end;
%else %if "&augmented" eq "YES" %then %do;
&unconstrvar
%end;
&constrvar / covb %if "&studydesign" eq "MCACO" | "&studydesign" eq "CACO" %then %do; ties=discrete %end; ;
%if "&studydesign" eq "COHORT" | "&studydesign" eq "CACO" %then %do; strata outcometype &stratavar; %end;
%if "&studydesign" eq "MCACO" %then %do; strata &matchid; %end;
by _imputation_;
After running the subtypeBY_macro my intention is to run this code to obtain the final estimates:
proc print data=&PamEst (obs=8) ;
title "Logistic Model Coefficients First 8 Obs ";
var _Imputation_ Variable Estimate StdErr;
run;
TITLE "MULTIPLE IMPUTATION LOG REFRESSION-FCS";
proc mianalyze parms (classvar=ClassVal)=&PamEst ;
class &classmi ;
MODELeffects &modelvar ;
ods output ParameterEstimates=mianalyze_&PamEst;
run;
title;
data OR_&PamEst;
set mianalyze_&PamEst;
OR=exp (estimate);
LCL_OR=exp (LCLMean);
UCL_OR=exp(UCLMean);
proc print;
var parm &classmi OR LCL_OR UCL_OR;
run;
Sadly the program crushes on the SubtypeBY step and I do not know how to best proceed to integrate multiple imputation into the adjusted model. Any tips would be much appreciated!