Convert multilevel jags model from wide to long format

205 Views Asked by At

I have a multi-level jags model. I'm trying to convert it from wide to long format as described here: http://jeromyanglim.tumblr.com/post/37361593128/jags-converting-multilevel-model-from-wide-to However my model is more complex than the example so I'm having some trouble making this work. To illustrate the difficulties I've made a repeatable example. This first block creates data and sets jags parameters:

library(ecodist)
library(runjags)
set.seed(10)

##### population n
n <- 250
# num outputs
num.ys <- 10

# Vector binary to indicate which domains have correlation with independent variables
corr.vec <- c(0, 0, 0, 1, 1, 0, 0, 1, 1, 1)
correlation = 0.99

# Function to simulate correlated outcome
sim.fn <- function(i, var1, sw1) {
    if(sw1 ==1){
        temp <- corgen(n , var1, correlation )
        temp <- as.numeric(temp$y * attr(temp$y,'scaled:scale') + attr(temp$y,'scaled:center'))
    } else {
        temp <- rnorm(n, 0, 5)
    }
    return(temp)
}

##### Generate data
df0 <- data.frame(var1=rnorm(n, 15, 2))
df1 <- data.frame(df0, sapply(1:num.ys, function(i) sim.fn(i, df0$var1, corr.vec[i])))

out.names <- paste0("y_", 1:num.ys)
names(df1) <- c("var1", out.names)

### Jags parameters
parameters = c("B1O", "b1", "b1o", "nu", "sd")
adaptSteps = 1000             # Number of steps to "tune" the samplers.
burnInSteps = 10000            # Number of steps to "burn-in" the samplers.
nChains = 2                   # Number of chains to run.
numSavedSteps=1000          # Total number of steps in chains to save.
thinSteps=2                   # Number of steps to "thin" (1=keep every step).
nPerChain = ceiling( ( numSavedSteps * thinSteps ) / nChains ) # Steps per chain.

Ok, so this next section is the 'wide format' jags model thats provides the correct estimates in the object mcmcChain:

modelstring = "
model {
for( i in 1 : nData ) {
    for(np in 1:nVars){
        y[i, np]  ~ dt( mu[i,np], tau, nu)
        mu[i, np] <- b0s[i] + (b1 + b1o[np]) * x1[i]
    }
}

#Random effects
for(i in 1:nData){
    b0s[i] ~ dnorm(0, b0stau)
}

#Outcome level
for (np in 1:nVars){
    b1o[np] ~ dnorm(0, b1otau)
}

##### Priors
#Overarching Level
b1 ~ dnorm(0, 0.0001)

#
b0stau <- pow(b0ssd, -2)
b0ssd  ~ dt(0, 1/625, 1)T(0,)

# tau & nu priors
nuI ~ dunif(0.001,0.5)
nu <- 1/nuI
tau <- pow(sd, -2)
sd ~ dunif(0, 10)

b1otau <- pow(b1osd, -2)
b1osd  ~ dt(0, 1/625, 1)T(0,)
b1dtau <- pow(b1dsd, -2)
b1dsd  ~ dt(0, 1/625, 1)T(0,)

#Transformations
for(np in 1:nVars){
    B1O[np] <- b1 + b1o[np]
}
}
" # close quote for modelstring
writeLines(modelstring,con="model.jags.no_dom.test.txt")

zy <- (df1[, out.names])
sc_ys <- data.frame(lapply(zy, function(x) scale(x)) )

dataList = list( y = as.matrix(sc_ys), x1 = as.numeric(scale(df1$var1,)), 
             nVars = num.ys, nData = nrow(df1))

# Run this model via run.jags
codaSamples <- run.jags(model="model.jags.no_dom.test.txt" , data=dataList , method ="parallel", n.chains=nChains, monitor=parameters,
                    adapt = adaptSteps, burnin = burnInSteps, sample=nPerChain, thin=thinSteps)

mcmcChain <- data.frame(summary( codaSamples ))
mcmcChain

So the BO outputs are close to the correlations the data was generated from. Next is my attempt at the "long format" model analogous to the explantion in the link above.

modelstring = "
model {
for( i in 1 : nData ) {
    y[i]  ~ dt( mu[i] , tau, nu )
    mu[i] <- b0s[i] + (b1 + b1o[idx[i]]) * x1[i]
}

#Random effects
for(i in 1:nData){
    b0s[i] ~ dnorm(0, b0stau)
}

#Outcome level
for (y in 1:nVars){
    b1o[y] ~ dnorm(0, b1otau[y])
}

##### Priors
#Overarching Level
b1 ~ dnorm(0, 0.0001)

b0stau <- pow(b0ssd, -2)
b0ssd  ~ dt(0, 1/625, 1)T(0,)

for (y in 1:nVars){
    b1otau[y] <- pow(b1osd[y], -2)
    b1osd[y]  ~ dt(0, 1/625, 1)T(0,)
}

tau <- pow(sd, -2)
sd ~ dunif(0, 10)
nuI ~ dunif(0.001,0.5)
nu <- 1/nuI

#Transformations
for(j in 1:nVars){
    B1O[j]  <- b1 + b1o[j]
}
}
" # close quote for modelstring
writeLines(modelstring,con="model.jags.no_dom.long.test.txt")

# Restructure data into long format
dataList2 = list( y = unlist(sc_ys), x1 = rep (as.numeric(scale(df1$var1,)),  length(out.names)),
             idx = rep(1:length(out.names), each=nrow(df1)),
             nVars = length(out.names), nData = nrow(df1))

codaSamples2 <- run.jags(model="model.jags.no_dom.long.test.txt" , data=dataList2 , method ="parallel", n.chains=nChains, monitor=parameters,
                     adapt = adaptSteps, burnin = burnInSteps, sample=nPerChain, thin=thinSteps)

mcmcChain2 <- data.frame(summary( codaSamples2 ))
mcmcChain2

So the results in mcmcChain2 don't match those of mcmcChain, but I cannot see where I'm going wrong. Can anyone help please ? Thanks.

1

There are 1 best solutions below

1
On

Your matrix df1 has nData * nVars elements, but your long format model is only using the first nData elements (i.e. in effect you are just using the first column of the data). The maximum for the main data loop needs to be adjusted to be equal to nData*nVars and not just nData.

Also you need a vector representing the row number of the original df1 so that you can index your random effect b0s correctly as e.g. b0s[dfrow[i]]. Also, it is hard to follow the data specification (e.g. what is length(out.names)) so I'm not sure if you have already done this, but either x1 needs to be repeated nVars times or you should use the same x1[dfrow[i]] indexing as for the random effect (preferably the latter for the sake of readability of your model code).

Matt