So, I have the following code for MCMC for the parameters of a Gaussian process
code:
walkers = 250
pos_min = np.concatenate((np.array([self.initial_scale, self.initial_noise]), np.zeros(self.ndim)))
pos_max = np.concatenate((np.array([self.initial_scale, self.initial_noise]), 2.0*np.ones(self.ndim)))
psize = pos_max - pos_min
pos = [pos_min + psize*np.random.rand(self.ndim+2) for i in range(nwalkers)]
with Pool(4) as pool:
sampler = emcee.EnsembleSampler(nwalkers, self.ndim+2, self.lnprob, pool=pool)
pos, prob, state = sampler.run_mcmc(pos, 200)
sampler.reset()
sampler.run_mcmc(pos, 300)
samples = sampler.flatchain[-500:,:]
return samples
The function is self.lnprob is defined in the following code segment
code
def lnprior(theta):
l = theta[2:]
s2_f = theta[0]
s2_n = theta[1]
if 0 < s2_f and 0 < s2_n and (l > 0).all() and (l < 2.0).all():
return np.log(np.log(1 + (0.1/s2_n)**2)) -0.5*(np.log(np.sqrt(s2_f))/1.0)**2
return -np.inf
def lnlike(theta):
l = theta[2:]
sigma_f = theta[0]
sigman = theta[1]
self.kernel.k1.k1.constant_value = sigma_f
self.kernel.k1.k2.length_scale = l
self.kernel.k2.noise_level = sigman
K = self.kernel(self.Xi)
K[np.diag_indices_from(K)] += self.alpha
L = cholesky(K, lower=True) # Line 2
# Support multi-dimensional output of self.y_train_
y_train = self.Yi
if y_train.ndim == 1:
y_train = y_train[:, np.newaxis]
alpha = cho_solve((L, True), y_train) # Line 3
# Compute log-likelihood (compare line 7)
log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha)
log_likelihood_dims -= np.log(np.diag(L)).sum()
log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi)
log_likelihood = log_likelihood_dims.sum(-1)
return log_likelihood
lp = lnprior(theta)
if not np.isfinite(lp):
return -np.inf
return lp + lnlike(theta)
The code takes takes 30s on my laptop ( which is intel core i5 7th generation) . The size of theta array is 4 and size of Self.Xi is 2.
The same code takes over 400s on an intel xeon phi processor with 64 cores.
Also, I noticed as I increases number of the number of pools in multiprocessing pool the time seems to increase.
Is there any reason why it takes so much time on the intel xeon phi processor? What can I do it to speed it up on the intel xeon phi processor?