Does anyone see the problem with my code? I am trying to set up a rather easy example of fitting some mock data to a lognormal function using emcee but MCMC does not produce a feasible fit like curve-fit. I am writing this post because after looking through several examples on the internet, I still cannot see a mistake in my code and think that there might be another issue. Thank you in advance for helping me out.
This is my code:
import matplotlib.pyplot as plt
import numpy as np
import emcee
from scipy.optimize import curve_fit
import corner
def lognormal(x, A, mu, sigma):
return A * 1/(x * sigma * np.sqrt(2 * np.pi)) * np.exp(-(np.log(x) - mu)**2/(2 * sigma**2))
#MCMC
def mcmc(x_data, y_data, func, initial_guess = [1,1,1], burning_in = 200, thinner = 50, steps = 5000, walkers = 100, dimension = 3):
#Scipy curve fit
curve_fit_params, cov= curve_fit(func, x_data, y_data, p0 = initial_guess)
curve_fit_error = np.sqrt(np.diag(cov))
#MCMCM setup
# Define the log-likelihood function
def log_likelihood(theta, x, y, func):
A, mu, sigma = theta
y_model = func(x, A, mu, sigma)
residual = y - y_model
chi_squared = np.sum(residual**2)
log_like = -0.5 * chi_squared
return log_like
# Define the log-prior function
def log_prior(theta):
A, mu, sigma = theta
# Add any prior constraints here
# For example, uniform priors for A, mu and sigma between certain ranges
if 0 < A < 2.*curve_fit_params[0] and 0 < mu < 2.*curve_fit_params[1] and 0 < sigma < 2.*curve_fit_params[2]:
return 0.0 # log(1) = 0
return -np.inf # log(0) = -inf
# Define the log-posterior function
def log_posterior(theta, x, y, func):
log_prior_val = log_prior(theta)
if not np.isfinite(log_prior_val):
return -np.inf # log(0) = -inf
log_like_val = log_likelihood(theta, x, y, func)
return log_prior_val + log_like_val
#MCMC
# Set up the number of walkers and dimensions
nwalkers = walkers
ndim = dimension
# Initialize the walkers with random positions near the maximum likelihood solution
pos = [0.1 * np.random.randn(ndim) + curve_fit_params for _ in range(nwalkers)]
# Set up the emcee sampler
sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args=(x_data, y_data, func))
# Run the MCMC sampling
nsteps = steps # Number of steps to run the sampler
sampler.run_mcmc(pos, nsteps, progress = True)
#tau = sampler.get_autocorr_time()
#print(tau)
# Get the chain of samples and flatten it
samples = sampler.get_chain(discard=burning_in, thin = thinner, flat=True)
# Get the best-fit parameters (maximum a posteriori or MAP estimate)
mcmc_fit_mean = np.mean(samples, axis=0)
# Get the uncertainties in the parameters (credible intervals)
mcmc_fit_std = np.std(samples, axis=0)
#corner plot
labels = ["A", "mu", "sigma"]
fig = corner.corner(samples, bins = 100, plot_datapoints=False, smooth = True, labels=labels, truths=curve_fit_params, truth_color = 'darkred', color = '#002448', quantiles=[0.16, 0.50, 0.84], hist_kwargs={"density": True, "alpha": 0.4, "histtype": "stepfilled"})
#plt.savefig('MCMC_test_convergence.png', bbox_inches='tight', dpi=800)
plt.show()
plt.clf()
return samples, mcmc_fit_mean, mcmc_fit_std, curve_fit_params, curve_fit_error
#test data
n_sample = 1000
x_test = np.linspace(0.001,250, n_sample)
y_test = lognormal(x_test, 0.8, 4.7, 0.4) + np.random.normal(0, 0.0003, n_sample)
plt.scatter(x_test, y_test, linewidth = 0.5, s = 1, color = 'darkred')
plt.show()
plt.clf()
result = mcmc(x_test, y_test, lognormal)
print('MCMC results: ', *result[1])
print('curvefit results: ', *result[3])
plt.plot(x_test, lognormal(x_test, *result[1]), linewidth = 0.5, color = 'navy', label = 'MCMC')
plt.plot(x_test, lognormal(x_test, *result[3]), linewidth = 0.5, color = 'orange', label = 'curvefit')
plt.scatter(x_test, y_test, linewidth = 0.5, s = 1, color = 'darkred')
plt.legend(loc = 'best')
plt.clf()