Does anyone see the mistake/problem with my code (MCMC using emcee)?

146 Views Asked by At

Does anyone see the problem with my code? I am trying to set up a rather easy example of fitting some mock data to a lognormal function using emcee but MCMC does not produce a feasible fit like curve-fit. I am writing this post because after looking through several examples on the internet, I still cannot see a mistake in my code and think that there might be another issue. Thank you in advance for helping me out.

This is my code:

import matplotlib.pyplot as plt
import numpy as np
import emcee
from scipy.optimize import curve_fit
import corner

def lognormal(x, A, mu, sigma):
     return A * 1/(x * sigma * np.sqrt(2 * np.pi)) * np.exp(-(np.log(x) - mu)**2/(2 * sigma**2))

#MCMC

def mcmc(x_data, y_data, func, initial_guess = [1,1,1], burning_in = 200, thinner = 50, steps = 5000, walkers = 100, dimension = 3):
#Scipy curve fit

    curve_fit_params, cov= curve_fit(func, x_data, y_data, p0 = initial_guess)

    curve_fit_error = np.sqrt(np.diag(cov))


    #MCMCM setup

    # Define the log-likelihood function

    def log_likelihood(theta, x, y, func):
        A, mu, sigma = theta
        y_model = func(x, A, mu, sigma)
        residual = y - y_model
        chi_squared = np.sum(residual**2)
        log_like = -0.5 * chi_squared
        return log_like


    # Define the log-prior function
    def log_prior(theta):
        A, mu, sigma = theta
        # Add any prior constraints here
        # For example, uniform priors for A, mu and sigma between certain ranges
        if 0 < A < 2.*curve_fit_params[0] and 0 < mu < 2.*curve_fit_params[1] and 0 < sigma < 2.*curve_fit_params[2]:
            return 0.0  # log(1) = 0
        return -np.inf  # log(0) = -inf

    # Define the log-posterior function
    def log_posterior(theta, x, y, func):
        log_prior_val = log_prior(theta)
        if not np.isfinite(log_prior_val):
            return -np.inf  # log(0) = -inf
        log_like_val = log_likelihood(theta, x, y, func)
        return log_prior_val + log_like_val

    #MCMC

    # Set up the number of walkers and dimensions
    nwalkers = walkers
    ndim = dimension

    # Initialize the walkers with random positions near the maximum likelihood solution
    pos = [0.1 * np.random.randn(ndim) + curve_fit_params for _ in range(nwalkers)]

    # Set up the emcee sampler
    sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args=(x_data, y_data, func))

    # Run the MCMC sampling
    nsteps = steps  # Number of steps to run the sampler
    sampler.run_mcmc(pos, nsteps, progress = True)

    #tau = sampler.get_autocorr_time()
    #print(tau)

    # Get the chain of samples and flatten it
    samples = sampler.get_chain(discard=burning_in, thin = thinner, flat=True)

    # Get the best-fit parameters (maximum a posteriori or MAP estimate)
    mcmc_fit_mean = np.mean(samples, axis=0)

    # Get the uncertainties in the parameters (credible intervals)
    mcmc_fit_std = np.std(samples, axis=0)

    #corner plot

    labels = ["A", "mu", "sigma"]

    fig = corner.corner(samples, bins = 100, plot_datapoints=False, smooth = True, labels=labels, truths=curve_fit_params, truth_color = 'darkred', color = '#002448', quantiles=[0.16, 0.50, 0.84], hist_kwargs={"density": True, "alpha": 0.4, "histtype": "stepfilled"})
    #plt.savefig('MCMC_test_convergence.png', bbox_inches='tight', dpi=800)
    plt.show()
    plt.clf()
    
    return samples, mcmc_fit_mean, mcmc_fit_std, curve_fit_params, curve_fit_error



#test data

n_sample = 1000

x_test = np.linspace(0.001,250, n_sample)

y_test = lognormal(x_test, 0.8, 4.7, 0.4) + np.random.normal(0, 0.0003, n_sample)


plt.scatter(x_test, y_test, linewidth  = 0.5, s = 1, color = 'darkred')
plt.show()
plt.clf()



result = mcmc(x_test, y_test, lognormal)
print('MCMC results: ', *result[1])
print('curvefit results: ', *result[3])

plt.plot(x_test, lognormal(x_test, *result[1]), linewidth = 0.5, color = 'navy', label = 'MCMC')
plt.plot(x_test, lognormal(x_test, *result[3]), linewidth = 0.5, color = 'orange', label = 'curvefit')
plt.scatter(x_test, y_test, linewidth  = 0.5, s = 1, color = 'darkred')
plt.legend(loc = 'best')
plt.clf()
0

There are 0 best solutions below