I have a function that has many parameters/variables. For simplicity we can assume it is f(a,b,c)=a + b * x + c * x**2, where the parameters "a", "b" and "c" can either be fixed parameters or variables I want to fit. I can fit f(a,b,c) with the three parameters simultaneously without problem. The issue is when I want to fix "b" and fit "a" and "c", or fix "b" and "c" and just fit "a". The motivation is that in physical systems some of these parameters can be known and therefore I can focus on fitting just the other ones (or maybe I don't have enough data points to fit many parameters and I prefer to fix some and let free only the most relevant ones).
This is a version of a code that works, fixing to fit all parameters:
import numpy as np
import emcee
import corner
import matplotlib.pyplot as plt
# True parameters of the function
true_a = 1.0
true_b = 2.0
true_c = 0.5
# Generate synthetic data with noise
np.random.seed(42)
x_data = np.linspace(-8, 10, 50)
y_true = true_a + true_b * x_data + true_c * x_data**2
y_data = y_true + np.random.normal(scale=0.05, size=len(x_data))
# Define the quadratic function
def f(x, a, b, c):
return a + b * x + c * x**2
# Define the log-likelihood function
def log_likelihood(params, x, y, y_err):
a, b, c = params
model = f(x, a, b, c)
return -0.5 * np.sum((y - model)**2 / y_err**2)
# Define the log-prior function
def log_prior(params):
# Uniform priors for a, b, c
if all(-10.0 < p < 10.0 for p in params):
return 0.0
return -np.inf
# Define the log-posterior function
def log_posterior(params, x, y, y_err):
lp = log_prior(params)
if not np.isfinite(lp):
return -np.inf
return lp + log_likelihood(params, x, y, y_err)
# Perform MCMC fitting
ndim = 3 # Number of parameters (a, b, c)
nwalkers = 50
nsteps = 2000
burnin = nsteps // 3 # Burn-in steps (1/3 of the total steps)
# Initialize walkers with random values around the true parameters
initial_params = [true_a, true_b, true_c]
per = 0.1
initial_pos = [initial_params + per * np.random.randn(ndim) for _ in range(nwalkers)]
# Set up the MCMC sampler
sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args=(x_data, y_data, 0.5))
# Run the sampler with burn-in
sampler.run_mcmc(initial_pos, nsteps, progress=True)
# Get the samples after burn-in
samples = sampler.get_chain(discard=burnin, flat=True)
Now this is my attempt to define which parameters to fit using as an argument a dictionary (though it might not be the right approach?)
import numpy as np
import emcee
import corner
import matplotlib.pyplot as plt
# True parameters of the function
true_a = 1.0
true_b = 2.0
true_c = 0.5
# Generate synthetic data with noise
np.random.seed(42)
x_data = np.linspace(-8, 10, 50)
y_true = true_a + true_b * x_data + true_c * x_data**2
y_data = y_true + np.random.normal(scale=0.05, size=len(x_data))
# Define the function
def f(x, params):
a = params.get('a', 0.0)
b = params.get('b', 0.0)
c = params.get('c', 0.0)
return a + b * x + c * x**2
# Define the log-likelihood function
def log_likelihood(params, x, y, y_err):
model = f(x, params)
return -0.5 * np.sum((y - model)**2 / y_err**2)
# Define the log-prior function
def log_prior(params):
# Uniform priors for a, b, c
a = params.get('a', 0.0)
b = params.get('b', 0.0)
c = params.get('c', 0.0)
if all(-10.0 < p < 10.0 for p in [a, b, c]):
return 0.0
return -np.inf
# Define the log-posterior function
def log_posterior(params, x, y, y_err):
lp = log_prior(params)
if not np.isfinite(lp):
return -np.inf
return lp + log_likelihood(params, x, y, y_err)
# Initialize walkers with random values around the true parameters
nwalkers = 50
initial_params = {'a': true_a, 'b': true_b, 'c': true_c} # here I would like to have any combination of parameters, not necessarily all three
per = 0.1
initial_pos = [{key: val + per * np.random.randn() for key, val in initial_params.items()} for _ in range(nwalkers)]
# Perform MCMC fitting
ndim = len(initial_params) # Number of parameters
nsteps = 2000
burnin = nsteps // 3 # Burn-in steps (1/3 of the total steps)
# Set up the MCMC sampler
sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args=(x_data, y_data, 0.05))
# Run the sampler with burn-in
sampler.run_mcmc(initial_pos, nsteps, progress=True)
# Get the samples after burn-in
samples = sampler.get_chain(discard=burnin, flat=True)
But when it runs it says: ---> 63 sampler.run_mcmc(initial_pos, nsteps, progress=True) ValueError: incompatible input dimensions (1, 50)
I would really appreciate feedback on how to address this.
One way to do this is using the bilby package, which provides a wrapper to emcee. With bilby you can set any fixed parameters of the model to have
DeltaFunction
priors. For example:The output plot (by default saved into a directory called
outdir
) is: