I was trying to code up an EM algorithm in python for clustering images of different types. My understanding of the EM algorithm is as follows:
Accordingly, I coded the same in python. Here's my code:
import numpy as np
import sys
from scipy import stats
# μ: mean vector ∈ R^(self.m x self.n)
# Σ: covariance matrix ∈ R^(self.m x self.n x self.n)
# π: probabilities of each component
class gmm:
def __init__(self,n_components):
self.m = n_components
self.π = np.random.random((self.m,))
self.x = None
self.Σ = None
self.μ = None
self.r = None
self.n = None # length of data
@classmethod
def N(cls, x, μ, Σ):
#print(Σ)
#print('\n')
return stats.multivariate_normal(μ, Σ).pdf(x)
def E_step(self):
for i,x_i in enumerate(self.x):
den = 0
for c in range(self.m):
#print(self.Σ[c].shape)
#print(self.μ[c].shape)
#sys.exit()
den+= self.π[c]*gmm.N(x_i,self.μ[c],self.Σ[c])
#print(f'{den=}')
for c in range(self.m):
num = self.π[c]*gmm.N(x_i,self.μ[c],self.Σ[c])
self.r[i,c] = num/den
print(f'{self.r}\n')
def M_step(self):
m_c = np.sum(self.r, axis = 0)
self.π = m_c/self.m
for c in range(self.m):
s1 = 0
s2 = 0
for i in range(self.n):
s1+= self.r[i,c]*self.x[i]
s2+= self.r[i,c]*(self.x[i]-self.μ[c]).dot(self.x[i]-self.μ[c])
self.μ[c] = s1/m_c[c]
self.Σ[c] = s2/m_c[c]
def fit(self,x, iterations = 10):
self.x = x
self.n = x.shape[0]
self.r = np.empty((self.n, self.m))
self.μ = np.random.random((self.m, self.n))
Sigma = [np.random.random((self.n, self.n)) for i in range(self.m)]
Sigma = [0.5*(s + s.T)+5*np.eye(s.shape[0]) for s in Sigma] # A symmetric diagonally dominant matrix is PD
#print([np.linalg.det(s) for s in Sigma])
self.Σ = np.asarray(Sigma)
for i in range(iterations):
self.E_step()
self.M_step()
def params():
return self.π, self.μ, self.Σ
if __name__ == '__main__':
data_dim = 5 # No. of data dimensions
data = [[]]*6
data[0] = np.random.normal(0.5,2, size = (300,))
data[1] = np.random.normal(3,2, size = (300,))
data[2] = np.random.normal(-1, 0.1, size = (300,))
data[3] = np.random.normal(2,3.14159, size = (300,))
data[4] = np.random.normal(0,1, size = (300,))
data[5] = np.random.normal(3.5,5, size = (300,))
p = [0.1, 0.15, 0.22, 0.3, 0.2, 0.03]
vals = [0,1,2,3,4,5]
combined = []
for i in range(data_dim):
choice = np.random.choice(vals, p = p)
combined.append(np.random.choice(data[choice]))
combined = np.array(combined)
G = gmm(n_components = 6)
G.fit(combined)
pi, mu, sigma = G.params()
print(pi)
print(mu)
print(sigma)
Now comes the problem. While running the code, the covariance matrix Σ becomes singular after some iterations. Specifically, all the entries of Sigma become the same all of a sudden in a particular iteration.
I have tried adding some random noise to Σ while this happens, but this seems to only delay the inevitable. Any help/comments will be greatly appreciated. Thanks in Advance :)
To prevent the covariance matrix from becoming singular, you could add an arbitrary value along the diagonal of the matrix, i.e.
as this ensures that the covariance matrix will remain positive definite, and have an inverse. For instance, sklearn uses the default value
1e-6
for their regularization.