I'm trying to perform content-based image retrieval (CBIR) using k-means clustering. I use the PCA function princomp()
with a feature vector length of 190.
I have 500 test images in color taken from here. There's 5 categories in total. When I run my code I only get 3 clusters and the images look very different. What am I doing wrong?
Here is my code:
% construction of feature vector
set = [hsvHist autoCorrelogram Moments_Couleur meanAmplitude msEnergy OndelettesMoments];
% add name of image
dataset(k, :) = [set str2double(name)];
handlse.CaracVt = dataset.';
dlmwrite('f:/hellonewday', handlse.CaracVt);
% ACP function
[COEFF, SCORE, latent] = princomp(handlse.CaracVt());
laten = cumsum(latent)./sum(latent)
list = []; o = 1; c = 0;
for kn = 1:length(laten)
if (isempty(list))
list(o, :) = laten(kn);
o = o + 1;
else
for i = 1:length(list)
kki = abs(laten(kn) - list(i));
if (kki > 0.006)
c = c + 1;
end;
end;
if (c == length(list))
list(o, :) = laten(kn);
o = o + 1;
end;
end;
c = 0;
end;
handlse.NmbreCluster = length(list);
disp('The amount of clusters is: ');
disp(handlse.NmbreCluster);
handlse.CaracVt = handlse.CaracVt.';
mat = handlse.CaracVt;
mat(:, end) = [];
[kMeansClusters, c] = kmeans(mat, handlse.NmbreCluster);
dlmwrite('f:/clusters', kMeansClusters);
dlmwrite('f:/centres', c);
disp('kMeansClusters for each image: ');
disp(kMeansClusters);
c = c.';
ko = 1;
for i = 1:handlse.NmbreCluster
array = zeros(1, 191);
c(i, 191) = 0;
array(:, 1) = c(i);
for kp = 1:length(kMeansClusters)
if (i == kMeansClusters(kp))
ko = ko + 1;
array(ko, :) = handlse.CaracVt(kp, :);
end
end;
myArray{i} = array;
ko = 1;
end;
disp(myArray);
uisave('myArray', 'dataset');