# import # ------------------------------------------------------------------------------------------ import os import matplotlib.pyplot as plt import numpy as np import copy # multiprocessing and functools # import multiprocessing as mp from pathos.multiprocessing import ProcessingPool as Pool from functools import partial # miam import import miam.utils import miam.image.Image as MIMG import miam.processing.ColorSpaceTransform as MCST import miam.histogram.Histogram as MHIST import miam.math as MMATH # ------------------------------------------------------------------------------------------ # MIAM project 2020 # ------------------------------------------------------------------------------------------ # author: remi.cozot@univ-littoral.fr # ------------------------------------------------------------------------------------------ class kmeans(object): # set a random seed for reproductability #np.random.seed(1968) np.random.seed(1968) def __init__(self, distance, normalize): # distance use for computation between samples self.distance = distance self.normalize = normalize def MPassignSamplesToCentroids(self,centroids, samples, previousAssigmentIdx): # parallel function def assignSampleToCentroid(previousAssigmentIdx, centroids, distance,isamp): """ return (jdist, samp, i, dist, change, remain) """ # recovering data from parameters i, samp = isamp # init data in centroids loop dist = 0.0 # distance to centroid jdist = 0 # minimal distance for j,cent in enumerate(centroids): #compute distance samps[i] et cents[j] if j==0: # first iteration dist = distance.eval(samp,cent) jdist =0 else: # other iteration d= distance.eval(samp,cent) # compare dist to current minimal dist if d> start") # end debug # return list assigmentAverage = [[]] for i in range(len(assignements)-1): assigmentAverage.append([]) for i,assigment_i in enumerate(assignements): # debug # print("kmeans.averageAssigment::sassigment_i.size>>",np.asarray(assigment_i).size) # end debug if np.asarray(assigment_i).size >0 : assavi=np.mean(np.asarray(assigment_i),axis=0) assavi = self.normalize.eval(assavi) assigmentAverage[i]= assavi # debug # print("kmeans.averageAssigment>> end") # end debug return assigmentAverage def kmeans(self,samples, nbClusters, nbIter, display = None, initRange=None, multiPro=False): """ method keams: attribute(s): self: insytance method samples: samples to cluster (np.ndarray) samples.shape[0] : number of samples samples.shape[1:]: dimension of sample, if samples.shape[1:] is int then sample are vectors and amples.shape[1:] is vector size if samples.shape[1:] is tuple are matrices or tensors and amples.shape[1:] is matrix/tensor dimension exemple samples.shape[1:] =(5,3) sample is matrix 5x3 nbClusters: number of cluster to compute (int) nbIter: number of iteration of k-means (int) display: class with init and plot method, plot is called at each iteration !!!!!!!!!!!!!!!!!!!! require refactoring !!!!!!!!!!!!!!!!!!!! initRange: None or list of tuple random centroids are used to init the k-means centroids are stored in a np.ndarray which shape is (number of clusters, *samples.shape[1:]) samples.shape[-1] is size of "atomic" vector for example is centroids is 5x3 it means 5 vector of size 3 (the case for color palettes) init should be [{minRange0,maxRange0}, {minRange1,maxRange1}, {minRange2,maxRange2}]*5 with initRange = [(minRange0,maxRange0), (minRange1,maxRange1), (minRange2,maxRange2)] initRange=None range in 0..1 """ # dimension of centroids dimSamp = samples.shape dimCentroid = dimSamp[1:] # palette # dimCentroid = dimSamp[1] # histo # init centroids if isinstance(dimSamp,tuple): u = np.random.rand(nbClusters,*dimCentroid) if initRange: minRange, maxRange = [],[] for _range in initRange: minr, maxr = _range minRange.append(minr) maxRange.append(maxr) v = (1-u)*np.asarray(minRange) + u*np.asarray(maxRange) else: v =u centroids = self.normalize.evals(v) # palette else: #integer u = np.random.rand(nbClusters,dimCentroid) if initRange: minRange, maxRange = [],[] for _range in initRange: minr, maxr = _range minRange.append(minr) maxRange.append(maxr) v = (1-u)*np.asarray(minRange) + u*np.asarray(maxRange) else: v =u centroids = self.normalize.eval(v) # histo # return assigments and assigments index previousAssigmentsIdx = [[]] assigments,assigmentsIdx = [[]], [[]] for i in range(nbClusters-1): assigments.append([]) assigmentsIdx.append([]) previousAssigmentsIdx.append([]) # convergence changes = [] remains=[] meanDistances= [] # MAIN LOOP # ----------------------------------------------------------------------------------------- # for iter in range(nbIter) for iter in range(nbIter): print("\r","kmeans(iteration): ",iter,"/",nbIter,":",iter*100//nbIter," % ",end = '\r') # assign sample to centoids if multiPro: (assigments,assigmentsIdx,conv) = self.MPassignSamplesToCentroids(centroids,samples, previousAssigmentsIdx) else: (assigments,assigmentsIdx,conv) = self.assignSamplesToCentroids(centroids,samples, previousAssigmentsIdx) # recover data from results change, remain, meanDist = conv changes.append(change) remains.append(remain) meanDistances.append(meanDist) # compute mean of (assigment) cluster assigmentsAverage = self.averageAssigments(assigments) # update centroids and stopping criteria canBreak= True for i,ass_av in enumerate(assigmentsAverage): emptyAss = True if isinstance(ass_av,np.ndarray): if (ass_av.size!=0): emptyAss = False centroids[i] = ass_av if emptyAss: canBreak= False if isinstance(dimSamp,tuple): u = np.random.rand(1,*dimCentroid) if initRange: minRange, maxRange = [],[] for _range in initRange: minr, maxr = _range minRange.append(minr) maxRange.append(maxr) v = (1-u)*np.asarray(minRange) + u*np.asarray(maxRange) else: v =u newcentroid = self.normalize.evals(v) # palette else: # histogram u = np.random.rand(nbClusters,dimCentroid) if initRange: minRange, maxRange = [],[] for _range in initRange: minr, maxr = _range minRange.append(minr) maxRange.append(maxr) v = (1-u)*np.asarray(minRange) + u*np.asarray(maxRange) else: v =u newcentroid = self.normalize.eval(v) # histo centroids[i] = newcentroid print("") print("WARNING[miam.classification.kmeans(): (iteration:",iter,"/centroid:",i,"): no assigment! >> compute new centroid]") # display if display: display.plot(centroids, assigmentsIdx, iter,(changes,remains,meanDistances), len(samples)) # memory previousAssigmentsIdx = copy.deepcopy(assigmentsIdx) # break iteration if change=0 if (change==0) and(canBreak): break # ----------------------------------------------------------------------------------------- # return centroids print(" ") return (centroids,assigments,assigmentsIdx)