Expectation Maximization Algorithm for Two-Component Gaussian Mixture
Image Recognition 2017. 1. 2. 19:11그레이 영상의 히스토그램 $h(x)$를 두 개의 가우시안 분포($g_1(x)$, $g_2(x)$)의 혼합으로 모델링하여 분리하려고 할 때 기준인 decision boundary 값 (threshold value)을 expectation maximization(EM) 알고리즘을 적용하여 구한다.
E-step: compute responsibility of class 2; (for class 1, 1-γ_i)
M-step: compute the weighted means (μ1, μ2), variances (σ1, σ2) and mixing probability (π)
log-likelihood:
$$\log L = \sum _{i} \log \left[ (1- \pi) \phi_{\theta_1 } (x_i) + \pi \phi_{\theta_2 }(x_i) \right] $$
decision boundary 값은 responsibility = 0.5인 bin 인덱스를 선택하면 된다.
아래 그림의 왼쪽은 히스토그램, 오른쪽은 최대우도 gaussian fitting 결과와 왼쪽 분포의 responsibility($1-\gamma_i$)를 그린 것이다.
void estimGaussParams(std::vector<double>& data, int start, int end, double *mean, double *var) ;
void estimGaussParams(std::vector<double>& data, int start, int end, double *mean, double *var) {
double s = 0, sx = 0, sxx = 0;
for (int i = start; i <= end; i++) {
s += data[i];
sx += data[i] * i;
sxx += data[i] * i * i;
}
*mean = sx / s;
*var = (sxx - sx * sx / s) / s;
};
void initGuess(std::vector<double>& data, double mean[], double var[], double *mixprob);
void initGuess(std::vector<double>& data, double mean[], double var[], double *mixprob) {
int start = -1, end = data.size();
// trim null data;
while (data[++start] <= 0) ;
while (data[--end] <= 0) ;
// split given data into two equal size sets;
int mid = (end + start) / 2;
// simple mean and variance;
estimGaussParams(data, start, mid, &mean[0], &var[0]);
estimGaussParams(data, mid + 1, end, &mean[1], &var[1]);
// initial guess for mixing probability;
*mixprob = 0.5;
};
#define PI (4.0 * atan(1.))
double gaussDist(double x, double mean, double var) ;
double gaussDist(double x, double mean, double var) {
// N(mean, var);
double arg = 0.5 * (x - mean) * (x - mean) / var;
double factor = 1 / sqrt(2.* PI * var);
return factor * exp(-arg);
}
double responsibility2(double x, double mean[], double var[], double mixprob) ;
double responsibility2(double x, double mean[], double var[], double mixprob) {
double a = (1 - mixprob) * gaussDist(x, mean[0], var[0]);
double b = mixprob * gaussDist(x, mean[1], var[1]);
return b / (a + b);
}
double weightedMeanVar(std::vector<double>& data, std::vector<double> & gamma, double mean[], double var[]) ;
double weightedMeanVar(std::vector<double>& data, std::vector<double>& gamma, double mean[], double var[]) {
// estimate new means;
double s = 0, sx0 = 0, sx1 = 0, sg = 0;
for (int i = data.size(); i-- > 0; ) {
s += data[i];
sg += data[i] * gamma[i];
sx0 += data[i] * i * (1 - gamma[i]);
sx1 += data[i] * i * gamma[i];
}
mean[0] = sx0 / (s - sg);
mean[1] = sx1 / sg;
// variances with new mean;
double sv0 = 0, sv1 = 0;
for (i = data.size(); i-- > 0; ) {
sv0 += data[i] * (i - mean[0]) * (i - mean[0]) * (1 - gamma[i]);
sv1 += data[i] * (i - mean[1]) * (i - mean[1]) * gamma[i];
}
var[0] = sv0 / (s - sg);
var[1] = sv1 / sg;
// return mixing probability = mixing ratio for class 2;
return (sg / s);
};
#define EPSILON 1e-6
// Expectation Maximization algorithm applied to Two component Gaussian Mixture Model;
double emTwoCompGMM(std::vector<double>& data) {
double mean[2], var[2], mixprob;
std::vector<double> gamma(data.size()); // responsibilities for class 2;
initGuess(data, mean, var, &mixprob);
// begin algorithm;
while (1) {
// E-step;
for (int i = data.size(); i-- > 0; )
gamma[i] = responsibility2(i, mean, var, mixprob);
double old_mixprob = mixprob;
// M-step;
mixprob = weightedMeanVar(data, gamma, mean, var);
TRACE("mixing probability= %f\n", mixprob);
// check convergence(usually loglikelihood is tested);
if (fabs(mixprob - old_mixprob) < EPSILON)
break;
}
// estimate decision boundary;
int k = data.size();
while (gamma[--k] >= 0.5) ;
return (2 * k + 1) / 2.; // = average of ;
};
'Image Recognition' 카테고리의 다른 글
Kuwahara Filter (2) | 2020.12.28 |
---|---|
Moving Average을 이용한 Thresholding (0) | 2020.11.26 |
Union-Find Connected Component Labeling (0) | 2012.11.01 |
RANSAC: Ellipse Fitting (1) | 2012.10.07 |
Autofocus Algorithm (0) | 2012.06.03 |