728x90

영상의 히스토그램을 두 개의 가우시안 분포의 혼합으로 모델링하여 분리하려고 할 때 기준인 decision boundary 값 (threshold value)을 expectation maximization(EM) 알고리즘을 적용하여 구한다. 

 

 

E-step: compute responsibility of class 2; (for class 1, 1-γ_i)

 

 

M-step: compute the weighted means (μ1, μ2), variances (σ1, σ2) and mixing probability (π)

 

 

log-likelihood: 

decision boundary 값은 responsibility = 0.5인  bin 인덱스를 선택하면 된다.

 

아래 그림의 왼쪽은 히스토그램, 오른쪽은 최대우도 gaussian fitting 결과와 왼쪽 분포의 responsibility(1-γ_i)를 그린 것이다.

void estimGaussParams(double data[], int start, int end, double *mean, double *var) ;

더보기
void estimGaussParams(double data[], int start, int end, double *mean, double *var) {
    double s = 0, sx = 0, sxx = 0;
    for (int i = start; i < end; i++) {
        s += data[i];
        sx += data[i] * i;
        sxx += data[i] * i * i;
    }
    *mean = sx / s;
    *var = (sxx - sx * sx / s) / s;
};

void initGuess(double data[], int n, double mean[], double var[], double *mixprob);

더보기
void initGuess(double data[], int n, double mean[], double var[], double *mixprob) {
    int start = 0, end = n; 
    // trim null data;
    while (data[start] <= 0) start++;
    while (data[end - 1] <= 0) end--;
    // split given data into two equal size sets;
    int n2 = (end + start) / 2;
    // simple mean and variance;
    estimGaussParams(data, start, n2, &mean[0], &var[0]);
    estimGaussParams(data, n2, end, &mean[1], &var[1]);
    // initial guess for mixing probability;
    *mixprob = 0.5; 
};

#define PI (4.0 * atan(1.))

double gaussDist(double x, double mean, double var) ;  

더보기
double gaussDist(double x, double mean, double var) { 
    // N(mean, var);
    double arg = 0.5 * (x - mean) * (x - mean) / var;
    double factor = 1 / sqrt(2.* PI * var);
    return factor * exp(-arg); 
}

double responsibility2(double x, double mean[], double var[], double mixprob) ;   

더보기
double responsibility2(double x, double mean[], double var[], double mixprob) {   
    double a = (1 - mixprob) * gaussDist(x, mean[0], var[0]);
    double b = mixprob * gaussDist(x, mean[1], var[1]);  
    return b / (a + b); 
}

double weightedMeanVar(double data[], double gamma[], int n, double mean[], double var[]) ;

더보기
double weightedMeanVar(double data[], double gamma[], int n, double mean[], double var[]) { 
	// estimate new means;
    double s = 0, sx0 = 0, sx1 = 0, sg = 0;
    for (int i = 0; i < n; i++) {
        s   += data[i];
        sg  += data[i] * gamma[i]; 
        sx0 += data[i] * i * (1 - gamma[i]);
        sx1 += data[i] * i * gamma[i];
    }
    mean[0] = sx0 / (s - sg);
    mean[1] = sx1 / sg;
    // variances with new mean;
    double sv0 = 0, sv1 = 0;
    for (i = 0; i < n; i++) {
        sv0 += data[i] * (i - mean[0]) * (i - mean[0]) * (1 - gamma[i]);
        sv1 += data[i] * (i - mean[1]) * (i - mean[1]) * gamma[i];
    }
    var[0] = sv0 / (s - sg);
    var[1] = sv1 / sg;
    // return mixing probability = mixing ratio for class 2;
    return (sg / s);
};
#define EPSILON  1e-6
// Expectation Maximization algorithm applied to Two component Gaussian Mixture Model;
double emTwoCompGMM(double data[], int n) {
    double mean[2], var[2], mixprob;
    double *gamma = new double [n];     // responsibilities for class 2;
    initGuess(data, n, mean, var, &mixprob);
    // begin algorithm;
    while (1) {
        // E-step;
        for (int i = 0; i < n; i++) 
            gamma[i] = responsibility2(i, mean, var, mixprob);
        double old_mixprob = mixprob;
        // M-step;
        mixprob = weightedMeanVar(data, gamma, n, mean, var);
        TRACE("mixing probability= %f\n", mixprob);
        // check convergence(usually loglikelihood is tested);
        if (fabs(mixprob - old_mixprob) < EPSILON)
            break;
    }
    // estimate decision boundary;
    int k = n - 1;
    for ( ; k >= 0; k--) 
        if (gamma[k] < 0.5) break;
    delete [] gamma;
    return (2 * k + 1) / 2.; // = average of ;
};

Posted by helloktk

댓글을 달아 주세요