주어진 히스토그램 데이터를 두 개의 가우시안 분포의 혼합으로 모델링하여 데이터를 분리하기 위한 decision boundary 값 (threshold value)을 expectation maximization 알고리즘을 적용하여 구한다. 

E-step: compute responsibility (of class 2; for class 1, 1-gamma_i)

M-step: compute the weighted means, variances and mixing probability

log-likelihood: 

void estimGaussParams(double data[], int start, int end, double *mean, double *var) {

코드보기

void initGuess(double data[], int n, double mean[], double var[], double *mixprob) {

코드보기

#define PI (4.0 * atan(1.))

double gaussDist(double x, double mean, double var) {   

코드보기

double responsibility2(double x, double mean[], double var[], double mixprob) {   

코드보기

double weightedMeanVar(double data[], double gamma[], int n, double mean[], double var[]) {

코드보기

#define EPSILON  1e-6

// Expectation Maximization algorithm applied to Two component Gaussian Mixture Model;

double emTwoCompGMM(double data[], int n) {

    double mean[2], var[2], mixprob;

    double *gamma = new double [n];     // responsibilities for class 2;

    initGuess(data, n, mean, var, &mixprob);

    

    // begin algorithm;

    while (1) {

        // E-step;

        for (int i = 0; i < n; i++) 

            gamma[i] = responsibility2(i, mean, var, mixprob);


        double old_mixprob mixprob;


        // M-step;

        mixprob = weightedMeanVar(data, gamma, n, mean, var);

        TRACE("mixing probability= %f\n", mixprob);


        // check convergence(usually loglikelihood is tested);

        if (fabs(mixprob - old_mixprob) < EPSILON)

            break;

    }

    // estimate decision boundary;

    for (int k = n - 1; k >= 0; k--) 

        if (gamma[k] < 0.5) break;


    delete [] gamma;

    return (2 * k + 1) / 2.; // = average of {k, k+1};

}



Posted by helloktk