Julius 4.2
libjulius/src/gmm.c
説明を見る。
00001 
00048 /*
00049  * Copyright (c) 2003-2005 Shikano Lab., Nara Institute of Science and Technology
00050  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00051  * All rights reserved
00052  */
00053 
00054 #include <julius/julius.h>
00055 
00056 #undef MES
00057 
00079 static int
00080 gmm_find_insert_point(GMMCalc *gc, LOGPROB score, int len)
00081 {
00082   /* binary search on score */
00083   int left = 0;
00084   int right = len - 1;
00085   int mid;
00086 
00087   while (left < right) {
00088     mid = (left + right) / 2;
00089     if (gc->OP_calced_score[mid] > score) {
00090       left = mid + 1;
00091     } else {
00092       right = mid;
00093     }
00094   }
00095   return(left);
00096 }
00097 
00120 static int
00121 gmm_cache_push(GMMCalc *gc, int id, LOGPROB score, int len)
00122 {
00123   int insertp;
00124 
00125   if (len == 0) {               /* first one */
00126     gc->OP_calced_score[0] = score;
00127     gc->OP_calced_id[0] = id;
00128     return(1);
00129   }
00130   if (gc->OP_calced_score[len-1] >= score) { /* bottom */
00131     if (len < gc->OP_gprune_num) {          /* append to bottom */
00132       gc->OP_calced_score[len] = score;
00133       gc->OP_calced_id[len] = id;
00134       len++;
00135     }
00136     return len;
00137   }
00138   if (gc->OP_calced_score[0] < score) {
00139     insertp = 0;
00140   } else {
00141     insertp = gmm_find_insert_point(gc, score, len);
00142   }
00143   if (len < gc->OP_gprune_num) {
00144     memmove(&(gc->OP_calced_score[insertp+1]), &(gc->OP_calced_score[insertp]), sizeof(LOGPROB)*(len - insertp));
00145     memmove(&(gc->OP_calced_id[insertp+1]), &(gc->OP_calced_id[insertp]), sizeof(int)*(len - insertp));    
00146   } else if (insertp < len - 1) {
00147     memmove(&(gc->OP_calced_score[insertp+1]), &(gc->OP_calced_score[insertp]), sizeof(LOGPROB)*(len - insertp - 1));
00148     memmove(&(gc->OP_calced_id[insertp+1]), &(gc->OP_calced_id[insertp]), sizeof(int)*(len - insertp - 1));
00149   }
00150   gc->OP_calced_score[insertp] = score;
00151   gc->OP_calced_id[insertp] = id;
00152   if (len < gc->OP_gprune_num) len++;
00153   return(len);
00154 }
00155 
00176 static LOGPROB
00177 gmm_compute_g_base(GMMCalc *gc, HTK_HMM_Dens *binfo)
00178 {
00179   VECT tmp, x;
00180   VECT *mean;
00181   VECT *var;
00182   VECT *vec = gc->OP_vec;
00183   short veclen = gc->OP_veclen;
00184 
00185   if (binfo == NULL) return(LOG_ZERO);
00186   mean = binfo->mean;
00187   var = binfo->var->vec;
00188   tmp = 0.0;
00189   for (; veclen > 0; veclen--) {
00190     x = *(vec++) - *(mean++);
00191     tmp += x * x * *(var++);
00192   }
00193   return((tmp + binfo->gconst) * -0.5);
00194 }
00195 
00218 static LOGPROB
00219 gmm_compute_g_safe(GMMCalc *gc, HTK_HMM_Dens *binfo, LOGPROB thres)
00220 {
00221   VECT tmp, x;
00222   VECT *mean;
00223   VECT *var;
00224   VECT *vec = gc->OP_vec;
00225   short veclen = gc->OP_veclen;
00226   VECT fthres = thres * (-2.0);
00227 
00228   if (binfo == NULL) return(LOG_ZERO);
00229   mean = binfo->mean;
00230   var = binfo->var->vec;
00231   tmp = binfo->gconst;
00232   for (; veclen > 0; veclen--) {
00233     x = *(vec++) - *(mean++);
00234     tmp += x * x * *(var++);
00235     if (tmp > fthres)  return LOG_ZERO;
00236   }
00237   return(tmp * -0.5);
00238 }
00239 
00256 static void
00257 gmm_gprune_safe_init(GMMCalc *gc, HTK_HMM_INFO *hmminfo, int prune_num)
00258 {
00259   /* store the pruning num to local area */
00260   gc->OP_gprune_num = prune_num;
00261   /* maximum Gaussian set size = maximum mixture size * nstream */
00262   gc->OP_calced_maxnum = hmminfo->maxmixturenum * gc->OP_nstream;
00263   /* allocate memory for storing list of currently computed Gaussian in a frame */
00264   gc->OP_calced_score = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gc->OP_calced_maxnum);
00265   gc->OP_calced_id = (int *)mymalloc(sizeof(int) * gc->OP_calced_maxnum);
00266 }
00267 
00295 static void
00296 gmm_gprune_safe(GMMCalc *gc, HTK_HMM_Dens **g, int gnum)
00297 {
00298   int i, num = 0;
00299   LOGPROB score, thres;
00300 
00301   thres = LOG_ZERO;
00302   for (i = 0; i < gnum; i++) {
00303     if (num < gc->OP_gprune_num) {
00304       score = gmm_compute_g_base(gc, g[i]);
00305     } else {
00306       score = gmm_compute_g_safe(gc, g[i], thres);
00307       if (score <= thres) continue;
00308     }
00309     num = gmm_cache_push(gc, i, score, num);
00310     thres = gc->OP_calced_score[num-1];
00311   }
00312   gc->OP_calced_num = num;
00313 }
00314 
00333 static LOGPROB
00334 gmm_calc_mix(GMMCalc *gc, HTK_HMM_State *state)
00335 {
00336   int i;
00337   LOGPROB logprob, logprobsum;
00338   int s;
00339   PROB stream_weight;
00340 
00341 
00342   /* compute Gaussian set */
00343   logprobsum = 0.0;
00344   for(s=0;s<gc->OP_nstream;s++) {
00345     /* set stream weight */
00346     if (state->w) stream_weight = state->w->weight[s];
00347     else stream_weight = 1.0;
00348     /* setup storage pointer for this mixture pdf */
00349     gc->OP_vec = gc->OP_vec_stream[s];
00350     gc->OP_veclen = gc->OP_veclen_stream[s];
00351     /* compute output probabilities */
00352     gmm_gprune_safe(gc, state->pdf[s]->b, state->pdf[s]->mix_num);
00353     /* computed Gaussians will be set in:
00354        score ... OP_calced_score[0..OP_calced_num]
00355        id    ... OP_calced_id[0..OP_calced_num] */
00356   /* sum */
00357     for(i=0;i<gc->OP_calced_num;i++) {
00358       gc->OP_calced_score[i] += state->pdf[s]->bweight[gc->OP_calced_id[i]];
00359     }
00360     /* add log probs */
00361     logprob = addlog_array(gc->OP_calced_score, gc->OP_calced_num);
00362     /* if outprob of a stream is zero, skip this stream */
00363     if (logprob <= LOG_ZERO) continue;
00364     /* sum all the obtained mixture scores */
00365     logprobsum += logprob * stream_weight;
00366 
00367   }
00368   if (logprobsum == 0.0) return(LOG_ZERO); /* no valid stream */
00369   if (logprobsum <= LOG_ZERO) return(LOG_ZERO); /* lowest == LOG_ZERO */
00370   return (logprob * INV_LOG_TEN);
00371 }
00372 
00396 static LOGPROB
00397 outprob_state_nocache(GMMCalc *gc, int t, HTK_HMM_State *stateinfo, HTK_Param *param)
00398 {
00399   int d, i;
00400   /* set global values for outprob functions to access them */
00401   for(d=0,i=0;i<gc->OP_nstream;i++) {
00402     gc->OP_vec_stream[i] = &(param->parvec[t][d]);
00403     d += gc->OP_veclen_stream[i];
00404   }
00405   return(gmm_calc_mix(gc, stateinfo));
00406 }
00407 
00408 /************************************************************************/
00409 /* global functions */
00410 
00428 boolean
00429 gmm_init(Recog *recog)
00430 {
00431   HTK_HMM_INFO *gmm;
00432   HTK_HMM_Data *d;
00433   GMMCalc *gc;
00434   int i;
00435 
00436   gmm = recog->gmm;
00437 
00438   /* check GMM format */
00439   /* tied-mixture GMM is not supported */
00440   if (gmm->is_tied_mixture) {
00441     jlog("ERROR: gmm_init: tied-mixture GMM is not supported\n");
00442     return FALSE;
00443   }
00444   /* assume 3 state GMM (only one output state) */
00445   for(d=gmm->start;d;d=d->next) {
00446     if (d->state_num > 3) {
00447       jlog("ERROR: gmm_init: more than three states (one output state) defined in GMM [%s]\n", d->name);
00448       return FALSE;
00449     }
00450   }
00451 
00452   /* check if CMN needed */
00453 
00454   /* allocate work area */
00455   if (recog->gc == NULL) {
00456     gc = (GMMCalc *)mymalloc(sizeof(GMMCalc));
00457     recog->gc = gc;
00458   } else {
00459     gc = recog->gc;
00460   }
00461   
00462   /* allocate buffers */
00463   gc->gmm_score = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gmm->totalhmmnum);
00464 
00465 #ifdef GMM_VAD
00466   gc->nframe = recog->jconf->detect.gmm_margin;
00467   gc->rates = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gc->nframe);
00468 #endif
00469 
00470   gc->is_voice = (boolean *)mymalloc(sizeof(boolean) * gmm->totalhmmnum);
00471   i = 0;
00472   if (recog->jconf->reject.gmm_reject_cmn_string) {
00473     for(d=recog->gmm->start;d;d=d->next) {
00474       if (strstr(recog->jconf->reject.gmm_reject_cmn_string, d->name)) {
00475         gc->is_voice[i] = FALSE;
00476       } else {
00477         gc->is_voice[i] = TRUE;
00478       }
00479       i++;
00480     }
00481   } else {
00482     for(d=recog->gmm->start;d;d=d->next) {
00483       gc->is_voice[i] = TRUE;
00484       i++;
00485     }
00486   }
00487 
00488   /* initialize work area */
00489   gc->OP_nstream = gmm->opt.stream_info.num;
00490   for(i=0;i<gc->OP_nstream;i++) {
00491     gc->OP_veclen_stream[i] = gmm->opt.stream_info.vsize[i];
00492   }
00493   gmm_gprune_safe_init(gc, gmm, recog->jconf->reject.gmm_gprune_num);
00494 
00495   /* check if variances are inversed */
00496   if (!gmm->variance_inversed) {
00497     /* here, inverse all variance values for faster computation */
00498     htk_hmm_inverse_variances(gmm);
00499     gmm->variance_inversed = TRUE;
00500   }
00501 
00502   return TRUE;
00503 }
00504 
00521 void
00522 gmm_prepare(Recog *recog)
00523 {
00524   HTK_HMM_Data *d;
00525   int i;
00526 
00527   /* initialize score buffer and frame count */
00528   i = 0;
00529   for(d=recog->gmm->start;d;d=d->next) {
00530     recog->gc->gmm_score[i] = 0.0;
00531     i++;
00532   }
00533 #ifdef GMM_VAD
00534   for(i=0;i<recog->gc->nframe;i++) recog->gc->rates[i] = 0.0;
00535   recog->gc->framep = 0;
00536   recog->gc->filled = FALSE;
00537   recog->gc->in_voice = FALSE;
00538 #endif
00539 
00540   recog->gc->framecount = 0;
00541 
00542 #ifdef GMM_VAD_DEBUG
00543   printf("GMM_VAD: init\n");
00544 #endif
00545 }
00546 
00573 void
00574 gmm_proceed(Recog *recog)
00575 {
00576   HTK_HMM_Data *d;
00577   GMMCalc *gc;
00578   int i;
00579   MFCCCalc *mfcc;
00580   LOGPROB score;
00581 #ifdef GMM_VAD
00582   LOGPROB max_n;
00583   LOGPROB max_v;
00584 #endif
00585 
00586   mfcc = recog->gmmmfcc;
00587   gc = recog->gc;
00588 
00589   if (!mfcc->valid) return;
00590 
00591   gc->framecount++;
00592 
00593 #ifdef GMM_VAD
00594   max_n = max_v = LOG_ZERO;
00595 #endif
00596 
00597   i = 0;
00598   for(d=recog->gmm->start;d;d=d->next) {
00599     score = outprob_state_nocache(gc, mfcc->f, d->s[1], mfcc->param);
00600     gc->gmm_score[i] += score;
00601 #ifdef GMM_VAD
00602     if (gc->is_voice[i]) {
00603       if (max_v < score) max_v = score;
00604     } else {
00605       if (max_n < score) max_n = score;
00606     }
00607 #endif
00608 #ifdef MES
00609     jlog("DEBUG: [%s: total=%f avg=%f]\n", d->name, gc->gmm_score[i], gc->gmm_score[i] / (float)gc->framecount);
00610 #endif
00611     i++;
00612   }
00613 #ifdef GMM_VAD
00614 #ifdef GMM_VAD_DEBUG
00615   //printf("GMM_VAD: max_v = %f, max_n = %f, rate = %f\n", max_v, max_n, max_v - max_n, gc->framep);
00616 #endif
00617   /* set rate of this frame */
00618   gc->rates[gc->framep] = max_v - max_n;
00619 #ifdef GMM_VAD_DEBUG
00620   printf("GMM_VAD: %f\n", max_v - max_n);
00621 #endif
00622   /* increment current frame pointer */
00623   gc->framep++;
00624   /* if reached end, go to start point */
00625   if (gc->framep >= gc->nframe) {
00626     gc->filled = TRUE;
00627     gc->framep = 0;
00628   }
00629 #endif
00630 }
00631 
00656 void
00657 gmm_end(Recog *recog)
00658 {
00659   HTK_HMM_INFO *gmm;
00660   LOGPROB *score;
00661   HTK_HMM_Data *d;
00662   LOGPROB maxprob;
00663   HTK_HMM_Data *dmax;
00664 #ifdef CONFIDENCE_MEASURE
00665   LOGPROB sum;
00666 #endif
00667   int i;
00668   int maxid;
00669 
00670   if (recog->gc->framecount == 0) return;
00671 
00672   gmm = recog->gmm;
00673   score = recog->gc->gmm_score;
00674 
00675   /* get max score */
00676   i = 0;
00677   maxprob = LOG_ZERO;
00678   dmax = NULL;
00679   maxid = 0;
00680   for(d=gmm->start;d;d=d->next) {
00681     if (maxprob < score[i]) {
00682       dmax = d;
00683       maxprob = score[i];
00684       maxid = i;
00685     }
00686     i++;
00687   }
00688   recog->gc->max_d = dmax;
00689   recog->gc->max_i = maxid;
00690 
00691 #ifdef CONFIDENCE_MEASURE
00692   /* compute CM */
00693   sum = 0.0;
00694   i = 0;
00695   for(d=gmm->start;d;d=d->next) {
00696     //sum += pow(10, recog->jconf->annotate.cm_alpha * (score[i] - maxprob));
00697     sum += pow(10, 0.05 * (score[i] - maxprob));
00698     i++;
00699   }
00700   recog->gc->gmm_max_cm = 1.0 / sum;
00701 #endif
00702   
00703   /* output result */
00704   callback_exec(CALLBACK_RESULT_GMM, recog);
00705 
00706 }
00707 
00708 
00733 boolean
00734 gmm_valid_input(Recog *recog)
00735 {
00736   if (recog->gc->max_d == NULL) return FALSE;
00737   if (recog->gc->is_voice[recog->gc->max_i]) {
00738     return TRUE;
00739   }
00740   return FALSE;
00741 }
00742 
00757 void
00758 gmm_free(Recog *recog)
00759 {
00760   if (recog->gc) {
00761     free(recog->gc->OP_calced_score);
00762     free(recog->gc->OP_calced_id);
00763     free(recog->gc->is_voice);
00764 #ifdef GMM_VAD
00765     free(recog->gc->rates);
00766 #endif
00767     free(recog->gc->gmm_score);
00768     free(recog->gc);
00769     recog->gc = NULL;
00770   }
00771 }
00772 
00773 #ifdef GMM_VAD
00774 
00791 static void
00792 voice_activity_score(GMMCalc *gc, float *mean_ret, float *var_ret, int *count_ret)
00793 {
00794   int i, len;
00795   LOGPROB mean;
00796   LOGPROB var;
00797   LOGPROB x;
00798   int count;
00799 
00800   if (!gc->filled) {
00801     /* cycle buffer not filled yet */
00802     *mean_ret = 0.0;
00803     *var_ret = 0.0;
00804     *count_ret = 0;
00805     return;
00806   }
00807 
00808   if (gc->filled) {
00809     len = gc->nframe;
00810   } else {
00811     len = gc->framep;
00812   }
00813 
00814   mean = 0;
00815   count = 0;
00816   for(i=0;i<len;i++) {
00817     mean += gc->rates[i];
00818     if (gc->rates[i] > 0.0) count++;
00819   }
00820   mean /= (float)len;
00821   var = 0.0;
00822   for(i=0;i<len;i++) {
00823     x = mean - gc->rates[i];
00824     var += x * x;
00825   }
00826   var /= (float)len;
00827 
00828   *mean_ret = mean;
00829   *var_ret = var;
00830   *count_ret = count;
00831 }
00832 
00852 void
00853 gmm_check_trigger(Recog *recog)
00854 {
00855   GMMCalc *gc;
00856   gc = recog->gc;
00857   float mean;
00858   float var;
00859   int count;
00860 
00861   gc->up_trigger = gc->down_trigger = FALSE;
00862 
00863   voice_activity_score(gc, &mean, &var, &count);
00864 
00865   if (gc->in_voice) {
00866     if (mean <= recog->jconf->detect.gmm_downtrigger_thres) {
00867       gc->down_trigger = TRUE;
00868       gc->in_voice = FALSE;
00869     }
00870   } else {
00871     if (mean >= recog->jconf->detect.gmm_uptrigger_thres) {
00872       gc->up_trigger = TRUE;
00873       gc->in_voice = TRUE;
00874     }
00875   }
00876 
00877 #ifdef GMM_VAD_DEBUG
00878   printf("GMM_VAD: %s: %f %f %d", gc->in_voice ? "VOICE" : "NOISE", mean, var, count);
00879   if (gc->up_trigger) printf(": BEGIN");
00880   if (gc->down_trigger) printf(": END");
00881   printf("\n");
00882 #endif
00883 
00884 }
00885 #endif /* GMM_VAD */
00886 
00887 /* end of file */