Julius 4.2
|
00001 00048 /* 00049 * Copyright (c) 2003-2005 Shikano Lab., Nara Institute of Science and Technology 00050 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00051 * All rights reserved 00052 */ 00053 00054 #include <julius/julius.h> 00055 00056 #undef MES 00057 00079 static int 00080 gmm_find_insert_point(GMMCalc *gc, LOGPROB score, int len) 00081 { 00082 /* binary search on score */ 00083 int left = 0; 00084 int right = len - 1; 00085 int mid; 00086 00087 while (left < right) { 00088 mid = (left + right) / 2; 00089 if (gc->OP_calced_score[mid] > score) { 00090 left = mid + 1; 00091 } else { 00092 right = mid; 00093 } 00094 } 00095 return(left); 00096 } 00097 00120 static int 00121 gmm_cache_push(GMMCalc *gc, int id, LOGPROB score, int len) 00122 { 00123 int insertp; 00124 00125 if (len == 0) { /* first one */ 00126 gc->OP_calced_score[0] = score; 00127 gc->OP_calced_id[0] = id; 00128 return(1); 00129 } 00130 if (gc->OP_calced_score[len-1] >= score) { /* bottom */ 00131 if (len < gc->OP_gprune_num) { /* append to bottom */ 00132 gc->OP_calced_score[len] = score; 00133 gc->OP_calced_id[len] = id; 00134 len++; 00135 } 00136 return len; 00137 } 00138 if (gc->OP_calced_score[0] < score) { 00139 insertp = 0; 00140 } else { 00141 insertp = gmm_find_insert_point(gc, score, len); 00142 } 00143 if (len < gc->OP_gprune_num) { 00144 memmove(&(gc->OP_calced_score[insertp+1]), &(gc->OP_calced_score[insertp]), sizeof(LOGPROB)*(len - insertp)); 00145 memmove(&(gc->OP_calced_id[insertp+1]), &(gc->OP_calced_id[insertp]), sizeof(int)*(len - insertp)); 00146 } else if (insertp < len - 1) { 00147 memmove(&(gc->OP_calced_score[insertp+1]), &(gc->OP_calced_score[insertp]), sizeof(LOGPROB)*(len - insertp - 1)); 00148 memmove(&(gc->OP_calced_id[insertp+1]), &(gc->OP_calced_id[insertp]), sizeof(int)*(len - insertp - 1)); 00149 } 00150 gc->OP_calced_score[insertp] = score; 00151 gc->OP_calced_id[insertp] = id; 00152 if (len < gc->OP_gprune_num) len++; 00153 return(len); 00154 } 00155 00176 static LOGPROB 00177 gmm_compute_g_base(GMMCalc *gc, HTK_HMM_Dens *binfo) 00178 { 00179 VECT tmp, x; 00180 VECT *mean; 00181 VECT *var; 00182 VECT *vec = gc->OP_vec; 00183 short veclen = gc->OP_veclen; 00184 00185 if (binfo == NULL) return(LOG_ZERO); 00186 mean = binfo->mean; 00187 var = binfo->var->vec; 00188 tmp = 0.0; 00189 for (; veclen > 0; veclen--) { 00190 x = *(vec++) - *(mean++); 00191 tmp += x * x * *(var++); 00192 } 00193 return((tmp + binfo->gconst) * -0.5); 00194 } 00195 00218 static LOGPROB 00219 gmm_compute_g_safe(GMMCalc *gc, HTK_HMM_Dens *binfo, LOGPROB thres) 00220 { 00221 VECT tmp, x; 00222 VECT *mean; 00223 VECT *var; 00224 VECT *vec = gc->OP_vec; 00225 short veclen = gc->OP_veclen; 00226 VECT fthres = thres * (-2.0); 00227 00228 if (binfo == NULL) return(LOG_ZERO); 00229 mean = binfo->mean; 00230 var = binfo->var->vec; 00231 tmp = binfo->gconst; 00232 for (; veclen > 0; veclen--) { 00233 x = *(vec++) - *(mean++); 00234 tmp += x * x * *(var++); 00235 if (tmp > fthres) return LOG_ZERO; 00236 } 00237 return(tmp * -0.5); 00238 } 00239 00256 static void 00257 gmm_gprune_safe_init(GMMCalc *gc, HTK_HMM_INFO *hmminfo, int prune_num) 00258 { 00259 /* store the pruning num to local area */ 00260 gc->OP_gprune_num = prune_num; 00261 /* maximum Gaussian set size = maximum mixture size * nstream */ 00262 gc->OP_calced_maxnum = hmminfo->maxmixturenum * gc->OP_nstream; 00263 /* allocate memory for storing list of currently computed Gaussian in a frame */ 00264 gc->OP_calced_score = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gc->OP_calced_maxnum); 00265 gc->OP_calced_id = (int *)mymalloc(sizeof(int) * gc->OP_calced_maxnum); 00266 } 00267 00295 static void 00296 gmm_gprune_safe(GMMCalc *gc, HTK_HMM_Dens **g, int gnum) 00297 { 00298 int i, num = 0; 00299 LOGPROB score, thres; 00300 00301 thres = LOG_ZERO; 00302 for (i = 0; i < gnum; i++) { 00303 if (num < gc->OP_gprune_num) { 00304 score = gmm_compute_g_base(gc, g[i]); 00305 } else { 00306 score = gmm_compute_g_safe(gc, g[i], thres); 00307 if (score <= thres) continue; 00308 } 00309 num = gmm_cache_push(gc, i, score, num); 00310 thres = gc->OP_calced_score[num-1]; 00311 } 00312 gc->OP_calced_num = num; 00313 } 00314 00333 static LOGPROB 00334 gmm_calc_mix(GMMCalc *gc, HTK_HMM_State *state) 00335 { 00336 int i; 00337 LOGPROB logprob, logprobsum; 00338 int s; 00339 PROB stream_weight; 00340 00341 00342 /* compute Gaussian set */ 00343 logprobsum = 0.0; 00344 for(s=0;s<gc->OP_nstream;s++) { 00345 /* set stream weight */ 00346 if (state->w) stream_weight = state->w->weight[s]; 00347 else stream_weight = 1.0; 00348 /* setup storage pointer for this mixture pdf */ 00349 gc->OP_vec = gc->OP_vec_stream[s]; 00350 gc->OP_veclen = gc->OP_veclen_stream[s]; 00351 /* compute output probabilities */ 00352 gmm_gprune_safe(gc, state->pdf[s]->b, state->pdf[s]->mix_num); 00353 /* computed Gaussians will be set in: 00354 score ... OP_calced_score[0..OP_calced_num] 00355 id ... OP_calced_id[0..OP_calced_num] */ 00356 /* sum */ 00357 for(i=0;i<gc->OP_calced_num;i++) { 00358 gc->OP_calced_score[i] += state->pdf[s]->bweight[gc->OP_calced_id[i]]; 00359 } 00360 /* add log probs */ 00361 logprob = addlog_array(gc->OP_calced_score, gc->OP_calced_num); 00362 /* if outprob of a stream is zero, skip this stream */ 00363 if (logprob <= LOG_ZERO) continue; 00364 /* sum all the obtained mixture scores */ 00365 logprobsum += logprob * stream_weight; 00366 00367 } 00368 if (logprobsum == 0.0) return(LOG_ZERO); /* no valid stream */ 00369 if (logprobsum <= LOG_ZERO) return(LOG_ZERO); /* lowest == LOG_ZERO */ 00370 return (logprob * INV_LOG_TEN); 00371 } 00372 00396 static LOGPROB 00397 outprob_state_nocache(GMMCalc *gc, int t, HTK_HMM_State *stateinfo, HTK_Param *param) 00398 { 00399 int d, i; 00400 /* set global values for outprob functions to access them */ 00401 for(d=0,i=0;i<gc->OP_nstream;i++) { 00402 gc->OP_vec_stream[i] = &(param->parvec[t][d]); 00403 d += gc->OP_veclen_stream[i]; 00404 } 00405 return(gmm_calc_mix(gc, stateinfo)); 00406 } 00407 00408 /************************************************************************/ 00409 /* global functions */ 00410 00428 boolean 00429 gmm_init(Recog *recog) 00430 { 00431 HTK_HMM_INFO *gmm; 00432 HTK_HMM_Data *d; 00433 GMMCalc *gc; 00434 int i; 00435 00436 gmm = recog->gmm; 00437 00438 /* check GMM format */ 00439 /* tied-mixture GMM is not supported */ 00440 if (gmm->is_tied_mixture) { 00441 jlog("ERROR: gmm_init: tied-mixture GMM is not supported\n"); 00442 return FALSE; 00443 } 00444 /* assume 3 state GMM (only one output state) */ 00445 for(d=gmm->start;d;d=d->next) { 00446 if (d->state_num > 3) { 00447 jlog("ERROR: gmm_init: more than three states (one output state) defined in GMM [%s]\n", d->name); 00448 return FALSE; 00449 } 00450 } 00451 00452 /* check if CMN needed */ 00453 00454 /* allocate work area */ 00455 if (recog->gc == NULL) { 00456 gc = (GMMCalc *)mymalloc(sizeof(GMMCalc)); 00457 recog->gc = gc; 00458 } else { 00459 gc = recog->gc; 00460 } 00461 00462 /* allocate buffers */ 00463 gc->gmm_score = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gmm->totalhmmnum); 00464 00465 #ifdef GMM_VAD 00466 gc->nframe = recog->jconf->detect.gmm_margin; 00467 gc->rates = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gc->nframe); 00468 #endif 00469 00470 gc->is_voice = (boolean *)mymalloc(sizeof(boolean) * gmm->totalhmmnum); 00471 i = 0; 00472 if (recog->jconf->reject.gmm_reject_cmn_string) { 00473 for(d=recog->gmm->start;d;d=d->next) { 00474 if (strstr(recog->jconf->reject.gmm_reject_cmn_string, d->name)) { 00475 gc->is_voice[i] = FALSE; 00476 } else { 00477 gc->is_voice[i] = TRUE; 00478 } 00479 i++; 00480 } 00481 } else { 00482 for(d=recog->gmm->start;d;d=d->next) { 00483 gc->is_voice[i] = TRUE; 00484 i++; 00485 } 00486 } 00487 00488 /* initialize work area */ 00489 gc->OP_nstream = gmm->opt.stream_info.num; 00490 for(i=0;i<gc->OP_nstream;i++) { 00491 gc->OP_veclen_stream[i] = gmm->opt.stream_info.vsize[i]; 00492 } 00493 gmm_gprune_safe_init(gc, gmm, recog->jconf->reject.gmm_gprune_num); 00494 00495 /* check if variances are inversed */ 00496 if (!gmm->variance_inversed) { 00497 /* here, inverse all variance values for faster computation */ 00498 htk_hmm_inverse_variances(gmm); 00499 gmm->variance_inversed = TRUE; 00500 } 00501 00502 return TRUE; 00503 } 00504 00521 void 00522 gmm_prepare(Recog *recog) 00523 { 00524 HTK_HMM_Data *d; 00525 int i; 00526 00527 /* initialize score buffer and frame count */ 00528 i = 0; 00529 for(d=recog->gmm->start;d;d=d->next) { 00530 recog->gc->gmm_score[i] = 0.0; 00531 i++; 00532 } 00533 #ifdef GMM_VAD 00534 for(i=0;i<recog->gc->nframe;i++) recog->gc->rates[i] = 0.0; 00535 recog->gc->framep = 0; 00536 recog->gc->filled = FALSE; 00537 recog->gc->in_voice = FALSE; 00538 #endif 00539 00540 recog->gc->framecount = 0; 00541 00542 #ifdef GMM_VAD_DEBUG 00543 printf("GMM_VAD: init\n"); 00544 #endif 00545 } 00546 00573 void 00574 gmm_proceed(Recog *recog) 00575 { 00576 HTK_HMM_Data *d; 00577 GMMCalc *gc; 00578 int i; 00579 MFCCCalc *mfcc; 00580 LOGPROB score; 00581 #ifdef GMM_VAD 00582 LOGPROB max_n; 00583 LOGPROB max_v; 00584 #endif 00585 00586 mfcc = recog->gmmmfcc; 00587 gc = recog->gc; 00588 00589 if (!mfcc->valid) return; 00590 00591 gc->framecount++; 00592 00593 #ifdef GMM_VAD 00594 max_n = max_v = LOG_ZERO; 00595 #endif 00596 00597 i = 0; 00598 for(d=recog->gmm->start;d;d=d->next) { 00599 score = outprob_state_nocache(gc, mfcc->f, d->s[1], mfcc->param); 00600 gc->gmm_score[i] += score; 00601 #ifdef GMM_VAD 00602 if (gc->is_voice[i]) { 00603 if (max_v < score) max_v = score; 00604 } else { 00605 if (max_n < score) max_n = score; 00606 } 00607 #endif 00608 #ifdef MES 00609 jlog("DEBUG: [%s: total=%f avg=%f]\n", d->name, gc->gmm_score[i], gc->gmm_score[i] / (float)gc->framecount); 00610 #endif 00611 i++; 00612 } 00613 #ifdef GMM_VAD 00614 #ifdef GMM_VAD_DEBUG 00615 //printf("GMM_VAD: max_v = %f, max_n = %f, rate = %f\n", max_v, max_n, max_v - max_n, gc->framep); 00616 #endif 00617 /* set rate of this frame */ 00618 gc->rates[gc->framep] = max_v - max_n; 00619 #ifdef GMM_VAD_DEBUG 00620 printf("GMM_VAD: %f\n", max_v - max_n); 00621 #endif 00622 /* increment current frame pointer */ 00623 gc->framep++; 00624 /* if reached end, go to start point */ 00625 if (gc->framep >= gc->nframe) { 00626 gc->filled = TRUE; 00627 gc->framep = 0; 00628 } 00629 #endif 00630 } 00631 00656 void 00657 gmm_end(Recog *recog) 00658 { 00659 HTK_HMM_INFO *gmm; 00660 LOGPROB *score; 00661 HTK_HMM_Data *d; 00662 LOGPROB maxprob; 00663 HTK_HMM_Data *dmax; 00664 #ifdef CONFIDENCE_MEASURE 00665 LOGPROB sum; 00666 #endif 00667 int i; 00668 int maxid; 00669 00670 if (recog->gc->framecount == 0) return; 00671 00672 gmm = recog->gmm; 00673 score = recog->gc->gmm_score; 00674 00675 /* get max score */ 00676 i = 0; 00677 maxprob = LOG_ZERO; 00678 dmax = NULL; 00679 maxid = 0; 00680 for(d=gmm->start;d;d=d->next) { 00681 if (maxprob < score[i]) { 00682 dmax = d; 00683 maxprob = score[i]; 00684 maxid = i; 00685 } 00686 i++; 00687 } 00688 recog->gc->max_d = dmax; 00689 recog->gc->max_i = maxid; 00690 00691 #ifdef CONFIDENCE_MEASURE 00692 /* compute CM */ 00693 sum = 0.0; 00694 i = 0; 00695 for(d=gmm->start;d;d=d->next) { 00696 //sum += pow(10, recog->jconf->annotate.cm_alpha * (score[i] - maxprob)); 00697 sum += pow(10, 0.05 * (score[i] - maxprob)); 00698 i++; 00699 } 00700 recog->gc->gmm_max_cm = 1.0 / sum; 00701 #endif 00702 00703 /* output result */ 00704 callback_exec(CALLBACK_RESULT_GMM, recog); 00705 00706 } 00707 00708 00733 boolean 00734 gmm_valid_input(Recog *recog) 00735 { 00736 if (recog->gc->max_d == NULL) return FALSE; 00737 if (recog->gc->is_voice[recog->gc->max_i]) { 00738 return TRUE; 00739 } 00740 return FALSE; 00741 } 00742 00757 void 00758 gmm_free(Recog *recog) 00759 { 00760 if (recog->gc) { 00761 free(recog->gc->OP_calced_score); 00762 free(recog->gc->OP_calced_id); 00763 free(recog->gc->is_voice); 00764 #ifdef GMM_VAD 00765 free(recog->gc->rates); 00766 #endif 00767 free(recog->gc->gmm_score); 00768 free(recog->gc); 00769 recog->gc = NULL; 00770 } 00771 } 00772 00773 #ifdef GMM_VAD 00774 00791 static void 00792 voice_activity_score(GMMCalc *gc, float *mean_ret, float *var_ret, int *count_ret) 00793 { 00794 int i, len; 00795 LOGPROB mean; 00796 LOGPROB var; 00797 LOGPROB x; 00798 int count; 00799 00800 if (!gc->filled) { 00801 /* cycle buffer not filled yet */ 00802 *mean_ret = 0.0; 00803 *var_ret = 0.0; 00804 *count_ret = 0; 00805 return; 00806 } 00807 00808 if (gc->filled) { 00809 len = gc->nframe; 00810 } else { 00811 len = gc->framep; 00812 } 00813 00814 mean = 0; 00815 count = 0; 00816 for(i=0;i<len;i++) { 00817 mean += gc->rates[i]; 00818 if (gc->rates[i] > 0.0) count++; 00819 } 00820 mean /= (float)len; 00821 var = 0.0; 00822 for(i=0;i<len;i++) { 00823 x = mean - gc->rates[i]; 00824 var += x * x; 00825 } 00826 var /= (float)len; 00827 00828 *mean_ret = mean; 00829 *var_ret = var; 00830 *count_ret = count; 00831 } 00832 00852 void 00853 gmm_check_trigger(Recog *recog) 00854 { 00855 GMMCalc *gc; 00856 gc = recog->gc; 00857 float mean; 00858 float var; 00859 int count; 00860 00861 gc->up_trigger = gc->down_trigger = FALSE; 00862 00863 voice_activity_score(gc, &mean, &var, &count); 00864 00865 if (gc->in_voice) { 00866 if (mean <= recog->jconf->detect.gmm_downtrigger_thres) { 00867 gc->down_trigger = TRUE; 00868 gc->in_voice = FALSE; 00869 } 00870 } else { 00871 if (mean >= recog->jconf->detect.gmm_uptrigger_thres) { 00872 gc->up_trigger = TRUE; 00873 gc->in_voice = TRUE; 00874 } 00875 } 00876 00877 #ifdef GMM_VAD_DEBUG 00878 printf("GMM_VAD: %s: %f %f %d", gc->in_voice ? "VOICE" : "NOISE", mean, var, count); 00879 if (gc->up_trigger) printf(": BEGIN"); 00880 if (gc->down_trigger) printf(": END"); 00881 printf("\n"); 00882 #endif 00883 00884 } 00885 #endif /* GMM_VAD */ 00886 00887 /* end of file */