Julius 4.1.5
|
00001 00026 /* 00027 * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University 00028 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00029 * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology 00030 * All rights reserved 00031 */ 00032 00033 #include <julius/julius.h> 00034 00072 static HTK_HMM_INFO * 00073 initialize_HMM(JCONF_AM *amconf, Jconf *jconf) 00074 { 00075 HTK_HMM_INFO *hmminfo; 00076 00077 /* at here, global variable "para" holds values specified by user or 00078 by user-specified HTK config file */ 00079 if (amconf->analysis.para_hmm.loaded == 1) { 00080 jlog("Warning: you seems to read more than one acoustic model for recognition, but\n"); 00081 jlog("Warning: previous one already has header-embedded acoustic parameters\n"); 00082 jlog("Warning: if you have different parameters, result may be wrong!\n"); 00083 } 00084 00085 /* allocate new hmminfo */ 00086 hmminfo = hmminfo_new(); 00087 /* load hmmdefs */ 00088 if (init_hmminfo(hmminfo, amconf->hmmfilename, amconf->mapfilename, &(amconf->analysis.para_hmm)) == FALSE) { 00089 hmminfo_free(hmminfo); 00090 return NULL; 00091 } 00092 00093 /* set multipath mode flag */ 00094 if (amconf->force_multipath) { 00095 jlog("STAT: m_fusion: force multipath HMM handling by user request\n"); 00096 hmminfo->multipath = TRUE; 00097 } else { 00098 hmminfo->multipath = hmminfo->need_multipath; 00099 } 00100 00101 /* only MFCC is supported for audio input */ 00102 /* MFCC_{0|E}[_D][_A][_Z][_N] is supported */ 00103 /* check parameter type of this acoustic HMM */ 00104 if (jconf->input.type == INPUT_WAVEFORM) { 00105 /* Decode parameter extraction type according to the training 00106 parameter type in the header of the given acoustic HMM */ 00107 if ((hmminfo->opt.param_type & F_BASEMASK) != F_MFCC) { 00108 jlog("ERROR: m_fusion: for direct speech input, only HMM trained by MFCC is supported\n"); 00109 hmminfo_free(hmminfo); 00110 return NULL; 00111 } 00112 /* set acoustic analysis parameters from HMM header */ 00113 calc_para_from_header(&(amconf->analysis.para), hmminfo->opt.param_type, hmminfo->opt.vec_size); 00114 } 00115 /* check if tied_mixture */ 00116 if (hmminfo->is_tied_mixture && hmminfo->codebooknum <= 0) { 00117 jlog("ERROR: m_fusion: this tied-mixture model has no codebook!?\n"); 00118 hmminfo_free(hmminfo); 00119 return NULL; 00120 } 00121 00122 #ifdef PASS1_IWCD 00123 /* make state clusters of same context for inter-word triphone approx. */ 00124 if (hmminfo->is_triphone) { 00125 jlog("STAT: making pseudo bi/mono-phone for IW-triphone\n"); 00126 if (make_cdset(hmminfo) == FALSE) { 00127 jlog("ERROR: m_fusion: failed to make context-dependent state set\n"); 00128 hmminfo_free(hmminfo); 00129 return NULL; 00130 } 00131 /* add those `pseudo' biphone and monophone to the logical HMM names */ 00132 /* they points not to the defined HMM, but to the CD_Set structure */ 00133 hmm_add_pseudo_phones(hmminfo); 00134 } 00135 #endif 00136 00137 /* find short pause model and set to hmminfo->sp */ 00138 htk_hmm_set_pause_model(hmminfo, amconf->spmodel_name); 00139 00140 00141 hmminfo->cdset_method = amconf->iwcdmethod; 00142 hmminfo->cdmax_num = amconf->iwcdmaxn; 00143 00144 if (amconf->analysis.para_htk.loaded == 1) apply_para(&(amconf->analysis.para), &(amconf->analysis.para_htk)); 00145 if (amconf->analysis.para_hmm.loaded == 1) apply_para(&(amconf->analysis.para), &(amconf->analysis.para_hmm)); 00146 apply_para(&(amconf->analysis.para), &(amconf->analysis.para_default)); 00147 00148 return(hmminfo); 00149 00150 } 00151 00165 static HTK_HMM_INFO * 00166 initialize_GSHMM(JCONF_AM *amconf) 00167 { 00168 HTK_HMM_INFO *hmm_gs; 00169 Value para_dummy; 00170 00171 jlog("STAT: Reading GS HMMs:\n"); 00172 hmm_gs = hmminfo_new(); 00173 undef_para(¶_dummy); 00174 if (init_hmminfo(hmm_gs, amconf->hmm_gs_filename, NULL, ¶_dummy) == FALSE) { 00175 hmminfo_free(hmm_gs); 00176 return NULL; 00177 } 00178 return(hmm_gs); 00179 } 00180 00197 static HTK_HMM_INFO * 00198 initialize_GMM(Jconf *jconf) 00199 { 00200 HTK_HMM_INFO *gmm; 00201 00202 jlog("STAT: reading GMM: %s\n", jconf->reject.gmm_filename); 00203 00204 if (jconf->gmm == NULL) { 00205 /* no acoustic parameter setting was given for GMM using -AM_GMM, 00206 copy the first AM setting */ 00207 jlog("STAT: -AM_GMM not used, use parameter of the first AM\n"); 00208 jconf->gmm = j_jconf_am_new(); 00209 memcpy(jconf->gmm, jconf->am_root, sizeof(JCONF_AM)); 00210 jconf->gmm->hmmfilename = NULL; 00211 jconf->gmm->mapfilename = NULL; 00212 jconf->gmm->spmodel_name = NULL; 00213 jconf->gmm->hmm_gs_filename = NULL; 00214 if (jconf->am_root->analysis.cmnload_filename) { 00215 jconf->gmm->analysis.cmnload_filename = strcpy((char *)mymalloc(strlen(jconf->am_root->analysis.cmnload_filename)+ 1), jconf->am_root->analysis.cmnload_filename); 00216 } 00217 if (jconf->am_root->analysis.cmnsave_filename) { 00218 jconf->gmm->analysis.cmnsave_filename = strcpy((char *)mymalloc(strlen(jconf->am_root->analysis.cmnsave_filename)+ 1), jconf->am_root->analysis.cmnsave_filename); 00219 } 00220 if (jconf->am_root->frontend.ssload_filename) { 00221 jconf->gmm->frontend.ssload_filename = strcpy((char *)mymalloc(strlen(jconf->am_root->frontend.ssload_filename)+ 1), jconf->am_root->frontend.ssload_filename); 00222 } 00223 } 00224 00225 gmm = hmminfo_new(); 00226 if (init_hmminfo(gmm, jconf->reject.gmm_filename, NULL, &(jconf->gmm->analysis.para_hmm)) == FALSE) { 00227 hmminfo_free(gmm); 00228 return NULL; 00229 } 00230 /* check parameter type of this acoustic HMM */ 00231 if (jconf->input.type == INPUT_WAVEFORM) { 00232 /* Decode parameter extraction type according to the training 00233 parameter type in the header of the given acoustic HMM */ 00234 if ((gmm->opt.param_type & F_BASEMASK) != F_MFCC) { 00235 jlog("ERROR: m_fusion: for direct speech input, only GMM trained by MFCC is supported\n"); 00236 hmminfo_free(gmm); 00237 return NULL; 00238 } 00239 } 00240 00241 /* set acoustic analysis parameters from HMM header */ 00242 calc_para_from_header(&(jconf->gmm->analysis.para), gmm->opt.param_type, gmm->opt.vec_size); 00243 00244 if (jconf->gmm->analysis.para_htk.loaded == 1) apply_para(&(jconf->gmm->analysis.para), &(jconf->gmm->analysis.para_htk)); 00245 if (jconf->gmm->analysis.para_hmm.loaded == 1) apply_para(&(jconf->gmm->analysis.para), &(jconf->gmm->analysis.para_hmm)); 00246 apply_para(&(jconf->gmm->analysis.para), &(jconf->gmm->analysis.para_default)); 00247 00248 return(gmm); 00249 } 00250 00284 static WORD_INFO * 00285 initialize_dict(JCONF_LM *lmconf, HTK_HMM_INFO *hmminfo) 00286 { 00287 WORD_INFO *winfo; 00288 00289 /* allocate new word dictionary */ 00290 winfo = word_info_new(); 00291 /* read in dictinary from file */ 00292 if ( ! 00293 #ifdef MONOTREE 00294 /* leave winfo monophone for 1st pass lexicon tree */ 00295 init_voca(winfo, lmconf->dictfilename, hmminfo, TRUE, lmconf->forcedict_flag) 00296 #else 00297 init_voca(winfo, lmconf->dictfilename, hmminfo, FALSE, lmconf->forcedict_flag) 00298 #endif 00299 ) { 00300 jlog("ERROR: m_fusion: failed to read dictionary, terminated\n"); 00301 word_info_free(winfo); 00302 return NULL; 00303 } 00304 00305 if (lmconf->lmtype == LM_PROB) { 00306 /* if necessary, append a IW-sp word to the dict if "-iwspword" specified */ 00307 if (lmconf->enable_iwspword) { 00308 if ( 00309 #ifdef MONOTREE 00310 voca_append_htkdict(lmconf->iwspentry, winfo, hmminfo, TRUE) 00311 #else 00312 voca_append_htkdict(lmconf->iwspentry, winfo, hmminfo, FALSE) 00313 #endif 00314 == FALSE) { 00315 jlog("ERROR: m_fusion: failed to make IW-sp word entry \"%s\"\n", lmconf->iwspentry); 00316 word_info_free(winfo); 00317 return NULL; 00318 } else { 00319 jlog("STAT: 1 IW-sp word entry added\n"); 00320 } 00321 } 00322 /* set {head,tail}_silwid */ 00323 winfo->head_silwid = voca_lookup_wid(lmconf->head_silname, winfo); 00324 if (winfo->head_silwid == WORD_INVALID) { /* not exist */ 00325 jlog("ERROR: m_fusion: head sil word \"%s\" not exist in voca\n", lmconf->head_silname); 00326 word_info_free(winfo); 00327 return NULL; 00328 } 00329 winfo->tail_silwid = voca_lookup_wid(lmconf->tail_silname, winfo); 00330 if (winfo->tail_silwid == WORD_INVALID) { /* not exist */ 00331 jlog("ERROR: m_fusion: tail sil word \"%s\" not exist in voca\n", lmconf->tail_silname); 00332 word_info_free(winfo); 00333 return NULL; 00334 } 00335 } 00336 00337 return(winfo); 00338 00339 } 00340 00341 00374 static NGRAM_INFO * 00375 initialize_ngram(JCONF_LM *lmconf, WORD_INFO *winfo) 00376 { 00377 NGRAM_INFO *ngram; 00378 boolean ret; 00379 00380 /* allocate new */ 00381 ngram = ngram_info_new(); 00382 /* load LM */ 00383 if (lmconf->ngram_filename != NULL) { /* binary format */ 00384 ret = init_ngram_bin(ngram, lmconf->ngram_filename); 00385 } else { /* ARPA format */ 00386 /* if either forward or backward N-gram is specified, read it */ 00387 /* if both specified, use backward N-gram as main and 00388 use forward 2-gram only for 1st pass (this is an old behavior) */ 00389 if (lmconf->ngram_filename_rl_arpa) { 00390 ret = init_ngram_arpa(ngram, lmconf->ngram_filename_rl_arpa, DIR_RL); 00391 if (ret == FALSE) { 00392 ngram_info_free(ngram); 00393 return NULL; 00394 } 00395 if (lmconf->ngram_filename_lr_arpa) { 00396 ret = init_ngram_arpa_additional(ngram, lmconf->ngram_filename_lr_arpa); 00397 if (ret == FALSE) { 00398 ngram_info_free(ngram); 00399 return NULL; 00400 } 00401 } 00402 } else if (lmconf->ngram_filename_lr_arpa) { 00403 ret = init_ngram_arpa(ngram, lmconf->ngram_filename_lr_arpa, DIR_LR); 00404 } 00405 } 00406 if (ret == FALSE) { 00407 ngram_info_free(ngram); 00408 return NULL; 00409 } 00410 00411 /* set unknown (=OOV) word id */ 00412 set_unknown_id(ngram, lmconf->unknown_name); 00413 00414 /* map dict item to N-gram entry */ 00415 if (make_voca_ref(ngram, winfo) == FALSE) { 00416 ngram_info_free(ngram); 00417 return NULL; 00418 } 00419 00420 /* post-fix EOS / BOS uni prob for SRILM */ 00421 fix_uniprob_srilm(ngram, winfo); 00422 00423 return(ngram); 00424 } 00425 00458 boolean 00459 j_load_am(Recog *recog, JCONF_AM *amconf) 00460 { 00461 PROCESS_AM *am; 00462 00463 jlog("STAT: *** loading AM%02d %s\n", amconf->id, amconf->name); 00464 00465 /* create AM process instance */ 00466 am = j_process_am_new(recog, amconf); 00467 00468 /* HMM */ 00469 if ((am->hmminfo = initialize_HMM(amconf, recog->jconf)) == NULL) { 00470 jlog("ERROR: m_fusion: failed to initialize AM\n"); 00471 return FALSE; 00472 } 00473 if (amconf->hmm_gs_filename != NULL) { 00474 if ((am->hmm_gs = initialize_GSHMM(amconf)) == NULL) { 00475 jlog("ERROR: m_fusion: failed to initialize GS HMM\n"); 00476 return FALSE; 00477 } 00478 } 00479 00480 /* fixate model-specific params */ 00481 /* set params whose default will change by models and not specified in arg */ 00482 /* select Gaussian pruning function */ 00483 if (am->config->gprune_method == GPRUNE_SEL_UNDEF) {/* set default if not specified */ 00484 if (am->hmminfo->is_tied_mixture) { 00485 /* enabled by default for tied-mixture models */ 00486 #if defined(GPRUNE_DEFAULT_SAFE) 00487 am->config->gprune_method = GPRUNE_SEL_SAFE; 00488 #elif defined(GPRUNE_DEFAULT_HEURISTIC) 00489 am->config->gprune_method = GPRUNE_SEL_HEURISTIC; 00490 #elif defined(GPRUNE_DEFAULT_BEAM) 00491 am->config->gprune_method = GPRUNE_SEL_BEAM; 00492 #endif 00493 } else { 00494 /* disabled by default for non tied-mixture model */ 00495 am->config->gprune_method = GPRUNE_SEL_NONE; 00496 } 00497 } 00498 00499 /* fixated analysis.para not uses loaded flag any more, so 00500 reset it for binary matching */ 00501 amconf->analysis.para.loaded = 0; 00502 00503 jlog("STAT: *** AM%02d %s loaded\n", amconf->id, amconf->name); 00504 00505 return TRUE; 00506 } 00507 00547 boolean 00548 j_load_lm(Recog *recog, JCONF_LM *lmconf) 00549 { 00550 JCONF_SEARCH *sh; 00551 PROCESS_LM *lm; 00552 PROCESS_AM *am, *atmp; 00553 00554 jlog("STAT: *** loading LM%02d %s\n", lmconf->id, lmconf->name); 00555 00556 /* find which am process instance to assign to each LM */ 00557 am = NULL; 00558 for(sh=recog->jconf->search_root;sh;sh=sh->next) { 00559 if (sh->lmconf == lmconf) { 00560 for(atmp=recog->amlist;atmp;atmp=atmp->next) { 00561 if (sh->amconf == atmp->config) { 00562 am = atmp; 00563 } 00564 } 00565 } 00566 } 00567 if (am == NULL) { 00568 jlog("ERROR: cannot find corresponding AM for LM%02d %s\n", lmconf->id, lmconf->name); 00569 jlog("ERROR: you should write all AM/LM combinations to be used for recognition with \"-SR\"\n"); 00570 return FALSE; 00571 } 00572 00573 /* create LM process instance */ 00574 lm = j_process_lm_new(recog, lmconf); 00575 00576 /* assign AM process instance to the LM instance */ 00577 lm->am = am; 00578 00579 /* load language model */ 00580 if (lm->lmtype == LM_PROB) { 00581 /* LM (N-gram) */ 00582 if ((lm->winfo = initialize_dict(lm->config, lm->am->hmminfo)) == NULL) { 00583 jlog("ERROR: m_fusion: failed to initialize dictionary\n"); 00584 return FALSE; 00585 } 00586 if (lm->config->ngram_filename_lr_arpa || lm->config->ngram_filename_rl_arpa || lm->config->ngram_filename) { 00587 if ((lm->ngram = initialize_ngram(lm->config, lm->winfo)) == NULL) { 00588 jlog("ERROR: m_fusion: failed to initialize N-gram\n"); 00589 return FALSE; 00590 } 00591 } 00592 } 00593 if (lm->lmtype == LM_DFA) { 00594 /* DFA */ 00595 if (lm->config->dfa_filename != NULL && lm->config->dictfilename != NULL) { 00596 /* here add grammar specified by "-dfa" and "-v" to grammar list */ 00597 multigram_add_gramlist(lm->config->dfa_filename, lm->config->dictfilename, lm->config, LM_DFA_GRAMMAR); 00598 } 00599 /* load all the specified grammars */ 00600 if (multigram_load_all_gramlist(lm) == FALSE) { 00601 jlog("ERROR: m_fusion: some error occured in reading grammars\n"); 00602 return FALSE; 00603 } 00604 /* setup for later wchmm building */ 00605 multigram_update(lm); 00606 /* the whole lexicon will be forced to built in the boot sequence, 00607 so reset the global modification flag here */ 00608 lm->global_modified = FALSE; 00609 } 00610 00611 jlog("STAT: *** LM%02d %s loaded\n", lmconf->id, lmconf->name); 00612 00613 return TRUE; 00614 } 00615 00616 /**********************************************************************/ 00645 boolean 00646 j_load_all(Recog *recog, Jconf *jconf) 00647 { 00648 JCONF_AM *amconf; 00649 JCONF_LM *lmconf; 00650 00651 /* set global jconf */ 00652 recog->jconf = jconf; 00653 00654 /* load acoustic models */ 00655 for(amconf=jconf->am_root;amconf;amconf=amconf->next) { 00656 if (j_load_am(recog, amconf) == FALSE) return FALSE; 00657 } 00658 00659 /* load language models */ 00660 for(lmconf=jconf->lm_root;lmconf;lmconf=lmconf->next) { 00661 if (j_load_lm(recog, lmconf) == FALSE) return FALSE; 00662 } 00663 00664 /* GMM */ 00665 if (jconf->reject.gmm_filename != NULL) { 00666 jlog("STAT: loading GMM\n"); 00667 if ((recog->gmm = initialize_GMM(jconf)) == NULL) { 00668 jlog("ERROR: m_fusion: failed to initialize GMM\n"); 00669 return FALSE; 00670 } 00671 } 00672 00673 /* check sampling rate requirement on AMs and set it to global jconf */ 00674 { 00675 boolean ok_p; 00676 00677 /* set input sampling rate from an AM */ 00678 jconf->input.sfreq = jconf->am_root->analysis.para.smp_freq; 00679 jconf->input.period = jconf->am_root->analysis.para.smp_period; 00680 jconf->input.frameshift = jconf->am_root->analysis.para.frameshift; 00681 jconf->input.framesize = jconf->am_root->analysis.para.framesize; 00682 /* check if the value is equal at all AMs */ 00683 ok_p = TRUE; 00684 for(amconf = jconf->am_root; amconf; amconf = amconf->next) { 00685 if (jconf->input.sfreq != amconf->analysis.para.smp_freq) ok_p = FALSE; 00686 } 00687 if (!ok_p) { 00688 jlog("ERROR: required sampling rate differs in AMs!\n"); 00689 for(amconf = jconf->am_root; amconf; amconf = amconf->next) { 00690 jlog("ERROR: AM%02d %s: %dHz\n", amconf->analysis.para.smp_freq); 00691 } 00692 return FALSE; 00693 } 00694 /* also check equality for GMM */ 00695 if (recog->gmm) { 00696 if (jconf->input.sfreq != jconf->gmm->analysis.para.smp_freq) { 00697 jlog("ERROR: required sampling rate differs between AM and GMM!\n"); 00698 jlog("ERROR: AM : %dHz\n", jconf->input.sfreq); 00699 jlog("ERROR: GMM: %dHz\n", jconf->gmm->analysis.para.smp_freq); 00700 return FALSE; 00701 } 00702 } 00703 for(amconf = jconf->am_root; amconf; amconf = amconf->next) { 00704 if (jconf->input.frameshift != amconf->analysis.para.frameshift) ok_p = FALSE; 00705 } 00706 if (!ok_p) { 00707 jlog("ERROR: requested frame shift differs in AMs!\n"); 00708 for(amconf = jconf->am_root; amconf; amconf = amconf->next) { 00709 jlog("ERROR: AM%02d %s: %d samples\n", amconf->analysis.para.frameshift); 00710 } 00711 return FALSE; 00712 } 00713 /* also check equality for GMM */ 00714 if (recog->gmm) { 00715 if (jconf->input.frameshift != jconf->gmm->analysis.para.frameshift) { 00716 jlog("ERROR: required frameshift differs between AM and GMM!\n"); 00717 jlog("ERROR: AM : %d samples\n", jconf->input.frameshift); 00718 jlog("ERROR: GMM: %d samples\n", jconf->gmm->analysis.para.frameshift); 00719 return FALSE; 00720 } 00721 } 00722 for(amconf = jconf->am_root; amconf; amconf = amconf->next) { 00723 if (jconf->input.framesize != amconf->analysis.para.framesize) ok_p = FALSE; 00724 } 00725 if (!ok_p) { 00726 jlog("ERROR: requested frame size (window length) differs in AMs!\n"); 00727 for(amconf = jconf->am_root; amconf; amconf = amconf->next) { 00728 jlog("ERROR: AM%02d %s: %d samples\n", amconf->analysis.para.framesize); 00729 } 00730 return FALSE; 00731 } 00732 /* also check equality for GMM */ 00733 if (recog->gmm) { 00734 if (jconf->input.framesize != jconf->gmm->analysis.para.framesize) { 00735 jlog("ERROR: requested frame size differs between AM and GMM!\n"); 00736 jlog("ERROR: AM : %d samples\n", jconf->input.framesize); 00737 jlog("ERROR: GMM: %d samples\n", jconf->gmm->analysis.para.framesize); 00738 return FALSE; 00739 } 00740 } 00741 } 00742 00743 return TRUE; 00744 } 00745 00763 static boolean 00764 mfcc_config_is_same(JCONF_AM *amconf, MFCCCalc *mfcc) 00765 { 00766 char *s1, *s2; 00767 00768 /* parameter extraction conditions are the same */ 00769 /* check exact match in amconf->analysis.* */ 00770 if (&(amconf->analysis.para) == mfcc->para || memcmp(&(amconf->analysis.para), mfcc->para, sizeof(Value)) == 0) { 00771 s1 = amconf->analysis.cmnload_filename; 00772 s2 = mfcc->cmn.load_filename; 00773 if (s1 == s2 || (s1 && s2 && strmatch(s1, s2))) { 00774 s1 = amconf->analysis.cmnsave_filename; 00775 s2 = mfcc->cmn.save_filename; 00776 if (s1 == s2 || (s1 && s2 && strmatch(s1, s2))) { 00777 if (amconf->analysis.cmn_update == mfcc->cmn.update 00778 && amconf->analysis.cmn_map_weight == mfcc->cmn.map_weight) { 00779 if (amconf->frontend.ss_alpha == mfcc->frontend.ss_alpha 00780 && amconf->frontend.ss_floor == mfcc->frontend.ss_floor 00781 && amconf->frontend.sscalc == mfcc->frontend.sscalc 00782 && amconf->frontend.sscalc_len == mfcc->frontend.sscalc_len) { 00783 s1 = amconf->frontend.ssload_filename; 00784 s2 = mfcc->frontend.ssload_filename; 00785 if (s1 == s2 || (s1 && s2 && strmatch(s1, s2))) { 00786 return TRUE; 00787 } 00788 } 00789 } 00790 } 00791 } 00792 } 00793 00794 return FALSE; 00795 } 00796 00797 /***************************************************/ 00798 /* create MFCC calculation instance from AM config */ 00799 /* according to the fixated parameter information */ 00800 /***************************************************/ 00825 void 00826 create_mfcc_calc_instances(Recog *recog) 00827 { 00828 PROCESS_AM *am; 00829 MFCCCalc *mfcc; 00830 int count; 00831 00832 jlog("STAT: *** create MFCC calculation modules from AM\n"); 00833 count = 0; 00834 for(am=recog->amlist;am;am=am->next) { 00835 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 00836 if (mfcc_config_is_same(am->config, mfcc)) { 00837 /* the same */ 00838 jlog("STAT: AM%02d %s: share MFCC%02d\n", am->config->id, am->config->name, mfcc->id); 00839 am->mfcc = mfcc; 00840 break; 00841 } 00842 } 00843 if (!mfcc) { /* the same not found */ 00844 /* initialize MFCC calculation work area */ 00845 count++; 00846 /* create new mfcc instance */ 00847 mfcc = j_mfcccalc_new(am->config); 00848 mfcc->id = count; 00849 /* assign to the am */ 00850 am->mfcc = mfcc; 00851 /* add to the list of all MFCCCalc */ 00852 mfcc->next = recog->mfcclist; 00853 recog->mfcclist = mfcc; 00854 jlog("STAT: AM%2d %s: create a new module MFCC%02d\n", am->config->id, am->config->name, mfcc->id); 00855 } 00856 } 00857 00858 /* for GMM */ 00859 if (recog->gmm) { 00860 /* if GMM calculation config found, make MFCC instance for that. */ 00861 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 00862 if (mfcc_config_is_same(recog->jconf->gmm, mfcc)) { 00863 /* the same */ 00864 jlog("STAT: GMM: share MFCC%02d\n", mfcc->id); 00865 recog->gmmmfcc = mfcc; 00866 break; 00867 } 00868 } 00869 if (!mfcc) { /* the same not found */ 00870 /* initialize MFCC calculation work area */ 00871 count++; 00872 /* create new mfcc instance */ 00873 mfcc = j_mfcccalc_new(recog->jconf->gmm); 00874 mfcc->id = count; 00875 /* assign to gmm */ 00876 recog->gmmmfcc = mfcc; 00877 /* add to the list of all MFCCCalc */ 00878 mfcc->next = recog->mfcclist; 00879 recog->mfcclist = mfcc; 00880 jlog("STAT: GMM: create a new module MFCC%02d\n", mfcc->id); 00881 } 00882 } 00883 00884 jlog("STAT: %d MFCC modules created\n", count); 00885 } 00886 00919 boolean 00920 j_launch_recognition_instance(Recog *recog, JCONF_SEARCH *sconf) 00921 { 00922 RecogProcess *p; 00923 PROCESS_AM *am; 00924 PROCESS_LM *lm; 00925 00926 jlog("STAT: composing recognizer instance SR%02d %s (AM%02d %s, LM%02d %s)\n", sconf->id, sconf->name, sconf->amconf->id, sconf->amconf->name, sconf->lmconf->id, sconf->lmconf->name); 00927 00928 /* allocate recognition instance */ 00929 p = j_recogprocess_new(recog, sconf); 00930 00931 /* assign corresponding AM instance and LM instance to use */ 00932 for(lm=recog->lmlist;lm;lm=lm->next) { 00933 if (sconf->lmconf == lm->config) { 00934 for(am=recog->amlist;am;am=am->next) { 00935 if (sconf->amconf == am->config) { 00936 p->am = am; 00937 p->lm = lm; 00938 } 00939 } 00940 } 00941 } 00942 00943 if (p->config->sw.triphone_check_flag && p->am->hmminfo->is_triphone) { 00944 /* go into interactive triphone HMM check mode */ 00945 hmm_check(p); 00946 } 00947 00948 /******************************************/ 00949 /******** set work area and flags *********/ 00950 /******************************************/ 00951 00952 /* copy values of sub instances for handly access during recognition */ 00953 /* set lm type */ 00954 p->lmtype = p->lm->lmtype; 00955 p->lmvar = p->lm->lmvar; 00956 p->graphout = p->config->graph.enabled; 00957 00958 /* set flag for context dependent handling */ 00959 if (p->config->force_ccd_handling) { 00960 p->ccd_flag = p->config->ccd_handling; 00961 } else { 00962 if (p->am->hmminfo->is_triphone) { 00963 p->ccd_flag = TRUE; 00964 } else { 00965 p->ccd_flag = FALSE; 00966 } 00967 } 00968 00969 /* iwsp prepare */ 00970 if (p->lm->config->enable_iwsp) { 00971 if (p->am->hmminfo->multipath) { 00972 /* find short-pause model */ 00973 if (p->am->hmminfo->sp == NULL) { 00974 jlog("ERROR: iwsp enabled but no short pause model \"%s\" in hmmdefs\n", p->am->config->spmodel_name); 00975 return FALSE; 00976 } 00977 p->am->hmminfo->iwsp_penalty = p->am->config->iwsp_penalty; 00978 } else { 00979 jlog("Warning: \"-iwsp\" is supported on multi-path mode, ignored\n"); 00980 } 00981 } 00982 00983 /* for short-pause segmentation */ 00984 if (p->config->successive.enabled) { 00985 if (p->config->successive.pausemodelname) { 00986 /* pause model name string specified, divide it and store to p */ 00987 char *s; 00988 int n; 00989 p->pass1.pausemodelnames = (char*)mymalloc(strlen(p->config->successive.pausemodelname)+1); 00990 strcpy(p->pass1.pausemodelnames, p->config->successive.pausemodelname); 00991 n = 0; 00992 for (s = strtok(p->pass1.pausemodelnames, " ,"); s; s = strtok(NULL, " ,")) { 00993 n++; 00994 } 00995 p->pass1.pausemodelnum = n; 00996 p->pass1.pausemodel = (char **)mymalloc(sizeof(char *) * n); 00997 strcpy(p->pass1.pausemodelnames, p->config->successive.pausemodelname); 00998 n = 0; 00999 for (s = strtok(p->pass1.pausemodelnames, " ,"); s; s = strtok(NULL, " ,")) { 01000 p->pass1.pausemodel[n++] = s; 01001 } 01002 } else { 01003 p->pass1.pausemodel = NULL; 01004 } 01005 /* check if pause word exists on dictionary */ 01006 { 01007 WORD_ID w; 01008 boolean ok_p; 01009 ok_p = FALSE; 01010 for(w=0;w<p->lm->winfo->num;w++) { 01011 if (is_sil(w, p)) { 01012 ok_p = TRUE; 01013 break; 01014 } 01015 } 01016 if (!ok_p) { 01017 #ifdef SPSEGMENT_NAIST 01018 jlog("Error: no pause word in dictionary needed for decoder-based VAD\n"); 01019 #else 01020 jlog("Error: no pause word in dictionary needed for short-pause segmentation\n"); 01021 #endif 01022 jlog("Error: you should have at least one pause word in dictionary\n"); 01023 jlog("Error: you can specify pause model names by \"-pausemodels\"\n"); 01024 return FALSE; 01025 } 01026 } 01027 } 01028 01029 /**********************************************/ 01030 /******** set model-specific defaults *********/ 01031 /**********************************************/ 01032 if (p->lmtype == LM_PROB) { 01033 /* set default lm parameter if not specified */ 01034 if (!p->config->lmp.lmp_specified) { 01035 if (p->am->hmminfo->is_triphone) { 01036 p->config->lmp.lm_weight = DEFAULT_LM_WEIGHT_TRI_PASS1; 01037 p->config->lmp.lm_penalty = DEFAULT_LM_PENALTY_TRI_PASS1; 01038 } else { 01039 p->config->lmp.lm_weight = DEFAULT_LM_WEIGHT_MONO_PASS1; 01040 p->config->lmp.lm_penalty = DEFAULT_LM_PENALTY_MONO_PASS1; 01041 } 01042 } 01043 if (!p->config->lmp.lmp2_specified) { 01044 if (p->am->hmminfo->is_triphone) { 01045 p->config->lmp.lm_weight2 = DEFAULT_LM_WEIGHT_TRI_PASS2; 01046 p->config->lmp.lm_penalty2 = DEFAULT_LM_PENALTY_TRI_PASS2; 01047 } else { 01048 p->config->lmp.lm_weight2 = DEFAULT_LM_WEIGHT_MONO_PASS2; 01049 p->config->lmp.lm_penalty2 = DEFAULT_LM_PENALTY_MONO_PASS2; 01050 } 01051 } 01052 if (p->config->lmp.lmp_specified != p->config->lmp.lmp2_specified) { 01053 jlog("WARNING: m_fusion: only -lmp or -lmp2 specified, LM weights may be unbalanced\n"); 01054 } 01055 } 01056 01057 /****************************/ 01058 /******* build wchmm ********/ 01059 /****************************/ 01060 if (p->lmtype == LM_DFA) { 01061 /* execute generation of global grammar and build of wchmm */ 01062 multigram_build(p); /* some modification occured if return TRUE */ 01063 } 01064 01065 if (p->lmtype == LM_PROB) { 01066 /* build wchmm with N-gram */ 01067 p->wchmm = wchmm_new(); 01068 p->wchmm->lmtype = p->lmtype; 01069 p->wchmm->lmvar = p->lmvar; 01070 p->wchmm->ccd_flag = p->ccd_flag; 01071 p->wchmm->category_tree = FALSE; 01072 p->wchmm->hmmwrk = &(p->am->hmmwrk); 01073 /* assign models */ 01074 p->wchmm->ngram = p->lm->ngram; 01075 if (p->lmvar == LM_NGRAM_USER) { 01076 /* register LM functions for 1st pass here */ 01077 p->wchmm->uni_prob_user = p->lm->lmfunc.uniprob; 01078 p->wchmm->bi_prob_user = p->lm->lmfunc.biprob; 01079 } 01080 p->wchmm->winfo = p->lm->winfo; 01081 p->wchmm->hmminfo = p->am->hmminfo; 01082 if (p->wchmm->category_tree) { 01083 if (p->config->pass1.old_tree_function_flag) { 01084 if (build_wchmm(p->wchmm, p->lm->config) == FALSE) { 01085 jlog("ERROR: m_fusion: error in bulding wchmm\n"); 01086 return FALSE; 01087 } 01088 } else { 01089 if (build_wchmm2(p->wchmm, p->lm->config) == FALSE) { 01090 jlog("ERROR: m_fusion: error in bulding wchmm\n"); 01091 return FALSE; 01092 } 01093 } 01094 } else { 01095 if (build_wchmm2(p->wchmm, p->lm->config) == FALSE) { 01096 jlog("ERROR: m_fusion: error in bulding wchmm\n"); 01097 return FALSE; 01098 } 01099 } 01100 01101 /* 起動時 -check でチェックモードへ */ 01102 if (p->config->sw.wchmm_check_flag) { 01103 wchmm_check_interactive(p->wchmm); 01104 } 01105 01106 /* set beam width */ 01107 /* guess beam width from models, when not specified */ 01108 p->trellis_beam_width = set_beam_width(p->wchmm, p->config->pass1.specified_trellis_beam_width); 01109 01110 /* initialize cache for factoring */ 01111 max_successor_cache_init(p->wchmm); 01112 } 01113 01114 /* backtrellis initialization */ 01115 p->backtrellis = (BACKTRELLIS *)mymalloc(sizeof(BACKTRELLIS)); 01116 bt_init(p->backtrellis); 01117 01118 /* prepare work area for 2nd pass */ 01119 wchmm_fbs_prepare(p); 01120 01121 jlog("STAT: SR%02d %s composed\n", sconf->id, sconf->name); 01122 01123 if (sconf->sw.start_inactive) { 01124 /* start inactive */ 01125 p->active = -1; 01126 } else { 01127 /* book activation for the recognition */ 01128 p->active = 1; 01129 } 01130 if (p->lmtype == LM_DFA) { 01131 if (p->lm->winfo == NULL || 01132 (p->lmvar == LM_DFA_GRAMMAR && p->lm->dfa == NULL)) { 01133 /* make this instance inactive */ 01134 p->active = -1; 01135 } 01136 } 01137 01138 return TRUE; 01139 } 01140 01141 01195 boolean 01196 j_final_fusion(Recog *recog) 01197 { 01198 MFCCCalc *mfcc; 01199 JCONF_SEARCH *sconf; 01200 PROCESS_AM *am; 01201 01202 jlog("STAT: ------\n"); 01203 jlog("STAT: All models are ready, go for final fusion\n"); 01204 jlog("STAT: [1] create MFCC extraction instance(s)\n"); 01205 if (recog->jconf->input.type == INPUT_WAVEFORM) { 01206 /***************************************************/ 01207 /* create MFCC calculation instance from AM config */ 01208 /* according to the fixated parameter information */ 01209 /***************************************************/ 01210 create_mfcc_calc_instances(recog); 01211 } 01212 01213 /****************************************/ 01214 /* create recognition process instances */ 01215 /****************************************/ 01216 jlog("STAT: [2] create recognition processing instance(s) with AM and LM\n"); 01217 for(sconf=recog->jconf->search_root;sconf;sconf=sconf->next) { 01218 if (j_launch_recognition_instance(recog, sconf) == FALSE) return FALSE; 01219 } 01220 01221 /****************************/ 01222 /****** initialize GMM ******/ 01223 /****************************/ 01224 if (recog->gmm != NULL) { 01225 jlog("STAT: [2.5] create GMM instance\n"); 01226 if (gmm_init(recog) == FALSE) { 01227 jlog("ERROR: m_fusion: error in initializing GMM\n"); 01228 return FALSE; 01229 } 01230 } 01231 01232 /* stage 4: setup output probability function for each AM */ 01233 jlog("STAT: [3] initialize for acoustic HMM calculation\n"); 01234 for(am=recog->amlist;am;am=am->next) { 01235 #ifdef ENABLE_PLUGIN 01236 /* set plugin function if specified */ 01237 if (am->config->gprune_method == GPRUNE_SEL_USER) { 01238 am->hmmwrk.compute_gaussset = (void (*)(HMMWork *, HTK_HMM_Dens **, int, int *, int)) plugin_get_func(am->config->gprune_plugin_source, "calcmix"); 01239 if (am->hmmwrk.compute_gaussset == NULL) { 01240 jlog("ERROR: calcmix plugin has no function \"calcmix\"\n"); 01241 return FALSE; 01242 } 01243 am->hmmwrk.compute_gaussset_init = (boolean (*)(HMMWork *)) plugin_get_func(am->config->gprune_plugin_source, "calcmix_init"); 01244 if (am->hmmwrk.compute_gaussset_init == NULL) { 01245 jlog("ERROR: calcmix plugin has no function \"calcmix_init\"\n"); 01246 return FALSE; 01247 } 01248 am->hmmwrk.compute_gaussset_free = (void (*)(HMMWork *)) plugin_get_func(am->config->gprune_plugin_source, "calcmix_free"); 01249 if (am->hmmwrk.compute_gaussset_free == NULL) { 01250 jlog("ERROR: calcmix plugin has no function \"calcmix_free\"\n"); 01251 return FALSE; 01252 } 01253 } 01254 #endif 01255 if (am->config->hmm_gs_filename != NULL) {/* with GMS */ 01256 if (outprob_init(&(am->hmmwrk), am->hmminfo, am->hmm_gs, am->config->gs_statenum, am->config->gprune_method, am->config->mixnum_thres) == FALSE) { 01257 return FALSE; 01258 } 01259 } else { 01260 if (outprob_init(&(am->hmmwrk), am->hmminfo, NULL, 0, am->config->gprune_method, am->config->mixnum_thres) == FALSE) { 01261 return FALSE; 01262 } 01263 } 01264 } 01265 01266 /* stage 5: initialize work area for input and realtime decoding */ 01267 01268 jlog("STAT: [4] prepare MFCC storage(s)\n"); 01269 if (recog->jconf->input.type == INPUT_VECTOR) { 01270 /* create an MFCC instance for MFCC input */ 01271 /* create new mfcc instance */ 01272 recog->mfcclist = j_mfcccalc_new(NULL); 01273 recog->mfcclist->id = 1; 01274 /* assign to the am */ 01275 for(am=recog->amlist;am;am=am->next) { 01276 am->mfcc = recog->mfcclist; 01277 } 01278 if (recog->gmm) recog->gmmmfcc = recog->mfcclist; 01279 } 01280 /* allocate parameter holders */ 01281 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 01282 mfcc->param = new_param(); 01283 } 01284 01285 /* initialize SS calculation work area */ 01286 if (recog->jconf->input.type == INPUT_WAVEFORM) { 01287 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 01288 if (mfcc->frontend.sscalc) { 01289 mfcc->frontend.mfccwrk_ss = WMP_work_new(mfcc->para); 01290 if (mfcc->frontend.mfccwrk_ss == NULL) { 01291 jlog("ERROR: m_fusion: failed to initialize MFCC computation for SS\n"); 01292 return FALSE; 01293 } 01294 if (mfcc->frontend.sscalc_len * recog->jconf->input.sfreq / 1000 < mfcc->para->framesize) { 01295 jlog("ERROR: m_fusion: head sil length for SS (%d msec) is shorter than a frame (%d msec)\n", mfcc->frontend.sscalc_len, mfcc->para->framesize * 1000 / recog->jconf->input.sfreq); 01296 return FALSE; 01297 } 01298 } 01299 } 01300 } 01301 01302 if (recog->jconf->decodeopt.realtime_flag) { 01303 jlog("STAT: [5] prepare for real-time decoding\n"); 01304 /* prepare for 1st pass pipeline processing */ 01305 if (recog->jconf->input.type == INPUT_WAVEFORM) { 01306 if (RealTimeInit(recog) == FALSE) { 01307 jlog("ERROR: m_fusion: failed to initialize recognition process\n"); 01308 return FALSE; 01309 } 01310 } 01311 } 01312 01313 /* finished! */ 01314 jlog("STAT: All init successfully done\n\n"); 01315 01316 /* set-up callback plugin if any */ 01317 #ifdef ENABLE_PLUGIN 01318 if (plugin_exec_engine_startup(recog) == FALSE) { 01319 jlog("ERROR: m_fusion: failed to execute callback setup in plugin\n"); 01320 return FALSE; 01321 } 01322 #endif 01323 01324 return TRUE; 01325 } 01326 01327 /* end of file */