Julius 4.1.5
libjulius/src/m_fusion.c
説明を見る。
00001 
00026 /*
00027  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00028  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00029  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00030  * All rights reserved
00031  */
00032 
00033 #include <julius/julius.h>
00034 
00072 static HTK_HMM_INFO *
00073 initialize_HMM(JCONF_AM *amconf, Jconf *jconf)
00074 {
00075   HTK_HMM_INFO *hmminfo;
00076 
00077   /* at here, global variable "para" holds values specified by user or
00078      by user-specified HTK config file */
00079   if (amconf->analysis.para_hmm.loaded == 1) {
00080     jlog("Warning: you seems to read more than one acoustic model for recognition, but\n");
00081     jlog("Warning: previous one already has header-embedded acoustic parameters\n");
00082     jlog("Warning: if you have different parameters, result may be wrong!\n");
00083   }
00084   
00085   /* allocate new hmminfo */
00086   hmminfo = hmminfo_new();
00087   /* load hmmdefs */
00088   if (init_hmminfo(hmminfo, amconf->hmmfilename, amconf->mapfilename, &(amconf->analysis.para_hmm)) == FALSE) {
00089     hmminfo_free(hmminfo);
00090     return NULL;
00091   }
00092 
00093   /* set multipath mode flag */
00094   if (amconf->force_multipath) {
00095     jlog("STAT: m_fusion: force multipath HMM handling by user request\n");
00096     hmminfo->multipath = TRUE;
00097   } else {
00098     hmminfo->multipath = hmminfo->need_multipath;
00099   }
00100 
00101   /* only MFCC is supported for audio input */
00102   /* MFCC_{0|E}[_D][_A][_Z][_N] is supported */
00103   /* check parameter type of this acoustic HMM */
00104   if (jconf->input.type == INPUT_WAVEFORM) {
00105     /* Decode parameter extraction type according to the training
00106        parameter type in the header of the given acoustic HMM */
00107     if ((hmminfo->opt.param_type & F_BASEMASK) != F_MFCC) {
00108       jlog("ERROR: m_fusion: for direct speech input, only HMM trained by MFCC is supported\n");
00109       hmminfo_free(hmminfo);
00110       return NULL;
00111     }
00112     /* set acoustic analysis parameters from HMM header */
00113     calc_para_from_header(&(amconf->analysis.para), hmminfo->opt.param_type, hmminfo->opt.vec_size);
00114   }
00115   /* check if tied_mixture */
00116   if (hmminfo->is_tied_mixture && hmminfo->codebooknum <= 0) {
00117     jlog("ERROR: m_fusion: this tied-mixture model has no codebook!?\n");
00118     hmminfo_free(hmminfo);
00119     return NULL;
00120   }
00121 
00122 #ifdef PASS1_IWCD
00123   /* make state clusters of same context for inter-word triphone approx. */
00124   if (hmminfo->is_triphone) {
00125     jlog("STAT: making pseudo bi/mono-phone for IW-triphone\n");
00126     if (make_cdset(hmminfo) == FALSE) {
00127       jlog("ERROR: m_fusion: failed to make context-dependent state set\n");
00128       hmminfo_free(hmminfo);
00129       return NULL;
00130     }
00131     /* add those `pseudo' biphone and monophone to the logical HMM names */
00132     /* they points not to the defined HMM, but to the CD_Set structure */
00133     hmm_add_pseudo_phones(hmminfo);
00134   }
00135 #endif
00136 
00137   /* find short pause model and set to hmminfo->sp */
00138   htk_hmm_set_pause_model(hmminfo, amconf->spmodel_name);
00139 
00140 
00141   hmminfo->cdset_method = amconf->iwcdmethod;
00142   hmminfo->cdmax_num = amconf->iwcdmaxn;
00143 
00144   if (amconf->analysis.para_htk.loaded == 1) apply_para(&(amconf->analysis.para), &(amconf->analysis.para_htk));
00145   if (amconf->analysis.para_hmm.loaded == 1) apply_para(&(amconf->analysis.para), &(amconf->analysis.para_hmm));
00146   apply_para(&(amconf->analysis.para), &(amconf->analysis.para_default));
00147 
00148   return(hmminfo);
00149   
00150 }
00151 
00165 static HTK_HMM_INFO *
00166 initialize_GSHMM(JCONF_AM *amconf)
00167 {
00168   HTK_HMM_INFO *hmm_gs;
00169   Value para_dummy;
00170 
00171   jlog("STAT: Reading GS HMMs:\n");
00172   hmm_gs = hmminfo_new();
00173   undef_para(&para_dummy);
00174   if (init_hmminfo(hmm_gs, amconf->hmm_gs_filename, NULL, &para_dummy) == FALSE) {
00175     hmminfo_free(hmm_gs);
00176     return NULL;
00177   }
00178   return(hmm_gs);
00179 }
00180 
00197 static HTK_HMM_INFO *
00198 initialize_GMM(Jconf *jconf)
00199 {
00200   HTK_HMM_INFO *gmm;
00201   
00202   jlog("STAT: reading GMM: %s\n", jconf->reject.gmm_filename);
00203 
00204   if (jconf->gmm == NULL) {
00205     /* no acoustic parameter setting was given for GMM using -AM_GMM, 
00206        copy the first AM setting */
00207     jlog("STAT: -AM_GMM not used, use parameter of the first AM\n");
00208     jconf->gmm = j_jconf_am_new();
00209     memcpy(jconf->gmm, jconf->am_root, sizeof(JCONF_AM));
00210     jconf->gmm->hmmfilename = NULL;
00211     jconf->gmm->mapfilename = NULL;
00212     jconf->gmm->spmodel_name = NULL;
00213     jconf->gmm->hmm_gs_filename = NULL;
00214     if (jconf->am_root->analysis.cmnload_filename) {
00215       jconf->gmm->analysis.cmnload_filename = strcpy((char *)mymalloc(strlen(jconf->am_root->analysis.cmnload_filename)+ 1), jconf->am_root->analysis.cmnload_filename);
00216     }
00217     if (jconf->am_root->analysis.cmnsave_filename) {
00218       jconf->gmm->analysis.cmnsave_filename = strcpy((char *)mymalloc(strlen(jconf->am_root->analysis.cmnsave_filename)+ 1), jconf->am_root->analysis.cmnsave_filename);
00219     }
00220     if (jconf->am_root->frontend.ssload_filename) {
00221       jconf->gmm->frontend.ssload_filename = strcpy((char *)mymalloc(strlen(jconf->am_root->frontend.ssload_filename)+ 1), jconf->am_root->frontend.ssload_filename);
00222     }
00223   }
00224 
00225   gmm = hmminfo_new();
00226   if (init_hmminfo(gmm, jconf->reject.gmm_filename, NULL, &(jconf->gmm->analysis.para_hmm)) == FALSE) {
00227     hmminfo_free(gmm);
00228     return NULL;
00229   }
00230   /* check parameter type of this acoustic HMM */
00231   if (jconf->input.type == INPUT_WAVEFORM) {
00232     /* Decode parameter extraction type according to the training
00233        parameter type in the header of the given acoustic HMM */
00234     if ((gmm->opt.param_type & F_BASEMASK) != F_MFCC) {
00235       jlog("ERROR: m_fusion: for direct speech input, only GMM trained by MFCC is supported\n");
00236       hmminfo_free(gmm);
00237       return NULL;
00238     }
00239   }
00240 
00241   /* set acoustic analysis parameters from HMM header */
00242   calc_para_from_header(&(jconf->gmm->analysis.para), gmm->opt.param_type, gmm->opt.vec_size);
00243 
00244   if (jconf->gmm->analysis.para_htk.loaded == 1) apply_para(&(jconf->gmm->analysis.para), &(jconf->gmm->analysis.para_htk));
00245   if (jconf->gmm->analysis.para_hmm.loaded == 1) apply_para(&(jconf->gmm->analysis.para), &(jconf->gmm->analysis.para_hmm));
00246   apply_para(&(jconf->gmm->analysis.para), &(jconf->gmm->analysis.para_default));
00247 
00248   return(gmm);
00249 }
00250 
00284 static WORD_INFO *
00285 initialize_dict(JCONF_LM *lmconf, HTK_HMM_INFO *hmminfo)
00286 {
00287   WORD_INFO *winfo;
00288 
00289   /* allocate new word dictionary */
00290   winfo = word_info_new();
00291   /* read in dictinary from file */
00292   if ( ! 
00293 #ifdef MONOTREE
00294       /* leave winfo monophone for 1st pass lexicon tree */
00295        init_voca(winfo, lmconf->dictfilename, hmminfo, TRUE, lmconf->forcedict_flag)
00296 #else 
00297        init_voca(winfo, lmconf->dictfilename, hmminfo, FALSE, lmconf->forcedict_flag)
00298 #endif
00299        ) {
00300     jlog("ERROR: m_fusion: failed to read dictionary, terminated\n");
00301     word_info_free(winfo);
00302     return NULL;
00303   }
00304 
00305   if (lmconf->lmtype == LM_PROB) {
00306     /* if necessary, append a IW-sp word to the dict if "-iwspword" specified */
00307     if (lmconf->enable_iwspword) {
00308       if (
00309 #ifdef MONOTREE
00310           voca_append_htkdict(lmconf->iwspentry, winfo, hmminfo, TRUE)
00311 #else 
00312           voca_append_htkdict(lmconf->iwspentry, winfo, hmminfo, FALSE)
00313 #endif
00314           == FALSE) {
00315         jlog("ERROR: m_fusion: failed to make IW-sp word entry \"%s\"\n", lmconf->iwspentry);
00316         word_info_free(winfo);
00317         return NULL;
00318       } else {
00319         jlog("STAT: 1 IW-sp word entry added\n");
00320       }
00321     }
00322     /* set {head,tail}_silwid */
00323     winfo->head_silwid = voca_lookup_wid(lmconf->head_silname, winfo);
00324     if (winfo->head_silwid == WORD_INVALID) { /* not exist */
00325       jlog("ERROR: m_fusion: head sil word \"%s\" not exist in voca\n", lmconf->head_silname);
00326       word_info_free(winfo);
00327       return NULL;
00328     }
00329     winfo->tail_silwid = voca_lookup_wid(lmconf->tail_silname, winfo);
00330     if (winfo->tail_silwid == WORD_INVALID) { /* not exist */
00331       jlog("ERROR: m_fusion: tail sil word \"%s\" not exist in voca\n", lmconf->tail_silname);
00332       word_info_free(winfo);
00333       return NULL;
00334     }
00335   }
00336   
00337   return(winfo);
00338   
00339 }
00340 
00341 
00374 static NGRAM_INFO *
00375 initialize_ngram(JCONF_LM *lmconf, WORD_INFO *winfo)
00376 {
00377   NGRAM_INFO *ngram;
00378   boolean ret;
00379 
00380   /* allocate new */
00381   ngram = ngram_info_new();
00382   /* load LM */
00383   if (lmconf->ngram_filename != NULL) { /* binary format */
00384     ret = init_ngram_bin(ngram, lmconf->ngram_filename);
00385   } else {                      /* ARPA format */
00386     /* if either forward or backward N-gram is specified, read it */
00387     /* if both specified, use backward N-gram as main and
00388        use forward 2-gram only for 1st pass (this is an old behavior) */
00389     if (lmconf->ngram_filename_rl_arpa) {
00390       ret = init_ngram_arpa(ngram, lmconf->ngram_filename_rl_arpa, DIR_RL);
00391       if (ret == FALSE) {
00392         ngram_info_free(ngram);
00393         return NULL;
00394       }
00395       if (lmconf->ngram_filename_lr_arpa) {
00396         ret = init_ngram_arpa_additional(ngram, lmconf->ngram_filename_lr_arpa);
00397         if (ret == FALSE) {
00398           ngram_info_free(ngram);
00399           return NULL;
00400         }
00401       }
00402     } else if (lmconf->ngram_filename_lr_arpa) {
00403       ret = init_ngram_arpa(ngram, lmconf->ngram_filename_lr_arpa, DIR_LR);
00404     }
00405   }
00406   if (ret == FALSE) {
00407     ngram_info_free(ngram);
00408     return NULL;
00409   }
00410 
00411   /* set unknown (=OOV) word id */
00412   set_unknown_id(ngram, lmconf->unknown_name);
00413 
00414   /* map dict item to N-gram entry */
00415   if (make_voca_ref(ngram, winfo) == FALSE) {
00416     ngram_info_free(ngram);
00417     return NULL;
00418   }
00419 
00420   /* post-fix EOS / BOS uni prob for SRILM */
00421   fix_uniprob_srilm(ngram, winfo);
00422 
00423   return(ngram);
00424 }
00425 
00458 boolean
00459 j_load_am(Recog *recog, JCONF_AM *amconf)
00460 {
00461   PROCESS_AM *am;
00462 
00463   jlog("STAT: *** loading AM%02d %s\n", amconf->id, amconf->name);
00464 
00465   /* create AM process instance */
00466   am = j_process_am_new(recog, amconf);
00467   
00468   /* HMM */
00469   if ((am->hmminfo = initialize_HMM(amconf, recog->jconf)) == NULL) {
00470     jlog("ERROR: m_fusion: failed to initialize AM\n");
00471     return FALSE;
00472   }
00473   if (amconf->hmm_gs_filename != NULL) {
00474     if ((am->hmm_gs = initialize_GSHMM(amconf)) == NULL) {
00475       jlog("ERROR: m_fusion: failed to initialize GS HMM\n");
00476       return FALSE;
00477     }
00478   }
00479 
00480   /* fixate model-specific params */
00481   /* set params whose default will change by models and not specified in arg */
00482   /* select Gaussian pruning function */
00483   if (am->config->gprune_method == GPRUNE_SEL_UNDEF) {/* set default if not specified */
00484     if (am->hmminfo->is_tied_mixture) {
00485       /* enabled by default for tied-mixture models */
00486 #if defined(GPRUNE_DEFAULT_SAFE)
00487       am->config->gprune_method = GPRUNE_SEL_SAFE;
00488 #elif defined(GPRUNE_DEFAULT_HEURISTIC)
00489       am->config->gprune_method = GPRUNE_SEL_HEURISTIC;
00490 #elif defined(GPRUNE_DEFAULT_BEAM)
00491       am->config->gprune_method = GPRUNE_SEL_BEAM;
00492 #endif
00493     } else {
00494       /* disabled by default for non tied-mixture model */
00495       am->config->gprune_method = GPRUNE_SEL_NONE;
00496     }
00497   }
00498   
00499   /* fixated analysis.para not uses loaded flag any more, so
00500      reset it for binary matching */
00501   amconf->analysis.para.loaded = 0;
00502 
00503   jlog("STAT: *** AM%02d %s loaded\n", amconf->id, amconf->name);
00504 
00505   return TRUE;
00506 }
00507 
00547 boolean
00548 j_load_lm(Recog *recog, JCONF_LM *lmconf)
00549 {
00550   JCONF_SEARCH *sh;
00551   PROCESS_LM *lm;
00552   PROCESS_AM *am, *atmp;
00553 
00554   jlog("STAT: *** loading LM%02d %s\n", lmconf->id, lmconf->name);
00555 
00556   /* find which am process instance to assign to each LM */
00557   am = NULL;
00558   for(sh=recog->jconf->search_root;sh;sh=sh->next) {
00559     if (sh->lmconf == lmconf) {
00560       for(atmp=recog->amlist;atmp;atmp=atmp->next) {
00561         if (sh->amconf == atmp->config) {
00562           am = atmp;
00563         }
00564       }
00565     }
00566   }
00567   if (am == NULL) {
00568     jlog("ERROR: cannot find corresponding AM for LM%02d %s\n", lmconf->id, lmconf->name);
00569     jlog("ERROR: you should write all AM/LM combinations to be used for recognition with \"-SR\"\n");
00570     return FALSE;
00571   }
00572 
00573   /* create LM process instance */
00574   lm = j_process_lm_new(recog, lmconf);
00575 
00576   /* assign AM process instance to the LM instance */
00577   lm->am = am;
00578 
00579   /* load language model */
00580   if (lm->lmtype == LM_PROB) {
00581     /* LM (N-gram) */
00582     if ((lm->winfo = initialize_dict(lm->config, lm->am->hmminfo)) == NULL) {
00583       jlog("ERROR: m_fusion: failed to initialize dictionary\n");
00584       return FALSE;
00585     }
00586     if (lm->config->ngram_filename_lr_arpa || lm->config->ngram_filename_rl_arpa || lm->config->ngram_filename) {
00587       if ((lm->ngram = initialize_ngram(lm->config, lm->winfo)) == NULL) {
00588         jlog("ERROR: m_fusion: failed to initialize N-gram\n");
00589         return FALSE;
00590       }
00591     }
00592   }
00593   if (lm->lmtype == LM_DFA) {
00594     /* DFA */
00595     if (lm->config->dfa_filename != NULL && lm->config->dictfilename != NULL) {
00596       /* here add grammar specified by "-dfa" and "-v" to grammar list */
00597       multigram_add_gramlist(lm->config->dfa_filename, lm->config->dictfilename, lm->config, LM_DFA_GRAMMAR);
00598     }
00599     /* load all the specified grammars */
00600     if (multigram_load_all_gramlist(lm) == FALSE) {
00601       jlog("ERROR: m_fusion: some error occured in reading grammars\n");
00602       return FALSE;
00603     }
00604     /* setup for later wchmm building */
00605     multigram_update(lm);
00606     /* the whole lexicon will be forced to built in the boot sequence,
00607        so reset the global modification flag here */
00608     lm->global_modified = FALSE;
00609   }
00610   
00611   jlog("STAT: *** LM%02d %s loaded\n", lmconf->id, lmconf->name);
00612 
00613   return TRUE;
00614 }
00615 
00616 /**********************************************************************/
00645 boolean
00646 j_load_all(Recog *recog, Jconf *jconf)
00647 {
00648   JCONF_AM *amconf;
00649   JCONF_LM *lmconf;
00650 
00651   /* set global jconf */
00652   recog->jconf = jconf;
00653 
00654   /* load acoustic models */
00655   for(amconf=jconf->am_root;amconf;amconf=amconf->next) {
00656     if (j_load_am(recog, amconf) == FALSE) return FALSE;
00657   }
00658 
00659   /* load language models */
00660   for(lmconf=jconf->lm_root;lmconf;lmconf=lmconf->next) {
00661     if (j_load_lm(recog, lmconf) == FALSE) return FALSE;
00662   }
00663 
00664   /* GMM */
00665   if (jconf->reject.gmm_filename != NULL) {
00666     jlog("STAT: loading GMM\n");
00667     if ((recog->gmm = initialize_GMM(jconf)) == NULL) {
00668       jlog("ERROR: m_fusion: failed to initialize GMM\n");
00669       return FALSE;
00670     }
00671   }
00672 
00673   /* check sampling rate requirement on AMs and set it to global jconf */
00674   {
00675     boolean ok_p;
00676 
00677     /* set input sampling rate from an AM */
00678     jconf->input.sfreq = jconf->am_root->analysis.para.smp_freq;
00679     jconf->input.period = jconf->am_root->analysis.para.smp_period;
00680     jconf->input.frameshift = jconf->am_root->analysis.para.frameshift;
00681     jconf->input.framesize = jconf->am_root->analysis.para.framesize;
00682     /* check if the value is equal at all AMs */
00683     ok_p = TRUE;
00684     for(amconf = jconf->am_root; amconf; amconf = amconf->next) {
00685       if (jconf->input.sfreq != amconf->analysis.para.smp_freq) ok_p = FALSE;
00686     }
00687     if (!ok_p) {
00688       jlog("ERROR: required sampling rate differs in AMs!\n");
00689       for(amconf = jconf->am_root; amconf; amconf = amconf->next) {
00690         jlog("ERROR: AM%02d %s: %dHz\n", amconf->analysis.para.smp_freq);
00691       }
00692       return FALSE;
00693     }
00694     /* also check equality for GMM */
00695     if (recog->gmm) {
00696       if (jconf->input.sfreq != jconf->gmm->analysis.para.smp_freq) {
00697         jlog("ERROR: required sampling rate differs between AM and GMM!\n");
00698         jlog("ERROR: AM : %dHz\n", jconf->input.sfreq);
00699         jlog("ERROR: GMM: %dHz\n", jconf->gmm->analysis.para.smp_freq);
00700         return FALSE;
00701       }
00702     }
00703     for(amconf = jconf->am_root; amconf; amconf = amconf->next) {
00704       if (jconf->input.frameshift != amconf->analysis.para.frameshift) ok_p = FALSE;
00705     }
00706     if (!ok_p) {
00707       jlog("ERROR: requested frame shift differs in AMs!\n");
00708       for(amconf = jconf->am_root; amconf; amconf = amconf->next) {
00709         jlog("ERROR: AM%02d %s: %d samples\n", amconf->analysis.para.frameshift);
00710       }
00711       return FALSE;
00712     }
00713     /* also check equality for GMM */
00714     if (recog->gmm) {
00715       if (jconf->input.frameshift != jconf->gmm->analysis.para.frameshift) {
00716         jlog("ERROR: required frameshift differs between AM and GMM!\n");
00717         jlog("ERROR: AM : %d samples\n", jconf->input.frameshift);
00718         jlog("ERROR: GMM: %d samples\n", jconf->gmm->analysis.para.frameshift);
00719         return FALSE;
00720       }
00721     }
00722     for(amconf = jconf->am_root; amconf; amconf = amconf->next) {
00723       if (jconf->input.framesize != amconf->analysis.para.framesize) ok_p = FALSE;
00724     }
00725     if (!ok_p) {
00726       jlog("ERROR: requested frame size (window length) differs in AMs!\n");
00727       for(amconf = jconf->am_root; amconf; amconf = amconf->next) {
00728         jlog("ERROR: AM%02d %s: %d samples\n", amconf->analysis.para.framesize);
00729       }
00730       return FALSE;
00731     }
00732     /* also check equality for GMM */
00733     if (recog->gmm) {
00734       if (jconf->input.framesize != jconf->gmm->analysis.para.framesize) {
00735         jlog("ERROR: requested frame size differs between AM and GMM!\n");
00736         jlog("ERROR: AM : %d samples\n", jconf->input.framesize);
00737         jlog("ERROR: GMM: %d samples\n", jconf->gmm->analysis.para.framesize);
00738         return FALSE;
00739       }
00740     }
00741   }
00742 
00743   return TRUE;
00744 }
00745 
00763 static boolean
00764 mfcc_config_is_same(JCONF_AM *amconf, MFCCCalc *mfcc)
00765 {
00766   char *s1, *s2;
00767 
00768   /* parameter extraction conditions are the same */
00769   /* check exact match in amconf->analysis.* */
00770   if (&(amconf->analysis.para) == mfcc->para || memcmp(&(amconf->analysis.para), mfcc->para, sizeof(Value)) == 0) {
00771     s1 = amconf->analysis.cmnload_filename;
00772     s2 = mfcc->cmn.load_filename;
00773     if (s1 == s2 || (s1 && s2 && strmatch(s1, s2))) {
00774       s1 = amconf->analysis.cmnsave_filename;
00775       s2 = mfcc->cmn.save_filename;
00776       if (s1 == s2 || (s1 && s2 && strmatch(s1, s2))) {
00777         if (amconf->analysis.cmn_update == mfcc->cmn.update
00778             && amconf->analysis.cmn_map_weight == mfcc->cmn.map_weight) {
00779           if (amconf->frontend.ss_alpha == mfcc->frontend.ss_alpha
00780               && amconf->frontend.ss_floor == mfcc->frontend.ss_floor
00781               && amconf->frontend.sscalc == mfcc->frontend.sscalc
00782               && amconf->frontend.sscalc_len == mfcc->frontend.sscalc_len) {
00783             s1 = amconf->frontend.ssload_filename;
00784             s2 = mfcc->frontend.ssload_filename;
00785             if (s1 == s2 || (s1 && s2 && strmatch(s1, s2))) {
00786               return TRUE;
00787             }
00788           }
00789         }
00790       }
00791     }
00792   }
00793 
00794   return FALSE;
00795 }
00796 
00797 /***************************************************/
00798 /* create MFCC calculation instance from AM config */
00799 /* according to the fixated parameter information  */
00800 /***************************************************/
00825 void
00826 create_mfcc_calc_instances(Recog *recog)
00827 {
00828   PROCESS_AM *am;
00829   MFCCCalc *mfcc;
00830   int count;
00831   
00832   jlog("STAT: *** create MFCC calculation modules from AM\n");
00833   count = 0;
00834   for(am=recog->amlist;am;am=am->next) {
00835     for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00836       if (mfcc_config_is_same(am->config, mfcc)) {
00837         /* the same */
00838         jlog("STAT: AM%02d %s: share MFCC%02d\n", am->config->id, am->config->name, mfcc->id);
00839         am->mfcc = mfcc;
00840         break;
00841       }
00842     }
00843     if (!mfcc) {                /* the same not found */
00844       /* initialize MFCC calculation work area */
00845       count++;
00846       /* create new mfcc instance */
00847       mfcc = j_mfcccalc_new(am->config);
00848       mfcc->id = count;
00849       /* assign to the am */
00850       am->mfcc = mfcc;
00851       /* add to the list of all MFCCCalc */
00852       mfcc->next = recog->mfcclist;
00853       recog->mfcclist = mfcc;
00854       jlog("STAT: AM%2d %s: create a new module MFCC%02d\n", am->config->id, am->config->name, mfcc->id);
00855     }
00856   }
00857 
00858   /* for GMM */
00859   if (recog->gmm) {
00860     /* if GMM calculation config found, make MFCC instance for that. */
00861     for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00862       if (mfcc_config_is_same(recog->jconf->gmm, mfcc)) {
00863         /* the same */
00864         jlog("STAT: GMM: share MFCC%02d\n", mfcc->id);
00865         recog->gmmmfcc = mfcc;
00866         break;
00867         }
00868     }
00869     if (!mfcc) {                /* the same not found */
00870       /* initialize MFCC calculation work area */
00871       count++;
00872       /* create new mfcc instance */
00873       mfcc = j_mfcccalc_new(recog->jconf->gmm);
00874       mfcc->id = count;
00875       /* assign to gmm */
00876       recog->gmmmfcc = mfcc;
00877       /* add to the list of all MFCCCalc */
00878       mfcc->next = recog->mfcclist;
00879       recog->mfcclist = mfcc;
00880       jlog("STAT: GMM: create a new module MFCC%02d\n", mfcc->id);
00881     }
00882   }
00883   
00884   jlog("STAT: %d MFCC modules created\n", count);
00885 }
00886 
00919 boolean
00920 j_launch_recognition_instance(Recog *recog, JCONF_SEARCH *sconf)
00921 {
00922   RecogProcess *p;
00923   PROCESS_AM *am;
00924   PROCESS_LM *lm;
00925 
00926   jlog("STAT: composing recognizer instance SR%02d %s (AM%02d %s, LM%02d %s)\n", sconf->id, sconf->name, sconf->amconf->id, sconf->amconf->name, sconf->lmconf->id, sconf->lmconf->name);
00927 
00928   /* allocate recognition instance */
00929   p = j_recogprocess_new(recog, sconf);
00930 
00931   /* assign corresponding AM instance and LM instance to use */
00932   for(lm=recog->lmlist;lm;lm=lm->next) {
00933     if (sconf->lmconf == lm->config) {
00934       for(am=recog->amlist;am;am=am->next) {
00935         if (sconf->amconf == am->config) {
00936           p->am = am;
00937           p->lm = lm;
00938         }
00939       }
00940     }
00941   }
00942 
00943   if (p->config->sw.triphone_check_flag && p->am->hmminfo->is_triphone) {
00944     /* go into interactive triphone HMM check mode */
00945     hmm_check(p);
00946   }
00947   
00948   /******************************************/
00949   /******** set work area and flags *********/
00950   /******************************************/
00951 
00952   /* copy values of sub instances for handly access during recognition */
00953   /* set lm type */
00954   p->lmtype = p->lm->lmtype;
00955   p->lmvar  = p->lm->lmvar;
00956   p->graphout = p->config->graph.enabled;
00957   
00958   /* set flag for context dependent handling */
00959   if (p->config->force_ccd_handling) {
00960     p->ccd_flag = p->config->ccd_handling;
00961   } else {
00962     if (p->am->hmminfo->is_triphone) {
00963       p->ccd_flag = TRUE;
00964     } else {
00965       p->ccd_flag = FALSE;
00966     }
00967   }
00968 
00969   /* iwsp prepare */
00970   if (p->lm->config->enable_iwsp) {
00971     if (p->am->hmminfo->multipath) {
00972       /* find short-pause model */
00973       if (p->am->hmminfo->sp == NULL) {
00974         jlog("ERROR: iwsp enabled but no short pause model \"%s\" in hmmdefs\n", p->am->config->spmodel_name);
00975         return FALSE;
00976       }
00977       p->am->hmminfo->iwsp_penalty = p->am->config->iwsp_penalty;
00978     } else {
00979       jlog("Warning: \"-iwsp\" is supported on multi-path mode, ignored\n");
00980     }
00981   }
00982 
00983   /* for short-pause segmentation  */
00984   if (p->config->successive.enabled) {
00985     if (p->config->successive.pausemodelname) {
00986       /* pause model name string specified, divide it and store to p */
00987       char *s;
00988       int n;
00989       p->pass1.pausemodelnames = (char*)mymalloc(strlen(p->config->successive.pausemodelname)+1);
00990       strcpy(p->pass1.pausemodelnames, p->config->successive.pausemodelname);
00991       n = 0;
00992       for (s = strtok(p->pass1.pausemodelnames, " ,"); s; s = strtok(NULL, " ,")) {
00993         n++;
00994       }
00995       p->pass1.pausemodelnum = n;
00996       p->pass1.pausemodel = (char **)mymalloc(sizeof(char *) * n);
00997       strcpy(p->pass1.pausemodelnames, p->config->successive.pausemodelname);
00998       n = 0;
00999       for (s = strtok(p->pass1.pausemodelnames, " ,"); s; s = strtok(NULL, " ,")) {
01000         p->pass1.pausemodel[n++] = s;
01001       }
01002     } else {
01003       p->pass1.pausemodel = NULL;
01004     }
01005     /* check if pause word exists on dictionary */
01006     {
01007       WORD_ID w;
01008       boolean ok_p;
01009       ok_p = FALSE;
01010       for(w=0;w<p->lm->winfo->num;w++) {
01011         if (is_sil(w, p)) {
01012           ok_p = TRUE;
01013           break;
01014         }
01015       }
01016       if (!ok_p) {
01017 #ifdef SPSEGMENT_NAIST
01018         jlog("Error: no pause word in dictionary needed for decoder-based VAD\n");
01019 #else
01020         jlog("Error: no pause word in dictionary needed for short-pause segmentation\n");
01021 #endif
01022         jlog("Error: you should have at least one pause word in dictionary\n");
01023         jlog("Error: you can specify pause model names by \"-pausemodels\"\n");
01024         return FALSE;
01025       }
01026     }
01027   }
01028 
01029   /**********************************************/
01030   /******** set model-specific defaults *********/
01031   /**********************************************/
01032   if (p->lmtype == LM_PROB) {
01033     /* set default lm parameter if not specified */
01034     if (!p->config->lmp.lmp_specified) {
01035       if (p->am->hmminfo->is_triphone) {
01036         p->config->lmp.lm_weight = DEFAULT_LM_WEIGHT_TRI_PASS1;
01037         p->config->lmp.lm_penalty = DEFAULT_LM_PENALTY_TRI_PASS1;
01038       } else {
01039         p->config->lmp.lm_weight = DEFAULT_LM_WEIGHT_MONO_PASS1;
01040         p->config->lmp.lm_penalty = DEFAULT_LM_PENALTY_MONO_PASS1;
01041       }
01042     }
01043     if (!p->config->lmp.lmp2_specified) {
01044       if (p->am->hmminfo->is_triphone) {
01045         p->config->lmp.lm_weight2 = DEFAULT_LM_WEIGHT_TRI_PASS2;
01046         p->config->lmp.lm_penalty2 = DEFAULT_LM_PENALTY_TRI_PASS2;
01047       } else {
01048         p->config->lmp.lm_weight2 = DEFAULT_LM_WEIGHT_MONO_PASS2;
01049         p->config->lmp.lm_penalty2 = DEFAULT_LM_PENALTY_MONO_PASS2;
01050       }
01051     }
01052     if (p->config->lmp.lmp_specified != p->config->lmp.lmp2_specified) {
01053       jlog("WARNING: m_fusion: only -lmp or -lmp2 specified, LM weights may be unbalanced\n");
01054     }
01055   }
01056 
01057   /****************************/
01058   /******* build wchmm ********/
01059   /****************************/
01060   if (p->lmtype == LM_DFA) {
01061     /* execute generation of global grammar and build of wchmm */
01062     multigram_build(p); /* some modification occured if return TRUE */
01063   }
01064 
01065   if (p->lmtype == LM_PROB) {
01066     /* build wchmm with N-gram */
01067     p->wchmm = wchmm_new();
01068     p->wchmm->lmtype = p->lmtype;
01069     p->wchmm->lmvar  = p->lmvar;
01070     p->wchmm->ccd_flag = p->ccd_flag;
01071     p->wchmm->category_tree = FALSE;
01072     p->wchmm->hmmwrk = &(p->am->hmmwrk);
01073     /* assign models */
01074     p->wchmm->ngram = p->lm->ngram;
01075     if (p->lmvar == LM_NGRAM_USER) {
01076       /* register LM functions for 1st pass here */
01077       p->wchmm->uni_prob_user = p->lm->lmfunc.uniprob;
01078       p->wchmm->bi_prob_user = p->lm->lmfunc.biprob;
01079     }
01080     p->wchmm->winfo = p->lm->winfo;
01081     p->wchmm->hmminfo = p->am->hmminfo;
01082     if (p->wchmm->category_tree) {
01083       if (p->config->pass1.old_tree_function_flag) {
01084         if (build_wchmm(p->wchmm, p->lm->config) == FALSE) {
01085           jlog("ERROR: m_fusion: error in bulding wchmm\n");
01086           return FALSE;
01087         }
01088       } else {
01089         if (build_wchmm2(p->wchmm, p->lm->config) == FALSE) {
01090           jlog("ERROR: m_fusion: error in bulding wchmm\n");
01091           return FALSE;
01092         }
01093       }
01094     } else {
01095       if (build_wchmm2(p->wchmm, p->lm->config) == FALSE) {
01096         jlog("ERROR: m_fusion: error in bulding wchmm\n");
01097         return FALSE;
01098       }
01099     }
01100 
01101     /* 起動時 -check でチェックモードへ */
01102     if (p->config->sw.wchmm_check_flag) {
01103       wchmm_check_interactive(p->wchmm);
01104     }
01105 
01106     /* set beam width */
01107     /* guess beam width from models, when not specified */
01108     p->trellis_beam_width = set_beam_width(p->wchmm, p->config->pass1.specified_trellis_beam_width);
01109 
01110     /* initialize cache for factoring */
01111     max_successor_cache_init(p->wchmm);
01112   }
01113 
01114   /* backtrellis initialization */
01115   p->backtrellis = (BACKTRELLIS *)mymalloc(sizeof(BACKTRELLIS));
01116   bt_init(p->backtrellis);
01117 
01118   /* prepare work area for 2nd pass */
01119   wchmm_fbs_prepare(p);
01120 
01121   jlog("STAT: SR%02d %s composed\n", sconf->id, sconf->name);
01122 
01123   if (sconf->sw.start_inactive) {
01124     /* start inactive */
01125     p->active = -1;
01126   } else {
01127     /* book activation for the recognition */
01128     p->active = 1;
01129   }
01130   if (p->lmtype == LM_DFA) {
01131     if (p->lm->winfo == NULL ||
01132         (p->lmvar == LM_DFA_GRAMMAR && p->lm->dfa == NULL)) {
01133       /* make this instance inactive */
01134       p->active = -1;
01135     }
01136   }
01137 
01138   return TRUE;
01139 }
01140 
01141 
01195 boolean
01196 j_final_fusion(Recog *recog)
01197 {
01198   MFCCCalc *mfcc;
01199   JCONF_SEARCH *sconf;
01200   PROCESS_AM *am;
01201 
01202   jlog("STAT: ------\n");
01203   jlog("STAT: All models are ready, go for final fusion\n");
01204   jlog("STAT: [1] create MFCC extraction instance(s)\n");
01205   if (recog->jconf->input.type == INPUT_WAVEFORM) {
01206     /***************************************************/
01207     /* create MFCC calculation instance from AM config */
01208     /* according to the fixated parameter information  */
01209     /***************************************************/
01210     create_mfcc_calc_instances(recog);
01211   }
01212 
01213   /****************************************/
01214   /* create recognition process instances */
01215   /****************************************/
01216   jlog("STAT: [2] create recognition processing instance(s) with AM and LM\n");
01217   for(sconf=recog->jconf->search_root;sconf;sconf=sconf->next) {
01218     if (j_launch_recognition_instance(recog, sconf) == FALSE) return FALSE;
01219   }
01220 
01221   /****************************/
01222   /****** initialize GMM ******/
01223   /****************************/
01224   if (recog->gmm != NULL) {
01225     jlog("STAT: [2.5] create GMM instance\n");
01226     if (gmm_init(recog) == FALSE) {
01227       jlog("ERROR: m_fusion: error in initializing GMM\n");
01228       return FALSE;
01229     }
01230   }
01231 
01232   /* stage 4: setup output probability function for each AM */
01233   jlog("STAT: [3] initialize for acoustic HMM calculation\n");
01234   for(am=recog->amlist;am;am=am->next) {
01235 #ifdef ENABLE_PLUGIN
01236     /* set plugin function if specified */
01237     if (am->config->gprune_method == GPRUNE_SEL_USER) {
01238       am->hmmwrk.compute_gaussset = (void (*)(HMMWork *, HTK_HMM_Dens **, int, int *, int)) plugin_get_func(am->config->gprune_plugin_source, "calcmix");
01239       if (am->hmmwrk.compute_gaussset == NULL) {
01240         jlog("ERROR: calcmix plugin has no function \"calcmix\"\n");
01241         return FALSE;
01242       }
01243       am->hmmwrk.compute_gaussset_init = (boolean (*)(HMMWork *)) plugin_get_func(am->config->gprune_plugin_source, "calcmix_init");
01244       if (am->hmmwrk.compute_gaussset_init == NULL) {
01245         jlog("ERROR: calcmix plugin has no function \"calcmix_init\"\n");
01246         return FALSE;
01247       }
01248       am->hmmwrk.compute_gaussset_free = (void (*)(HMMWork *)) plugin_get_func(am->config->gprune_plugin_source, "calcmix_free");
01249       if (am->hmmwrk.compute_gaussset_free == NULL) {
01250         jlog("ERROR: calcmix plugin has no function \"calcmix_free\"\n");
01251         return FALSE;
01252       }
01253     }
01254 #endif
01255     if (am->config->hmm_gs_filename != NULL) {/* with GMS */
01256       if (outprob_init(&(am->hmmwrk), am->hmminfo, am->hmm_gs, am->config->gs_statenum, am->config->gprune_method, am->config->mixnum_thres) == FALSE) {
01257         return FALSE;
01258       }
01259     } else {
01260       if (outprob_init(&(am->hmmwrk), am->hmminfo, NULL, 0, am->config->gprune_method, am->config->mixnum_thres) == FALSE) {
01261         return FALSE;
01262       }
01263     }
01264   }
01265 
01266   /* stage 5: initialize work area for input and realtime decoding */
01267 
01268   jlog("STAT: [4] prepare MFCC storage(s)\n");
01269   if (recog->jconf->input.type == INPUT_VECTOR) {
01270     /* create an MFCC instance for MFCC input */
01271     /* create new mfcc instance */
01272     recog->mfcclist = j_mfcccalc_new(NULL);
01273     recog->mfcclist->id = 1;
01274     /* assign to the am */
01275     for(am=recog->amlist;am;am=am->next) {
01276       am->mfcc = recog->mfcclist;
01277     }
01278     if (recog->gmm) recog->gmmmfcc = recog->mfcclist;
01279   }
01280   /* allocate parameter holders */
01281   for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
01282     mfcc->param = new_param();
01283   }
01284   
01285   /* initialize SS calculation work area */
01286   if (recog->jconf->input.type == INPUT_WAVEFORM) {
01287     for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
01288       if (mfcc->frontend.sscalc) {
01289         mfcc->frontend.mfccwrk_ss = WMP_work_new(mfcc->para);
01290         if (mfcc->frontend.mfccwrk_ss == NULL) {
01291           jlog("ERROR: m_fusion: failed to initialize MFCC computation for SS\n");
01292           return FALSE;
01293         }
01294         if (mfcc->frontend.sscalc_len * recog->jconf->input.sfreq / 1000 < mfcc->para->framesize) {
01295           jlog("ERROR: m_fusion: head sil length for SS (%d msec) is shorter than a frame (%d msec)\n", mfcc->frontend.sscalc_len, mfcc->para->framesize * 1000 / recog->jconf->input.sfreq);
01296           return FALSE;
01297         }
01298       }
01299     }
01300   }
01301 
01302   if (recog->jconf->decodeopt.realtime_flag) {
01303     jlog("STAT: [5] prepare for real-time decoding\n");
01304     /* prepare for 1st pass pipeline processing */
01305     if (recog->jconf->input.type == INPUT_WAVEFORM) {
01306       if (RealTimeInit(recog) == FALSE) {
01307         jlog("ERROR: m_fusion: failed to initialize recognition process\n");
01308         return FALSE;
01309       }
01310     }
01311   }
01312 
01313   /* finished! */
01314   jlog("STAT: All init successfully done\n\n");
01315 
01316   /* set-up callback plugin if any */
01317 #ifdef ENABLE_PLUGIN
01318   if (plugin_exec_engine_startup(recog) == FALSE) {
01319     jlog("ERROR: m_fusion: failed to execute callback setup in plugin\n");
01320     return FALSE;
01321   }
01322 #endif
01323 
01324   return TRUE;
01325 }
01326 
01327 /* end of file */