Julius 4.2
libjulius/src/m_chkparam.c
説明を見る。
00001 
00026 /*
00027  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00028  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00029  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00030  * All rights reserved
00031  */
00032 
00033 #include <julius/julius.h>
00034 
00048 boolean
00049 checkpath(char *filename)
00050 {
00051   if (access(filename, R_OK) == -1) {
00052     jlog("ERROR: m_chkparam: cannot access %s\n", filename);
00053     return FALSE;
00054   }
00055   return TRUE;
00056 }
00057 
00094 boolean
00095 j_jconf_finalize(Jconf *jconf)
00096 {
00097   boolean ok_p;
00098   JCONF_LM *lm;
00099   JCONF_AM *am;
00100   JCONF_SEARCH *s, *hs;
00101 
00102   ok_p = TRUE;
00103 
00104   /* update and tailor configuration */
00105   /* if a search config has progout_flag enabled, set it to all config */
00106   hs = NULL;
00107   for(s=jconf->search_root;s;s=s->next) {
00108     if (s->output.progout_flag) {
00109       hs = s;
00110       break;
00111     }
00112   }
00113   if (hs != NULL) {
00114     for(s=jconf->search_root;s;s=s->next) {
00115       s->output.progout_flag = hs->output.progout_flag;
00116       s->output.progout_interval = hs->output.progout_interval;
00117     }
00118   }
00119       
00120   /* if an instance has short-pause segmentation enabled,
00121      set it to global opt for parameter handling
00122      (only a recognizer with this option will decide the segmentation,
00123       but the segmentation should be synchronized for all the recognizer)
00124   */
00125   for(s=jconf->search_root;s;s=s->next) {
00126     if (s->successive.enabled) {
00127       jconf->decodeopt.segment = TRUE;
00128       break;
00129     }
00130   }
00131 #ifdef GMM_VAD
00132   /* if GMM VAD enabled, set it to global */
00133   if (jconf->reject.gmm_filename) {
00134     jconf->decodeopt.segment = TRUE;
00135   }
00136 #endif
00137 
00138   for(lm = jconf->lm_root; lm; lm = lm->next) {
00139     if (lm->lmtype == LM_UNDEF) {
00140       /* determine LM type from the specified LM files */
00141       if (lm->ngram_filename_lr_arpa || lm->ngram_filename_rl_arpa || lm->ngram_filename) {
00142         /* n-gram specified */
00143         lm->lmtype = LM_PROB;
00144         lm->lmvar  = LM_NGRAM;
00145       }
00146       if (lm->gramlist_root) {
00147         /* DFA grammar specified */
00148         if (lm->lmtype != LM_UNDEF) {
00149           jlog("ERROR: m_chkparam: LM conflicts: several LM of different type specified?\n");
00150           return FALSE;
00151         }
00152         lm->lmtype = LM_DFA;
00153         lm->lmvar  = LM_DFA_GRAMMAR;
00154       }
00155       if (lm->dfa_filename) {
00156         /* DFA grammar specified by "-dfa" */
00157         if (lm->lmtype != LM_UNDEF && lm->lmvar != LM_DFA_GRAMMAR) {
00158           jlog("ERROR: m_chkparam: LM conflicts: several LM of different type specified?\n");
00159           return FALSE;
00160         }
00161         lm->lmtype = LM_DFA;
00162         lm->lmvar  = LM_DFA_GRAMMAR;
00163       }
00164       if (lm->wordlist_root) {
00165         /* word list specified */
00166         if (lm->lmtype != LM_UNDEF) {
00167           jlog("ERROR: m_chkparam: LM conflicts: several LM of different type specified?\n");
00168           return FALSE;
00169         }
00170         lm->lmtype = LM_DFA;
00171         lm->lmvar  = LM_DFA_WORD;
00172       }
00173     }
00174     if (lm->lmtype == LM_UNDEF) { /* an LM is not specified */
00175       jlog("ERROR: m_chkparam: you should specify at least one LM to run Julius!\n");
00176       return FALSE;
00177     }
00178     if (lm->lmtype == LM_PROB) {
00179       if (lm->dictfilename == NULL) {
00180         jlog("ERROR: m_chkparam: needs dictionary file (-v dict_file)\n");
00181         ok_p = FALSE;
00182       }
00183     }
00184     /* file existence check */
00185     if (lm->dictfilename != NULL) 
00186       if (!checkpath(lm->dictfilename)) ok_p = FALSE;
00187     if (lm->ngram_filename != NULL) 
00188       if (!checkpath(lm->ngram_filename)) ok_p = FALSE;
00189     if (lm->ngram_filename_lr_arpa != NULL)
00190       if (!checkpath(lm->ngram_filename_lr_arpa)) ok_p = FALSE;
00191     if (lm->ngram_filename_rl_arpa != NULL)
00192       if (!checkpath(lm->ngram_filename_rl_arpa)) ok_p = FALSE;
00193     if (lm->dfa_filename != NULL) 
00194       if (!checkpath(lm->dfa_filename)) ok_p = FALSE;
00195   }
00196 
00197   for(am = jconf->am_root; am; am = am->next) {
00198     /* check if needed files are specified */
00199     if (am->hmmfilename == NULL) {
00200       jlog("ERROR: m_chkparam: needs HMM definition file (-h hmmdef_file)\n");
00201       ok_p = FALSE;
00202     }
00203     /* file existence check */
00204     if (am->hmmfilename != NULL) 
00205       if (!checkpath(am->hmmfilename)) ok_p = FALSE;
00206     if (am->mapfilename != NULL) 
00207       if (!checkpath(am->mapfilename)) ok_p = FALSE;
00208     if (am->hmm_gs_filename != NULL) 
00209       if (!checkpath(am->hmm_gs_filename)) ok_p = FALSE;
00210     /* cmn{save,load}_filename allows missing file (skipped if missing) */
00211     if (am->frontend.ssload_filename != NULL) 
00212       if (!checkpath(am->frontend.ssload_filename)) ok_p = FALSE;
00213   }
00214   if (jconf->reject.gmm_filename != NULL) 
00215     if (!checkpath(jconf->reject.gmm_filename)) ok_p = FALSE;
00216   if (jconf->input.inputlist_filename != NULL) {
00217     if (jconf->input.speech_input != SP_RAWFILE && jconf->input.speech_input != SP_MFCFILE) {
00218       jlog("WARNING: m_chkparam: not file input, \"-filelist %s\" ignored\n", jconf->input.inputlist_filename);
00219     } else {
00220       if (!checkpath(jconf->input.inputlist_filename)) ok_p = FALSE;
00221     }
00222   }
00223 
00224   /* set default realtime flag according to input mode */
00225   if (jconf->decodeopt.force_realtime_flag) {
00226     if (jconf->input.type == INPUT_VECTOR) {
00227       jlog("WARNING: m_chkparam: real-time concurrent processing is not needed on feature vector input\n");
00228       jlog("WARNING: m_chkparam: real-time flag has turned off\n");
00229       jconf->decodeopt.realtime_flag = FALSE;
00230     } else {
00231       jconf->decodeopt.realtime_flag = jconf->decodeopt.forced_realtime;
00232     }
00233   }
00234 
00235   /* check for cmn */
00236   if (jconf->decodeopt.realtime_flag) {
00237     for(am = jconf->am_root; am; am = am->next) {
00238       if (am->analysis.cmn_update == FALSE && am->analysis.cmnload_filename == NULL) {
00239         jlog("ERROR: m_chkparam: when \"-cmnnoupdate\", initial cepstral normalisation data should be given by \"-cmnload\"\n");
00240         ok_p = FALSE;
00241       }
00242     }
00243   }
00244 
00245   /* set values for search config */
00246   for(s=jconf->search_root;s;s=s->next) {
00247     lm = s->lmconf;
00248     am = s->amconf;
00249 
00250     /* force context dependency handling flag for word-recognition mode */
00251     if (lm->lmtype == LM_DFA && lm->lmvar == LM_DFA_WORD) {
00252       /* disable inter-word context dependent handling ("-no_ccd") */
00253       s->ccd_handling = FALSE;
00254       s->force_ccd_handling = TRUE;
00255       /* force 1pass ("-1pass") */
00256       s->compute_only_1pass = TRUE;
00257     }
00258 
00259     /* set default iwcd1 method from lm */
00260     /* WARNING: THIS WILL BEHAVE WRONG IF MULTIPLE LM TYPE SPECIFIED */
00261     /* RECOMMEND USING EXPLICIT OPTION */
00262     if (am->iwcdmethod == IWCD_UNDEF) {
00263       switch(lm->lmtype) {
00264       case LM_PROB:
00265         am->iwcdmethod = IWCD_NBEST; break;
00266       case LM_DFA:
00267         am->iwcdmethod = IWCD_AVG; break;
00268       }
00269     }
00270 
00271   }
00272 
00273   /* check option validity with the current lm type */
00274   /* just a warning message for user */
00275   for(s=jconf->search_root;s;s=s->next) {
00276     lm = s->lmconf;
00277     am = s->amconf;
00278     if (lm->lmtype != LM_PROB) {
00279       /* in case not a probabilistic model */
00280       if (s->lmp.lmp_specified) {
00281         jlog("WARNING: m_chkparam: \"-lmp\" only for N-gram, ignored\n");
00282       }
00283       if (s->lmp.lmp2_specified) {
00284         jlog("WARNING: m_chkparam: \"-lmp2\" only for N-gram, ignored\n");
00285       }
00286       if (s->lmp.lm_penalty_trans != 0.0) {
00287         jlog("WARNING: m_chkparam: \"-transp\" only for N-gram, ignored\n");
00288       }
00289       if (lm->head_silname && !strmatch(lm->head_silname, BEGIN_WORD_DEFAULT)) {
00290         jlog("WARNING: m_chkparam: \"-silhead\" only for N-gram, ignored\n");
00291       }
00292       if (lm->tail_silname && !strmatch(lm->tail_silname, END_WORD_DEFAULT)) {
00293         jlog("WARNING: m_chkparam: \"-siltail\" only for N-gram, ignored\n");
00294       }
00295       if (lm->enable_iwspword) {
00296         jlog("WARNING: m_chkparam: \"-iwspword\" only for N-gram, ignored\n");
00297       }
00298       if (lm->iwspentry && !strmatch(lm->iwspentry, IWSPENTRY_DEFAULT)) {
00299         jlog("WARNING: m_chkparam: \"-iwspentry\" only for N-gram, ignored\n");
00300       }
00301 #ifdef HASH_CACHE_IW
00302       if (s->pass1.iw_cache_rate != 10) {
00303         jlog("WARNING: m_chkparam: \"-iwcache\" only for N-gram, ignored\n");
00304       }
00305 #endif
00306 #ifdef SEPARATE_BY_UNIGRAM
00307       if (lm->separate_wnum != 150) {
00308         jlog("WARNING: m_chkparam: \"-sepnum\" only for N-gram, ignored\n");
00309       }
00310 #endif
00311     }  
00312     if (lm->lmtype != LM_DFA) {
00313       /* in case not a deterministic model */
00314       if (s->pass2.looktrellis_flag) {
00315         jlog("WARNING: m_chkparam: \"-looktrellis\" only for grammar, ignored\n");
00316       }
00317       if (s->output.multigramout_flag) {
00318         jlog("WARNING: m_chkparam: \"-multigramout\" only for grammar, ignored\n");
00319       }
00320       if (s->lmp.penalty1 != 0.0) {
00321         jlog("WARNING: m_chkparam: \"-penalty1\" only for grammar, ignored\n");
00322       }
00323       if (s->lmp.penalty2 != 0.0) {
00324         jlog("WARNING: m_chkparam: \"-penalty2\" only for grammar, ignored\n");
00325       }
00326     }
00327   }
00328 
00329 
00330 
00331   if (!ok_p) {
00332     jlog("ERROR: m_chkparam: could not pass parameter check\n");
00333   } else {
00334     jlog("STAT: jconf successfully finalized\n");
00335   }
00336 
00337   if (debug2_flag) {
00338     print_jconf_overview(jconf);
00339   }
00340 
00341   return ok_p;
00342 }
00343 
00368 static int
00369 default_width(HTK_HMM_INFO *hmminfo)
00370 {
00371   if (strmatch(JULIUS_SETUP, "fast")) { /* for fast setup */
00372     if (hmminfo->is_triphone) {
00373       if (hmminfo->is_tied_mixture) {
00374         /* tied-mixture triphones (PTM etc.) */
00375         return(600);
00376       } else {
00377         /* shared-state triphone */
00378 #ifdef PASS1_IWCD
00379         return(800);
00380 #else
00381         /* v2.1 compliant (no IWCD on 1st pass) */
00382         return(1000);           
00383 #endif
00384       }
00385     } else {
00386       /* monophone */
00387       return(400);
00388     }
00389   } else {                      /* for standard / v2.1 setup */
00390     if (hmminfo->is_triphone) {
00391       if (hmminfo->is_tied_mixture) {
00392         /* tied-mixture triphones (PTM etc.) */
00393         return(800);
00394       } else {
00395         /* shared-state triphone */
00396 #ifdef PASS1_IWCD
00397         return(1500);
00398 #else
00399         return(1500);           /* v2.1 compliant (no IWCD on 1st pass) */
00400 #endif
00401       }
00402     } else {
00403       /* monophone */
00404       return(700);
00405     }
00406   }
00407 }
00408 
00436 int
00437 set_beam_width(WCHMM_INFO *wchmm, int specified)
00438 {
00439   int width;
00440   int standard_width;
00441   
00442   if (specified == 0) { /* full search */
00443     jlog("WARNING: doing full search (can be extremely slow)\n");
00444     width = wchmm->n;
00445   } else if (specified == -1) { /* not specified */
00446     standard_width = default_width(wchmm->hmminfo); /* system default */
00447     width = (int)(sqrt(wchmm->winfo->num) * 15.0); /* heuristic value!! */
00448     if (width > standard_width) width = standard_width;
00449     /* 2007/1/20 bgn */
00450     if (width < MINIMAL_BEAM_WIDTH) {
00451       width = MINIMAL_BEAM_WIDTH;
00452     }
00453     /* 2007/1/20 end */
00454   } else {                      /* actual value has been specified */
00455     width = specified;
00456   }
00457   if (width > wchmm->n) width = wchmm->n;
00458 
00459   return(width);
00460 }
00461 
00462 /* end of file */