Julius 4.1.5
|
00001 00074 /* 00075 * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University 00076 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00077 * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology 00078 * All rights reserved 00079 */ 00080 00081 #include <julius/julius.h> 00082 00083 #ifdef PASS1_IWCD 00084 00099 void 00100 outprob_style_cache_init(WCHMM_INFO *wchmm) 00101 { 00102 int n; 00103 for(n=0;n<wchmm->n;n++) { 00104 if (wchmm->state[n].out.state == NULL) continue; 00105 if (wchmm->outstyle[n] == AS_RSET) { 00106 (wchmm->state[n].out.rset)->cache.state = NULL; 00107 } else if (wchmm->outstyle[n] == AS_LRSET) { 00108 (wchmm->state[n].out.lrset)->cache.state = NULL; 00109 } 00110 } 00111 } 00112 00113 /**********************************************************************/ 00114 00143 CD_Set * 00144 lcdset_lookup_with_category(WCHMM_INFO *wchmm, HMM_Logical *hmm, WORD_ID category) 00145 { 00146 CD_Set *cd; 00147 00148 leftcenter_name(hmm->name, wchmm->lccbuf); 00149 sprintf(wchmm->lccbuf2, "%s::%04d", wchmm->lccbuf, category); 00150 if (wchmm->lcdset_category_root != NULL) { 00151 cd = aptree_search_data(wchmm->lccbuf2, wchmm->lcdset_category_root); 00152 if (cd == NULL) return NULL; 00153 if (strmatch(wchmm->lccbuf2, cd->name)) { 00154 return cd; 00155 } 00156 } 00157 return NULL; 00158 } 00159 00205 static void 00206 lcdset_register_with_category(WCHMM_INFO *wchmm, HMM_Logical *hmm, WORD_ID category) 00207 { 00208 WORD_ID c2, i, w; 00209 HMM_Logical *ltmp; 00210 00211 int cnt_c, cnt_w, cnt_p; 00212 00213 if (lcdset_lookup_with_category(wchmm, hmm, category) == NULL) { 00214 leftcenter_name(hmm->name, wchmm->lccbuf); 00215 sprintf(wchmm->lccbuf2, "%s::%04d", wchmm->lccbuf, category); 00216 if (debug2_flag) { 00217 jlog("DEBUG: category-aware lcdset {%s}...", wchmm->lccbuf2); 00218 } 00219 cnt_c = cnt_w = cnt_p = 0; 00220 /* search for category that can connect after this category */ 00221 for(c2=0;c2<wchmm->dfa->term_num;c2++) { 00222 if (! dfa_cp(wchmm->dfa, category, c2)) continue; 00223 /* for each word in the category, register triphone whose right context 00224 is the beginning phones */ 00225 for(i=0;i<wchmm->dfa->term.wnum[c2];i++) { 00226 w = wchmm->dfa->term.tw[c2][i]; 00227 ltmp = get_right_context_HMM(hmm, wchmm->winfo->wseq[w][0]->name, wchmm->hmminfo); 00228 if (ltmp == NULL) { 00229 ltmp = hmm; 00230 if (ltmp->is_pseudo) { 00231 error_missing_right_triphone(hmm, wchmm->winfo->wseq[w][0]->name); 00232 } 00233 } 00234 if (! ltmp->is_pseudo) { 00235 if (regist_cdset(&(wchmm->lcdset_category_root), ltmp->body.defined, wchmm->lccbuf2, &(wchmm->lcdset_mroot))) { 00236 cnt_p++; 00237 } 00238 } 00239 } 00240 cnt_c++; 00241 cnt_w += wchmm->dfa->term.wnum[c2]; 00242 } 00243 if (debug2_flag) { 00244 jlog("%d categories (%d words) can follow, %d HMMs registered\n", cnt_c, cnt_w, cnt_p); 00245 } 00246 } 00247 } 00248 00266 void 00267 lcdset_register_with_category_all(WCHMM_INFO *wchmm) 00268 { 00269 WORD_INFO *winfo; 00270 WORD_ID c1, w, w_prev; 00271 int i; 00272 HMM_Logical *ltmp; 00273 00274 winfo = wchmm->winfo; 00275 00276 /* (1) 単語終端の音素について */ 00277 /* word end phone */ 00278 for(w=0;w<winfo->num;w++) { 00279 ltmp = winfo->wseq[w][winfo->wlen[w]-1]; 00280 lcdset_register_with_category(wchmm, ltmp, winfo->wton[w]); 00281 } 00282 /* (2)1音素単語の場合, 先行しうる単語の終端音素を考慮 */ 00283 /* for one-phoneme word, possible left context should be also considered */ 00284 for(w=0;w<winfo->num;w++) { 00285 if (winfo->wlen[w] > 1) continue; 00286 for(c1=0;c1<wchmm->dfa->term_num;c1++) { 00287 if (! dfa_cp(wchmm->dfa, c1, winfo->wton[w])) continue; 00288 for(i=0;i<wchmm->dfa->term.wnum[c1];i++) { 00289 w_prev = wchmm->dfa->term.tw[c1][i]; 00290 ltmp = get_left_context_HMM(winfo->wseq[w][0], winfo->wseq[w_prev][winfo->wlen[w_prev]-1]->name, wchmm->hmminfo); 00291 if (ltmp == NULL) continue; /* 1音素自身のlcd_setは(1)で作成済 */ 00292 if (ltmp->is_pseudo) continue; /* pseudo phone ならlcd_setはいらない */ 00293 lcdset_register_with_category(wchmm, ltmp, winfo->wton[w]); 00294 } 00295 } 00296 } 00297 } 00298 00316 void 00317 lcdset_remove_with_category_all(WCHMM_INFO *wchmm) 00318 { 00319 free_cdset(&(wchmm->lcdset_category_root), &(wchmm->lcdset_mroot)); 00320 } 00321 00322 #endif /* PASS1_IWCD */ 00323 00353 LOGPROB 00354 outprob_style(WCHMM_INFO *wchmm, int node, int last_wid, int t, HTK_Param *param) 00355 { 00356 char rbuf[MAX_HMMNAME_LEN]; 00357 00358 #ifndef PASS1_IWCD 00359 00360 /* if cross-word triphone handling is disabled, we simply compute the 00361 output prob of the state */ 00362 return(outprob_state(wchmm->hmmwrk, t, wchmm->state[node].out, param)); 00363 00364 #else /* PASS1_IWCD */ 00365 00366 /* state type and context cache is considered */ 00367 HMM_Logical *ohmm, *rhmm; 00368 RC_INFO *rset; 00369 LRC_INFO *lrset; 00370 CD_Set *lcd; 00371 WORD_INFO *winfo = wchmm->winfo; 00372 HTK_HMM_INFO *hmminfo = wchmm->hmminfo; 00373 00374 /* the actual computation is different according to their context dependency 00375 handling */ 00376 switch(wchmm->outstyle[node]) { 00377 case AS_STATE: 00378 /* normal state (word-internal or context-independent )*/ 00379 /* compute as usual */ 00380 return(outprob_state(wchmm->hmmwrk, t, wchmm->state[node].out.state, param)); 00381 case AS_LSET: 00382 /* node in word end phone */ 00383 /* compute approximated value using the state set in pseudo phone */ 00384 return(outprob_cd(wchmm->hmmwrk, t, wchmm->state[node].out.lset, param)); 00385 case AS_RSET: 00386 /* note in the beginning phone of word */ 00387 /* depends on the last word hypothesis to compute the actual triphone */ 00388 rset = wchmm->state[node].out.rset; 00389 /* consult cache */ 00390 if (rset->cache.state == NULL || rset->lastwid_cache != last_wid) { 00391 /* cache miss...calculate */ 00392 /* rset contains either defined biphone or pseudo biphone */ 00393 if (last_wid != WORD_INVALID) { 00394 /* lookup triphone with left-context (= last phoneme) */ 00395 if ((ohmm = get_left_context_HMM(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name, hmminfo)) != NULL) { 00396 rhmm = ohmm; 00397 } else { 00398 /* if triphone not found, try to use the bi-phone itself */ 00399 rhmm = rset->hmm; 00400 /* If the bi-phone is explicitly specified in hmmdefs/HMMList, 00401 use it. if both triphone and biphone not found in user-given 00402 hmmdefs/HMMList, use "pseudo" phone, as same as the end of word */ 00403 if (debug2_flag) { 00404 if (rhmm->is_pseudo) { 00405 error_missing_left_triphone(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name); 00406 } 00407 } 00408 } 00409 } else { 00410 /* if last word is WORD_INVALID try to use the bi-phone itself */ 00411 rhmm = rset->hmm; 00412 /* If the bi-phone is explicitly specified in hmmdefs/HMMList, 00413 use it. if not, use "pseudo" phone, as same as the end of word */ 00414 if (debug2_flag) { 00415 if (rhmm->is_pseudo) { 00416 error_missing_left_triphone(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name); 00417 } 00418 } 00419 } 00420 /* rhmm may be a pseudo phone */ 00421 /* store to cache */ 00422 if (rhmm->is_pseudo) { 00423 rset->last_is_lset = TRUE; 00424 rset->cache.lset = &(rhmm->body.pseudo->stateset[rset->state_loc]); 00425 } else { 00426 rset->last_is_lset = FALSE; 00427 rset->cache.state = rhmm->body.defined->s[rset->state_loc]; 00428 } 00429 rset->lastwid_cache = last_wid; 00430 } 00431 /* calculate outprob and return */ 00432 if (rset->last_is_lset) { 00433 return(outprob_cd(wchmm->hmmwrk, t, rset->cache.lset, param)); 00434 } else { 00435 return(outprob_state(wchmm->hmmwrk, t, rset->cache.state, param)); 00436 } 00437 case AS_LRSET: 00438 /* node in word with only one phoneme --- both beginning and end */ 00439 lrset = wchmm->state[node].out.lrset; 00440 if (lrset->cache.state == NULL || lrset->lastwid_cache != last_wid) { 00441 /* cache miss...calculate */ 00442 rhmm = lrset->hmm; 00443 /* lookup cdset for given left context (= last phoneme) */ 00444 strcpy(rbuf, rhmm->name); 00445 if (last_wid != WORD_INVALID) { 00446 add_left_context(rbuf, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name); 00447 } 00448 if (wchmm->category_tree) { 00449 #ifdef USE_OLD_IWCD 00450 lcd = lcdset_lookup_by_hmmname(hmminfo, rbuf); 00451 #else 00452 /* use category-indexed cdset */ 00453 if (last_wid != WORD_INVALID && 00454 (ohmm = get_left_context_HMM(rhmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name, hmminfo)) != NULL) { 00455 lcd = lcdset_lookup_with_category(wchmm, ohmm, lrset->category); 00456 } else { 00457 lcd = lcdset_lookup_with_category(wchmm, rhmm, lrset->category); 00458 } 00459 #endif 00460 } else { 00461 lcd = lcdset_lookup_by_hmmname(hmminfo, rbuf); 00462 } 00463 if (lcd != NULL) { /* found, set to cache */ 00464 lrset->last_is_lset = TRUE; 00465 lrset->cache.lset = &(lcd->stateset[lrset->state_loc]); 00466 lrset->lastwid_cache = last_wid; 00467 } else { 00468 /* no relating lcdset found, falling to normal state */ 00469 if (rhmm->is_pseudo) { 00470 lrset->last_is_lset = TRUE; 00471 lrset->cache.lset = &(rhmm->body.pseudo->stateset[lrset->state_loc]); 00472 lrset->lastwid_cache = last_wid; 00473 } else { 00474 lrset->last_is_lset = FALSE; 00475 lrset->cache.state = rhmm->body.defined->s[lrset->state_loc]; 00476 lrset->lastwid_cache = last_wid; 00477 } 00478 } 00479 /*printf("[%s->%s]\n", lrset->hmm->name, rhmm->name);*/ 00480 } 00481 /* calculate outprob and return */ 00482 if (lrset->last_is_lset) { 00483 return(outprob_cd(wchmm->hmmwrk, t, lrset->cache.lset, param)); 00484 } else { 00485 return(outprob_state(wchmm->hmmwrk, t, lrset->cache.state, param)); 00486 } 00487 default: 00488 /* should not happen */ 00489 j_internal_error("outprob_style: no outprob style??\n"); 00490 return(LOG_ZERO); 00491 } 00492 00493 #endif /* PASS1_IWCD */ 00494 00495 } 00496 00519 void 00520 error_missing_right_triphone(HMM_Logical *base, char *rc_name) 00521 { 00522 char rbuf[MAX_HMMNAME_LEN]; 00523 /* only output message */ 00524 strcpy(rbuf, base->name); 00525 add_right_context(rbuf, rc_name); 00526 jlog("WARNING: IW-triphone for word end \"%s\" not found, fallback to pseudo {%s}\n", rbuf, base->name); 00527 } 00528 00551 void 00552 error_missing_left_triphone(HMM_Logical *base, char *lc_name) 00553 { 00554 char rbuf[MAX_HMMNAME_LEN]; 00555 /* only output message */ 00556 strcpy(rbuf, base->name); 00557 add_left_context(rbuf, lc_name); 00558 jlog("WARNING: IW-triphone for word head \"%s\" not found, fallback to pseudo {%s}\n", rbuf, base->name); 00559 } 00560 00561 /* end of file */