Julius 4.2
libjulius/src/outprob_style.c
説明を見る。
00001 
00074 /*
00075  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00076  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00077  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00078  * All rights reserved
00079  */
00080 
00081 #include <julius/julius.h>
00082 
00083 #ifdef PASS1_IWCD
00084 
00099 void
00100 outprob_style_cache_init(WCHMM_INFO *wchmm)
00101 {
00102   int n;
00103   for(n=0;n<wchmm->n;n++) {
00104     if (wchmm->state[n].out.state == NULL) continue;
00105     if (wchmm->outstyle[n] == AS_RSET) {
00106       (wchmm->state[n].out.rset)->cache.state = NULL;
00107     } else if (wchmm->outstyle[n] == AS_LRSET) {
00108       (wchmm->state[n].out.lrset)->cache.state = NULL;
00109     }
00110   }
00111 }
00112 
00113 /**********************************************************************/
00114 
00143 CD_Set *
00144 lcdset_lookup_with_category(WCHMM_INFO *wchmm, HMM_Logical *hmm, WORD_ID category)
00145 {
00146   CD_Set *cd;
00147 
00148   leftcenter_name(hmm->name, wchmm->lccbuf);
00149   sprintf(wchmm->lccbuf2, "%s::%04d", wchmm->lccbuf, category);
00150   if (wchmm->lcdset_category_root != NULL) {
00151     cd = aptree_search_data(wchmm->lccbuf2, wchmm->lcdset_category_root);
00152     if (cd == NULL) return NULL;
00153     if (strmatch(wchmm->lccbuf2, cd->name)) {
00154       return cd;
00155     }
00156   }
00157   return NULL;
00158 }
00159 
00205 static void
00206 lcdset_register_with_category(WCHMM_INFO *wchmm, HMM_Logical *hmm, WORD_ID category)
00207 {
00208   WORD_ID c2, i, w;
00209   HMM_Logical *ltmp;
00210 
00211   int cnt_c, cnt_w, cnt_p;
00212 
00213   if (lcdset_lookup_with_category(wchmm, hmm, category) == NULL) {
00214     leftcenter_name(hmm->name, wchmm->lccbuf);
00215     sprintf(wchmm->lccbuf2, "%s::%04d", wchmm->lccbuf, category);
00216     if (debug2_flag) {
00217       jlog("DEBUG: category-aware lcdset {%s}...", wchmm->lccbuf2);
00218     }
00219     cnt_c = cnt_w = cnt_p = 0;
00220     /* search for category that can connect after this category */
00221     for(c2=0;c2<wchmm->dfa->term_num;c2++) {
00222       if (! dfa_cp(wchmm->dfa, category, c2)) continue;
00223       /* for each word in the category, register triphone whose right context
00224          is the beginning phones  */
00225       for(i=0;i<wchmm->dfa->term.wnum[c2];i++) {
00226         w = wchmm->dfa->term.tw[c2][i];
00227         ltmp = get_right_context_HMM(hmm, wchmm->winfo->wseq[w][0]->name, wchmm->hmminfo);
00228         if (ltmp == NULL) {
00229           ltmp = hmm;
00230           if (ltmp->is_pseudo) {
00231             error_missing_right_triphone(hmm, wchmm->winfo->wseq[w][0]->name);
00232           }
00233         }
00234         if (! ltmp->is_pseudo) {
00235           if (regist_cdset(&(wchmm->lcdset_category_root), ltmp->body.defined, wchmm->lccbuf2, &(wchmm->lcdset_mroot))) {
00236             cnt_p++;
00237           }
00238         }
00239       }
00240       cnt_c++;
00241       cnt_w += wchmm->dfa->term.wnum[c2];
00242     }
00243     if (debug2_flag) {
00244       jlog("%d categories (%d words) can follow, %d HMMs registered\n", cnt_c, cnt_w, cnt_p);
00245     }
00246   }
00247 }
00248 
00266 void
00267 lcdset_register_with_category_all(WCHMM_INFO *wchmm)
00268 {
00269   WORD_INFO *winfo;
00270   WORD_ID c1, w, w_prev;
00271   int i;
00272   HMM_Logical *ltmp;
00273 
00274   winfo = wchmm->winfo;
00275 
00276   /* (1) 単語終端の音素について */
00277   /*     word end phone */
00278   for(w=0;w<winfo->num;w++) {
00279     ltmp = winfo->wseq[w][winfo->wlen[w]-1];
00280     lcdset_register_with_category(wchmm, ltmp, winfo->wton[w]);
00281   }
00282   /* (2)1音素単語の場合, 先行しうる単語の終端音素を考慮 */
00283   /*    for one-phoneme word, possible left context should be also considered */
00284   for(w=0;w<winfo->num;w++) {
00285     if (winfo->wlen[w] > 1) continue;
00286     for(c1=0;c1<wchmm->dfa->term_num;c1++) {
00287       if (! dfa_cp(wchmm->dfa, c1, winfo->wton[w])) continue;
00288       for(i=0;i<wchmm->dfa->term.wnum[c1];i++) {
00289         w_prev = wchmm->dfa->term.tw[c1][i];
00290         ltmp = get_left_context_HMM(winfo->wseq[w][0], winfo->wseq[w_prev][winfo->wlen[w_prev]-1]->name, wchmm->hmminfo);
00291         if (ltmp == NULL) continue; /* 1音素自身のlcd_setは(1)で作成済 */
00292         if (ltmp->is_pseudo) continue; /* pseudo phone ならlcd_setはいらない */
00293         lcdset_register_with_category(wchmm, ltmp, winfo->wton[w]);
00294       }
00295     }
00296   }
00297 }
00298 
00316 void
00317 lcdset_remove_with_category_all(WCHMM_INFO *wchmm)
00318 {
00319   free_cdset(&(wchmm->lcdset_category_root), &(wchmm->lcdset_mroot));
00320 }
00321 
00322 #endif /* PASS1_IWCD */
00323 
00353 LOGPROB
00354 outprob_style(WCHMM_INFO *wchmm, int node, int last_wid, int t, HTK_Param *param)
00355 {
00356   char rbuf[MAX_HMMNAME_LEN]; 
00357 
00358 #ifndef PASS1_IWCD
00359   
00360   /* if cross-word triphone handling is disabled, we simply compute the
00361      output prob of the state */
00362   return(outprob_state(wchmm->hmmwrk, t, wchmm->state[node].out, param));
00363   
00364 #else  /* PASS1_IWCD */
00365 
00366   /* state type and context cache is considered */
00367   HMM_Logical *ohmm, *rhmm;
00368   RC_INFO *rset;
00369   LRC_INFO *lrset;
00370   CD_Set *lcd;
00371   WORD_INFO *winfo = wchmm->winfo;
00372   HTK_HMM_INFO *hmminfo = wchmm->hmminfo;
00373 
00374   /* the actual computation is different according to their context dependency
00375      handling */
00376   switch(wchmm->outstyle[node]) {
00377   case AS_STATE:
00378     /* normal state (word-internal or context-independent )*/
00379     /* compute as usual */
00380     return(outprob_state(wchmm->hmmwrk, t, wchmm->state[node].out.state, param));
00381   case AS_LSET:
00382     /* node in word end phone */
00383     /* compute approximated value using the state set in pseudo phone */
00384     return(outprob_cd(wchmm->hmmwrk, t, wchmm->state[node].out.lset, param));
00385   case AS_RSET:
00386     /* note in the beginning phone of word */
00387     /* depends on the last word hypothesis to compute the actual triphone */
00388     rset = wchmm->state[node].out.rset;
00389     /* consult cache */
00390     if (rset->cache.state == NULL || rset->lastwid_cache != last_wid) {
00391       /* cache miss...calculate */
00392       /* rset contains either defined biphone or pseudo biphone */
00393       if (last_wid != WORD_INVALID) {
00394         /* lookup triphone with left-context (= last phoneme) */
00395         if ((ohmm = get_left_context_HMM(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name, hmminfo)) != NULL) {
00396           rhmm = ohmm;
00397         } else {
00398           /* if triphone not found, try to use the bi-phone itself */
00399           rhmm = rset->hmm;
00400           /* If the bi-phone is explicitly specified in hmmdefs/HMMList,
00401              use it.  if both triphone and biphone not found in user-given
00402              hmmdefs/HMMList, use "pseudo" phone, as same as the end of word */
00403           if (debug2_flag) {
00404             if (rhmm->is_pseudo) {
00405             error_missing_left_triphone(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name);
00406             }
00407           }
00408         }
00409       } else {
00410         /* if last word is WORD_INVALID try to use the bi-phone itself */
00411         rhmm = rset->hmm;
00412         /* If the bi-phone is explicitly specified in hmmdefs/HMMList,
00413            use it.  if not, use "pseudo" phone, as same as the end of word */
00414         if (debug2_flag) {
00415           if (rhmm->is_pseudo) {
00416             error_missing_left_triphone(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name);
00417           }
00418         }
00419       }
00420       /* rhmm may be a pseudo phone */
00421       /* store to cache */
00422       if (rhmm->is_pseudo) {
00423         rset->last_is_lset  = TRUE;
00424         rset->cache.lset    = &(rhmm->body.pseudo->stateset[rset->state_loc]);
00425       } else {
00426         rset->last_is_lset  = FALSE;
00427         rset->cache.state   = rhmm->body.defined->s[rset->state_loc];
00428       }
00429       rset->lastwid_cache = last_wid;
00430     }
00431     /* calculate outprob and return */
00432     if (rset->last_is_lset) {
00433       return(outprob_cd(wchmm->hmmwrk, t, rset->cache.lset, param));
00434     } else {
00435       return(outprob_state(wchmm->hmmwrk, t, rset->cache.state, param));
00436     }
00437   case AS_LRSET:
00438     /* node in word with only one phoneme --- both beginning and end */
00439     lrset = wchmm->state[node].out.lrset;
00440     if (lrset->cache.state == NULL || lrset->lastwid_cache != last_wid) {
00441       /* cache miss...calculate */
00442       rhmm = lrset->hmm;
00443       /* lookup cdset for given left context (= last phoneme) */
00444       strcpy(rbuf, rhmm->name);
00445       if (last_wid != WORD_INVALID) {
00446         add_left_context(rbuf, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name);
00447       }
00448       if (wchmm->category_tree) {
00449 #ifdef USE_OLD_IWCD
00450         lcd = lcdset_lookup_by_hmmname(hmminfo, rbuf);
00451 #else
00452         /* use category-indexed cdset */
00453         if (last_wid != WORD_INVALID &&
00454             (ohmm = get_left_context_HMM(rhmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name, hmminfo)) != NULL) {
00455           lcd = lcdset_lookup_with_category(wchmm, ohmm, lrset->category);
00456         } else {
00457           lcd = lcdset_lookup_with_category(wchmm, rhmm, lrset->category);
00458         }
00459 #endif
00460       } else {
00461         lcd = lcdset_lookup_by_hmmname(hmminfo, rbuf);
00462       }
00463       if (lcd != NULL) {        /* found, set to cache */
00464         lrset->last_is_lset  = TRUE;
00465         lrset->cache.lset    = &(lcd->stateset[lrset->state_loc]);
00466         lrset->lastwid_cache = last_wid;
00467       } else {
00468         /* no relating lcdset found, falling to normal state */
00469         if (rhmm->is_pseudo) {
00470           lrset->last_is_lset  = TRUE;
00471           lrset->cache.lset    = &(rhmm->body.pseudo->stateset[lrset->state_loc]);
00472           lrset->lastwid_cache = last_wid;
00473         } else {
00474           lrset->last_is_lset  = FALSE;
00475           lrset->cache.state   = rhmm->body.defined->s[lrset->state_loc];
00476           lrset->lastwid_cache = last_wid;
00477         }
00478       }
00479       /*printf("[%s->%s]\n", lrset->hmm->name, rhmm->name);*/
00480     }
00481     /* calculate outprob and return */
00482     if (lrset->last_is_lset) {
00483       return(outprob_cd(wchmm->hmmwrk, t, lrset->cache.lset, param));
00484     } else {
00485       return(outprob_state(wchmm->hmmwrk, t, lrset->cache.state, param));
00486     }
00487   default:
00488     /* should not happen */
00489     j_internal_error("outprob_style: no outprob style??\n");
00490     return(LOG_ZERO);
00491   }
00492 
00493 #endif  /* PASS1_IWCD */
00494 
00495 }
00496 
00519 void
00520 error_missing_right_triphone(HMM_Logical *base, char *rc_name)
00521 {
00522   char rbuf[MAX_HMMNAME_LEN]; 
00523   /* only output message */
00524   strcpy(rbuf, base->name);
00525   add_right_context(rbuf, rc_name);
00526   jlog("WARNING: IW-triphone for word end \"%s\" not found, fallback to pseudo {%s}\n", rbuf, base->name);
00527 }
00528 
00551 void
00552 error_missing_left_triphone(HMM_Logical *base, char *lc_name)
00553 {
00554   char rbuf[MAX_HMMNAME_LEN]; 
00555   /* only output message */
00556   strcpy(rbuf, base->name);
00557   add_left_context(rbuf, lc_name);
00558   jlog("WARNING: IW-triphone for word head \"%s\" not found, fallback to pseudo {%s}\n", rbuf, base->name);
00559 }
00560 
00561 /* end of file */