Julius 4.2
|
00001 00098 /* 00099 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00100 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00101 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00102 * All rights reserved 00103 */ 00104 00105 #include <julius/julius.h> 00106 00107 #ifndef PASS2_STRICT_IWCD 00108 00109 #undef TCD ///< Define if want triphone debug messages 00110 00111 /**********************************************************************/ 00112 /************ 仮説ノードの基本操作 ************/ 00113 /************ Basic functions for hypothesis node handling ************/ 00114 /**********************************************************************/ 00115 00116 #undef STOCKER_DEBUG 00117 00118 #ifdef STOCKER_DEBUG 00119 static int stocked_num = 0; 00120 static int reused_num = 0; 00121 static int new_num = 0; 00122 static int request_num = 0; 00123 #endif 00124 00137 static void 00138 free_node_exec(NODE *node) 00139 { 00140 if (node == NULL) return; 00141 00142 free(node->g); 00143 if (node->g_prev != NULL) free(node->g_prev); 00144 #ifdef GRAPHOUT_PRECISE_BOUNDARY 00145 if (node->region->graphout) { 00146 free(node->wordend_frame); 00147 free(node->wordend_gscore); 00148 } 00149 #endif 00150 00151 free(node); 00152 } 00153 00169 void 00170 free_node(NODE *node) 00171 { 00172 if (node == NULL) return; 00173 00174 if (node->region->graphout) { 00175 if (node->prevgraph != NULL && node->prevgraph->saved == FALSE) { 00176 wordgraph_free(node->prevgraph); 00177 } 00178 } 00179 00180 /* save to stocker */ 00181 node->next = node->region->pass2.stocker_root; 00182 node->region->pass2.stocker_root = node; 00183 00184 #ifdef STOCKER_DEBUG 00185 stocked_num++; 00186 #endif 00187 } 00188 00205 void 00206 clear_stocker(StackDecode *s) 00207 { 00208 NODE *node, *tmp; 00209 node = s->stocker_root; 00210 while(node) { 00211 tmp = node->next; 00212 free_node_exec(node); 00213 node = tmp; 00214 } 00215 s->stocker_root = NULL; 00216 00217 #ifdef STOCKER_DEBUG 00218 jlog("DEBUG: %d times requested, %d times newly allocated, %d times reused\n", request_num, new_num, reused_num); 00219 stocked_num = 0; 00220 reused_num = 0; 00221 new_num = 0; 00222 request_num = 0; 00223 #endif 00224 } 00225 00246 NODE * 00247 cpy_node(NODE *dst, NODE *src) 00248 { 00249 int peseqlen; 00250 00251 peseqlen = src->region->peseqlen; 00252 00253 dst->next = src->next; 00254 dst->prev = src->prev; 00255 memcpy(dst->g, src->g, sizeof(LOGPROB) * peseqlen); 00256 memcpy(dst->seq, src->seq, sizeof(WORD_ID) * MAXSEQNUM); 00257 #ifdef CM_SEARCH 00258 #ifdef CM_MULTIPLE_ALPHA 00259 { 00260 int w; 00261 for(w=0;w<src->seqnum;w++) { 00262 memcpy(dst->cmscore[w], src->cmscore[w], sizeof(LOGPROB) * src->region->config->annotate.cm_alpha_num); 00263 } 00264 } 00265 #else 00266 memcpy(dst->cmscore, src->cmscore, sizeof(LOGPROB) * MAXSEQNUM); 00267 #endif 00268 #endif /* CM_SEARCH */ 00269 dst->seqnum = src->seqnum; 00270 dst->score = src->score; 00271 dst->bestt = src->bestt; 00272 dst->estimated_next_t = src->estimated_next_t; 00273 dst->endflag = src->endflag; 00274 dst->state = src->state; 00275 dst->tre = src->tre; 00276 if (src->g_prev != NULL) { // ccd_flag == TRUE 00277 memcpy(dst->g_prev, src->g_prev, sizeof(LOGPROB) * peseqlen); 00278 dst->last_ph = src->last_ph; 00279 dst->last_ph_sp_attached = src->last_ph_sp_attached; 00280 dst->lscore = src->lscore; 00281 } 00282 dst->totallscore = src->totallscore; 00283 dst->final_g = src->final_g; 00284 #ifdef VISUALIZE 00285 dst->popnode = src->popnode; 00286 #endif 00287 00288 if (src->region->graphout) { 00289 #ifdef GRAPHOUT_PRECISE_BOUNDARY 00290 memcpy(dst->wordend_frame, src->wordend_frame, sizeof(short) * peseqlen); 00291 memcpy(dst->wordend_gscore, src->wordend_gscore, sizeof(LOGPROB) * peseqlen); 00292 #endif 00293 dst->prevgraph = src->prevgraph; 00294 dst->lastcontext = src->lastcontext; 00295 #ifndef GRAPHOUT_PRECISE_BOUNDARY 00296 dst->tail_g_score = src->tail_g_score; 00297 #endif 00298 } 00299 00300 return(dst); 00301 } 00302 00323 NODE * 00324 newnode(RecogProcess *r) 00325 { 00326 NODE *tmp; 00327 int i; 00328 int peseqlen; 00329 00330 peseqlen = r->peseqlen; 00331 00332 #ifdef STOCKER_DEBUG 00333 request_num++; 00334 #endif 00335 if ((tmp = r->pass2.stocker_root) != NULL) { 00336 /* re-use ones in the stocker */ 00337 r->pass2.stocker_root = tmp->next; 00338 #ifdef STOCKER_DEBUG 00339 stocked_num--; 00340 reused_num++; 00341 #endif 00342 } else { 00343 /* allocate new */ 00344 tmp =(NODE *)mymalloc(sizeof(NODE)); 00345 tmp->g = (LOGPROB *)mymalloc(sizeof(LOGPROB) * peseqlen); 00346 if (r->ccd_flag) { 00347 tmp->g_prev = (LOGPROB *)mymalloc(sizeof(LOGPROB) * peseqlen); 00348 } else { 00349 tmp->g_prev = NULL; 00350 } 00351 00352 #ifdef GRAPHOUT_PRECISE_BOUNDARY 00353 if (r->graphout) { 00354 tmp->wordend_frame = (short *)mymalloc(sizeof(short) * peseqlen); 00355 tmp->wordend_gscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * peseqlen); 00356 } 00357 #endif 00358 #ifdef STOCKER_DEBUG 00359 new_num++; 00360 #endif 00361 } 00362 00363 /* clear the data */ 00364 /*bzero(tmp,sizeof(NODE));*/ 00365 tmp->next=NULL; 00366 tmp->prev=NULL; 00367 tmp->last_ph = NULL; 00368 tmp->last_ph_sp_attached = FALSE; 00369 if (r->ccd_flag) { 00370 if (r->lmtype == LM_PROB) { 00371 tmp->lscore = LOG_ZERO; 00372 tmp->totallscore = LOG_ZERO; 00373 } else if (r->lmtype == LM_DFA) { 00374 tmp->lscore = 0.0; 00375 tmp->totallscore = 0.0; 00376 } 00377 } 00378 tmp->endflag = FALSE; 00379 tmp->seqnum = 0; 00380 for(i=0;i<peseqlen;i++) { 00381 tmp->g[i] = LOG_ZERO; 00382 } 00383 if (r->ccd_flag) { 00384 for(i=0;i<peseqlen;i++) { 00385 tmp->g_prev[i] = LOG_ZERO; 00386 } 00387 } 00388 tmp->final_g = LOG_ZERO; 00389 #ifdef VISUALIZE 00390 tmp->popnode = NULL; 00391 #endif 00392 tmp->tre = NULL; 00393 00394 if (r->graphout) { 00395 tmp->prevgraph = NULL; 00396 tmp->lastcontext = NULL; 00397 } 00398 00399 tmp->region = r; 00400 00401 return(tmp); 00402 } 00403 00404 00405 /**********************************************************************/ 00406 /************ 前向きトレリス展開と尤度計算 ***************/ 00407 /************ Expand trellis and update forward viterbi ***************/ 00408 /**********************************************************************/ 00409 00426 void 00427 malloc_wordtrellis(RecogProcess *r) 00428 { 00429 int maxwn; 00430 StackDecode *dwrk; 00431 00432 maxwn = r->lm->winfo->maxwn + 10; 00433 dwrk = &(r->pass2); 00434 00435 dwrk->wordtrellis[0] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn); 00436 dwrk->wordtrellis[1] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn); 00437 00438 dwrk->g = (LOGPROB *)mymalloc(sizeof(LOGPROB) * r->peseqlen); 00439 00440 dwrk->phmmlen_max = r->lm->winfo->maxwlen + 2; 00441 dwrk->phmmseq = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * dwrk->phmmlen_max); 00442 if (r->lm->config->enable_iwsp && r->am->hmminfo->multipath) { 00443 dwrk->has_sp = (boolean *)mymalloc(sizeof(boolean) * dwrk->phmmlen_max); 00444 } else { 00445 dwrk->has_sp = NULL; 00446 } 00447 00448 dwrk->wend_token_frame[0] = NULL; 00449 dwrk->wend_token_frame[1] = NULL; 00450 dwrk->wend_token_gscore[0] = NULL; 00451 dwrk->wend_token_gscore[1] = NULL; 00452 #ifdef GRAPHOUT_PRECISE_BOUNDARY 00453 if (r->graphout) { 00454 dwrk->wend_token_frame[0] = (short *)mymalloc(sizeof(short) * maxwn); 00455 dwrk->wend_token_frame[1] = (short *)mymalloc(sizeof(short) * maxwn); 00456 dwrk->wend_token_gscore[0] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn); 00457 dwrk->wend_token_gscore[1] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn); 00458 } 00459 #endif 00460 00461 } 00462 00475 void 00476 free_wordtrellis(StackDecode *dwrk) 00477 { 00478 int i; 00479 00480 free(dwrk->wordtrellis[0]); 00481 free(dwrk->wordtrellis[1]); 00482 free(dwrk->g); 00483 free(dwrk->phmmseq); 00484 if (dwrk->has_sp) { 00485 free(dwrk->has_sp); 00486 dwrk->has_sp = NULL; 00487 } 00488 #ifdef GRAPHOUT_PRECISE_BOUNDARY 00489 for(i=0;i<2;i++) { 00490 if (dwrk->wend_token_frame[i]) { 00491 free(dwrk->wend_token_frame[i]); 00492 dwrk->wend_token_frame[i] = NULL; 00493 } 00494 if (dwrk->wend_token_gscore[i]) { 00495 free(dwrk->wend_token_gscore[i]); 00496 dwrk->wend_token_gscore[i] = NULL; 00497 } 00498 } 00499 #endif 00500 } 00501 00502 00503 /**********************************************************************/ 00504 /************ 仮説の前向き尤度計算 *******************/ 00505 /************ Compute forward score of a hypothesis *******************/ 00506 /**********************************************************************/ 00507 00526 static LOGPROB 00527 get_max_out_arc(HTK_HMM_Trans *tr, int state_num) 00528 { 00529 LOGPROB max_a; 00530 int afrom; 00531 LOGPROB a; 00532 00533 max_a = LOG_ZERO; 00534 for (afrom = 0; afrom < state_num - 1; afrom++) { 00535 a = tr->a[afrom][state_num-1]; 00536 if (max_a < a) max_a = a; 00537 } 00538 return(max_a); 00539 } 00540 00557 static LOGPROB 00558 max_out_arc(HMM_Logical *l) 00559 { 00560 return(get_max_out_arc(hmm_logical_trans(l), hmm_logical_state_num(l))); 00561 } 00562 00584 void 00585 scan_word(NODE *now, HTK_Param *param, RecogProcess *r) 00586 { 00587 int i,t, j; 00588 HMM *whmm; 00589 A_CELL *ac; 00590 WORD_ID word; 00591 LOGPROB tmpmax, tmptmp, score1; 00592 int startt = 0, endt; 00593 int wordhmmnum; 00594 LOGPROB tmpmax_store, store_point_maxarc; /* multipath */ 00595 LOGPROB tmpmax2 = LOG_ZERO; 00596 int phmmlen; 00597 HMM_Logical *ret, *wend; 00598 int store_point; 00599 int crossword_point = 0; 00600 boolean back_rescan = FALSE; 00601 boolean node_exist_p; 00602 int tn; 00603 int tl; 00604 00605 /* store global values to local for rapid access */ 00606 WORD_INFO *winfo; 00607 HTK_HMM_INFO *hmminfo; 00608 LOGPROB *framemaxscore; 00609 int peseqlen; 00610 boolean ccd_flag; 00611 boolean enable_iwsp; 00612 #ifdef SCAN_BEAM 00613 LOGPROB scan_beam_thres; 00614 #endif 00615 StackDecode *dwrk; 00616 00617 winfo = r->lm->winfo; 00618 hmminfo = r->am->hmminfo; 00619 dwrk = &(r->pass2); 00620 peseqlen = r->peseqlen; 00621 framemaxscore = r->pass2.framemaxscore; 00622 ccd_flag = r->ccd_flag; 00623 enable_iwsp = r->lm->config->enable_iwsp; /* multipath */ 00624 #ifdef SCAN_BEAM 00625 scan_beam_thres = r->config->pass2.scan_beam_thres; 00626 #endif 00627 00628 if (hmminfo->multipath) { 00629 store_point = -1; 00630 } else { 00631 store_point = 0; 00632 } 00633 00634 /* ----------------------- prepare HMM ----------------------- */ 00635 00636 if (ccd_flag) { 00637 /* 直前の音素があれば,そこまでさかのぼって scan する */ 00638 /* if there are any last phone, enable backscan */ 00639 if (now->last_ph == NULL) { 00640 /* initial score: now->g[] */ 00641 /* scan range: phones in now->seq[now->seqnum-1] */ 00642 back_rescan = FALSE; 00643 } else { 00644 /* initial score: now->g_prev[] (1-phone before)*/ 00645 /* scan range: phones in now->seq[now->seqnum-1] + now->last_ph */ 00646 back_rescan = TRUE; 00647 } 00648 } 00649 #ifdef TCD 00650 if (now->last_ph != NULL) { 00651 jlog("DEBUG: inherited last_ph: %s\n", (now->last_ph)->name); 00652 if (now->last_ph_sp_attached) jlog("DEBUG: (sp attached)\n"); /* multipath */ 00653 } else { 00654 jlog("DEBUG: no last_ph inherited\n"); 00655 } 00656 #endif 00657 00658 /* scan 範囲分のHMMを準備 */ 00659 /* prepare HMM of the scan range */ 00660 word = now->seq[now->seqnum-1]; 00661 00662 if (ccd_flag) { 00663 00664 if (back_rescan) { 00665 00666 /* scan range: phones in now->seq[now->seqnum-1] + now->last_ph */ 00667 00668 phmmlen = winfo->wlen[word] + 1; 00669 if (phmmlen > dwrk->phmmlen_max) { 00670 j_internal_error("scan_word: num of phonemes in a word exceed phmmlenmax (%d) ?\n", dwrk->phmmlen_max); 00671 } 00672 for (i=0;i<phmmlen - 2;i++) dwrk->phmmseq[i] = winfo->wseq[word][i]; 00673 if (enable_iwsp && hmminfo->multipath) { 00674 for (i=0;i<phmmlen - 2;i++) dwrk->has_sp[i] = FALSE; 00675 } 00676 00677 /* 最終単語と last_ph 間の単語間triphoneを考慮 */ 00678 /* consider cross-word context dependency between the last word and now->last_ph */ 00679 wend = winfo->wseq[word][winfo->wlen[word]-1]; 00680 ret = get_right_context_HMM(wend, now->last_ph->name, hmminfo); 00681 if (ret == NULL) { /* triphone not found */ 00682 /* fallback to the original bi/mono-phone */ 00683 /* error if the original is pseudo phone (not explicitly defined 00684 in hmmdefs/hmmlist) */ 00685 /* exception: word with 1 phone (triphone may exist in the next expansion */ 00686 if (winfo->wlen[word] > 1 && wend->is_pseudo) { 00687 error_missing_right_triphone(wend, now->last_ph->name); 00688 } 00689 dwrk->phmmseq[phmmlen-2] = wend; 00690 } else { 00691 dwrk->phmmseq[phmmlen-2] = ret; 00692 } 00693 ret = get_left_context_HMM(now->last_ph, wend->name, hmminfo); 00694 if (ret == NULL) { 00695 /* fallback to the original bi/mono-phone */ 00696 /* error if the original is pseudo phone (not explicitly defined 00697 in hmmdefs/hmmlist) */ 00698 if (now->last_ph->is_pseudo) { 00699 error_missing_left_triphone(now->last_ph, wend->name); 00700 } 00701 dwrk->phmmseq[phmmlen-1] = now->last_ph; 00702 } else { 00703 dwrk->phmmseq[phmmlen-1] = ret; 00704 } 00705 00706 if (enable_iwsp && hmminfo->multipath) { 00707 dwrk->has_sp[phmmlen-2] = TRUE; 00708 dwrk->has_sp[phmmlen-1] = now->last_ph_sp_attached; 00709 } 00710 00711 #ifdef TCD 00712 jlog("DEBUG: w="); 00713 for(i=0;i<winfo->wlen[word];i++) { 00714 jlog(" %s",(winfo->wseq[word][i])->name); 00715 if (enable_iwsp && hmminfo->multipath && dwrk->has_sp[i]) jlog("(sp)"); 00716 } 00717 jlog(" | %s\n", (now->last_ph)->name); 00718 if (hmminfo->multipath && now->last_ph_sp_attached) jlog("DEBUG: (sp)\n"); 00719 jlog("DEBUG: scan for:"); 00720 00721 for (i=0;i<phmmlen;i++) { 00722 jlog(" %s", dwrk->phmmseq[i]->name); 00723 if (enable_iwsp && hmminfo->multipath && dwrk->has_sp[i]) jlog("(sp)"); 00724 } 00725 jlog("\n"); 00726 #endif 00727 00728 /* 単語HMMを作る */ 00729 /* make word HMM */ 00730 whmm = new_make_word_hmm(hmminfo, dwrk->phmmseq, phmmlen, (enable_iwsp && hmminfo->multipath) ? dwrk->has_sp : NULL); 00731 if (whmm == NULL) { 00732 j_internal_error("Error: failed to make word hmm for word #%d \"%s [%s]\"\n", word, winfo->wname[word], winfo->woutput[word]); 00733 } 00734 00735 /* backscan なので,計算前の g[] 初期値は now->g_prev[] を使用 */ 00736 /* As backscan enabled, the initial forward score g[] is set by 00737 now->g_prev[] */ 00738 for (t=0;t<peseqlen;t++) { 00739 dwrk->g[t]=now->g_prev[t]; 00740 00741 } 00742 00743 /* 次段用のg_prevを格納するノード位置を設定 */ 00744 /* set where to store scores as new g_prev[] for the next backscan 00745 in the HMM */ 00746 if (hmminfo->multipath) { 00747 store_point = hmm_logical_state_num(dwrk->phmmseq[0]) - 2; 00748 store_point_maxarc = max_out_arc(dwrk->phmmseq[0]); 00749 if (enable_iwsp && dwrk->has_sp[0]) { 00750 store_point += hmm_logical_state_num(hmminfo->sp) - 2; 00751 if (store_point_maxarc < max_out_arc(hmminfo->sp)) { 00752 store_point_maxarc = max_out_arc(hmminfo->sp); 00753 } 00754 } 00755 } else { 00756 store_point = hmm_logical_state_num(dwrk->phmmseq[0]) - 2 - 1; 00757 } 00758 /* scan中に直前単語とこの単語をまたぐ場所を設定 */ 00759 /* set where is the connection point of the last word in the HMM */ 00760 if (hmminfo->multipath) { 00761 crossword_point = whmm->len - hmm_logical_state_num(dwrk->phmmseq[phmmlen-1]); 00762 if (enable_iwsp && dwrk->has_sp[phmmlen-1]) { 00763 crossword_point -= hmm_logical_state_num(hmminfo->sp) - 2; 00764 } 00765 } else { 00766 crossword_point = whmm->len - (hmm_logical_state_num(dwrk->phmmseq[phmmlen-1]) - 2) - 1; 00767 } 00768 00769 } else { /* not backscan mode */ 00770 00771 /* scan range: phones in now->seq[now->seqnum-1] */ 00772 00773 #ifdef TCD 00774 jlog("DEBUG: scan(org):"); 00775 for (i=0;i<winfo->wlen[word];i++) { 00776 jlog(" %s", (winfo->wseq[word][i])->name); 00777 } 00778 jlog("\n"); 00779 #endif 00780 00781 if (enable_iwsp && hmminfo->multipath) { 00782 /* 必要ならばショートポーズを挟み込む位置を指定する */ 00783 for(i=0;i<winfo->wlen[word];i++) { 00784 dwrk->has_sp[i] = FALSE; 00785 } 00786 dwrk->has_sp[winfo->wlen[word]-1] = TRUE; 00787 } 00788 00789 /* 単語HMMを作る */ 00790 /* make word HMM */ 00791 whmm = new_make_word_hmm(hmminfo, winfo->wseq[word], winfo->wlen[word], (enable_iwsp && hmminfo->multipath) ? dwrk->has_sp : NULL); 00792 if (whmm == NULL) { 00793 j_internal_error("Error: failed to make word hmm for word #%d \"%s [%s]\"\n", word, winfo->wname[word], winfo->woutput[word]); 00794 } 00795 00796 /* 計算前の g[] 初期値は now->g[] を使用 */ 00797 /* the initial forward score g[] is set by now->g[] */ 00798 for (t=0;t<peseqlen;t++) { 00799 dwrk->g[t]=now->g[t]; 00800 } 00801 00802 /* 次段用のg_prevを格納するノード位置を設定 */ 00803 /* set where to store scores as new g_prev[] for the next backscan 00804 in the HMM */ 00805 if (hmminfo->multipath) { 00806 store_point = hmm_logical_state_num(winfo->wseq[word][0]) - 2; 00807 store_point_maxarc = max_out_arc(winfo->wseq[word][0]); 00808 if (enable_iwsp && dwrk->has_sp[0]) { 00809 store_point += hmm_logical_state_num(hmminfo->sp) - 2; 00810 if (store_point_maxarc < max_out_arc(hmminfo->sp)) { 00811 store_point_maxarc = max_out_arc(hmminfo->sp); 00812 } 00813 } 00814 } else { 00815 store_point = hmm_logical_state_num(winfo->wseq[word][0]) - 2 - 1; 00816 } 00817 00818 /* scan中に直前単語とこの単語をまたぐ場所は,なし */ 00819 /* the connection point of the last word is not exist in the HMM */ 00820 crossword_point = -1; 00821 } 00822 00823 } else { /* ccd_flag == FALSE */ 00824 00825 if (enable_iwsp && hmminfo->multipath) { 00826 /* 必要ならばショートポーズを挟み込む位置を指定する */ 00827 for(i=0;i<winfo->wlen[word];i++) { 00828 dwrk->has_sp[i] = FALSE; 00829 } 00830 dwrk->has_sp[winfo->wlen[word]-1] = TRUE; 00831 } 00832 00833 /* 音素環境非依存の場合は単純に最終単語分の HMM を作成 */ 00834 /* for monophone: simple make HMM for the last word */ 00835 whmm = new_make_word_hmm(hmminfo, winfo->wseq[word], winfo->wlen[word], (enable_iwsp && hmminfo->multipath) ? dwrk->has_sp : NULL); 00836 if (whmm == NULL) { 00837 j_internal_error("Error: failed to make word hmm for word #%d \"%s [%s]\"\n", word, winfo->wname[word], winfo->woutput[word]); 00838 } 00839 00840 /* 計算前の g[] 初期値は now->g[] を使用 */ 00841 /* the initial forward score g[] is set by now->g[] */ 00842 for (t=0;t<peseqlen;t++) { 00843 dwrk->g[t]=now->g[t]; 00844 } 00845 00846 } 00847 00848 #ifdef TCD 00849 jlog("DEBUG: whmm len = %d\n",whmm->len); 00850 jlog("DEBUG: crossword_point = %d\n", crossword_point); 00851 jlog("DEBUG: g[] store point = %d\n", store_point); 00852 #endif 00853 00854 wordhmmnum = whmm->len; 00855 if (wordhmmnum >= winfo->maxwn + 10) { 00856 j_internal_error("scan_word: word too long (>%d)\n", winfo->maxwn + 10); 00857 } 00858 00859 #ifndef GRAPHOUT_PRECISE_BOUNDARY 00860 if (r->graphout) { 00861 if (ccd_flag) { 00862 now->tail_g_score = now->g[now->bestt]; 00863 } 00864 } 00865 #endif 00866 00867 /* ----------------------- do scan ----------------------- */ 00868 00869 /* scan開始点を検索 -> starttへ*/ 00870 /* search for the start frame -> set to startt */ 00871 for(t = peseqlen-1; t >=0 ; t--) { 00872 if ( 00873 #ifdef SCAN_BEAM 00874 dwrk->g[t] > framemaxscore[t] - scan_beam_thres && 00875 #endif 00876 dwrk->g[t] > LOG_ZERO) { 00877 break; 00878 } 00879 } 00880 if (t < 0) { /* no node has score > LOG_ZERO */ 00881 for(t=0;t<peseqlen;t++) { 00882 if (ccd_flag) now->g_prev[t] = LOG_ZERO; 00883 now->g[t] = LOG_ZERO; 00884 } 00885 #ifdef GRAPHOUT_PRECISE_BOUNDARY 00886 if (r->graphout) { 00887 for(t=0;t<peseqlen;t++) { 00888 now->wordend_frame[t] = -1; 00889 now->wordend_gscore[t] = LOG_ZERO; 00890 } 00891 } 00892 #endif 00893 goto end_of_scan; 00894 } 00895 startt = t; 00896 00897 /* clear [startt+1..peseqlen-1] */ 00898 for(t=peseqlen-1;t>startt;t--) { 00899 if (ccd_flag) now->g_prev[t] = LOG_ZERO; 00900 now->g[t] = LOG_ZERO; 00901 #ifdef GRAPHOUT_PRECISE_BOUNDARY 00902 if (r->graphout) { 00903 now->wordend_frame[t] = -1; 00904 now->wordend_gscore[t] = LOG_ZERO; 00905 } 00906 #endif 00907 } 00908 00909 /* バッファポインタ初期化 */ 00910 tn = 0; tl = 1; 00911 00912 #ifdef GRAPHOUT_PRECISE_BOUNDARY 00913 if (r->graphout) { 00914 for(i=0;i<wordhmmnum;i++) { 00915 dwrk->wend_token_frame[tn][i] = -1; 00916 dwrk->wend_token_gscore[tn][i] = LOG_ZERO; 00917 } 00918 } 00919 #endif 00920 00921 if (! hmminfo->multipath) { 00922 /* Below initialization is not needed on multipath version, since 00923 the actual viterbi will begin at frame 0 in multipath mode in main loop */ 00924 00925 /* 時間 [startt] 上の値を初期化 */ 00926 /* initialize scores on frame [startt] */ 00927 for(i=0;i<wordhmmnum-1;i++) dwrk->wordtrellis[tn][i] = LOG_ZERO; 00928 dwrk->wordtrellis[tn][wordhmmnum-1] = dwrk->g[startt] + outprob(&(r->am->hmmwrk), startt, &(whmm->state[wordhmmnum-1]), param); 00929 if (ccd_flag) { 00930 now->g_prev[startt] = dwrk->wordtrellis[tn][store_point]; 00931 } 00932 now->g[startt] = dwrk->wordtrellis[tn][0]; 00933 00934 #ifdef GRAPHOUT_PRECISE_BOUNDARY 00935 if (r->graphout) { 00936 if (ccd_flag) { 00937 if (back_rescan) { 00938 if (wordhmmnum-1 == crossword_point) { 00939 dwrk->wend_token_frame[tn][wordhmmnum-1] = startt; 00940 dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[startt]; 00941 } else { 00942 dwrk->wend_token_frame[tn][wordhmmnum-1] = -1; 00943 dwrk->wend_token_gscore[tn][wordhmmnum-1] = LOG_ZERO; 00944 } 00945 } else { 00946 dwrk->wend_token_frame[tn][wordhmmnum-1] = startt; 00947 dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[startt]; 00948 } 00949 } else { 00950 dwrk->wend_token_frame[tn][wordhmmnum-1] = startt; 00951 dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[startt]; 00952 } 00953 now->wordend_frame[startt] = dwrk->wend_token_frame[tn][0]; 00954 now->wordend_gscore[startt] = dwrk->wend_token_gscore[tn][0]; 00955 } 00956 #endif 00957 } /* end of hmminfo->multipath */ 00958 00959 endt = startt; 00960 00961 /* メインループ: startt から始まり 0 に向かって Viterbi 計算 */ 00962 /* main loop: start from [startt], and compute Viterbi toward [0] */ 00963 for(t = hmminfo->multipath ? startt : startt - 1; t >= 0; t--) { 00964 00965 /* wordtrellisのワークエリアをスワップ */ 00966 i = tn; tn = tl; tl = i; 00967 00968 node_exist_p = FALSE; /* TRUE if there is at least 1 survived node in this frame */ 00969 00970 if (hmminfo->multipath) { 00971 00972 /* 端のノード [t][wordhmmnum-1]は g[] を参照する */ 00973 /* the edge node [t][wordhmmnum-1] is equal to g[] */ 00974 00975 /* ノード [t][wordhmmnum-2..0] についてトレリスを計算 */ 00976 /* expand trellis for node [t][wordhmmnum-2..0] */ 00977 tmpmax_store = LOG_ZERO; 00978 00979 } else { 00980 00981 /* 端のノード [t][wordhmmnum-1]は,内部遷移 か g[]の高い方になる */ 00982 /* the edge node [t][wordhmmnum-1] is either internal transitin or g[] */ 00983 tmptmp = LOG_ZERO; 00984 for (ac=whmm->state[wordhmmnum-1].ac;ac;ac=ac->next) { 00985 score1 = dwrk->wordtrellis[tl][ac->arc] + ac->a; 00986 if (tmptmp < score1) { 00987 j = ac->arc; 00988 tmptmp = score1; 00989 } 00990 } 00991 if (dwrk->g[t] > tmptmp) { 00992 tmpmax = dwrk->g[t]; 00993 #ifdef GRAPHOUT_PRECISE_BOUNDARY 00994 if (r->graphout) { 00995 if (!back_rescan || wordhmmnum-1 == crossword_point) { 00996 dwrk->wend_token_frame[tn][wordhmmnum-1] = t; 00997 dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[t]; 00998 } else { 00999 dwrk->wend_token_frame[tn][wordhmmnum-1] = dwrk->wend_token_frame[tl][j]; 01000 dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->wend_token_gscore[tl][j]; 01001 } 01002 } 01003 #endif 01004 } else { 01005 tmpmax = tmptmp; 01006 #ifdef GRAPHOUT_PRECISE_BOUNDARY 01007 if (r->graphout) { 01008 dwrk->wend_token_frame[tn][wordhmmnum-1] = dwrk->wend_token_frame[tl][j]; 01009 dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->wend_token_gscore[tl][j]; 01010 } 01011 #endif 01012 } 01013 01014 /* 端のノードのスコアエンベロープチェック: 一定幅外なら落とす */ 01015 /* check if the edge node is within score envelope */ 01016 if ( 01017 #ifdef SCAN_BEAM 01018 tmpmax <= framemaxscore[t] - scan_beam_thres || 01019 #endif 01020 tmpmax <= LOG_ZERO 01021 ) { 01022 dwrk->wordtrellis[tn][wordhmmnum-1] = LOG_ZERO; 01023 #ifdef GRAPHOUT_PRECISE_BOUNDARY 01024 if (r->graphout) { 01025 dwrk->wend_token_frame[tn][wordhmmnum-1] = -1; 01026 dwrk->wend_token_gscore[tn][wordhmmnum-1] = LOG_ZERO; 01027 } 01028 #endif 01029 } else { 01030 node_exist_p = TRUE; 01031 dwrk->wordtrellis[tn][wordhmmnum-1] = tmpmax + outprob(&(r->am->hmmwrk), t, &(whmm->state[wordhmmnum-1]), param); 01032 } 01033 01034 } /* end of ~multipath */ 01035 01036 /* ノード [t][wordhmmnum-2..0] についてトレリスを計算 */ 01037 /* expand trellis for node [t][wordhmmnum-2..0] */ 01038 for(i=wordhmmnum-2;i>=0;i--) { 01039 01040 if (ccd_flag) { 01041 01042 /* 最尤パスと最尤スコア tmpmax を見つける */ 01043 /* tmpmax2 は次回用 g_prev[] のための最大値(自己遷移を除いた最大値) */ 01044 /* find most likely path and the max score 'tmpmax' */ 01045 /* 'tmpmax2' is max score excluding self transition, for next g_prev[] */ 01046 if (! hmminfo->multipath) { 01047 if (i == store_point) { 01048 tmpmax2 = LOG_ZERO; 01049 } 01050 } 01051 tmpmax = LOG_ZERO; 01052 for (ac=whmm->state[i].ac;ac;ac=ac->next) { 01053 if (hmminfo->multipath) { 01054 if (ac->arc == wordhmmnum-1) score1 = dwrk->g[t]; 01055 else if (t + 1 > startt) score1 = LOG_ZERO; 01056 else score1 = dwrk->wordtrellis[tl][ac->arc]; 01057 score1 += ac->a; 01058 } else { 01059 score1 = dwrk->wordtrellis[tl][ac->arc] + ac->a; 01060 } 01061 if (i <= crossword_point && ac->arc > crossword_point) { 01062 /* これは単語を越える遷移 (backscan 実行時) */ 01063 /* this is a transition across word (when backscan is enabled) */ 01064 score1 += now->lscore; /* add LM score */ 01065 } 01066 01067 if (hmminfo->multipath) { 01068 if (i <= store_point && ac->arc > store_point) { 01069 if (tmpmax_store < score1) tmpmax_store = score1; 01070 } 01071 } else { 01072 if (i == store_point && i != ac->arc) { 01073 if (tmpmax2 < score1) tmpmax2 = score1; 01074 } 01075 } 01076 01077 if (tmpmax < score1) { 01078 tmpmax = score1; 01079 j = ac->arc; 01080 } 01081 } 01082 01083 /* スコアエンベロープチェック: 一定幅外なら落とす */ 01084 /* check if score of this node is within the score envelope */ 01085 if ( 01086 #ifdef SCAN_BEAM 01087 tmpmax <= framemaxscore[t] - scan_beam_thres || 01088 #endif 01089 tmpmax <= LOG_ZERO 01090 ) { /* invalid node */ 01091 dwrk->wordtrellis[tn][i] = LOG_ZERO; 01092 #ifdef GRAPHOUT_PRECISE_BOUNDARY 01093 if (r->graphout) { 01094 dwrk->wend_token_frame[tn][i] = -1; 01095 dwrk->wend_token_gscore[tn][i] = LOG_ZERO; 01096 } 01097 #endif 01098 if (! hmminfo->multipath) { 01099 if (i == store_point) now->g_prev[t] = LOG_ZERO; 01100 } 01101 } else { /* survived node */ 01102 if (! hmminfo->multipath) { 01103 if (i == store_point) now->g_prev[t] = tmpmax2; 01104 } 01105 #ifdef GRAPHOUT_PRECISE_BOUNDARY 01106 if (r->graphout) { 01107 01108 if (hmminfo->multipath) { 01109 if ((back_rescan && i <= crossword_point && j > crossword_point) 01110 || j == wordhmmnum-1) { 01111 dwrk->wend_token_frame[tn][i] = t; 01112 dwrk->wend_token_gscore[tn][i] = tmpmax; 01113 } else { 01114 dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j]; 01115 dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j]; 01116 } 01117 } else { 01118 if (i <= crossword_point && j > crossword_point) { 01119 dwrk->wend_token_frame[tn][i] = t; 01120 dwrk->wend_token_gscore[tn][i] = tmpmax; 01121 } else { 01122 dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j]; 01123 dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j]; 01124 } 01125 } 01126 } 01127 #endif 01128 node_exist_p = TRUE; /* at least one node survive in this frame */ 01129 01130 dwrk->wordtrellis[tn][i] = tmpmax; 01131 if (! hmminfo->multipath || i > 0) { 01132 /* compute output probability */ 01133 dwrk->wordtrellis[tn][i] += outprob(&(r->am->hmmwrk), t, &(whmm->state[i]), param); 01134 } 01135 } 01136 01137 } else { /* not triphone */ 01138 01139 /* backscan 無し: store_point, crossword_point は無関係 */ 01140 /* no backscan: store_point, crossword_point ignored */ 01141 tmpmax = LOG_ZERO; 01142 if (hmminfo->multipath) { 01143 for (ac=whmm->state[i].ac;ac;ac=ac->next) { 01144 if (ac->arc == wordhmmnum-1) score1 = dwrk->g[t]; 01145 else if (t + 1 > startt) score1 = LOG_ZERO; 01146 else score1 = dwrk->wordtrellis[tl][ac->arc]; 01147 score1 += ac->a; 01148 if (tmpmax < score1) { 01149 tmpmax = score1; 01150 j = ac->arc; 01151 } 01152 } 01153 } else { 01154 for (ac=whmm->state[i].ac;ac;ac=ac->next) { 01155 score1 = dwrk->wordtrellis[tl][ac->arc] + ac->a; 01156 if (tmpmax < score1) { 01157 tmpmax = score1; 01158 j = ac->arc; 01159 } 01160 } 01161 } 01162 01163 /* スコアエンベロープチェック: 一定幅外なら落とす */ 01164 /* check if score of this node is within the score envelope */ 01165 if ( 01166 #ifdef SCAN_BEAM 01167 tmpmax <= framemaxscore[t] - scan_beam_thres || 01168 #endif 01169 tmpmax <= LOG_ZERO 01170 ) { 01171 /* invalid node */ 01172 dwrk->wordtrellis[tn][i] = LOG_ZERO; 01173 #ifdef GRAPHOUT_PRECISE_BOUNDARY 01174 if (r->graphout) { 01175 dwrk->wend_token_frame[tn][i] = -1; 01176 dwrk->wend_token_gscore[tn][i] = LOG_ZERO; 01177 } 01178 #endif 01179 } else { 01180 /* survived node */ 01181 node_exist_p = TRUE; 01182 #ifdef GRAPHOUT_PRECISE_BOUNDARY 01183 if (r->graphout) { 01184 if (hmminfo->multipath) { 01185 if (j == wordhmmnum-1) { 01186 dwrk->wend_token_frame[tn][i] = t; 01187 dwrk->wend_token_gscore[tn][i] = tmpmax; 01188 } else { 01189 dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j]; 01190 dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j]; 01191 } 01192 } else { 01193 dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j]; 01194 dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j]; 01195 } 01196 } 01197 #endif 01198 /* score of node [t][i] has been determined here */ 01199 dwrk->wordtrellis[tn][i] = tmpmax; 01200 if (! hmminfo->multipath || i > 0) { 01201 dwrk->wordtrellis[tn][i] += outprob(&(r->am->hmmwrk), t, &(whmm->state[i]), param); 01202 } 01203 } 01204 01205 } 01206 } /* end of node loop */ 01207 01208 /* 時間 t のViterbi計算終了. 前向きスコアはscanした単語の始端 */ 01209 /* Viterbi end for frame [t]. the forward score is the score of word 01210 beginning scanned */ 01211 now->g[t] = dwrk->wordtrellis[tn][0]; 01212 #ifdef GRAPHOUT_PRECISE_BOUNDARY 01213 if (r->graphout) { 01214 now->wordend_frame[t] = dwrk->wend_token_frame[tn][0]; 01215 now->wordend_gscore[t] = dwrk->wend_token_gscore[tn][0]; 01216 } 01217 #endif 01218 01219 if (hmminfo->multipath) { 01220 /* triphone 時, 次段のために store_point のデータをg_prevに保存 */ 01221 /* store the scores crossing the store_point to g_prev, for next scan */ 01222 if (ccd_flag) { 01223 /* the max arc crossing the store_point always selected as tmpmax_score */ 01224 tmpmax_store -= store_point_maxarc; 01225 if (tmpmax_store < LOG_ZERO) tmpmax_store = LOG_ZERO; 01226 now->g_prev[t] = tmpmax_store; 01227 } 01228 } 01229 01230 /* store the number of last computed frame */ 01231 if (node_exist_p) endt = t; 01232 01233 /* scanした単語の第1パスでの始端時刻より先まで t が進んでおり,かつ 01234 この t においてスコアエンベロープによって生き残ったノードが一つも 01235 無かったならば,このフレームで計算を打ち切りそれ以上先([0..t-1])は 01236 計算しない */ 01237 /* if frame 't' already reached the beginning frame of scanned word 01238 in 1st pass and no node was survived in this frame (all nodes pruned 01239 by score envelope), terminate computation at this frame and 01240 do not computer further frame ([0..t-1]). */ 01241 if (t < now->estimated_next_t && (!node_exist_p)) { 01242 /* clear the rest scores */ 01243 for (i=t-1;i>=0;i--) { 01244 now->g[i] = LOG_ZERO; 01245 #ifdef GRAPHOUT_PRECISE_BOUNDARY 01246 if (r->graphout) { 01247 now->wordend_frame[i] = -1; 01248 now->wordend_gscore[i] = LOG_ZERO; 01249 } 01250 #endif 01251 if (ccd_flag) now->g_prev[i] = LOG_ZERO; 01252 } 01253 /* terminate loop */ 01254 break; 01255 } 01256 01257 } /* end of time loop */ 01258 01259 if (debug2_flag) jlog("DEBUG: scanned: [%3d-%3d]\n", endt, startt); 01260 01261 end_of_scan: 01262 01263 if (hmminfo->multipath) { 01264 /* 前向きスコアの最終値を計算 (状態 0 から時間 0 への遷移) */ 01265 /* compute the total forward score (transition from state 0 to frame 0 */ 01266 if (endt == 0) { 01267 tmpmax = LOG_ZERO; 01268 for(ac=whmm->state[0].ac;ac;ac=ac->next) { 01269 score1 = dwrk->wordtrellis[tn][ac->arc] + ac->a; 01270 if (tmpmax < score1) tmpmax = score1; 01271 } 01272 now->final_g = score1; 01273 } else { 01274 now->final_g = LOG_ZERO; 01275 } 01276 } 01277 01278 /* 次回 backscan のための情報格納 */ 01279 /* store data for next backscan */ 01280 if (ccd_flag) { 01281 if (store_point == (hmminfo->multipath ? wordhmmnum - 2 : wordhmmnum - 1)) { 01282 /* last_ph無し,かつ単語の音素長=1の場合、次回の scan_word() で 01283 単語全体がもう一度再計算される. この場合, 01284 g_prev は,このscan_wordを開始する前のスコアを入れておく必要がある */ 01285 /* if there was no 'last_ph' and the scanned word consists of only 01286 1 phone, the whole word should be re-computed in the future scan_word(). 01287 So the next 'g_prev[]' should be the initial forward scores 01288 before we begin Viterbi (= g[t]). */ 01289 for (t = startt; t>=0; t--) { 01290 now->g_prev[t] = dwrk->g[t]; 01291 } 01292 } 01293 #ifndef GRAPHOUT_PRECISE_BOUNDARY 01294 if (r->graphout) { 01295 if (now->tail_g_score != LOG_ZERO) { 01296 if (now->prevgraph != NULL) { 01297 (now->prevgraph)->leftscore = now->tail_g_score; 01298 } 01299 } 01300 } 01301 #endif 01302 /* 次回のために now->last_ph を更新 */ 01303 /* update 'now->last_ph' for future scan_word() */ 01304 if (back_rescan) { 01305 now->last_ph = dwrk->phmmseq[0]; 01306 } else { 01307 now->last_ph = winfo->wseq[word][0]; 01308 } 01309 if (enable_iwsp && hmminfo->multipath) { 01310 now->last_ph_sp_attached = dwrk->has_sp[0]; 01311 } 01312 } 01313 01314 #ifdef GRAPHOUT_PRECISE_BOUNDARY 01315 if (! hmminfo->multipath) { 01316 if (r->graphout) { 01317 /* 次回の next_word 用に境界情報を調整 */ 01318 /* proceed word boundary for one step for next_word */ 01319 now->wordend_frame[peseqlen-1] = now->wordend_frame[0]; 01320 now->wordend_gscore[peseqlen-1] = now->wordend_gscore[0]; 01321 for (t=0;t<peseqlen-1;t++) { 01322 now->wordend_frame[t] = now->wordend_frame[t+1]; 01323 now->wordend_gscore[t] = now->wordend_gscore[t+1]; 01324 } 01325 } 01326 } 01327 #endif 01328 01329 /* free work area */ 01330 free_hmm(whmm); 01331 #ifdef TCD 01332 if (hmminfo->multipath) { 01333 if (ccd_flag) { 01334 jlog("DEBUG: last_ph = %s", (now->last_ph)->name); 01335 if (now->last_ph_sp_attached) jlog(" (sp attached)"); 01336 jlog("\n"); 01337 } 01338 } else { 01339 jlog("DEBUG: last_ph = %s\n", (now->last_ph)->name); 01340 } 01341 #endif 01342 } 01343 01344 01345 /**************************************************************************/ 01346 /*** 新仮説の展開とヒューリスティックを繋いだ全体スコアを計算 ***/ 01347 /*** Expand new hypothesis and compute the total score (with heuristic) ***/ 01348 /**************************************************************************/ 01349 01377 void 01378 next_word(NODE *now, NODE *new, NEXTWORD *nword, HTK_Param *param, RecogProcess *r) 01379 { 01380 int t; 01381 HMM_Logical *newphone; 01382 int lastword; 01383 int i; 01384 LOGPROB tmpp; 01385 LOGPROB a_value; 01386 int startt; 01387 int word; 01388 LOGPROB totalscore; 01389 TRELLIS_ATOM *tre; 01390 01391 BACKTRELLIS *backtrellis; 01392 WORD_INFO *winfo; 01393 HTK_HMM_INFO *hmminfo; 01394 int peseqlen; 01395 boolean ccd_flag; 01396 01397 backtrellis = r->backtrellis; 01398 winfo = r->lm->winfo; 01399 hmminfo = r->am->hmminfo; 01400 peseqlen = r->peseqlen; 01401 ccd_flag = r->ccd_flag; 01402 01403 new->score = LOG_ZERO; 01404 01405 word = nword->id; 01406 lastword=now->seq[now->seqnum-1]; 01407 01408 /* 単語並び、DFA状態番号、言語スコアを継承・更新 */ 01409 /* inherit and update word sequence, DFA state and total LM score */ 01410 for (i=0;i< now->seqnum;i++){ 01411 new->seq[i] = now->seq[i]; 01412 #ifdef CM_SEARCH 01413 #ifdef CM_MULTIPLE_ALPHA 01414 memcpy(new->cmscore[i], now->cmscore[i], sizeof(LOGPROB) * r->config->annotate.cm_alpha_num); 01415 #else 01416 new->cmscore[i] = now->cmscore[i]; 01417 #endif 01418 #endif /* CM_SEARCH */ 01419 } 01420 new->seq[i] = word; 01421 new->seqnum = now->seqnum+1; 01422 new->state = nword->next_state; 01423 new->totallscore = now->totallscore + nword->lscore; 01424 if (hmminfo->multipath) new->final_g = now->final_g; 01425 01426 if (ccd_flag) { 01427 01428 /* 展開単語の接続点の音素HMMをnewphoneにセットする. 01429 元仮説 now との単語間の音素環境依存性を考慮する */ 01430 /* set the triphone at the connection point to 'newphone', considering 01431 cross-word context dependency to 'now' */ 01432 newphone = get_right_context_HMM(winfo->wseq[word][winfo->wlen[word]-1], now->last_ph->name, hmminfo); 01433 if (newphone == NULL) { /* triphone not found */ 01434 /* fallback to the original bi/mono-phone */ 01435 /* error if the original is pseudo phone (not explicitly defined 01436 in hmmdefs/hmmlist) */ 01437 /* exception: word with 1 phone (triphone may exist in the next expansion */ 01438 if (winfo->wlen[word] > 1 && winfo->wseq[word][winfo->wlen[word]-1]->is_pseudo){ 01439 error_missing_right_triphone(winfo->wseq[word][winfo->wlen[word]-1], now->last_ph->name); 01440 } 01441 newphone = winfo->wseq[word][winfo->wlen[word]-1]; 01442 } 01443 01444 /* 元仮説をscanした時の末端音素HMM -> 新仮説の直前音素HMM */ 01445 /* inherit last_ph */ 01446 new->last_ph = now->last_ph; 01447 if (hmminfo->multipath) { 01448 new->last_ph_sp_attached = now->last_ph_sp_attached; 01449 } 01450 01451 /* backscan用接続ポイントのスコア g_prev[] をコピー */ 01452 /* copy g_prev[] that are scores at backscan connection point */ 01453 for (t=0;t<peseqlen;t++) { 01454 new->g_prev[t] = now->g_prev[t]; 01455 } 01456 01457 } else { /* not triphone */ 01458 01459 /* 展開単語の接続(=終端)の音素HMMをnewphoneにセット */ 01460 /* set the phone at the connection point to 'newphone' */ 01461 newphone = winfo->wseq[word][winfo->wlen[word]-1]; 01462 } 01463 01464 01465 /* 接続確率を与える */ 01466 new->lscore = nword->lscore; 01467 01468 if (! hmminfo->multipath) { 01469 /* a_value: 接続点の遷移確率 */ 01470 /* a_value: transition probability of connection point */ 01471 i = hmm_logical_state_num(newphone); 01472 a_value = (hmm_logical_trans(newphone))->a[i-2][i-1]; 01473 } 01474 01475 /***************************************************************************/ 01476 /* 前向き(第2パス),後ろ向き(第1パス)トレリスを接続し最尤接続点を見つける */ 01477 /* connect forward/backward trellis to look for the best connection time */ 01478 /***************************************************************************/ 01479 01480 if (hmminfo->multipath) { 01481 startt = peseqlen-1; 01482 } else { 01483 startt = peseqlen-2; 01484 new->g[startt+1] = LOG_ZERO; 01485 } 01486 01487 /*-----------------------------------------------------------------*/ 01488 /* 単語トレリスを探して, 次単語の最尤接続点を発見する */ 01489 /* determine the best connection time of the new word, seeking the word 01490 trellis */ 01491 /*-----------------------------------------------------------------*/ 01492 01493 /* update new->g[t] */ 01494 if (hmminfo->multipath) { 01495 for(t=startt;t>=0;t--) { 01496 new->g[t] = now->g[t] + nword->lscore; 01497 } 01498 } else { 01499 for(t=startt;t>=0;t--) { 01500 new->g[t] = now->g[t+1] + a_value + nword->lscore; 01501 } 01502 } 01503 01504 new->tre = NULL; 01505 01506 if (r->lmtype == LM_DFA && !r->config->pass2.looktrellis_flag) { 01507 /* すべてのフレームにわたって最尤を探す */ 01508 /* search for best trellis word throughout all frame */ 01509 for(t = startt; t >= 0; t--) { 01510 tre = bt_binsearch_atom(backtrellis, t, (WORD_ID) word); 01511 if (tre == NULL) continue; 01512 totalscore = new->g[t] + tre->backscore; 01513 if (! hmminfo->multipath) { 01514 if (newphone->is_pseudo) { 01515 tmpp = outprob_cd(&(r->am->hmmwrk), t, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param); 01516 } else { 01517 tmpp = outprob_state(&(r->am->hmmwrk), t, newphone->body.defined->s[newphone->body.defined->state_num-2], param); 01518 } 01519 totalscore += tmpp; 01520 } 01521 if (new->score < totalscore) { 01522 new->score = totalscore; 01523 new->bestt = t; 01524 new->estimated_next_t = tre->begintime - 1; 01525 new->tre = tre; 01526 } 01527 } 01528 01529 return; 01530 01531 } 01532 01533 /* この展開単語のトレリス上の終端時間の前後のみスキャンする 01534 前後に連続して存在するフレームについてのみ計算 */ 01535 /* search for best trellis word only around the estimated time */ 01536 /* 1. search forward */ 01537 for(t = (nword->tre)->endtime; t >= 0; t--) { 01538 tre = bt_binsearch_atom(backtrellis, t, (WORD_ID) word); 01539 if (tre == NULL) break; /* go to 2 if the trellis word disappear */ 01540 totalscore = new->g[t] + tre->backscore; 01541 if (! hmminfo->multipath) { 01542 if (newphone->is_pseudo) { 01543 tmpp = outprob_cd(&(r->am->hmmwrk), t, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param); 01544 } else { 01545 tmpp = outprob_state(&(r->am->hmmwrk), t, newphone->body.defined->s[newphone->body.defined->state_num-2], param); 01546 } 01547 totalscore += tmpp; 01548 } 01549 if (new->score < totalscore) { 01550 new->score = totalscore; 01551 new->bestt = t; 01552 new->estimated_next_t = tre->begintime - 1; 01553 new->tre = tre; 01554 } 01555 } 01556 /* 2. search backward */ 01557 for(t = (nword->tre)->endtime + 1; t <= startt; t++) { 01558 tre = bt_binsearch_atom(backtrellis, t, (WORD_ID) word); 01559 if (tre == NULL) break; /* end if the trellis word disapper */ 01560 totalscore = new->g[t] + tre->backscore; 01561 if (! hmminfo->multipath) { 01562 if (newphone->is_pseudo) { 01563 tmpp = outprob_cd(&(r->am->hmmwrk), t, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param); 01564 } else { 01565 tmpp = outprob_state(&(r->am->hmmwrk), t, newphone->body.defined->s[newphone->body.defined->state_num-2], param); 01566 } 01567 totalscore += tmpp; 01568 } 01569 if (new->score < totalscore) { 01570 new->score = totalscore; 01571 new->bestt = t; 01572 new->estimated_next_t = tre->begintime - 1; 01573 new->tre = tre; 01574 } 01575 } 01576 01577 } 01578 01579 /**********************************************************************/ 01580 /********** 初期仮説の生成 ****************************/ 01581 /********** Generate an initial hypothesis ****************************/ 01582 /**********************************************************************/ 01583 01606 void 01607 start_word(NODE *new, NEXTWORD *nword, HTK_Param *param, RecogProcess *r) 01608 { 01609 HMM_Logical *newphone; 01610 WORD_ID word; 01611 LOGPROB tmpp; 01612 int t; 01613 TRELLIS_ATOM *tre = NULL; 01614 01615 BACKTRELLIS *backtrellis; 01616 WORD_INFO *winfo; 01617 int peseqlen; 01618 boolean ccd_flag; 01619 01620 backtrellis = r->backtrellis; 01621 winfo = r->lm->winfo; 01622 peseqlen = r->peseqlen; 01623 ccd_flag = r->ccd_flag; 01624 01625 /* initialize data */ 01626 word = nword->id; 01627 new->score = LOG_ZERO; 01628 new->seqnum = 1; 01629 new->seq[0] = word; 01630 01631 new->state = nword->next_state; 01632 new->totallscore = nword->lscore; 01633 01634 /* cross-word triphone handling is not needed on startup */ 01635 newphone = winfo->wseq[word][winfo->wlen[word]-1]; 01636 if (ccd_flag) { 01637 new->last_ph = NULL; 01638 new->last_ph_sp_attached = FALSE; 01639 } 01640 new->lscore = nword->lscore; 01641 01642 new->g[peseqlen-1] = nword->lscore; 01643 01644 for (t=peseqlen-1; t>=0; t--) { 01645 tre = bt_binsearch_atom(backtrellis, t, word); 01646 if (tre != NULL) { 01647 if (r->graphout) { 01648 new->bestt = peseqlen-1; 01649 } else { 01650 new->bestt = t; 01651 } 01652 new->score = new->g[peseqlen-1] + tre->backscore; 01653 if (! r->am->hmminfo->multipath) { 01654 if (newphone->is_pseudo) { 01655 tmpp = outprob_cd(&(r->am->hmmwrk), peseqlen-1, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param); 01656 } else { 01657 tmpp = outprob_state(&(r->am->hmmwrk), peseqlen-1, newphone->body.defined->s[newphone->body.defined->state_num-2], param); 01658 } 01659 new->score += tmpp; 01660 } 01661 new->estimated_next_t = tre->begintime - 1; 01662 new->tre = tre; 01663 break; 01664 } 01665 } 01666 if (tre == NULL) { /* no word in backtrellis */ 01667 new->score = LOG_ZERO; 01668 } 01669 01670 } 01671 01672 01696 void 01697 last_next_word(NODE *now, NODE *new, HTK_Param *param, RecogProcess *r) 01698 { 01699 cpy_node(new, now); 01700 /* 最終スコアを設定 */ 01701 /* update the final score */ 01702 if (r->am->hmminfo->multipath) { 01703 new->score = now->final_g; 01704 } else { 01705 new->score = now->g[0]; 01706 } 01707 } 01708 01709 01710 #endif /* PASS2_STRICT_IWCD */ 01711 01712 /* end of file */