Julius 4.1.5
|
00001 00048 /* 00049 * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University 00050 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00051 * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology 00052 * All rights reserved 00053 */ 00054 00055 #include <julius/julius.h> 00056 00057 #undef DEBUG 00058 00059 00060 /* ---------------------------------------------------------- */ 00061 /* 第1パスの結果処理 */ 00062 /* end procedure to get result of 1st pass */ 00063 /* ---------------------------------------------------------- */ 00064 00065 #ifdef WORD_GRAPH 00066 00096 static void 00097 generate_lattice(int frame, RecogProcess *r) 00098 { 00099 BACKTRELLIS *bt; 00100 WORD_INFO *winfo; 00101 TRELLIS_ATOM *ta; 00102 int i, j; 00103 LOGPROB l; 00104 WordGraph *new; 00105 00106 bt = r->backtrellis; 00107 winfo = r->lm->winfo; 00108 00109 if (frame >= 0) { 00110 for (i=0;i<bt->num[frame];i++) { 00111 ta = bt->rw[frame][i]; 00112 /* words will be saved as a part of graph only if any of its 00113 following word has been survived in a beam */ 00114 if (! ta->within_context) continue; /* not a candidate */ 00115 if (ta->within_wordgraph) continue; /* already marked */ 00116 /* mark */ 00117 ta->within_wordgraph = TRUE; 00118 00119 new = (WordGraph *)mymalloc(sizeof(WordGraph)); 00120 new->wid = ta->wid; 00121 new->lefttime = ta->begintime; 00122 new->righttime = ta->endtime; 00123 new->fscore_head = ta->backscore; 00124 new->fscore_tail = 0.0; 00125 new->gscore_head = 0.0; 00126 new->gscore_tail = 0.0; 00127 new->lscore_tmp = ta->lscore; 00128 #ifdef CM_SEARCH 00129 new->cmscore = 0.0; 00130 #endif 00131 new->forward_score = new->backward_score = 0.0; 00132 new->headphone = winfo->wseq[ta->wid][0]; 00133 new->tailphone = winfo->wseq[ta->wid][winfo->wlen[ta->wid]-1]; 00134 00135 new->leftwordmaxnum = FANOUTSTEP; 00136 new->leftword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->leftwordmaxnum); 00137 new->left_lscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * new->leftwordmaxnum); 00138 new->leftwordnum = 0; 00139 new->rightwordmaxnum = FANOUTSTEP; 00140 new->rightword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->rightwordmaxnum); 00141 new->right_lscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * new->rightwordmaxnum); 00142 new->rightwordnum = 0; 00143 00144 l = ta->backscore; 00145 if (ta->last_tre->wid != WORD_INVALID) { 00146 l -= ta->last_tre->backscore; 00147 } 00148 l -= ta->lscore; 00149 new->amavg = l / (float)(ta->endtime - ta->begintime + 1); 00150 00151 #ifdef GRAPHOUT_DYNAMIC 00152 new->purged = FALSE; 00153 #endif 00154 new->saved = FALSE; 00155 new->graph_cm = 0.0; 00156 new->mark = FALSE; 00157 00158 new->next = r->result.wg1; 00159 r->result.wg1 = new; 00160 00161 /* recursive call */ 00162 generate_lattice(ta->last_tre->endtime, r); 00163 } 00164 } 00165 } 00166 00183 static void 00184 link_lattice_by_time(WordGraph *root) 00185 { 00186 WordGraph *wg; 00187 WordGraph *wtmp; 00188 int lefttime, righttime; 00189 00190 for(wg=root;wg;wg=wg->next) { 00191 00192 for(wtmp=root;wtmp;wtmp=wtmp->next) { 00193 if (wg->righttime + 1 == wtmp->lefttime) { 00194 wordgraph_check_and_add_leftword(wtmp, wg, wtmp->lscore_tmp); 00195 wordgraph_check_and_add_rightword(wg, wtmp, wtmp->lscore_tmp); 00196 } 00197 if (wtmp->righttime + 1 == wg->lefttime) { 00198 wordgraph_check_and_add_leftword(wg, wtmp, wg->lscore_tmp); 00199 wordgraph_check_and_add_rightword(wtmp, wg, wg->lscore_tmp); 00200 } 00201 } 00202 } 00203 00204 } 00205 00219 static void 00220 re_compute_lattice_lm(WordGraph *root, WCHMM_INFO *wchmm) 00221 { 00222 int i; 00223 00224 for(wg=root;wg;wg=wg->next) { 00225 for(i=0;i<wg->leftwordnum;i++) { 00226 wg->left_lscore[i] = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, wchmm->winfo->wton[wg->leftword[i]->wid], wchmm->winfo->wton[wg->wid]); 00227 } 00228 for(i=0;i<wg->rightwordnum;i++) { 00229 wg->right_lscore[i] = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, wchmm->winfo->wton[wg->wid], wchmm->winfo->wton[wg->rightword[i]->wid]); 00230 } 00231 } 00232 } 00233 00234 #endif 00235 00250 static void 00251 put_atom(TRELLIS_ATOM *atom, WORD_INFO *winfo) 00252 { 00253 int i; 00254 jlog("DEBUG: %3d,%3d %f %16s (id=%5d)", atom->begintime, atom->endtime, 00255 atom->backscore, winfo->wname[atom->wid], atom->wid); 00256 for (i=0;i<winfo->wlen[atom->wid]; i++) { 00257 jlog(" %s",winfo->wseq[atom->wid][i]->name); 00258 } 00259 jlog("\n"); 00260 } 00261 00292 static LOGPROB 00293 trace_backptr(WORD_ID wordseq_rt[MAXSEQNUM], int *rt_wordlen, TRELLIS_ATOM *atom, WORD_INFO *winfo) 00294 { 00295 int wordlen = 0; /* word length of best sentence hypothesis */ 00296 TRELLIS_ATOM *tretmp; 00297 LOGPROB langscore = 0.0; 00298 WORD_ID wordseq[MAXSEQNUM]; /* temporal: in reverse order */ 00299 int i; 00300 00301 /* initialize */ 00302 wordseq[0] = atom->wid; /* start from specified atom */ 00303 wordlen = 1; 00304 tretmp = atom; 00305 langscore += tretmp->lscore; 00306 if (debug2_flag) { 00307 put_atom(tretmp, winfo); 00308 } 00309 00310 /* trace the backtrellis */ 00311 while (tretmp->begintime > 0) {/* until beginning of input */ 00312 tretmp = tretmp->last_tre; 00313 /* t = tretmp->boundtime - 1; 00314 tretmp = bt_binsearch_atom(backtrellis, tretmp->boundtime-1, tretmp->last_wid);*/ 00315 if (tretmp == NULL) { /* should not happen */ 00316 j_internal_error("trace_backptr: last trellis missing while backtracking"); 00317 } 00318 langscore += tretmp->lscore; 00319 wordseq[wordlen] = tretmp->wid; 00320 wordlen++; 00321 if (debug2_flag) { 00322 put_atom(tretmp, winfo); 00323 } 00324 if (wordlen >= MAXSEQNUM) { 00325 j_internal_error("trace_backptr: sentence length exceeded ( > %d)\n",MAXSEQNUM); 00326 } 00327 } 00328 *rt_wordlen = wordlen; 00329 /* reverse order -> normal order */ 00330 for(i=0;i<wordlen;i++) wordseq_rt[i] = wordseq[wordlen-i-1]; 00331 return(langscore); 00332 } 00333 00370 static void 00371 find_1pass_result(int framelen, RecogProcess *r) 00372 { 00373 BACKTRELLIS *backtrellis; 00374 WORD_INFO *winfo; 00375 WORD_ID wordseq[MAXSEQNUM]; 00376 int wordlen; 00377 int i; 00378 TRELLIS_ATOM *best; 00379 int last_time; 00380 LOGPROB total_lscore; 00381 LOGPROB maxscore; 00382 TRELLIS_ATOM *tmp; 00383 #ifdef SPSEGMENT_NAIST 00384 boolean ok_p; 00385 #endif 00386 00387 backtrellis = r->backtrellis; 00388 winfo = r->lm->winfo; 00389 00390 /* look for the last trellis word */ 00391 00392 if (r->lmtype == LM_PROB) { 00393 00394 for (last_time = framelen - 1; last_time >= 0; last_time--) { 00395 00396 maxscore = LOG_ZERO; 00397 for (i=0;i<backtrellis->num[last_time];i++) { 00398 tmp = backtrellis->rw[last_time][i]; 00399 #ifdef WORD_GRAPH 00400 /* treat only words on a graph path */ 00401 if (!tmp->within_context) continue; 00402 #endif 00403 if (r->config->successive.enabled) { 00404 /* short-pause segmentation mode */ 00405 /* 最終フレームに残った最大スコアの単語 */ 00406 /* it should be the best trellis word on the last frame */ 00407 if (maxscore < tmp->backscore) { 00408 maxscore = tmp->backscore; 00409 best = tmp; 00410 } 00411 } else { 00412 /* not segmentation mode */ 00413 /* 最終単語は winfo->tail_silwid に固定 */ 00414 /* it is fixed to the tail silence model (winfo->tail_silwid) */ 00415 if (tmp->wid == winfo->tail_silwid && maxscore < tmp->backscore) { 00416 maxscore = tmp->backscore; 00417 best = tmp; 00418 break; 00419 } 00420 } 00421 } 00422 if (maxscore != LOG_ZERO) break; 00423 } 00424 00425 if (last_time < 0) { /* not found */ 00426 jlog("WARNING: %02d %s: no tail silence word survived on the last frame, search failed\n", r->config->id, r->config->name); 00427 r->result.status = J_RESULT_STATUS_FAIL; 00428 //callback_exec(CALLBACK_RESULT, r); 00429 return; 00430 } 00431 00432 } 00433 00434 if (r->lmtype == LM_DFA) { 00435 00436 for (last_time = framelen - 1; last_time >= 0; last_time--) { 00437 00438 /* 末尾に残った単語の中で最大スコアの単語(cp_endは使用しない) */ 00439 /* the best trellis word on the last frame (not use cp_end[]) */ 00440 maxscore = LOG_ZERO; 00441 for (i=0;i<backtrellis->num[last_time];i++) { 00442 tmp = backtrellis->rw[last_time][i]; 00443 #ifdef WORD_GRAPH 00444 /* treat only words on a graph path */ 00445 if (!tmp->within_context) continue; 00446 #endif 00447 /* if (dfa->cp_end[winfo->wton[tmp->wid]] == TRUE) {*/ 00448 if (maxscore < tmp->backscore) { 00449 maxscore = tmp->backscore; 00450 best = tmp; 00451 } 00452 /* }*/ 00453 } 00454 if (maxscore != LOG_ZERO) break; 00455 } 00456 00457 if (last_time < 0) { /* not found */ 00458 jlog("WARNING: %02d %s: no sentence-end word survived on last beam\n", r->config->id, r->config->name); 00459 r->result.status = J_RESULT_STATUS_FAIL; 00460 //callback_exec(CALLBACK_RESULT, r); 00461 return; 00462 } 00463 00464 } 00465 00466 /* traceback word trellis from the best word */ 00467 total_lscore = trace_backptr(wordseq, &wordlen, best, r->lm->winfo); 00468 #ifdef SPSEGMENT_NAIST 00469 if (r->config->successive.enabled) { 00470 /* on segmentation mode, recognition result that only consists of 00471 short-pause words will be treated as recognition rejection */ 00472 ok_p = FALSE; 00473 for(i=0;i<wordlen;i++) { 00474 if (! is_sil(wordseq[i], r)) ok_p = TRUE; 00475 } 00476 if (ok_p == FALSE) { 00477 r->result.status = J_RESULT_STATUS_ONLY_SILENCE; 00478 return; 00479 } 00480 } 00481 #endif 00482 00483 /* just flush last progress output */ 00484 /* 00485 if (recog->jconf->output.progout_flag) { 00486 recog->result.status = 1; 00487 recog->result.num_frame = last_time; 00488 recog->result.pass1.word = wordseq; 00489 recog->result.pass1.word_num = wordlen; 00490 recog->result.pass1.score = best->backscore; 00491 recog->result.pass1.score_lm = total_lscore; 00492 recog->result.pass1.score_am = best->backscore - total_lscore; 00493 //callback_exec(CALLBACK_RESULT_PASS1_INTERIM, recog); 00494 }*/ 00495 00496 /* output 1st pass result */ 00497 if (verbose_flag || ! r->config->output.progout_flag) { 00498 r->result.status = J_RESULT_STATUS_SUCCESS; 00499 r->result.num_frame = framelen; 00500 for(i=0;i<wordlen;i++) r->result.pass1.word[i] = wordseq[i]; 00501 r->result.pass1.word_num = wordlen; 00502 r->result.pass1.score = best->backscore; 00503 r->result.pass1.score_lm = total_lscore; 00504 r->result.pass1.score_am = best->backscore - total_lscore; 00505 //callback_exec(CALLBACK_RESULT_PASS1, r); 00506 } 00507 00508 /* store the result to global val (notice: in reverse order) */ 00509 for(i=0;i<wordlen;i++) r->pass1_wseq[i] = wordseq[i]; 00510 r->pass1_wnum = wordlen; 00511 r->pass1_score = best->backscore; 00512 00513 #ifdef WORD_GRAPH 00514 /* 単語トレリスから,ラティスを生成する */ 00515 /* generate word graph from the word trellis */ 00516 r->peseqlen = backtrellis->framelen; 00517 r->result.wg1 = NULL; 00518 generate_lattice(last_time, r); 00519 link_lattice_by_time(r->result.wg1); 00520 if (r->lmtype == LM_PROB) re_compute_lattice_lm(r->result.wg1, r->wchmm); 00521 r->result.wg1_num = wordgraph_sort_and_annotate_id(&(r->result.wg1), r); 00522 /* compute graph CM by forward-backward processing */ 00523 graph_forward_backward(r->result.wg1, r); 00524 //callback_exec(CALLBACK_RESULT_PASS1_GRAPH, r); 00525 //wordgraph_clean(&(r->result.wg1)); 00526 #endif 00527 00528 } 00529 00548 static int 00549 compare_backscore(TRELLIS_ATOM **x1, TRELLIS_ATOM **x2) 00550 { 00551 return((*x2)->backscore - (*x1)->backscore); 00552 } 00553 00573 static void 00574 find_1pass_result_word(int framelen, RecogProcess *r) 00575 { 00576 BACKTRELLIS *bt; 00577 TRELLIS_ATOM *best, *tmp; 00578 int last_time; 00579 Sentence *s; 00580 #ifdef CONFIDENCE_MEASURE 00581 LOGPROB sum; 00582 #endif 00583 LOGPROB maxscore; 00584 int i; 00585 TRELLIS_ATOM **idx; 00586 int num; 00587 00588 if (r->lmvar != LM_DFA_WORD) return; 00589 00590 bt = r->backtrellis; 00591 for (last_time = framelen - 1; last_time >= 0; last_time--) { 00592 maxscore = LOG_ZERO; 00593 for (i=0;i<bt->num[last_time];i++) { 00594 tmp = bt->rw[last_time][i]; 00595 #ifdef WORD_GRAPH 00596 /* treat only words on a graph path */ 00597 if (!tmp->within_context) continue; 00598 #endif 00599 if (maxscore < tmp->backscore) { 00600 maxscore = tmp->backscore; 00601 best = tmp; 00602 } 00603 } 00604 if (maxscore != LOG_ZERO) break; 00605 } 00606 00607 if (last_time < 0) { /* not found */ 00608 jlog("WARNING: %02d %s: no word survived on the last frame, search failed\n", r->config->id, r->config->name); 00609 r->result.status = J_RESULT_STATUS_FAIL; 00610 //callback_exec(CALLBACK_RESULT, r); 00611 return; 00612 } 00613 00614 #ifdef CONFIDENCE_MEASURE 00615 sum = 0.0; 00616 for (i=0;i<bt->num[last_time];i++) { 00617 tmp = bt->rw[last_time][i]; 00618 #ifdef WORD_GRAPH 00619 /* treat only words on a graph path */ 00620 if (!tmp->within_context) continue; 00621 #endif 00622 sum += pow(10, r->config->annotate.cm_alpha * (tmp->backscore - maxscore)); 00623 } 00624 #endif 00625 00626 /* set recognition result status to normal */ 00627 r->result.status = J_RESULT_STATUS_SUCCESS; 00628 00629 if (r->config->output.output_hypo_maxnum > 1) { 00630 /* more than one candidate is requested */ 00631 00632 /* get actual number of candidates to output */ 00633 num = r->config->output.output_hypo_maxnum; 00634 if (num > bt->num[last_time]) { 00635 num = bt->num[last_time]; 00636 } 00637 00638 /* prepare result storage */ 00639 result_sentence_malloc(r, num); 00640 r->result.sentnum = num; 00641 00642 /* sort by score */ 00643 idx = (TRELLIS_ATOM **)mymalloc(sizeof(TRELLIS_ATOM *)*bt->num[last_time]); 00644 for (i=0;i<bt->num[last_time];i++) { 00645 idx[i] = bt->rw[last_time][i]; 00646 } 00647 qsort(idx, bt->num[last_time], sizeof(TRELLIS_ATOM *), 00648 (int (*)(const void *,const void *))compare_backscore); 00649 00650 /* store to result storage */ 00651 for(i=0;i<r->result.sentnum;i++) { 00652 s = &(r->result.sent[i]); 00653 tmp = idx[i]; 00654 s->word_num = 1; 00655 s->word[0] = tmp->wid; 00656 #ifdef CONFIDENCE_MEASURE 00657 s->confidence[0] = pow(10, r->config->annotate.cm_alpha * (tmp->backscore - maxscore)) / sum; 00658 #endif 00659 s->score = tmp->backscore; 00660 s->score_lm = 0.0; 00661 s->score_am = tmp->backscore; 00662 if (multigram_get_all_num(r->lm) > 0) { 00663 s->gram_id = multigram_get_gram_from_wid(s->word[0], r->lm); 00664 } else { 00665 s->gram_id = 0; 00666 } 00667 } 00668 /* free work area for sort */ 00669 free(idx); 00670 00671 } else { /* only max is needed */ 00672 00673 /* prepare result storage */ 00674 result_sentence_malloc(r, 1); 00675 r->result.sentnum = 1; 00676 s = &(r->result.sent[0]); 00677 s->word_num = 1; 00678 s->word[0] = best->wid; 00679 #ifdef CONFIDENCE_MEASURE 00680 s->confidence[0] = 1.0 / sum; 00681 #endif 00682 s->score = best->backscore; 00683 s->score_lm = 0.0; 00684 s->score_am = best->backscore; 00685 if (multigram_get_all_num(r->lm) > 0) { 00686 s->gram_id = multigram_get_gram_from_wid(s->word[0], r->lm); 00687 } else { 00688 s->gram_id = 0; 00689 } 00690 } 00691 00692 /* copy as 1st pass result */ 00693 memcpy(&(r->result.pass1), &(r->result.sent[0]), sizeof(Sentence)); 00694 r->result.pass1.align = NULL; 00695 00696 //callback_exec(CALLBACK_RESULT, r); 00697 //free(r->result.sent); 00698 } 00699 00700 00701 #ifdef DETERMINE 00702 00730 static TRELLIS_ATOM * 00731 determine_word(RecogProcess *r, int t, TRELLIS_ATOM *tremax, LOGPROB thres, int countthres) 00732 { 00733 TRELLIS_ATOM *ret; 00734 WORD_ID w; 00735 00736 //LOGPROB sum; 00737 //LOGPROB cm; 00738 00739 int j; 00740 FSBeam *d; 00741 TOKEN2 *tk; 00742 00743 if (tremax == NULL) { 00744 /* initialize */ 00745 r->determine_count = 0; 00746 r->determine_maxnodescore = LOG_ZERO; 00747 r->determined = FALSE; 00748 r->determine_last_wid = WORD_INVALID; 00749 r->have_determine = FALSE; 00750 return NULL; 00751 } 00752 00753 ret = NULL; 00754 00755 /* get confidence score of the maximum word hypothesis */ 00756 /* 00757 * sum = 0.0; 00758 * tre = recog->backtrellis->list; 00759 * while (tre != NULL && tre->endtime == t) { 00760 * sum += pow(10, recog->jconf->annotate.cm_alpha * (tre->backscore - tremax->backscore)); 00761 * tre = tre->next; 00762 * } 00763 * cm = 1.0 / sum; 00764 */ 00765 00766 /* determinization decision */ 00767 w = tremax->wid; 00768 00769 r->have_determine = FALSE; 00770 00771 /* determine by score threshold from maximum node score to maximum word end node score */ 00772 if (r->determine_last_wid == w && r->determine_maxnodescore - tremax->backscore <= thres) { 00773 r->determine_count++; 00774 if (r->determine_count > countthres) { 00775 if (r->determined == FALSE) { 00776 ret = tremax; 00777 r->determined = TRUE; 00778 r->have_determine = TRUE; 00779 } 00780 } 00781 } else { 00782 r->determine_count = 0; 00783 } 00784 00785 //printf("determine: %d: %s: cm=%f, relscore=%f, count=%d, phase=%d\n", t, recog->model->winfo->woutput[w], cm, determine_maxnodescore - tremax->backscore, count, phase); 00786 r->determine_last_wid = w; 00787 00788 /* update maximum node score here for next call, since 00789 the word path determination is always one frame later */ 00790 d = &(r->pass1); 00791 r->determine_maxnodescore = LOG_ZERO; 00792 for (j = d->n_start; j <= d->n_end; j++) { 00793 tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]); 00794 if (r->determine_maxnodescore < tk->score) r->determine_maxnodescore = tk->score; 00795 } 00796 00797 return(ret); 00798 } 00799 00819 static void 00820 check_determine_word(RecogProcess *r, int t) 00821 { 00822 TRELLIS_ATOM *tre; 00823 TRELLIS_ATOM *tremax; 00824 LOGPROB maxscore; 00825 00826 /* bt->list is ordered by time frame */ 00827 maxscore = LOG_ZERO; 00828 tremax = NULL; 00829 tre = r->backtrellis->list; 00830 while (tre != NULL && tre->endtime == t) { 00831 if (maxscore < tre->backscore) { 00832 maxscore = tre->backscore; 00833 tremax = tre; 00834 } 00835 tre = tre->next; 00836 } 00837 00838 r->result.status = J_RESULT_STATUS_SUCCESS; 00839 r->result.num_frame = t; 00840 00841 if (maxscore != LOG_ZERO) { 00842 // if ((tre = determine_word(recog, t, tremax, 0.9, 17)) != NULL) { 00843 if ((tre = determine_word(r, t, tremax, r->config->pass1.determine_score_thres, r->config->pass1.determine_duration_thres)) != NULL) { 00844 r->result.pass1.word[0] = tremax->wid; 00845 r->result.pass1.word_num = 1; 00846 r->result.pass1.score = tremax->backscore; 00847 r->result.pass1.score_lm = 0.0; 00848 r->result.pass1.score_am = tremax->backscore; 00849 r->result.num_frame = t; 00850 //callback_exec(CALLBACK_RESULT_PASS1_DETERMINED, r); 00851 } 00852 } 00853 00854 00855 } 00856 00857 #endif /* DETERMINE */ 00858 00874 static void 00875 bt_current_max(RecogProcess *r, int t) 00876 { 00877 int wordlen; 00878 TRELLIS_ATOM *tre; 00879 TRELLIS_ATOM *tremax; 00880 LOGPROB maxscore; 00881 LOGPROB lscore; 00882 00883 /* bt->list is ordered by time frame */ 00884 maxscore = LOG_ZERO; 00885 tremax = NULL; 00886 tre = r->backtrellis->list; 00887 while (tre != NULL && tre->endtime == t) { 00888 if (maxscore < tre->backscore) { 00889 maxscore = tre->backscore; 00890 tremax = tre; 00891 } 00892 tre = tre->next; 00893 } 00894 00895 r->result.status = J_RESULT_STATUS_SUCCESS; 00896 r->result.num_frame = t; 00897 00898 if (maxscore == LOG_ZERO) { 00899 r->result.pass1.word_num = 0; 00900 } else { 00901 if (r->lmvar == LM_DFA_WORD) { 00902 r->result.pass1.word[0] = tremax->wid; 00903 r->result.pass1.word_num = 1; 00904 r->result.pass1.score = tremax->backscore; 00905 r->result.pass1.score_lm = 0.0; 00906 r->result.pass1.score_am = tremax->backscore; 00907 } else { 00908 lscore = trace_backptr(r->result.pass1.word, &wordlen, tremax, r->lm->winfo); 00909 r->result.pass1.word_num = wordlen; 00910 r->result.pass1.score = tremax->backscore; 00911 r->result.pass1.score_lm = lscore; 00912 r->result.pass1.score_am = tremax->backscore; 00913 } 00914 } 00915 //callback_exec(CALLBACK_RESULT_PASS1_INTERIM, r); 00916 } 00917 00933 static void 00934 bt_current_max_word(RecogProcess *r, int t) 00935 { 00936 00937 TRELLIS_ATOM *tre; 00938 TRELLIS_ATOM *tremax; 00939 LOGPROB maxscore; 00940 WORD_ID w; 00941 00942 /* bt->list は時間順に格納されている */ 00943 /* bt->list is order by time */ 00944 maxscore = LOG_ZERO; 00945 tremax = NULL; 00946 tre = r->backtrellis->list; 00947 while (tre != NULL && tre->endtime == t) { 00948 if (maxscore < tre->backscore) { 00949 maxscore = tre->backscore; 00950 tremax = tre; 00951 } 00952 tre = tre->next; 00953 } 00954 00955 if (maxscore != LOG_ZERO) { 00956 jlog("DEBUG: %3d: ",t); 00957 w = tremax->wid; 00958 jlog("\"%s [%s]\"(id=%d)", 00959 r->lm->winfo->wname[w], r->lm->winfo->woutput[w], w); 00960 jlog(" [%d-%d] %f", tremax->begintime, t, tremax->backscore); 00961 w = tremax->last_tre->wid; 00962 if (w != WORD_INVALID) { 00963 jlog(" <- \"%s [%s]\"(id=%d)\n", 00964 r->lm->winfo->wname[w], r->lm->winfo->woutput[w], w); 00965 } else { 00966 jlog(" <- bgn\n"); 00967 } 00968 } 00969 } 00970 00971 00972 /* -------------------------------------------------------------------- */ 00973 /* ビーム探索中のトークンを扱うサブ関数 */ 00974 /* functions to handle hypothesis tokens */ 00975 /* -------------------------------------------------------------------- */ 00976 00995 static void 00996 malloc_nodes(FSBeam *d, int n, int ntoken_init) 00997 { 00998 d->totalnodenum = n; 00999 d->token = (TOKENID *)mymalloc(sizeof(TOKENID) * d->totalnodenum); 01000 //d->maxtnum = ntoken_init; 01001 if (d->maxtnum < ntoken_init) d->maxtnum = ntoken_init; 01002 d->tlist[0] = (TOKEN2 *)mymalloc(sizeof(TOKEN2) * d->maxtnum); 01003 d->tlist[1] = (TOKEN2 *)mymalloc(sizeof(TOKEN2) * d->maxtnum); 01004 d->tindex[0] = (TOKENID *)mymalloc(sizeof(TOKENID) * d->maxtnum); 01005 d->tindex[1] = (TOKENID *)mymalloc(sizeof(TOKENID) * d->maxtnum); 01006 //d->expand_step = ntoken_step; 01007 d->nodes_malloced = TRUE; 01008 d->expanded = FALSE; 01009 } 01010 01023 static void 01024 expand_tlist(FSBeam *d) 01025 { 01026 d->maxtnum += d->expand_step; 01027 d->tlist[0] = (TOKEN2 *)myrealloc(d->tlist[0],sizeof(TOKEN2) * d->maxtnum); 01028 d->tlist[1] = (TOKEN2 *)myrealloc(d->tlist[1],sizeof(TOKEN2) * d->maxtnum); 01029 d->tindex[0] = (TOKENID *)myrealloc(d->tindex[0],sizeof(TOKENID) * d->maxtnum); 01030 d->tindex[1] = (TOKENID *)myrealloc(d->tindex[1],sizeof(TOKENID) * d->maxtnum); 01031 if (debug2_flag) jlog("STAT: token space expanded to %d\n", d->maxtnum); 01032 d->expanded = TRUE; 01033 } 01034 01052 static void 01053 prepare_nodes(FSBeam *d, int ntoken_step) 01054 { 01055 d->tnum[0] = d->tnum[1] = 0; 01056 if (d->expand_step < ntoken_step) d->expand_step = ntoken_step; 01057 } 01058 01073 static void 01074 free_nodes(FSBeam *d) 01075 { 01076 if (d->nodes_malloced) { 01077 free(d->token); 01078 free(d->tlist[0]); 01079 free(d->tlist[1]); 01080 free(d->tindex[0]); 01081 free(d->tindex[1]); 01082 d->nodes_malloced = FALSE; 01083 } 01084 } 01085 01100 static void 01101 clear_tlist(FSBeam *d, int tt) 01102 { 01103 d->tnum[tt] = 0; 01104 } 01105 01120 static void 01121 clear_tokens(FSBeam *d, int tt) 01122 { 01123 int j; 01124 /* initialize active token list: only clear ones used in the last call */ 01125 for (j=0; j<d->tnum[tt]; j++) { 01126 d->token[d->tlist[tt][j].node] = TOKENID_UNDEFINED; 01127 } 01128 } 01129 01146 static TOKENID 01147 create_token(FSBeam *d) 01148 { 01149 TOKENID newid; 01150 int tn; 01151 tn = d->tn; 01152 newid = d->tnum[tn]; 01153 d->tnum[tn]++; 01154 while (d->tnum[tn]>=d->maxtnum) expand_tlist(d); 01155 d->tindex[tn][newid] = newid; 01156 #ifdef WPAIR 01157 /* initialize link */ 01158 d->tlist[tn][newid].next = TOKENID_UNDEFINED; 01159 #endif 01160 return(newid); 01161 } 01162 01192 static void 01193 node_assign_token(FSBeam *d, int node, TOKENID tkid) 01194 { 01195 #ifdef WPAIR 01196 /* add to link list */ 01197 d->tlist[d->tn][tkid].next = d->token[node]; 01198 #endif 01199 d->token[node] = tkid; 01200 d->tlist[d->tn][tkid].node = node; 01201 } 01202 01239 static TOKENID 01240 node_exist_token(FSBeam *d, int tt, int node, WORD_ID wid) 01241 { 01242 #ifdef WPAIR 01243 /* In word-pair mode, multiple tokens are assigned to a node as a list. 01244 so we have to search for tokens with same last word ID */ 01245 #ifdef WPAIR_KEEP_NLIMIT 01246 /* 1ノードごとに保持するtoken数の上限を設定 */ 01247 /* tokenが無いが上限に達しているときは一番スコアの低いtokenを上書きする */ 01248 /* N-best: limit number of assigned tokens to a node */ 01249 int i = 0; 01250 TOKENID lowest_token = TOKENID_UNDEFINED; 01251 #endif 01252 TOKENID tmp; 01253 for(tmp=d->token[node]; tmp != TOKENID_UNDEFINED; tmp=d->tlist[tt][tmp].next) { 01254 if (d->tlist[tt][tmp].last_tre->wid == wid) { 01255 return(tmp); 01256 } 01257 #ifdef WPAIR_KEEP_NLIMIT 01258 if (lowest_token == TOKENID_UNDEFINED || 01259 d->tlist[tt][lowest_token].score > d->tlist[tt][tmp].score) 01260 lowest_token = tmp; 01261 if (++i >= d->wpair_keep_nlimit) break; 01262 #endif 01263 } 01264 #ifdef WPAIR_KEEP_NLIMIT 01265 if (i >= d->wpair_keep_nlimit) { /* overflow, overwrite lowest score */ 01266 return(lowest_token); 01267 } else { 01268 return(TOKENID_UNDEFINED); 01269 } 01270 #else 01271 return(TOKENID_UNDEFINED); 01272 #endif 01273 01274 #else /* not WPAIR */ 01275 /* 1つだけ保持,これを常に上書き */ 01276 /* Only one token is kept in 1-best mode (default), so 01277 simply return the ID */ 01278 return(d->token[node]); 01279 #endif 01280 } 01281 01282 #ifdef DEBUG 01283 /* tlist と token の対応をチェックする(debug) */ 01284 /* for debug: check tlist <-> token correspondence 01285 where tlist[tt][tokenID].node = nodeID and 01286 token[nodeID] = tokenID 01287 */ 01288 static void 01289 node_check_token(FSBeam *d, int tt) 01290 { 01291 int i; 01292 for(i=0;i<d->tnum[tt];i++) { 01293 if (node_exist_token(d, tt, d->tlist[tt][i].node, d->tlist[tt][i].last_tre->wid) != i) { 01294 jlog("ERROR: token %d not found on node %d\n", i, d->tlist[tt][i].node); 01295 } 01296 } 01297 } 01298 #endif 01299 01300 01301 01302 /* -------------------------------------------------------------------- */ 01303 /* トークンをソートし 上位 N トークンを判別する (heap sort) */ 01304 /* Sort generated tokens and get N-best (use heap sort) */ 01305 /* -------------------------------------------------------------------- */ 01306 /* ビームの閾値として上位 N 番目のスコアが欲しいだけであり,実際にソート 01307 される必要はない */ 01308 /* we only want to know the N-th score for determining beam threshold, so 01309 order is not considered here */ 01310 01311 #define SD(A) tindex_local[A-1] ///< Index locater for sort_token_*() 01312 #define SCOPY(D,S) D = S ///< Content copier for sort_token_*() 01313 #define SVAL(A) (tlist_local[tindex_local[A-1]].score) ///< Score locater for sort_token_*() 01314 #define STVAL (tlist_local[s].score) ///< Indexed score locater for sort_token_*() 01315 01340 static void 01341 sort_token_upward(FSBeam *d, int neednum, int totalnum) 01342 { 01343 int n,root,child,parent; 01344 TOKENID s; 01345 TOKEN2 *tlist_local; 01346 TOKENID *tindex_local; 01347 01348 tlist_local = d->tlist[d->tn]; 01349 tindex_local = d->tindex[d->tn]; 01350 01351 for (root = totalnum/2; root >= 1; root--) { 01352 SCOPY(s, SD(root)); 01353 parent = root; 01354 while ((child = parent * 2) <= totalnum) { 01355 if (child < totalnum && SVAL(child) < SVAL(child+1)) { 01356 child++; 01357 } 01358 if (STVAL >= SVAL(child)) { 01359 break; 01360 } 01361 SCOPY(SD(parent), SD(child)); 01362 parent = child; 01363 } 01364 SCOPY(SD(parent), s); 01365 } 01366 n = totalnum; 01367 while ( n > totalnum - neednum) { 01368 SCOPY(s, SD(n)); 01369 SCOPY(SD(n), SD(1)); 01370 n--; 01371 parent = 1; 01372 while ((child = parent * 2) <= n) { 01373 if (child < n && SVAL(child) < SVAL(child+1)) { 01374 child++; 01375 } 01376 if (STVAL >= SVAL(child)) { 01377 break; 01378 } 01379 SCOPY(SD(parent), SD(child)); 01380 parent = child; 01381 } 01382 SCOPY(SD(parent), s); 01383 } 01384 } 01385 01412 static void 01413 sort_token_downward(FSBeam *d, int neednum, int totalnum) 01414 { 01415 int n,root,child,parent; 01416 TOKENID s; 01417 TOKEN2 *tlist_local; 01418 TOKENID *tindex_local; 01419 01420 tlist_local = d->tlist[d->tn]; 01421 tindex_local = d->tindex[d->tn]; 01422 01423 for (root = totalnum/2; root >= 1; root--) { 01424 SCOPY(s, SD(root)); 01425 parent = root; 01426 while ((child = parent * 2) <= totalnum) { 01427 if (child < totalnum && SVAL(child) > SVAL(child+1)) { 01428 child++; 01429 } 01430 if (STVAL <= SVAL(child)) { 01431 break; 01432 } 01433 SCOPY(SD(parent), SD(child)); 01434 parent = child; 01435 } 01436 SCOPY(SD(parent), s); 01437 } 01438 n = totalnum; 01439 while ( n > totalnum - neednum) { 01440 SCOPY(s, SD(n)); 01441 SCOPY(SD(n), SD(1)); 01442 n--; 01443 parent = 1; 01444 while ((child = parent * 2) <= n) { 01445 if (child < n && SVAL(child) > SVAL(child+1)) { 01446 child++; 01447 } 01448 if (STVAL <= SVAL(child)) { 01449 break; 01450 } 01451 SCOPY(SD(parent), SD(child)); 01452 parent = child; 01453 } 01454 SCOPY(SD(parent), s); 01455 } 01456 } 01457 01490 static void 01491 sort_token_no_order(FSBeam *d, int neednum, int *start, int *end) 01492 { 01493 int totalnum; 01494 int restnum; 01495 01496 totalnum = d->tnum[d->tn]; 01497 01498 restnum = totalnum - neednum; 01499 01500 if (neednum >= totalnum) { 01501 /* no need to sort */ 01502 *start = 0; 01503 *end = totalnum - 1; 01504 } else if (neednum < restnum) { 01505 /* needed num is smaller than rest, so sort for the needed tokens */ 01506 sort_token_upward(d, neednum,totalnum); 01507 *start = totalnum - neednum; 01508 *end = totalnum - 1; 01509 } else { 01510 /* needed num is bigger than rest, so sort for the rest token */ 01511 sort_token_downward(d, restnum,totalnum); 01512 *start = 0; 01513 *end = neednum - 1; 01514 } 01515 } 01516 01517 /* -------------------------------------------------------------------- */ 01518 /* 第1パス(フレーム同期ビームサーチ) メイン */ 01519 /* main routines of 1st pass (frame-synchronous beam search) */ 01520 /* -------------------------------------------------------------------- */ 01521 01550 static boolean 01551 init_nodescore(HTK_Param *param, RecogProcess *r) 01552 { 01553 WCHMM_INFO *wchmm; 01554 FSBeam *d; 01555 TOKENID newid; 01556 TOKEN2 *new; 01557 WORD_ID beginword; 01558 int node; 01559 int i; 01560 01561 wchmm = r->wchmm; 01562 d = &(r->pass1); 01563 01564 /* 初期仮説用単語履歴 */ 01565 /* setup initial word context */ 01566 if (r->config->successive.enabled) { /* sp segment mode */ 01567 /* initial word context = last non-sp word of previous 2nd pass at last segment*/ 01568 if (r->lmtype == LM_PROB) { 01569 if (r->sp_break_last_nword == wchmm->winfo->tail_silwid) { 01570 /* if end with silE, initialize as normal start of sentence */ 01571 d->bos.wid = WORD_INVALID; 01572 } else { 01573 d->bos.wid = r->sp_break_last_nword; 01574 } 01575 } else { 01576 d->bos.wid = WORD_INVALID; 01577 } 01578 } else { /* not sp segment mode */ 01579 d->bos.wid = WORD_INVALID; /* no context */ 01580 } 01581 01582 d->bos.begintime = d->bos.endtime = -1; 01583 01584 /* ノード・トークンを初期化 */ 01585 /* clear tree lexicon nodes and tokens */ 01586 for(node = 0; node < d->totalnodenum; node++) { 01587 d->token[node] = TOKENID_UNDEFINED; 01588 } 01589 d->tnum[0] = d->tnum[1] = 0; 01590 01591 #ifdef PASS1_IWCD 01592 /* 出力確率計算キャッシュを初期化 */ 01593 /* initialize outprob cache */ 01594 outprob_style_cache_init(wchmm); 01595 #endif 01596 01597 /* 初期仮説の作成: 初期単語の決定と初期トークンの生成 */ 01598 /* initial word hypothesis */ 01599 01600 if (r->lmtype == LM_PROB) { 01601 01602 if (r->config->successive.enabled) { /* sp segment mode */ 01603 if (r->sp_break_last_word != WORD_INVALID) { /* last segment exist */ 01604 /* 開始単語=前のセグメント計算時の最後の最尤単語 */ 01605 /* 文終了単語(silE,句点(IPAモデル))なら,silB で開始 */ 01606 /* initial word = best last word hypothesis on the last segment */ 01607 /* if silE or sp, begin with silB */ 01608 /*if (is_sil(recog.sp_break_last_word, wchmm->winfo, wchmm->hmminfo)) {*/ 01609 if (r->sp_break_last_word == wchmm->winfo->tail_silwid) { 01610 beginword = wchmm->winfo->head_silwid; 01611 d->bos.wid = WORD_INVALID; /* reset initial word context */ 01612 } else { 01613 beginword = r->sp_break_last_word; 01614 } 01615 } else { 01616 /* initial segment: initial word set to silB */ 01617 beginword = wchmm->winfo->head_silwid; 01618 } 01619 } else { /* not sp segment mode */ 01620 /* initial word fixed to silB */ 01621 beginword = wchmm->winfo->head_silwid; 01622 } 01623 01624 #ifdef SP_BREAK_DEBUG 01625 jlog("DEBUG: startword=[%s], last_nword=[%s]\n", 01626 (beginword == WORD_INVALID) ? "WORD_INVALID" : wchmm->winfo->wname[beginword], 01627 (d->bos.wid == WORD_INVALID) ? "WORD_INVALID" : wchmm->winfo->wname[d->bos.wid]); 01628 #endif 01629 /* create the first token at the first node of initial word */ 01630 newid = create_token(d); 01631 new = &(d->tlist[d->tn][newid]); 01632 01633 /* initial node = head node of the beginword */ 01634 if (wchmm->hmminfo->multipath) { 01635 node = wchmm->wordbegin[beginword]; 01636 } else { 01637 node = wchmm->offset[beginword][0]; 01638 } 01639 01640 /* set initial LM score */ 01641 if (wchmm->state[node].scid != 0) { 01642 /* if initial node is on a factoring branch, use the factored score */ 01643 new->last_lscore = max_successor_prob(wchmm, d->bos.wid, node); 01644 } else { 01645 /* else, set to 0.0 */ 01646 new->last_lscore = 0.0; 01647 } 01648 #ifdef FIX_PENALTY 01649 new->last_lscore = new->last_lscore * d->lm_weight; 01650 #else 01651 new->last_lscore = new->last_lscore * d->lm_weight + d->lm_penalty; 01652 #endif 01653 /* set initial word history */ 01654 new->last_tre = &(d->bos); 01655 new->last_cword = d->bos.wid; 01656 if (wchmm->hmminfo->multipath) { 01657 /* set initial score using the initial LM score */ 01658 new->score = new->last_lscore; 01659 } else { 01660 /* set initial score using the initial LM score and AM score of the first state */ 01661 new->score = outprob_style(wchmm, node, d->bos.wid, 0, param) + new->last_lscore; 01662 } 01663 /* assign the initial node to token list */ 01664 node_assign_token(d, node, newid); 01665 01666 } 01667 01668 if (r->lmtype == LM_DFA && r->lmvar == LM_DFA_GRAMMAR) { 01669 01670 /* 初期仮説: 文法上文頭に接続しうる単語集合 */ 01671 /* initial words: all words that can be begin of sentence grammatically */ 01672 /* アクティブな文法に属する単語のみ許す */ 01673 /* only words in active grammars are allowed to be an initial words */ 01674 MULTIGRAM *m; 01675 int t,tb,te; 01676 WORD_ID iw; 01677 boolean flag; 01678 DFA_INFO *gdfa; 01679 01680 gdfa = r->lm->dfa; 01681 01682 flag = FALSE; 01683 /* for all active grammar */ 01684 for(m = r->lm->grammars; m; m = m->next) { 01685 if (m->active) { 01686 tb = m->cate_begin; 01687 te = tb + m->dfa->term_num; 01688 for(t=tb;t<te;t++) { 01689 /* for all word categories that belong to the grammar */ 01690 if (dfa_cp_begin(gdfa, t) == TRUE) { 01691 /* if the category can appear at beginning of sentence, */ 01692 flag = TRUE; 01693 for (iw=0;iw<gdfa->term.wnum[t];iw++) { 01694 /* create the initial token at the first node of all words that belongs to the category */ 01695 i = gdfa->term.tw[t][iw]; 01696 if (wchmm->hmminfo->multipath) { 01697 node = wchmm->wordbegin[i]; 01698 } else { 01699 node = wchmm->offset[i][0]; 01700 } 01701 /* in tree lexicon, words in the same category may share the same root node, so skip it if the node has already existed */ 01702 if (node_exist_token(d, d->tn, node, d->bos.wid) != TOKENID_UNDEFINED) continue; 01703 newid = create_token(d); 01704 new = &(d->tlist[d->tn][newid]); 01705 new->last_tre = &(d->bos); 01706 #ifdef FIX_PENALTY 01707 new->last_lscore = 0.0; 01708 #else 01709 new->last_lscore = d->penalty1; 01710 #endif 01711 if (wchmm->hmminfo->multipath) { 01712 new->score = new->last_lscore; 01713 } else { 01714 new->score = outprob_style(wchmm, node, d->bos.wid, 0, param) + new->last_lscore; 01715 } 01716 node_assign_token(d, node, newid); 01717 } 01718 } 01719 } 01720 } 01721 } 01722 if (!flag) { 01723 jlog("ERROR: init_nodescore: no initial state found in active DFA grammar\n"); 01724 return FALSE; 01725 } 01726 } 01727 01728 if (r->lmtype == LM_DFA && r->lmvar == LM_DFA_WORD) { 01729 /* アクティブな文法に属する単語のみ許す */ 01730 /* only words in active grammars are allowed to be an initial words */ 01731 MULTIGRAM *m; 01732 01733 for(m = r->lm->grammars; m; m = m->next) { 01734 if (m->active) { 01735 for(i = m->word_begin; i < m->word_begin + m->winfo->num; i++) { 01736 if (wchmm->hmminfo->multipath) { 01737 node = wchmm->wordbegin[i]; 01738 } else { 01739 node = wchmm->offset[i][0]; 01740 } 01741 if (node_exist_token(d, d->tn, node, d->bos.wid) != TOKENID_UNDEFINED) continue; 01742 newid = create_token(d); 01743 new = &(d->tlist[d->tn][newid]); 01744 new->last_tre = &(d->bos); 01745 new->last_lscore = 0.0; 01746 if (wchmm->hmminfo->multipath) { 01747 new->score = 0.0; 01748 } else { 01749 new->score = outprob_style(wchmm, node, d->bos.wid, 0, param); 01750 } 01751 node_assign_token(d, node, newid); 01752 } 01753 } 01754 } 01755 } 01756 01757 return TRUE; 01758 01759 } 01760 01761 /******************************************************/ 01762 /* フレーム同期ビーム探索の実行 --- 最初のフレーム用 */ 01763 /* frame synchronous beam search --- first frame only */ 01764 /******************************************************/ 01765 01790 boolean 01791 get_back_trellis_init(HTK_Param *param, RecogProcess *r) 01792 { 01793 WCHMM_INFO *wchmm; 01794 BACKTRELLIS *backtrellis; 01795 FSBeam *d; 01796 01797 wchmm = r->wchmm; 01798 backtrellis = r->backtrellis; 01799 d = &(r->pass1); 01800 01801 /* Viterbi演算用ワークエリアのスイッチャー tl,tn の初期値設定 */ 01802 /* tn: このフレーム用ID tl: 1フレーム前のID */ 01803 /* initialize switch tl, tn for Viterbi computation */ 01804 /* tn: this frame tl: last frame */ 01805 d->tn = 0; 01806 d->tl = 1; 01807 01808 /* 結果の単語トレリスを格納するバックトレリス構造体を初期化 */ 01809 /* initialize backtrellis structure to store resulting word trellis */ 01810 bt_prepare(backtrellis); 01811 01812 /* 計算用ワークエリアを初期化 */ 01813 /* initialize some data on work area */ 01814 01815 if (r->lmtype == LM_PROB) { 01816 d->lm_weight = r->config->lmp.lm_weight; 01817 d->lm_penalty = r->config->lmp.lm_penalty; 01818 } 01819 if (r->lmtype == LM_DFA) { 01820 d->penalty1 = r->config->lmp.penalty1; 01821 } 01822 #if defined(WPAIR) && defined(WPAIR_KEEP_NLIMIT) 01823 d->wpair_keep_nlimit = r->config->pass1.wpair_keep_nlimit; 01824 #endif 01825 01826 /* ワークエリアを確保 */ 01827 /* malloc work area */ 01828 /* 使用するトークン量 = viterbi時に遷移先となる状態候補の数 01829 * 予測: ビーム数 x 2 (自己遷移+次状態) + 木構造化辞書のルートノード数 01830 */ 01831 /* assumed initial number of needed tokens: beam width x 2 (self + next trans.) 01832 * + root node on the tree lexicon (for inter-word trans.) 01833 */ 01834 if (d->totalnodenum != wchmm->n) { 01835 free_nodes(d); 01836 } 01837 if (d->nodes_malloced == FALSE) { 01838 malloc_nodes(d, wchmm->n, r->trellis_beam_width * 2 + wchmm->startnum); 01839 } 01840 prepare_nodes(d, r->trellis_beam_width); 01841 01842 /* 初期スコアを nodescore[tn] にセット */ 01843 /* set initial score to nodescore[tn] */ 01844 if (init_nodescore(param, r) == FALSE) { 01845 jlog("ERROR: get_back_trellis_init: failed to set initial node scores\n"); 01846 return FALSE; 01847 } 01848 01849 sort_token_no_order(d, r->trellis_beam_width, &(d->n_start), &(d->n_end)); 01850 01851 /* 漸次出力を行なう場合のインターバルを計算 */ 01852 /* set interval frame for progout */ 01853 r->config->output.progout_interval_frame = (int)((float)r->config->output.progout_interval / ((float)param->header.wshift / 10000.0)); 01854 01855 if (r->config->successive.enabled) { 01856 /* ショートポーズセグメンテーション用パラメータの初期化 */ 01857 /* initialize parameter for short pause segmentation */ 01858 d->in_sparea = TRUE; /* assume beginning is silence */ 01859 r->am->mfcc->sparea_start = d->tmp_sparea_start = 0; /* set start frame to 0 */ 01860 #ifdef SP_BREAK_RESUME_WORD_BEGIN 01861 d->tmp_sp_break_last_word = WORD_INVALID; 01862 #endif 01863 r->sp_break_last_word = WORD_INVALID; 01864 /* 最初のセグメント: 次の非ポーズフレームで第2パスへ移行しない */ 01865 /* the first end of pause segment should be always silB, so 01866 skip the first segment */ 01867 d->first_sparea = TRUE; 01868 r->sp_break_2_begin_word = WORD_INVALID; 01869 } 01870 01871 #ifdef DETERMINE 01872 if (r->lmvar == LM_DFA_WORD) { 01873 /* initialize */ 01874 determine_word(r, 0, NULL, 0, 0); 01875 } 01876 #endif 01877 01878 return TRUE; 01879 } 01880 01881 /*****************************************************/ 01882 /* frame synchronous beam search --- proceed 1 frame */ 01883 /* フレーム同期ビーム探索の実行 --- 1フレーム進める */ 01884 /*****************************************************/ 01885 01904 static void 01905 propagate_token(FSBeam *d, int next_node, LOGPROB next_score, TRELLIS_ATOM *last_tre, WORD_ID last_cword, LOGPROB last_lscore) 01906 { 01907 TOKEN2 *tknext; 01908 TOKENID tknextid; 01909 01910 if ((tknextid = node_exist_token(d, d->tn, next_node, last_tre->wid)) != TOKENID_UNDEFINED) { 01911 /* 遷移先ノードには既に他ノードから伝搬済み: スコアが高いほうを残す */ 01912 /* the destination node already has a token: compare score */ 01913 tknext = &(d->tlist[d->tn][tknextid]); 01914 if (tknext->score < next_score) { 01915 /* その遷移先ノードが持つトークンの内容を上書きする(新規トークンは作らない) */ 01916 /* overwrite the content of existing destination token: not create a new token */ 01917 tknext->last_tre = last_tre; /* propagate last word info */ 01918 tknext->last_cword = last_cword; /* propagate last context word info */ 01919 tknext->last_lscore = last_lscore; /* set new LM score */ 01920 tknext->score = next_score; /* set new score */ 01921 } 01922 } else { 01923 /* 遷移先ノードは未伝搬: 新規トークンを作って割り付ける */ 01924 /* token unassigned: create new token and assign */ 01925 if (next_score > LOG_ZERO) { /* valid token */ 01926 tknextid = create_token(d); /* get new token */ 01927 } 01928 tknext = &(d->tlist[d->tn][tknextid]); 01929 tknext->last_tre = last_tre; /* propagate last word info */ 01930 tknext->last_cword = last_cword; /* propagate last context word info */ 01931 tknext->last_lscore = last_lscore; 01932 tknext->score = next_score; /* set new score */ 01933 node_assign_token(d, next_node, tknextid); /* assign this new token to the next node */ 01934 } 01935 } 01936 01960 static void 01961 beam_intra_word_core(WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, int j, int next_node, LOGPROB next_a) 01962 { 01963 int node; 01964 LOGPROB tmpsum; 01965 LOGPROB ngram_score_cache; 01966 TOKEN2 *tk; 01967 01968 tk = *tk_ret; 01969 01970 node = tk->node; 01971 01972 /* now, 'node' is the source node, 'next_node' is the destication node, 01973 and ac-> holds transition probability */ 01974 /* tk->score is the accumulated score at the 'node' on previous frame */ 01975 01976 /******************************************************************/ 01977 /* 2.1.1 遷移先へのスコア計算(遷移確率+言語スコア) */ 01978 /* compute score of destination node (transition prob + LM) */ 01979 /******************************************************************/ 01980 tmpsum = tk->score + next_a; 01981 ngram_score_cache = LOG_ZERO; 01982 /* the next score at 'next_node' will be computed on 'tmpsum', and 01983 the new LM probability (if updated) will be stored on 'ngram_score_cache' at below */ 01984 01985 if (!wchmm->category_tree) { 01986 /* 言語スコア factoring: 01987 arcが自己遷移でない単語内の遷移で,かつ遷移先にsuccessorリスト 01988 があれば,lexicon tree の分岐部分の遷移である */ 01989 /* LM factoring: 01990 If this is not a self transition and destination node has successor 01991 list, this is branching transition 01992 */ 01993 if (next_node != node) { 01994 if (wchmm->state[next_node].scid != 0 01995 #ifdef UNIGRAM_FACTORING 01996 /* 1-gram factoring 使用時は, 複数で共有される枝では 01997 wchmm->state[node].scid は負の値となり,その絶対値を 01998 添字として wchmm->fscore[] に単語集合の1-gramの最大値が格納 01999 されている. 末端の枝(複数単語で共有されない)では, 02000 wchmm->state[node].scid は正の値となり, 02001 1単語を sc として持つのでそこで正確な2-gramを計算する */ 02002 /* When uni-gram factoring, 02003 wchmm->state[node].scid is below 0 for shared branches. 02004 In this case the maximum uni-gram probability for sub-tree 02005 is stored in wchmm->fscore[- wchmm->state[node].scid]. 02006 Leaf branches (with only one successor word): the scid is 02007 larger than 0, and has 02008 the word ID in wchmm->sclist[wchmm->state[node].scid]. 02009 So precise 2-gram is computed in this point */ 02010 #endif 02011 ){ 02012 02013 if (wchmm->lmtype == LM_PROB) { 02014 /* ここで言語モデル確率を更新する */ 02015 /* LM value should be update at this transition */ 02016 /* N-gram確率からfactoring 値を計算 */ 02017 /* compute new factoring value from N-gram probabilities */ 02018 #ifdef FIX_PENALTY 02019 /* if at the beginning of sentence, not add lm_penalty */ 02020 if (tk->last_cword == WORD_INVALID) { 02021 ngram_score_cache = max_successor_prob(wchmm, tk->last_cword, next_node) * d->lm_weight; 02022 } else { 02023 ngram_score_cache = max_successor_prob(wchmm, tk->last_cword, next_node) * d->lm_weight + d->lm_penalty; 02024 } 02025 #else 02026 ngram_score_cache = max_successor_prob(wchmm, tk->last_cword, next_node) * d->lm_weight + d->lm_penalty; 02027 #endif 02028 /* スコアの更新: tk->last_lscore に単語内での最後のfactoring値が 02029 入っているので, それをスコアから引いてリセットし, 新たなスコアを 02030 セットする */ 02031 /* update score: since 'tk->last_lscore' holds the last LM factoring 02032 value in this word, we first remove the score from the current 02033 score, and then add the new LM value computed above. */ 02034 tmpsum -= tk->last_lscore; 02035 tmpsum += ngram_score_cache; 02036 } 02037 02038 if (wchmm->lmtype == LM_DFA && wchmm->lmvar == LM_DFA_GRAMMAR) { 02039 /* 文法を用いる場合, カテゴリ単位の木構造化がなされていれば, 02040 接続制約は単語間遷移のみで扱われるので,factoring は必要ない. 02041 カテゴリ単位木構造化が行われない場合, 文法間の接続制約はここ 02042 で factoring で行われることになる. */ 02043 /* With DFA, we use category-pair constraint extracted from the DFA 02044 at this 1st pass. So if we compose a tree lexicon per word's 02045 category, the each category tree has the same connection 02046 constraint and thus we can apply the constraint on the cross-word 02047 transition. This per-category tree lexicon is enabled by default, 02048 and in the case the constraint will be applied at the word end. 02049 If you disable per-category tree lexicon by undefining 02050 'CATEGORY_TREE', the word-pair contrained will be applied in a 02051 factoring style at here. 02052 */ 02053 02054 /* 決定的factoring: 直前単語に対して,sub-tree内にカテゴリ対制約上 02055 接続しうる単語が1つもなければ, この遷移は不可 */ 02056 /* deterministic factoring in grammar mode: 02057 transition disabled if there are totally no sub-tree word that can 02058 grammatically (in category-pair constraint) connect 02059 to the previous word. 02060 */ 02061 if (!can_succeed(wchmm, tk->last_tre->wid, next_node)) { 02062 tmpsum = LOG_ZERO; 02063 } 02064 } 02065 02066 } 02067 } 02068 } 02069 /* factoring not needed when DFA mode and uses category-tree */ 02070 02071 /****************************************/ 02072 /* 2.1.2 遷移先ノードへトークン伝搬 */ 02073 /* pass token to destination node */ 02074 /****************************************/ 02075 02076 if (ngram_score_cache == LOG_ZERO) ngram_score_cache = tk->last_lscore; 02077 propagate_token(d, next_node, tmpsum, tk->last_tre, tk->last_cword, ngram_score_cache); 02078 02079 if (d->expanded) { 02080 /* if work area has been expanded at 'create_token()' above, 02081 the inside 'realloc()' will destroy the pointers. 02082 so, reset local pointers from token index */ 02083 tk = &(d->tlist[d->tl][d->tindex[d->tl][j]]); 02084 d->expanded = FALSE; 02085 } 02086 02087 *tk_ret = tk; 02088 02089 } 02090 02110 static void 02111 beam_intra_word(WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, int j) 02112 { 02113 A_CELL2 *ac; 02114 TOKEN2 *tk; 02115 int node; 02116 int k; 02117 02118 tk = *tk_ret; 02119 02120 node = tk->node; 02121 02122 if (wchmm->self_a[node] != LOG_ZERO) { 02123 beam_intra_word_core(wchmm, d, tk_ret, j, node, wchmm->self_a[node]); 02124 } 02125 02126 if (wchmm->next_a[node] != LOG_ZERO) { 02127 beam_intra_word_core(wchmm, d, tk_ret, j, node+1, wchmm->next_a[node]); 02128 } 02129 02130 for(ac=wchmm->ac[node];ac;ac=ac->next) { 02131 for(k=0;k<ac->n;k++) { 02132 beam_intra_word_core(wchmm, d, tk_ret, j, ac->arc[k], ac->a[k]); 02133 } 02134 } 02135 } 02136 02137 /**************************/ 02138 /* 2.2. トレリス単語保存 */ 02139 /* save trellis word */ 02140 /**************************/ 02165 static TRELLIS_ATOM * 02166 save_trellis(BACKTRELLIS *bt, WCHMM_INFO *wchmm, TOKEN2 *tk, int t, boolean final_for_multipath) 02167 { 02168 TRELLIS_ATOM *tre; 02169 int sword; 02170 02171 sword = wchmm->stend[tk->node]; 02172 02173 /* この遷移元の単語終端ノードは「直前フレームで」生き残ったノード. 02174 (「このフレーム」でないことに注意!!) 02175 よってここで, 時間(t-1) を単語終端とするトレリス上の単語仮説 02176 (TRELLIS_ATOM)として,単語トレリス構造体に保存する. */ 02177 /* This source node (= word end node) has been survived in the 02178 "last" frame (notice: not "this" frame!!). So this word end 02179 is saved here to the word trellis structure (BACKTRELLIS) as a 02180 trellis word (TRELLIS_ATOM) with end frame (t-1). */ 02181 tre = bt_new(bt); 02182 tre->wid = sword; /* word ID */ 02183 tre->backscore = tk->score; /* log score (AM + LM) */ 02184 tre->begintime = tk->last_tre->endtime + 1; /* word beginning frame */ 02185 tre->endtime = t-1; /* word end frame */ 02186 tre->last_tre = tk->last_tre; /* link to previous trellis word */ 02187 tre->lscore = tk->last_lscore; /* log LM score */ 02188 bt_store(bt, tre); /* save to backtrellis */ 02189 #ifdef WORD_GRAPH 02190 if (tre->last_tre != NULL) { 02191 /* mark to indicate that the following words was survived in beam */ 02192 tre->last_tre->within_context = TRUE; 02193 } 02194 if (final_for_multipath) { 02195 /* last node */ 02196 if (tre->wid == wchmm->winfo->tail_silwid) { 02197 tre->within_context = TRUE; 02198 } 02199 } 02200 #endif /* WORD_GRAPH */ 02201 02202 return tre; 02203 } 02204 02205 02226 static void 02227 beam_inter_word(WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, TRELLIS_ATOM *tre, int j) 02228 { 02229 A_CELL2 *ac; 02230 TOKEN2 *tk; 02231 int sword; 02232 int node, next_node; 02233 LOGPROB *iwparray; 02234 int stid; 02235 #ifdef UNIGRAM_FACTORING 02236 int isoid; 02237 #endif 02238 LOGPROB tmpprob, tmpsum, ngram_score_cache; 02239 int k; 02240 WORD_ID last_word; 02241 02242 tk = *tk_ret; 02243 02244 node = tk->node; 02245 sword = wchmm->stend[node]; 02246 last_word = wchmm->winfo->is_transparent[sword] ? tk->last_cword : sword; 02247 02248 if (wchmm->lmtype == LM_PROB) { 02249 02250 /* 遷移元単語が末尾単語の終端なら,どこへも遷移させない */ 02251 /* do not allow transition if the source word is end-of-sentence word */ 02252 if (sword == wchmm->winfo->tail_silwid) return; 02253 02254 #ifdef UNIGRAM_FACTORING 02255 #ifndef WPAIR 02256 /* あとで共有単語先頭ノードに対して単語間遷移をまとめて計算するため,*/ 02257 /* このループ内では最大尤度を持つ単語終端ノードを記録しておく */ 02258 /* here we will record the best wordend node of maximum likelihood 02259 at this frame, to compute later the cross-word transitions toward 02260 shared factoring word-head node */ 02261 tmpprob = tk->score; 02262 if (!wchmm->hmminfo->multipath) tmpprob += wchmm->wordend_a[sword]; 02263 if (d->wordend_best_score < tmpprob) { 02264 d->wordend_best_score = tmpprob; 02265 d->wordend_best_node = node; 02266 d->wordend_best_tre = tre; 02267 d->wordend_best_last_cword = tk->last_cword; 02268 } 02269 #endif 02270 #endif 02271 02272 /* N-gramにおいては常に全単語の接続を考慮する必要があるため, 02273 ここで単語間の言語確率値をすべて計算しておく. 02274 キャッシュは max_successor_prob_iw() 内で考慮. */ 02275 /* As all words are possible to connect in N-gram, we first compute 02276 all the inter-word LM probability here. 02277 Cache is onsidered in max_successor_prob_iw(). */ 02278 if (wchmm->winfo->is_transparent[sword]) { 02279 iwparray = max_successor_prob_iw(wchmm, tk->last_cword); 02280 } else { 02281 iwparray = max_successor_prob_iw(wchmm, sword); 02282 } 02283 } 02284 02285 /* すべての単語始端ノードに対して以下を実行 */ 02286 /* for all beginning-of-word nodes, */ 02287 /* wchmm->startnode[0..stid-1] ... 単語始端ノードリスト */ 02288 /* wchmm->startnode[0..stid-1] ... list of word start node (shared) */ 02289 for (stid = wchmm->startnum - 1; stid >= 0; stid--) { 02290 next_node = wchmm->startnode[stid]; 02291 if (wchmm->hmminfo->multipath) { 02292 if (wchmm->lmtype == LM_PROB) { 02293 /* connection to the head silence word is not allowed */ 02294 if (wchmm->wordbegin[wchmm->winfo->head_silwid] == next_node) continue; 02295 } 02296 } 02297 02298 /*****************************************/ 02299 /* 2.3.1. 単語間言語制約を適用 */ 02300 /* apply cross-word LM constraint */ 02301 /*****************************************/ 02302 02303 if (wchmm->lmtype == LM_PROB) { 02304 /* N-gram確率を計算 */ 02305 /* compute N-gram probability */ 02306 #ifdef UNIGRAM_FACTORING 02307 /* wchmm,start2isolate[0..stid-1] ... ノードを共有しない単語は 02308 その通しID, 共有する(キャッシュの必要のない)単語は -1 */ 02309 /* wchmm->start2isolate[0..stid-1] ... isolate ID for 02310 beginning-of-word state. value: -1 for states that has 02311 1-gram factoring value (share nodes with some other words), 02312 and ID for unshared words 02313 */ 02314 isoid = wchmm->start2isolate[stid]; 02315 #ifdef WPAIR 02316 /* Efficient cross-word LM handling should be disabled for 02317 word-pair approximation */ 02318 if (isoid == -1) { 02319 tmpprob = wchmm->fscore[- wchmm->state[next_node].scid]; 02320 } else { 02321 tmpprob = iwparray[isoid]; 02322 } 02323 #else /* ~WPAIR */ 02324 /* 1-gram factoring における単語間言語確率キャッシュの効率化: 02325 1-gram factoring は単語履歴に依存しないので, 02326 ここで参照する factoring 値の多くは 02327 wchmm->fscore[] に既に格納され, 探索中も不変である. 02328 よって計算が必要な単語(どの単語ともノードを共有しない単語) 02329 についてのみ iwparray[] で計算・キャッシュする. */ 02330 /* Efficient cross-word LM cache: 02331 As 1-gram factoring values are independent of word context, 02332 they remain unchanged while search. So, in cross-word LM 02333 computation, beginning-of-word states which share nodes with 02334 others and has factoring value in wchmm does not need cache. 02335 So only the unshared beginning-of-word states are computed and 02336 cached here in iwparray[]. 02337 */ 02338 /* 計算が必要でない単語先頭ノードはパスをまとめて後に計算するので 02339 ここではスキップ */ 02340 /* the shared nodes will be computed afterward, so just skip them 02341 here */ 02342 if (isoid == -1) continue; 02343 tmpprob = iwparray[isoid]; 02344 #endif /* ~WPAIR */ 02345 #else /* ~UNIGRAM_FACTORING */ 02346 tmpprob = iwparray[stid]; 02347 #endif 02348 } 02349 02350 /* 遷移先の単語が先頭単語なら遷移させない. 02351 これは wchmm.c で該当単語に stid を割り振らないことで対応 02352 しているので,ここでは何もしなくてよい */ 02353 /* do not allow transition if the destination word is 02354 beginning-of-sentence word. This limitation is realized by 02355 not assigning 'stid' for the word in wchmm.c, so we have nothing 02356 to do here. 02357 */ 02358 02359 if (wchmm->category_tree) { 02360 /* 文法の場合, 制約は決定的: カテゴリ対制約上許されない場合は遷移させない */ 02361 /* With DFA and per-category tree lexicon, 02362 LM constraint is deterministic: 02363 do not allow transition if the category connection is not allowed 02364 (with category tree, constraint can be determined on top node) */ 02365 if (dfa_cp(wchmm->dfa, wchmm->winfo->wton[sword], wchmm->winfo->wton[wchmm->start2wid[stid]]) == FALSE) continue; 02366 } 02367 02368 /*******************************************************************/ 02369 /* 2.3.2. 遷移先の単語先頭へのスコア計算(遷移確率+言語スコア) */ 02370 /* compute score of destination node (transition prob + LM) */ 02371 /*******************************************************************/ 02372 tmpsum = tk->score; 02373 if (!wchmm->hmminfo->multipath) tmpsum += wchmm->wordend_a[sword]; 02374 02375 /* 'tmpsum' now holds outgoing score from the wordend node */ 02376 if (wchmm->lmtype == LM_PROB) { 02377 /* 言語スコアを追加 */ 02378 /* add LM score */ 02379 ngram_score_cache = tmpprob * d->lm_weight + d->lm_penalty; 02380 tmpsum += ngram_score_cache; 02381 if (wchmm->winfo->is_transparent[sword] && wchmm->winfo->is_transparent[tk->last_cword]) { 02382 02383 tmpsum += d->lm_penalty_trans; 02384 } 02385 } 02386 if (wchmm->lmtype == LM_DFA) { 02387 /* grammar: 単語挿入ペナルティを追加 */ 02388 /* grammar: add insertion penalty */ 02389 ngram_score_cache = d->penalty1; 02390 tmpsum += ngram_score_cache; 02391 02392 /* grammar: deterministic factoring (in case category-tree not enabled) */ 02393 if (!wchmm->category_tree) { 02394 if (!can_succeed(wchmm, sword, next_node)) { 02395 tmpsum = LOG_ZERO; 02396 } 02397 } 02398 } 02399 02400 /*********************************************************************/ 02401 /* 2.3.3. 遷移先ノードへトークン伝搬(単語履歴情報は更新) */ 02402 /* pass token to destination node (updating word-context info */ 02403 /*********************************************************************/ 02404 02405 if (wchmm->hmminfo->multipath) { 02406 /* since top node has no ouput, we should go one more step further */ 02407 if (wchmm->self_a[next_node] != LOG_ZERO) { 02408 propagate_token(d, next_node, tmpsum + wchmm->self_a[next_node], tre, last_word, ngram_score_cache); 02409 if (d->expanded) { 02410 /* if work area has been expanded at 'create_token()' above, 02411 the inside 'realloc()' will destroy the pointers. 02412 so, reset local pointers from token index */ 02413 tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]); 02414 d->expanded = FALSE; 02415 } 02416 } 02417 if (wchmm->next_a[next_node] != LOG_ZERO) { 02418 propagate_token(d, next_node+1, tmpsum + wchmm->next_a[next_node], tre, last_word, ngram_score_cache); 02419 if (d->expanded) { 02420 /* if work area has been expanded at 'create_token()' above, 02421 the inside 'realloc()' will destroy the pointers. 02422 so, reset local pointers from token index */ 02423 tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]); 02424 d->expanded = FALSE; 02425 } 02426 } 02427 for(ac=wchmm->ac[next_node];ac;ac=ac->next) { 02428 for(k=0;k<ac->n;k++) { 02429 propagate_token(d, ac->arc[k], tmpsum + ac->a[k], tre, last_word, ngram_score_cache); 02430 if (d->expanded) { 02431 /* if work area has been expanded at 'create_token()' above, 02432 the inside 'realloc()' will destroy the pointers. 02433 so, reset local pointers from token index */ 02434 tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]); 02435 d->expanded = FALSE; 02436 } 02437 } 02438 } 02439 } else { 02440 propagate_token(d, next_node, tmpsum, tre, last_word, ngram_score_cache); 02441 if (d->expanded) { 02442 /* if work area has been expanded at 'create_token()' above, 02443 the inside 'realloc()' will destroy the pointers. 02444 so, reset local pointers from token index */ 02445 tk = &(d->tlist[d->tl][d->tindex[d->tl][j]]); 02446 d->expanded = FALSE; 02447 } 02448 } 02449 02450 } /* end of next word heads */ 02451 02452 *tk_ret = tk; 02453 02454 02455 } /* end of cross-word processing */ 02456 02457 02458 #ifdef UNIGRAM_FACTORING 02459 02486 static void 02487 beam_inter_word_factoring(WCHMM_INFO *wchmm, FSBeam *d) 02488 { 02489 int sword; 02490 int node, next_node; 02491 int stid; 02492 LOGPROB tmpprob, tmpsum, ngram_score_cache; 02493 A_CELL2 *ac; 02494 int j; 02495 WORD_ID last_word; 02496 02497 node = d->wordend_best_node; 02498 sword = wchmm->stend[node]; 02499 last_word = wchmm->winfo->is_transparent[sword] ? d->wordend_best_last_cword : sword; 02500 02501 for (stid = wchmm->startnum - 1; stid >= 0; stid--) { 02502 next_node = wchmm->startnode[stid]; 02503 /* compute transition from 'node' at end of word 'sword' to 'next_node' */ 02504 /* skip isolated words already calculated in the above main loop */ 02505 if (wchmm->start2isolate[stid] != -1) continue; 02506 /* rest should have 1-gram factoring score at wchmm->fscore */ 02507 if (wchmm->state[next_node].scid >= 0) { 02508 j_internal_error("get_back_trellis_proceed: scid mismatch at 1-gram factoring of shared states\n"); 02509 } 02510 tmpprob = wchmm->fscore[- wchmm->state[next_node].scid]; 02511 ngram_score_cache = tmpprob * d->lm_weight + d->lm_penalty; 02512 tmpsum = d->wordend_best_score; 02513 tmpsum += ngram_score_cache; 02514 if (wchmm->winfo->is_transparent[sword] && wchmm->winfo->is_transparent[d->wordend_best_last_cword]) { 02515 tmpsum += d->lm_penalty_trans; 02516 } 02517 02518 if (wchmm->hmminfo->multipath) { 02519 /* since top node has no ouput, we should go one more step further */ 02520 if (wchmm->self_a[next_node] != LOG_ZERO) { 02521 propagate_token(d, next_node, tmpsum + wchmm->self_a[next_node], d->wordend_best_tre, last_word, ngram_score_cache); 02522 if (d->expanded) { 02523 d->expanded = FALSE; 02524 } 02525 } 02526 if (wchmm->next_a[next_node] != LOG_ZERO) { 02527 propagate_token(d, next_node+1, tmpsum + wchmm->next_a[next_node], d->wordend_best_tre, last_word, ngram_score_cache); 02528 if (d->expanded) { 02529 d->expanded = FALSE; 02530 } 02531 } 02532 for(ac=wchmm->ac[next_node];ac;ac=ac->next) { 02533 for(j=0;j<ac->n;j++) { 02534 propagate_token(d, ac->arc[j], tmpsum + ac->a[j], d->wordend_best_tre, last_word, ngram_score_cache); 02535 if (d->expanded) { 02536 d->expanded = FALSE; 02537 } 02538 } 02539 } 02540 02541 } else { 02542 propagate_token(d, next_node, tmpsum, d->wordend_best_tre, last_word, ngram_score_cache); 02543 if (d->expanded) { 02544 d->expanded = FALSE; 02545 } 02546 } 02547 02548 } 02549 } 02550 02551 #endif /* UNIGRAM_FACTORING */ 02552 02553 02595 boolean 02596 get_back_trellis_proceed(int t, HTK_Param *param, RecogProcess *r, boolean final_for_multipath) 02597 { 02598 /* local static work area for get_back_trellis_proceed() */ 02599 /* these are local work area and need not to be kept for another call */ 02600 TRELLIS_ATOM *tre; 02601 int node; 02602 int lmtype, lmvar; 02603 02604 WCHMM_INFO *wchmm; 02605 FSBeam *d; 02606 int j; 02607 TOKEN2 *tk; 02608 02609 /* local copied variables */ 02610 int tn, tl; 02611 02612 /* store pointer to local for rapid access */ 02613 wchmm = r->wchmm; 02614 d = &(r->pass1); 02615 02616 02617 lmtype = r->lmtype; 02618 lmvar = r->lmvar; 02619 02620 /*********************/ 02621 /* 1. 初期化 */ 02622 /* initialization */ 02623 /*********************/ 02624 02625 /* tl と tn を入れ替えて作業領域を切り替え */ 02626 /* tl (= 直前の tn) は直前フレームの結果を持つ */ 02627 /* swap tl and tn to switch work buffer */ 02628 /* tl (= last tn) holds result of the previous frame */ 02629 d->tl = d->tn; 02630 if (d->tn == 0) d->tn = 1; else d->tn = 0; 02631 02632 /* store locally for rapid access */ 02633 tl = d->tl; 02634 tn = d->tn; 02635 02636 #ifdef UNIGRAM_FACTORING 02637 #ifndef WPAIR 02638 /* 1-gram factoring では単語先頭での言語確率が一定で直前単語に依存しない 02639 ため,単語間 Viterbi において選ばれる直前単語は,次単語によらず共通である. 02640 よって単語終端からfactoring値のある単語先頭への遷移は1つにまとめられる. 02641 ただし,木から独立した単語については, 単語先頭で履歴に依存した2-gramが 02642 与えられるため, 最尤の単語間 Viterbi パスは次単語ごとに異なる. 02643 よってそれらについてはまとめずに別に計算する */ 02644 /* In 1-gram factoring, the language score on the word-head node is constant 02645 and independent of the previous word. So, the same word hypothesis will 02646 be selected as the best previous word at the inter-word Viterbi 02647 processing. So, in inter-word processing, we can (1) select only 02648 the best word-end hypothesis, and then (2) process viterbi from the node 02649 to each word-head node. On the other hand, the isolated words, 02650 i.e. words not sharing any node with other word, has unique word-head 02651 node and the true 2-gram language score is determined at the top node. 02652 In such case the best word hypothesis prior to each node will differ 02653 according to the language scores. So we have to deal such words separately. */ 02654 /* initialize max value to delect best word-end hypothesis */ 02655 if (lmtype == LM_PROB) { 02656 d->wordend_best_score = LOG_ZERO; 02657 } 02658 #endif 02659 #endif 02660 02661 #ifdef DEBUG 02662 /* debug */ 02663 /* node_check_token(d, tl); */ 02664 #endif 02665 02666 /* トークンバッファを初期化: 直前フレームで使われた部分だけクリアすればよい */ 02667 /* initialize token buffer: for speedup, only ones used in the last call will be cleared */ 02668 clear_tokens(d, tl); 02669 02670 /**************************/ 02671 /* 2. Viterbi計算 */ 02672 /* Viterbi computation */ 02673 /**************************/ 02674 /* 直前フレームからこのフレームへの Viterbi 計算を行なう */ 02675 /* tindex[tl][n_start..n_end] に直前フレーム上位ノードのIDが格納されている */ 02676 /* do one viterbi computation from last frame to this frame */ 02677 /* tindex[tl][n_start..n_end] holds IDs of survived nodes in last frame */ 02678 02679 if (wchmm->hmminfo->multipath) { 02680 /*********************************/ 02681 /* MULTIPATH MODE */ 02682 /*********************************/ 02683 02684 for (j = d->n_start; j <= d->n_end; j++) { 02685 /* tk: 対象トークン node: そのトークンを持つ木構造化辞書ノードID */ 02686 /* tk: token data node: lexicon tree node ID that holds the 'tk' */ 02687 tk = &(d->tlist[tl][d->tindex[tl][j]]); 02688 if (tk->score <= LOG_ZERO) continue; /* invalid node */ 02689 node = tk->node; 02690 /*********************************/ 02691 /* 2.1. 単語内遷移 */ 02692 /* word-internal transition */ 02693 /*********************************/ 02694 beam_intra_word(wchmm, d, &tk, j); 02695 } 02696 /*******************************************************/ 02697 /* 2.2. スコアでトークンをソートしビーム幅分の上位を決定 */ 02698 /* sort tokens by score up to beam width */ 02699 /*******************************************************/ 02700 sort_token_no_order(d, r->trellis_beam_width, &(d->n_start), &(d->n_end)); 02701 02702 /*************************/ 02703 /* 2.3. 単語間Viterbi計算 */ 02704 /* cross-word viterbi */ 02705 /*************************/ 02706 for(j = d->n_start; j <= d->n_end; j++) { 02707 tk = &(d->tlist[tn][d->tindex[tn][j]]); 02708 node = tk->node; 02709 02710 /* 遷移元ノードが単語終端ならば */ 02711 /* if source node is end state of a word, */ 02712 if (wchmm->stend[node] != WORD_INVALID) { 02713 02714 /**************************/ 02715 /* 2.4. トレリス単語保存 */ 02716 /* save trellis word */ 02717 /**************************/ 02718 #ifdef SPSEGMENT_NAIST 02719 if (r->config->successive.enabled && !d->after_trigger) { 02720 tre = tk->last_tre; /* dummy */ 02721 } else { 02722 tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath); 02723 } 02724 #else 02725 tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath); 02726 #endif 02727 /* 最終フレームであればここまで:遷移はさせない */ 02728 /* If this is a final frame, does not do cross-word transition */ 02729 if (final_for_multipath) continue; 02730 /* 単語認識モードでは単語間遷移は必要ない */ 02731 if (lmvar == LM_DFA_WORD) continue; 02732 02733 /******************************/ 02734 /* 2.5. 単語間遷移 */ 02735 /* cross-word transition */ 02736 /******************************/ 02737 02738 #ifdef UNIGRAM_FACTORING 02739 /* ここで処理されるのは isolated words のみ, 02740 shared nodes はまとめてこのループの外で計算する */ 02741 /* Only the isolated words will be processed here. 02742 The shared nodes with constant factoring values will be computed 02743 after this loop */ 02744 #endif 02745 beam_inter_word(wchmm, d, &tk, tre, j); 02746 02747 } /* end of cross-word processing */ 02748 02749 } /* end of main viterbi loop */ 02750 02751 02752 02753 } else { 02754 02755 /*********************************/ 02756 /* NORMAL MODE */ 02757 /*********************************/ 02758 02759 for (j = d->n_start; j <= d->n_end; j++) { 02760 /* tk: 対象トークン node: そのトークンを持つ木構造化辞書ノードID */ 02761 /* tk: token data node: lexicon tree node ID that holds the 'tk' */ 02762 tk = &(d->tlist[tl][d->tindex[tl][j]]); 02763 if (tk->score <= LOG_ZERO) continue; /* invalid node */ 02764 node = tk->node; 02765 02766 /*********************************/ 02767 /* 2.1. 単語内遷移 */ 02768 /* word-internal transition */ 02769 /*********************************/ 02770 beam_intra_word(wchmm, d, &tk, j); 02771 02772 /* 遷移元ノードが単語終端ならば */ 02773 /* if source node is end state of a word, */ 02774 if (wchmm->stend[node] != WORD_INVALID) { 02775 02776 /**************************/ 02777 /* 2.2. トレリス単語保存 */ 02778 /* save trellis word */ 02779 /**************************/ 02780 #ifdef SPSEGMENT_NAIST 02781 if (r->config->successive.enabled && !d->after_trigger) { 02782 tre = tk->last_tre; /* dummy */ 02783 } else { 02784 tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath); 02785 } 02786 #else 02787 tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath); 02788 #endif 02789 /* 単語認識モードでは単語間遷移は必要ない */ 02790 if (lmvar == LM_DFA_WORD) continue; 02791 02792 /******************************/ 02793 /* 2.3. 単語間遷移 */ 02794 /* cross-word transition */ 02795 /******************************/ 02796 02797 #ifdef UNIGRAM_FACTORING 02798 /* ここで処理されるのは isolated words のみ, 02799 shared nodes はまとめてこのループの外で計算する */ 02800 /* Only the isolated words will be processed here. 02801 The shared nodes with constant factoring values will be computed 02802 after this loop */ 02803 #endif 02804 02805 beam_inter_word(wchmm, d, &tk, tre, j); 02806 02807 } /* end of cross-word processing */ 02808 02809 } /* end of main viterbi loop */ 02810 02811 02812 } 02813 02814 #ifdef UNIGRAM_FACTORING 02815 #ifndef WPAIR 02816 02817 if (lmtype == LM_PROB) { 02818 02819 /***********************************************************/ 02820 /* 2.x 単語終端からfactoring付き単語先頭への遷移 ***********/ 02821 /* transition from wordend to shared (factorized) nodes */ 02822 /***********************************************************/ 02823 /* d->wordend_best_* holds the best word ends at this frame. */ 02824 if (d->wordend_best_score > LOG_ZERO) { 02825 beam_inter_word_factoring(wchmm, d); 02826 } 02827 } 02828 #endif 02829 #endif /* UNIGRAM_FACTORING */ 02830 02831 /***************************************/ 02832 /* 3. 状態の出力確率計算 */ 02833 /* compute state output probability */ 02834 /***************************************/ 02835 02836 /* 次段の有効ノードについて出力確率を計算してスコアに加える */ 02837 /* compute outprob for new valid (token assigned) nodes and add to score */ 02838 /* 今扱っているのが入力の最終フレームの場合出力確率は計算しない */ 02839 /* don't calculate the last frame (transition only) */ 02840 02841 if (wchmm->hmminfo->multipath) { 02842 if (! final_for_multipath) { 02843 for (j = 0; j < d->tnum[tn]; j++) { 02844 tk = &(d->tlist[tn][d->tindex[tn][j]]); 02845 /* skip non-output state */ 02846 if (wchmm->state[tk->node].out.state == NULL) continue; 02847 tk->score += outprob_style(wchmm, tk->node, tk->last_tre->wid, t, param); 02848 } 02849 } 02850 } else { 02851 for (j = 0; j < d->tnum[tn]; j++) { 02852 tk = &(d->tlist[tn][d->tindex[tn][j]]); 02853 tk->score += outprob_style(wchmm, tk->node, tk->last_tre->wid, t, param); 02854 } 02855 } 02856 02857 /*******************************************************/ 02858 /* 4. スコアでトークンをソートしビーム幅分の上位を決定 */ 02859 /* sort tokens by score up to beam width */ 02860 /*******************************************************/ 02861 02862 /* tlist[tl]を次段のためにリセット */ 02863 clear_tlist(d, tl); 02864 02865 /* ヒープソートを用いてこの段のノード集合から上位(bwidth)個を得ておく */ 02866 /* (上位内の順列は必要ない) */ 02867 sort_token_no_order(d, r->trellis_beam_width, &(d->n_start), &(d->n_end)); 02868 /***************/ 02869 /* 5. 終了処理 */ 02870 /* finalize */ 02871 /***************/ 02872 02873 #ifdef SPSEGMENT_NAIST 02874 if (!r->config->successive.enabled || d->after_trigger) { 02875 #endif 02876 02877 /* call frame-wise callback */ 02878 r->have_interim = FALSE; 02879 if (t > 0) { 02880 if (r->config->output.progout_flag) { 02881 /* 漸次出力: 現フレームのベストパスを一定時間おきに上書き出力 */ 02882 /* progressive result output: output current best path in certain time interval */ 02883 if (((t-1) % r->config->output.progout_interval_frame) == 0) { 02884 r->have_interim = TRUE; 02885 bt_current_max(r, t-1); 02886 } 02887 } 02888 } 02889 02890 /* jlog("DEBUG: %d: %d\n",t,tnum[tn]); */ 02891 /* for debug: output current max word */ 02892 if (debug2_flag) { 02893 bt_current_max_word(r, t-1); 02894 } 02895 02896 #ifdef DETERMINE 02897 if (lmvar == LM_DFA_WORD) { 02898 check_determine_word(r, t-1); 02899 } 02900 #endif 02901 02902 #ifdef SPSEGMENT_NAIST 02903 } 02904 #endif 02905 02906 /* ビーム内ノード数が 0 になってしまったら,強制終了 */ 02907 if (d->tnum[tn] == 0) { 02908 jlog("ERROR: get_back_trellis_proceed: %02d %s: frame %d: no nodes left in beam, now terminates search\n", r->config->id, r->config->name, t); 02909 return(FALSE); 02910 } 02911 02912 return(TRUE); 02913 02914 } 02915 02916 /*************************************************/ 02917 /* frame synchronous beam search --- last frame */ 02918 /* フレーム同期ビーム探索の実行 --- 最終フレーム */ 02919 /*************************************************/ 02920 02946 void 02947 get_back_trellis_end(HTK_Param *param, RecogProcess *r) 02948 { 02949 WCHMM_INFO *wchmm; 02950 FSBeam *d; 02951 int j; 02952 TOKEN2 *tk; 02953 02954 wchmm = r->wchmm; 02955 d = &(r->pass1); 02956 02957 /* 最後にビーム内に残った単語終端トークンを処理する */ 02958 /* process the last wordend tokens */ 02959 02960 02961 if (r->am->hmminfo->multipath) { 02962 /* MULTI-PATH VERSION */ 02963 02964 /* 単語末ノードへの遷移のみ計算 */ 02965 /* only arcs to word-end node is calculated */ 02966 get_back_trellis_proceed(param->samplenum, param, r, TRUE); 02967 02968 } else { 02969 /* NORMAL VERSION */ 02970 02971 /* 最後の遷移のあとの単語終端処理を行う */ 02972 /* process the word-ends at the last frame */ 02973 d->tl = d->tn; 02974 if (d->tn == 0) d->tn = 1; else d->tn = 0; 02975 for (j = d->n_start; j <= d->n_end; j++) { 02976 tk = &(d->tlist[d->tl][d->tindex[d->tl][j]]); 02977 if (wchmm->stend[tk->node] != WORD_INVALID) { 02978 save_trellis(r->backtrellis, wchmm, tk, param->samplenum, TRUE); 02979 } 02980 } 02981 02982 } 02983 02984 } 02985 02986 /*************************/ 02987 /* 探索終了 --- 終了処理 */ 02988 /* end of search */ 02989 /*************************/ 03024 void 03025 finalize_1st_pass(RecogProcess *r, int len) 03026 { 03027 BACKTRELLIS *backtrellis; 03028 03029 backtrellis = r->backtrellis; 03030 03031 backtrellis->framelen = len; 03032 03033 /* 単語トレリス(backtrellis) を整理: トレリス単語の再配置とソート */ 03034 /* re-arrange backtrellis: index them by frame, and sort by word ID */ 03035 03036 bt_relocate_rw(backtrellis); 03037 bt_sort_rw(backtrellis); 03038 if (backtrellis->num == NULL) { 03039 if (backtrellis->framelen > 0) { 03040 jlog("WARNING: %02d %s: input processed, but no survived word found\n", r->config->id, r->config->name); 03041 } 03042 /* reognition failed */ 03043 r->result.status = J_RESULT_STATUS_FAIL; 03044 return; 03045 } 03046 03047 /* 第1パスのベストパスを結果に格納する */ 03048 /* store 1st pass result (best hypothesis) to result */ 03049 if (r->lmvar == LM_DFA_WORD) { 03050 find_1pass_result_word(len, r); 03051 } else { 03052 find_1pass_result(len, r); 03053 } 03054 } 03055 03070 void 03071 fsbeam_free(FSBeam *d) 03072 { 03073 free_nodes(d); 03074 if (d->pausemodelnames != NULL) { 03075 free(d->pausemodelnames); 03076 free(d->pausemodel); 03077 } 03078 } 03079 03080 /* end of file */