Julius 4.2
|
00001 00048 /* 00049 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00050 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00051 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00052 * All rights reserved 00053 */ 00054 00055 #include <julius/julius.h> 00056 00057 #undef DEBUG 00058 00059 00060 /* ---------------------------------------------------------- */ 00061 /* 第1パスの結果処理 */ 00062 /* end procedure to get result of 1st pass */ 00063 /* ---------------------------------------------------------- */ 00064 00065 #ifdef WORD_GRAPH 00066 00096 static void 00097 generate_lattice(int frame, RecogProcess *r) 00098 { 00099 BACKTRELLIS *bt; 00100 WORD_INFO *winfo; 00101 TRELLIS_ATOM *ta; 00102 int i, j; 00103 LOGPROB l; 00104 WordGraph *new; 00105 00106 bt = r->backtrellis; 00107 winfo = r->lm->winfo; 00108 00109 if (frame >= 0) { 00110 for (i=0;i<bt->num[frame];i++) { 00111 ta = bt->rw[frame][i]; 00112 /* words will be saved as a part of graph only if any of its 00113 following word has been survived in a beam */ 00114 if (! ta->within_context) continue; /* not a candidate */ 00115 if (ta->within_wordgraph) continue; /* already marked */ 00116 /* mark */ 00117 ta->within_wordgraph = TRUE; 00118 00119 new = (WordGraph *)mymalloc(sizeof(WordGraph)); 00120 new->wid = ta->wid; 00121 new->lefttime = ta->begintime; 00122 new->righttime = ta->endtime; 00123 new->fscore_head = ta->backscore; 00124 new->fscore_tail = 0.0; 00125 new->gscore_head = 0.0; 00126 new->gscore_tail = 0.0; 00127 new->lscore_tmp = ta->lscore; 00128 #ifdef CM_SEARCH 00129 new->cmscore = 0.0; 00130 #endif 00131 new->forward_score = new->backward_score = 0.0; 00132 new->headphone = winfo->wseq[ta->wid][0]; 00133 new->tailphone = winfo->wseq[ta->wid][winfo->wlen[ta->wid]-1]; 00134 00135 new->leftwordmaxnum = FANOUTSTEP; 00136 new->leftword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->leftwordmaxnum); 00137 new->left_lscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * new->leftwordmaxnum); 00138 new->leftwordnum = 0; 00139 new->rightwordmaxnum = FANOUTSTEP; 00140 new->rightword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->rightwordmaxnum); 00141 new->right_lscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * new->rightwordmaxnum); 00142 new->rightwordnum = 0; 00143 00144 l = ta->backscore; 00145 if (ta->last_tre->wid != WORD_INVALID) { 00146 l -= ta->last_tre->backscore; 00147 } 00148 l -= ta->lscore; 00149 new->amavg = l / (float)(ta->endtime - ta->begintime + 1); 00150 00151 #ifdef GRAPHOUT_DYNAMIC 00152 new->purged = FALSE; 00153 #endif 00154 new->saved = FALSE; 00155 new->graph_cm = 0.0; 00156 new->mark = FALSE; 00157 00158 new->next = r->result.wg1; 00159 r->result.wg1 = new; 00160 00161 /* recursive call */ 00162 generate_lattice(ta->last_tre->endtime, r); 00163 } 00164 } 00165 } 00166 00183 static void 00184 link_lattice_by_time(WordGraph *root) 00185 { 00186 WordGraph *wg; 00187 WordGraph *wtmp; 00188 int lefttime, righttime; 00189 00190 for(wg=root;wg;wg=wg->next) { 00191 00192 for(wtmp=root;wtmp;wtmp=wtmp->next) { 00193 if (wg->righttime + 1 == wtmp->lefttime) { 00194 wordgraph_check_and_add_leftword(wtmp, wg, wtmp->lscore_tmp); 00195 wordgraph_check_and_add_rightword(wg, wtmp, wtmp->lscore_tmp); 00196 } 00197 if (wtmp->righttime + 1 == wg->lefttime) { 00198 wordgraph_check_and_add_leftword(wg, wtmp, wg->lscore_tmp); 00199 wordgraph_check_and_add_rightword(wtmp, wg, wg->lscore_tmp); 00200 } 00201 } 00202 } 00203 00204 } 00205 00219 static void 00220 re_compute_lattice_lm(WordGraph *root, WCHMM_INFO *wchmm) 00221 { 00222 int i; 00223 00224 for(wg=root;wg;wg=wg->next) { 00225 for(i=0;i<wg->leftwordnum;i++) { 00226 wg->left_lscore[i] = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, wchmm->winfo->wton[wg->leftword[i]->wid], wchmm->winfo->wton[wg->wid]); 00227 } 00228 for(i=0;i<wg->rightwordnum;i++) { 00229 wg->right_lscore[i] = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, wchmm->winfo->wton[wg->wid], wchmm->winfo->wton[wg->rightword[i]->wid]); 00230 } 00231 } 00232 } 00233 00234 #endif 00235 00250 static void 00251 put_atom(TRELLIS_ATOM *atom, WORD_INFO *winfo) 00252 { 00253 int i; 00254 jlog("DEBUG: %3d,%3d %f %16s (id=%5d)", atom->begintime, atom->endtime, 00255 atom->backscore, winfo->wname[atom->wid], atom->wid); 00256 for (i=0;i<winfo->wlen[atom->wid]; i++) { 00257 jlog(" %s",winfo->wseq[atom->wid][i]->name); 00258 } 00259 jlog("\n"); 00260 } 00261 00292 static LOGPROB 00293 trace_backptr(WORD_ID wordseq_rt[MAXSEQNUM], int *rt_wordlen, TRELLIS_ATOM *atom, WORD_INFO *winfo) 00294 { 00295 int wordlen = 0; /* word length of best sentence hypothesis */ 00296 TRELLIS_ATOM *tretmp; 00297 LOGPROB langscore = 0.0; 00298 WORD_ID wordseq[MAXSEQNUM]; /* temporal: in reverse order */ 00299 int i; 00300 00301 /* initialize */ 00302 wordseq[0] = atom->wid; /* start from specified atom */ 00303 wordlen = 1; 00304 tretmp = atom; 00305 langscore += tretmp->lscore; 00306 if (debug2_flag) { 00307 put_atom(tretmp, winfo); 00308 } 00309 00310 /* trace the backtrellis */ 00311 while (tretmp->begintime > 0) {/* until beginning of input */ 00312 tretmp = tretmp->last_tre; 00313 /* t = tretmp->boundtime - 1; 00314 tretmp = bt_binsearch_atom(backtrellis, tretmp->boundtime-1, tretmp->last_wid);*/ 00315 if (tretmp == NULL) { /* should not happen */ 00316 j_internal_error("trace_backptr: last trellis missing while backtracking"); 00317 } 00318 langscore += tretmp->lscore; 00319 wordseq[wordlen] = tretmp->wid; 00320 wordlen++; 00321 if (debug2_flag) { 00322 put_atom(tretmp, winfo); 00323 } 00324 if (wordlen >= MAXSEQNUM) { 00325 j_internal_error("trace_backptr: sentence length exceeded ( > %d)\n",MAXSEQNUM); 00326 } 00327 } 00328 *rt_wordlen = wordlen; 00329 /* reverse order -> normal order */ 00330 for(i=0;i<wordlen;i++) wordseq_rt[i] = wordseq[wordlen-i-1]; 00331 return(langscore); 00332 } 00333 00370 static void 00371 find_1pass_result(int framelen, RecogProcess *r) 00372 { 00373 BACKTRELLIS *backtrellis; 00374 WORD_INFO *winfo; 00375 WORD_ID wordseq[MAXSEQNUM]; 00376 int wordlen; 00377 int i; 00378 TRELLIS_ATOM *best; 00379 int last_time; 00380 LOGPROB total_lscore; 00381 LOGPROB maxscore; 00382 TRELLIS_ATOM *tmp; 00383 #ifdef SPSEGMENT_NAIST 00384 boolean ok_p; 00385 #endif 00386 00387 backtrellis = r->backtrellis; 00388 winfo = r->lm->winfo; 00389 00390 /* look for the last trellis word */ 00391 00392 if (r->lmtype == LM_PROB) { 00393 00394 for (last_time = framelen - 1; last_time >= 0; last_time--) { 00395 00396 maxscore = LOG_ZERO; 00397 for (i=0;i<backtrellis->num[last_time];i++) { 00398 tmp = backtrellis->rw[last_time][i]; 00399 #ifdef WORD_GRAPH 00400 /* treat only words on a graph path */ 00401 if (!tmp->within_context) continue; 00402 #endif 00403 if (r->config->successive.enabled) { 00404 /* short-pause segmentation mode */ 00405 /* 最終フレームに残った最大スコアの単語 */ 00406 /* it should be the best trellis word on the last frame */ 00407 if (maxscore < tmp->backscore) { 00408 maxscore = tmp->backscore; 00409 best = tmp; 00410 } 00411 } else { 00412 /* not segmentation mode */ 00413 /* 最終単語は winfo->tail_silwid に固定 */ 00414 /* it is fixed to the tail silence model (winfo->tail_silwid) */ 00415 if (tmp->wid == winfo->tail_silwid && maxscore < tmp->backscore) { 00416 maxscore = tmp->backscore; 00417 best = tmp; 00418 break; 00419 } 00420 } 00421 } 00422 if (maxscore != LOG_ZERO) break; 00423 } 00424 00425 if (last_time < 0) { /* not found */ 00426 jlog("WARNING: %02d %s: no tail silence word survived on the last frame, search failed\n", r->config->id, r->config->name); 00427 r->result.status = J_RESULT_STATUS_FAIL; 00428 //callback_exec(CALLBACK_RESULT, r); 00429 return; 00430 } 00431 00432 } 00433 00434 if (r->lmtype == LM_DFA) { 00435 00436 for (last_time = framelen - 1; last_time >= 0; last_time--) { 00437 00438 /* 末尾に残った単語の中で最大スコアの単語(cp_endは使用しない) */ 00439 /* the best trellis word on the last frame (not use cp_end[]) */ 00440 maxscore = LOG_ZERO; 00441 for (i=0;i<backtrellis->num[last_time];i++) { 00442 tmp = backtrellis->rw[last_time][i]; 00443 #ifdef WORD_GRAPH 00444 /* treat only words on a graph path */ 00445 if (!tmp->within_context) continue; 00446 #endif 00447 /* if (dfa->cp_end[winfo->wton[tmp->wid]] == TRUE) {*/ 00448 if (maxscore < tmp->backscore) { 00449 maxscore = tmp->backscore; 00450 best = tmp; 00451 } 00452 /* }*/ 00453 } 00454 if (maxscore != LOG_ZERO) break; 00455 } 00456 00457 if (last_time < 0) { /* not found */ 00458 jlog("WARNING: %02d %s: no sentence-end word survived on last beam\n", r->config->id, r->config->name); 00459 r->result.status = J_RESULT_STATUS_FAIL; 00460 //callback_exec(CALLBACK_RESULT, r); 00461 return; 00462 } 00463 00464 } 00465 00466 /* traceback word trellis from the best word */ 00467 total_lscore = trace_backptr(wordseq, &wordlen, best, r->lm->winfo); 00468 #ifdef SPSEGMENT_NAIST 00469 if (r->config->successive.enabled) { 00470 /* on segmentation mode, recognition result that only consists of 00471 short-pause words will be treated as recognition rejection */ 00472 ok_p = FALSE; 00473 for(i=0;i<wordlen;i++) { 00474 if (! is_sil(wordseq[i], r)) ok_p = TRUE; 00475 } 00476 if (ok_p == FALSE) { 00477 r->result.status = J_RESULT_STATUS_ONLY_SILENCE; 00478 return; 00479 } 00480 } 00481 #endif 00482 00483 /* just flush last progress output */ 00484 /* 00485 if (recog->jconf->output.progout_flag) { 00486 recog->result.status = 1; 00487 recog->result.num_frame = last_time; 00488 recog->result.pass1.word = wordseq; 00489 recog->result.pass1.word_num = wordlen; 00490 recog->result.pass1.score = best->backscore; 00491 recog->result.pass1.score_lm = total_lscore; 00492 recog->result.pass1.score_am = best->backscore - total_lscore; 00493 //callback_exec(CALLBACK_RESULT_PASS1_INTERIM, recog); 00494 }*/ 00495 00496 /* output 1st pass result */ 00497 if (verbose_flag || ! r->config->output.progout_flag) { 00498 r->result.status = J_RESULT_STATUS_SUCCESS; 00499 r->result.num_frame = framelen; 00500 for(i=0;i<wordlen;i++) r->result.pass1.word[i] = wordseq[i]; 00501 r->result.pass1.word_num = wordlen; 00502 r->result.pass1.score = best->backscore; 00503 r->result.pass1.score_lm = total_lscore; 00504 r->result.pass1.score_am = best->backscore - total_lscore; 00505 //callback_exec(CALLBACK_RESULT_PASS1, r); 00506 } 00507 00508 /* store the result to global val (notice: in reverse order) */ 00509 for(i=0;i<wordlen;i++) r->pass1_wseq[i] = wordseq[i]; 00510 r->pass1_wnum = wordlen; 00511 r->pass1_score = best->backscore; 00512 00513 #ifdef WORD_GRAPH 00514 /* 単語トレリスから,ラティスを生成する */ 00515 /* generate word graph from the word trellis */ 00516 r->peseqlen = backtrellis->framelen; 00517 r->result.wg1 = NULL; 00518 generate_lattice(last_time, r); 00519 link_lattice_by_time(r->result.wg1); 00520 if (r->lmtype == LM_PROB) re_compute_lattice_lm(r->result.wg1, r->wchmm); 00521 r->result.wg1_num = wordgraph_sort_and_annotate_id(&(r->result.wg1), r); 00522 /* compute graph CM by forward-backward processing */ 00523 graph_forward_backward(r->result.wg1, r); 00524 //callback_exec(CALLBACK_RESULT_PASS1_GRAPH, r); 00525 //wordgraph_clean(&(r->result.wg1)); 00526 #endif 00527 00528 } 00529 00548 static int 00549 compare_backscore(TRELLIS_ATOM **x1, TRELLIS_ATOM **x2) 00550 { 00551 return((*x2)->backscore - (*x1)->backscore); 00552 } 00553 00573 static void 00574 find_1pass_result_word(int framelen, RecogProcess *r) 00575 { 00576 BACKTRELLIS *bt; 00577 TRELLIS_ATOM *best, *tmp; 00578 int last_time; 00579 Sentence *s; 00580 #ifdef CONFIDENCE_MEASURE 00581 LOGPROB sum; 00582 #endif 00583 LOGPROB maxscore; 00584 int i; 00585 TRELLIS_ATOM **idx; 00586 int num; 00587 00588 if (r->lmvar != LM_DFA_WORD) return; 00589 00590 bt = r->backtrellis; 00591 for (last_time = framelen - 1; last_time >= 0; last_time--) { 00592 maxscore = LOG_ZERO; 00593 for (i=0;i<bt->num[last_time];i++) { 00594 tmp = bt->rw[last_time][i]; 00595 #ifdef WORD_GRAPH 00596 /* treat only words on a graph path */ 00597 if (!tmp->within_context) continue; 00598 #endif 00599 if (maxscore < tmp->backscore) { 00600 maxscore = tmp->backscore; 00601 best = tmp; 00602 } 00603 } 00604 if (maxscore != LOG_ZERO) break; 00605 } 00606 00607 if (last_time < 0) { /* not found */ 00608 jlog("WARNING: %02d %s: no word survived on the last frame, search failed\n", r->config->id, r->config->name); 00609 r->result.status = J_RESULT_STATUS_FAIL; 00610 //callback_exec(CALLBACK_RESULT, r); 00611 return; 00612 } 00613 00614 #ifdef CONFIDENCE_MEASURE 00615 sum = 0.0; 00616 for (i=0;i<bt->num[last_time];i++) { 00617 tmp = bt->rw[last_time][i]; 00618 #ifdef WORD_GRAPH 00619 /* treat only words on a graph path */ 00620 if (!tmp->within_context) continue; 00621 #endif 00622 sum += pow(10, r->config->annotate.cm_alpha * (tmp->backscore - maxscore)); 00623 } 00624 #endif 00625 00626 /* set recognition result status to normal */ 00627 r->result.status = J_RESULT_STATUS_SUCCESS; 00628 00629 if (r->config->output.output_hypo_maxnum > 1) { 00630 /* more than one candidate is requested */ 00631 00632 /* get actual number of candidates to output */ 00633 num = r->config->output.output_hypo_maxnum; 00634 if (num > bt->num[last_time]) { 00635 num = bt->num[last_time]; 00636 } 00637 00638 /* prepare result storage */ 00639 result_sentence_malloc(r, num); 00640 r->result.sentnum = num; 00641 00642 /* sort by score */ 00643 idx = (TRELLIS_ATOM **)mymalloc(sizeof(TRELLIS_ATOM *)*bt->num[last_time]); 00644 for (i=0;i<bt->num[last_time];i++) { 00645 idx[i] = bt->rw[last_time][i]; 00646 } 00647 qsort(idx, bt->num[last_time], sizeof(TRELLIS_ATOM *), 00648 (int (*)(const void *,const void *))compare_backscore); 00649 00650 /* store to result storage */ 00651 for(i=0;i<r->result.sentnum;i++) { 00652 s = &(r->result.sent[i]); 00653 tmp = idx[i]; 00654 s->word_num = 1; 00655 s->word[0] = tmp->wid; 00656 #ifdef CONFIDENCE_MEASURE 00657 s->confidence[0] = pow(10, r->config->annotate.cm_alpha * (tmp->backscore - maxscore)) / sum; 00658 #endif 00659 s->score = tmp->backscore; 00660 s->score_lm = 0.0; 00661 s->score_am = tmp->backscore; 00662 if (multigram_get_all_num(r->lm) > 0) { 00663 s->gram_id = multigram_get_gram_from_wid(s->word[0], r->lm); 00664 } else { 00665 s->gram_id = 0; 00666 } 00667 } 00668 /* free work area for sort */ 00669 free(idx); 00670 00671 } else { /* only max is needed */ 00672 00673 /* prepare result storage */ 00674 result_sentence_malloc(r, 1); 00675 r->result.sentnum = 1; 00676 s = &(r->result.sent[0]); 00677 s->word_num = 1; 00678 s->word[0] = best->wid; 00679 #ifdef CONFIDENCE_MEASURE 00680 s->confidence[0] = 1.0 / sum; 00681 #endif 00682 s->score = best->backscore; 00683 s->score_lm = 0.0; 00684 s->score_am = best->backscore; 00685 if (multigram_get_all_num(r->lm) > 0) { 00686 s->gram_id = multigram_get_gram_from_wid(s->word[0], r->lm); 00687 } else { 00688 s->gram_id = 0; 00689 } 00690 } 00691 00692 /* copy as 1st pass result */ 00693 memcpy(&(r->result.pass1), &(r->result.sent[0]), sizeof(Sentence)); 00694 r->result.pass1.align = NULL; 00695 00696 //callback_exec(CALLBACK_RESULT, r); 00697 //free(r->result.sent); 00698 } 00699 00700 00701 #ifdef DETERMINE 00702 00730 static TRELLIS_ATOM * 00731 determine_word(RecogProcess *r, int t, TRELLIS_ATOM *tremax, LOGPROB thres, int countthres) 00732 { 00733 TRELLIS_ATOM *ret; 00734 WORD_ID w; 00735 00736 //LOGPROB sum; 00737 //LOGPROB cm; 00738 00739 int j; 00740 FSBeam *d; 00741 TOKEN2 *tk; 00742 00743 if (tremax == NULL) { 00744 /* initialize */ 00745 r->determine_count = 0; 00746 r->determine_maxnodescore = LOG_ZERO; 00747 r->determined = FALSE; 00748 r->determine_last_wid = WORD_INVALID; 00749 r->have_determine = FALSE; 00750 return NULL; 00751 } 00752 00753 ret = NULL; 00754 00755 /* get confidence score of the maximum word hypothesis */ 00756 /* 00757 * sum = 0.0; 00758 * tre = recog->backtrellis->list; 00759 * while (tre != NULL && tre->endtime == t) { 00760 * sum += pow(10, recog->jconf->annotate.cm_alpha * (tre->backscore - tremax->backscore)); 00761 * tre = tre->next; 00762 * } 00763 * cm = 1.0 / sum; 00764 */ 00765 00766 /* determinization decision */ 00767 w = tremax->wid; 00768 00769 r->have_determine = FALSE; 00770 00771 /* determine by score threshold from maximum node score to maximum word end node score */ 00772 if (r->determine_last_wid == w && r->determine_maxnodescore - tremax->backscore <= thres) { 00773 r->determine_count++; 00774 if (r->determine_count > countthres) { 00775 if (r->determined == FALSE) { 00776 ret = tremax; 00777 r->determined = TRUE; 00778 r->have_determine = TRUE; 00779 } 00780 } 00781 } else { 00782 r->determine_count = 0; 00783 } 00784 00785 //printf("determine: %d: %s: cm=%f, relscore=%f, count=%d, phase=%d\n", t, recog->model->winfo->woutput[w], cm, determine_maxnodescore - tremax->backscore, count, phase); 00786 r->determine_last_wid = w; 00787 00788 /* update maximum node score here for next call, since 00789 the word path determination is always one frame later */ 00790 d = &(r->pass1); 00791 r->determine_maxnodescore = LOG_ZERO; 00792 for (j = d->n_start; j <= d->n_end; j++) { 00793 tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]); 00794 if (r->determine_maxnodescore < tk->score) r->determine_maxnodescore = tk->score; 00795 } 00796 00797 return(ret); 00798 } 00799 00819 static void 00820 check_determine_word(RecogProcess *r, int t) 00821 { 00822 TRELLIS_ATOM *tre; 00823 TRELLIS_ATOM *tremax; 00824 LOGPROB maxscore; 00825 00826 /* bt->list is ordered by time frame */ 00827 maxscore = LOG_ZERO; 00828 tremax = NULL; 00829 tre = r->backtrellis->list; 00830 while (tre != NULL && tre->endtime == t) { 00831 if (maxscore < tre->backscore) { 00832 maxscore = tre->backscore; 00833 tremax = tre; 00834 } 00835 tre = tre->next; 00836 } 00837 00838 r->result.status = J_RESULT_STATUS_SUCCESS; 00839 r->result.num_frame = t; 00840 00841 if (maxscore != LOG_ZERO) { 00842 // if ((tre = determine_word(recog, t, tremax, 0.9, 17)) != NULL) { 00843 if ((tre = determine_word(r, t, tremax, r->config->pass1.determine_score_thres, r->config->pass1.determine_duration_thres)) != NULL) { 00844 r->result.pass1.word[0] = tremax->wid; 00845 r->result.pass1.word_num = 1; 00846 r->result.pass1.score = tremax->backscore; 00847 r->result.pass1.score_lm = 0.0; 00848 r->result.pass1.score_am = tremax->backscore; 00849 r->result.num_frame = t; 00850 //callback_exec(CALLBACK_RESULT_PASS1_DETERMINED, r); 00851 } 00852 } 00853 00854 00855 } 00856 00857 #endif /* DETERMINE */ 00858 00874 static void 00875 bt_current_max(RecogProcess *r, int t) 00876 { 00877 int wordlen; 00878 TRELLIS_ATOM *tre; 00879 TRELLIS_ATOM *tremax; 00880 LOGPROB maxscore; 00881 LOGPROB lscore; 00882 00883 /* bt->list is ordered by time frame */ 00884 maxscore = LOG_ZERO; 00885 tremax = NULL; 00886 tre = r->backtrellis->list; 00887 while (tre != NULL && tre->endtime == t) { 00888 if (maxscore < tre->backscore) { 00889 maxscore = tre->backscore; 00890 tremax = tre; 00891 } 00892 tre = tre->next; 00893 } 00894 00895 r->result.status = J_RESULT_STATUS_SUCCESS; 00896 r->result.num_frame = t; 00897 00898 if (maxscore == LOG_ZERO) { 00899 r->result.pass1.word_num = 0; 00900 } else { 00901 if (r->lmvar == LM_DFA_WORD) { 00902 r->result.pass1.word[0] = tremax->wid; 00903 r->result.pass1.word_num = 1; 00904 r->result.pass1.score = tremax->backscore; 00905 r->result.pass1.score_lm = 0.0; 00906 r->result.pass1.score_am = tremax->backscore; 00907 } else { 00908 lscore = trace_backptr(r->result.pass1.word, &wordlen, tremax, r->lm->winfo); 00909 r->result.pass1.word_num = wordlen; 00910 r->result.pass1.score = tremax->backscore; 00911 r->result.pass1.score_lm = lscore; 00912 r->result.pass1.score_am = tremax->backscore; 00913 } 00914 } 00915 //callback_exec(CALLBACK_RESULT_PASS1_INTERIM, r); 00916 } 00917 00933 static void 00934 bt_current_max_word(RecogProcess *r, int t) 00935 { 00936 00937 TRELLIS_ATOM *tre; 00938 TRELLIS_ATOM *tremax; 00939 LOGPROB maxscore; 00940 WORD_ID w; 00941 00942 /* bt->list は時間順に格納されている */ 00943 /* bt->list is order by time */ 00944 maxscore = LOG_ZERO; 00945 tremax = NULL; 00946 tre = r->backtrellis->list; 00947 while (tre != NULL && tre->endtime == t) { 00948 if (maxscore < tre->backscore) { 00949 maxscore = tre->backscore; 00950 tremax = tre; 00951 } 00952 tre = tre->next; 00953 } 00954 00955 if (maxscore != LOG_ZERO) { 00956 jlog("DEBUG: %3d: ",t); 00957 w = tremax->wid; 00958 jlog("\"%s [%s]\"(id=%d)", 00959 r->lm->winfo->wname[w], r->lm->winfo->woutput[w], w); 00960 jlog(" [%d-%d] %f", tremax->begintime, t, tremax->backscore); 00961 w = tremax->last_tre->wid; 00962 if (w != WORD_INVALID) { 00963 jlog(" <- \"%s [%s]\"(id=%d)\n", 00964 r->lm->winfo->wname[w], r->lm->winfo->woutput[w], w); 00965 } else { 00966 jlog(" <- bgn\n"); 00967 } 00968 } 00969 } 00970 00971 00972 /* -------------------------------------------------------------------- */ 00973 /* ビーム探索中のトークンを扱うサブ関数 */ 00974 /* functions to handle hypothesis tokens */ 00975 /* -------------------------------------------------------------------- */ 00976 00995 static void 00996 malloc_nodes(FSBeam *d, int n, int ntoken_init) 00997 { 00998 d->totalnodenum = n; 00999 d->token = (TOKENID *)mymalloc(sizeof(TOKENID) * d->totalnodenum); 01000 //d->maxtnum = ntoken_init; 01001 if (d->maxtnum < ntoken_init) d->maxtnum = ntoken_init; 01002 d->tlist[0] = (TOKEN2 *)mymalloc(sizeof(TOKEN2) * d->maxtnum); 01003 d->tlist[1] = (TOKEN2 *)mymalloc(sizeof(TOKEN2) * d->maxtnum); 01004 d->tindex[0] = (TOKENID *)mymalloc(sizeof(TOKENID) * d->maxtnum); 01005 d->tindex[1] = (TOKENID *)mymalloc(sizeof(TOKENID) * d->maxtnum); 01006 //d->expand_step = ntoken_step; 01007 d->nodes_malloced = TRUE; 01008 d->expanded = FALSE; 01009 } 01010 01023 static void 01024 expand_tlist(FSBeam *d) 01025 { 01026 d->maxtnum += d->expand_step; 01027 d->tlist[0] = (TOKEN2 *)myrealloc(d->tlist[0],sizeof(TOKEN2) * d->maxtnum); 01028 d->tlist[1] = (TOKEN2 *)myrealloc(d->tlist[1],sizeof(TOKEN2) * d->maxtnum); 01029 d->tindex[0] = (TOKENID *)myrealloc(d->tindex[0],sizeof(TOKENID) * d->maxtnum); 01030 d->tindex[1] = (TOKENID *)myrealloc(d->tindex[1],sizeof(TOKENID) * d->maxtnum); 01031 if (debug2_flag) jlog("STAT: token space expanded to %d\n", d->maxtnum); 01032 d->expanded = TRUE; 01033 } 01034 01052 static void 01053 prepare_nodes(FSBeam *d, int ntoken_step) 01054 { 01055 d->tnum[0] = d->tnum[1] = 0; 01056 if (d->expand_step < ntoken_step) d->expand_step = ntoken_step; 01057 } 01058 01073 static void 01074 free_nodes(FSBeam *d) 01075 { 01076 if (d->nodes_malloced) { 01077 free(d->token); 01078 free(d->tlist[0]); 01079 free(d->tlist[1]); 01080 free(d->tindex[0]); 01081 free(d->tindex[1]); 01082 d->nodes_malloced = FALSE; 01083 } 01084 } 01085 01100 static void 01101 clear_tlist(FSBeam *d, int tt) 01102 { 01103 d->tnum[tt] = 0; 01104 } 01105 01120 static void 01121 clear_tokens(FSBeam *d, int tt) 01122 { 01123 int j; 01124 /* initialize active token list: only clear ones used in the last call */ 01125 for (j=0; j<d->tnum[tt]; j++) { 01126 d->token[d->tlist[tt][j].node] = TOKENID_UNDEFINED; 01127 } 01128 } 01129 01146 static TOKENID 01147 create_token(FSBeam *d) 01148 { 01149 TOKENID newid; 01150 int tn; 01151 tn = d->tn; 01152 newid = d->tnum[tn]; 01153 d->tnum[tn]++; 01154 while (d->tnum[tn]>=d->maxtnum) expand_tlist(d); 01155 d->tindex[tn][newid] = newid; 01156 #ifdef WPAIR 01157 /* initialize link */ 01158 d->tlist[tn][newid].next = TOKENID_UNDEFINED; 01159 #endif 01160 return(newid); 01161 } 01162 01192 static void 01193 node_assign_token(FSBeam *d, int node, TOKENID tkid) 01194 { 01195 #ifdef WPAIR 01196 /* add to link list */ 01197 d->tlist[d->tn][tkid].next = d->token[node]; 01198 #endif 01199 d->token[node] = tkid; 01200 d->tlist[d->tn][tkid].node = node; 01201 } 01202 01239 static TOKENID 01240 node_exist_token(FSBeam *d, int tt, int node, WORD_ID wid) 01241 { 01242 #ifdef WPAIR 01243 /* In word-pair mode, multiple tokens are assigned to a node as a list. 01244 so we have to search for tokens with same last word ID */ 01245 #ifdef WPAIR_KEEP_NLIMIT 01246 /* 1ノードごとに保持するtoken数の上限を設定 */ 01247 /* tokenが無いが上限に達しているときは一番スコアの低いtokenを上書きする */ 01248 /* N-best: limit number of assigned tokens to a node */ 01249 int i = 0; 01250 TOKENID lowest_token = TOKENID_UNDEFINED; 01251 #endif 01252 TOKENID tmp; 01253 for(tmp=d->token[node]; tmp != TOKENID_UNDEFINED; tmp=d->tlist[tt][tmp].next) { 01254 if (d->tlist[tt][tmp].last_tre->wid == wid) { 01255 return(tmp); 01256 } 01257 #ifdef WPAIR_KEEP_NLIMIT 01258 if (lowest_token == TOKENID_UNDEFINED || 01259 d->tlist[tt][lowest_token].score > d->tlist[tt][tmp].score) 01260 lowest_token = tmp; 01261 if (++i >= d->wpair_keep_nlimit) break; 01262 #endif 01263 } 01264 #ifdef WPAIR_KEEP_NLIMIT 01265 if (i >= d->wpair_keep_nlimit) { /* overflow, overwrite lowest score */ 01266 return(lowest_token); 01267 } else { 01268 return(TOKENID_UNDEFINED); 01269 } 01270 #else 01271 return(TOKENID_UNDEFINED); 01272 #endif 01273 01274 #else /* not WPAIR */ 01275 /* 1つだけ保持,これを常に上書き */ 01276 /* Only one token is kept in 1-best mode (default), so 01277 simply return the ID */ 01278 return(d->token[node]); 01279 #endif 01280 } 01281 01282 #ifdef DEBUG 01283 /* tlist と token の対応をチェックする(debug) */ 01284 /* for debug: check tlist <-> token correspondence 01285 where tlist[tt][tokenID].node = nodeID and 01286 token[nodeID] = tokenID 01287 */ 01288 static void 01289 node_check_token(FSBeam *d, int tt) 01290 { 01291 int i; 01292 for(i=0;i<d->tnum[tt];i++) { 01293 if (node_exist_token(d, tt, d->tlist[tt][i].node, d->tlist[tt][i].last_tre->wid) != i) { 01294 jlog("ERROR: token %d not found on node %d\n", i, d->tlist[tt][i].node); 01295 } 01296 } 01297 } 01298 #endif 01299 01300 01301 01302 /* -------------------------------------------------------------------- */ 01303 /* トークンをソートし 上位 N トークンを判別する (heap sort) */ 01304 /* Sort generated tokens and get N-best (use heap sort) */ 01305 /* -------------------------------------------------------------------- */ 01306 /* ビームの閾値として上位 N 番目のスコアが欲しいだけであり,実際にソート 01307 される必要はない */ 01308 /* we only want to know the N-th score for determining beam threshold, so 01309 order is not considered here */ 01310 01311 #define SD(A) tindex_local[A-1] ///< Index locater for sort_token_*() 01312 #define SCOPY(D,S) D = S ///< Content copier for sort_token_*() 01313 #define SVAL(A) (tlist_local[tindex_local[A-1]].score) ///< Score locater for sort_token_*() 01314 #define STVAL (tlist_local[s].score) ///< Indexed score locater for sort_token_*() 01315 01340 static void 01341 sort_token_upward(FSBeam *d, int neednum, int totalnum) 01342 { 01343 int n,root,child,parent; 01344 TOKENID s; 01345 TOKEN2 *tlist_local; 01346 TOKENID *tindex_local; 01347 01348 tlist_local = d->tlist[d->tn]; 01349 tindex_local = d->tindex[d->tn]; 01350 01351 for (root = totalnum/2; root >= 1; root--) { 01352 SCOPY(s, SD(root)); 01353 parent = root; 01354 while ((child = parent * 2) <= totalnum) { 01355 if (child < totalnum && SVAL(child) < SVAL(child+1)) { 01356 child++; 01357 } 01358 if (STVAL >= SVAL(child)) { 01359 break; 01360 } 01361 SCOPY(SD(parent), SD(child)); 01362 parent = child; 01363 } 01364 SCOPY(SD(parent), s); 01365 } 01366 n = totalnum; 01367 while ( n > totalnum - neednum) { 01368 SCOPY(s, SD(n)); 01369 SCOPY(SD(n), SD(1)); 01370 n--; 01371 parent = 1; 01372 while ((child = parent * 2) <= n) { 01373 if (child < n && SVAL(child) < SVAL(child+1)) { 01374 child++; 01375 } 01376 if (STVAL >= SVAL(child)) { 01377 break; 01378 } 01379 SCOPY(SD(parent), SD(child)); 01380 parent = child; 01381 } 01382 SCOPY(SD(parent), s); 01383 } 01384 } 01385 01412 static void 01413 sort_token_downward(FSBeam *d, int neednum, int totalnum) 01414 { 01415 int n,root,child,parent; 01416 TOKENID s; 01417 TOKEN2 *tlist_local; 01418 TOKENID *tindex_local; 01419 01420 tlist_local = d->tlist[d->tn]; 01421 tindex_local = d->tindex[d->tn]; 01422 01423 for (root = totalnum/2; root >= 1; root--) { 01424 SCOPY(s, SD(root)); 01425 parent = root; 01426 while ((child = parent * 2) <= totalnum) { 01427 if (child < totalnum && SVAL(child) > SVAL(child+1)) { 01428 child++; 01429 } 01430 if (STVAL <= SVAL(child)) { 01431 break; 01432 } 01433 SCOPY(SD(parent), SD(child)); 01434 parent = child; 01435 } 01436 SCOPY(SD(parent), s); 01437 } 01438 n = totalnum; 01439 while ( n > totalnum - neednum) { 01440 SCOPY(s, SD(n)); 01441 SCOPY(SD(n), SD(1)); 01442 n--; 01443 parent = 1; 01444 while ((child = parent * 2) <= n) { 01445 if (child < n && SVAL(child) > SVAL(child+1)) { 01446 child++; 01447 } 01448 if (STVAL <= SVAL(child)) { 01449 break; 01450 } 01451 SCOPY(SD(parent), SD(child)); 01452 parent = child; 01453 } 01454 SCOPY(SD(parent), s); 01455 } 01456 } 01457 01490 static void 01491 sort_token_no_order(FSBeam *d, int neednum, int *start, int *end) 01492 { 01493 int totalnum; 01494 int restnum; 01495 01496 totalnum = d->tnum[d->tn]; 01497 01498 restnum = totalnum - neednum; 01499 01500 if (neednum >= totalnum) { 01501 /* no need to sort */ 01502 *start = 0; 01503 *end = totalnum - 1; 01504 } else if (neednum < restnum) { 01505 /* needed num is smaller than rest, so sort for the needed tokens */ 01506 sort_token_upward(d, neednum,totalnum); 01507 *start = totalnum - neednum; 01508 *end = totalnum - 1; 01509 } else { 01510 /* needed num is bigger than rest, so sort for the rest token */ 01511 sort_token_downward(d, restnum,totalnum); 01512 *start = 0; 01513 *end = neednum - 1; 01514 } 01515 } 01516 01517 /* -------------------------------------------------------------------- */ 01518 /* 第1パス(フレーム同期ビームサーチ) メイン */ 01519 /* main routines of 1st pass (frame-synchronous beam search) */ 01520 /* -------------------------------------------------------------------- */ 01521 01550 static boolean 01551 init_nodescore(HTK_Param *param, RecogProcess *r) 01552 { 01553 WCHMM_INFO *wchmm; 01554 FSBeam *d; 01555 TOKENID newid; 01556 TOKEN2 *new; 01557 WORD_ID beginword; 01558 int node; 01559 int i; 01560 01561 wchmm = r->wchmm; 01562 d = &(r->pass1); 01563 01564 /* 初期仮説用単語履歴 */ 01565 /* setup initial word context */ 01566 if (r->config->successive.enabled) { /* sp segment mode */ 01567 /* initial word context = last non-sp word of previous 2nd pass at last segment*/ 01568 if (r->lmtype == LM_PROB) { 01569 if (r->sp_break_last_nword == wchmm->winfo->tail_silwid) { 01570 /* if end with silE, initialize as normal start of sentence */ 01571 d->bos.wid = WORD_INVALID; 01572 } else { 01573 d->bos.wid = r->sp_break_last_nword; 01574 } 01575 } else { 01576 d->bos.wid = WORD_INVALID; 01577 } 01578 } else { /* not sp segment mode */ 01579 d->bos.wid = WORD_INVALID; /* no context */ 01580 } 01581 01582 d->bos.begintime = d->bos.endtime = -1; 01583 01584 /* ノード・トークンを初期化 */ 01585 /* clear tree lexicon nodes and tokens */ 01586 for(node = 0; node < d->totalnodenum; node++) { 01587 d->token[node] = TOKENID_UNDEFINED; 01588 } 01589 d->tnum[0] = d->tnum[1] = 0; 01590 01591 #ifdef PASS1_IWCD 01592 /* 出力確率計算キャッシュを初期化 */ 01593 /* initialize outprob cache */ 01594 outprob_style_cache_init(wchmm); 01595 #endif 01596 01597 /* 初期仮説の作成: 初期単語の決定と初期トークンの生成 */ 01598 /* initial word hypothesis */ 01599 01600 if (r->lmtype == LM_PROB) { 01601 01602 if (r->config->successive.enabled) { /* sp segment mode */ 01603 if (r->sp_break_last_word != WORD_INVALID) { /* last segment exist */ 01604 /* 開始単語=前のセグメント計算時の最後の最尤単語 */ 01605 /* 文終了単語(silE,句点(IPAモデル))なら,silB で開始 */ 01606 /* initial word = best last word hypothesis on the last segment */ 01607 /* if silE or sp, begin with silB */ 01608 /*if (is_sil(recog.sp_break_last_word, wchmm->winfo, wchmm->hmminfo)) {*/ 01609 if (r->sp_break_last_word == wchmm->winfo->tail_silwid) { 01610 beginword = wchmm->winfo->head_silwid; 01611 d->bos.wid = WORD_INVALID; /* reset initial word context */ 01612 } else { 01613 beginword = r->sp_break_last_word; 01614 } 01615 } else { 01616 /* initial segment: initial word set to silB */ 01617 beginword = wchmm->winfo->head_silwid; 01618 } 01619 } else { /* not sp segment mode */ 01620 /* initial word fixed to silB */ 01621 beginword = wchmm->winfo->head_silwid; 01622 } 01623 01624 #ifdef SP_BREAK_DEBUG 01625 jlog("DEBUG: startword=[%s], last_nword=[%s]\n", 01626 (beginword == WORD_INVALID) ? "WORD_INVALID" : wchmm->winfo->wname[beginword], 01627 (d->bos.wid == WORD_INVALID) ? "WORD_INVALID" : wchmm->winfo->wname[d->bos.wid]); 01628 #endif 01629 /* create the first token at the first node of initial word */ 01630 newid = create_token(d); 01631 new = &(d->tlist[d->tn][newid]); 01632 01633 /* initial node = head node of the beginword */ 01634 if (wchmm->hmminfo->multipath) { 01635 node = wchmm->wordbegin[beginword]; 01636 } else { 01637 node = wchmm->offset[beginword][0]; 01638 } 01639 01640 /* set initial LM score */ 01641 if (wchmm->state[node].scid != 0) { 01642 /* if initial node is on a factoring branch, use the factored score */ 01643 new->last_lscore = max_successor_prob(wchmm, d->bos.wid, node); 01644 } else { 01645 /* else, set to 0.0 */ 01646 new->last_lscore = 0.0; 01647 } 01648 #ifdef FIX_PENALTY 01649 new->last_lscore = new->last_lscore * d->lm_weight; 01650 #else 01651 new->last_lscore = new->last_lscore * d->lm_weight + d->lm_penalty; 01652 #endif 01653 /* set initial word history */ 01654 new->last_tre = &(d->bos); 01655 new->last_cword = d->bos.wid; 01656 if (wchmm->hmminfo->multipath) { 01657 /* set initial score using the initial LM score */ 01658 new->score = new->last_lscore; 01659 } else { 01660 /* set initial score using the initial LM score and AM score of the first state */ 01661 new->score = outprob_style(wchmm, node, d->bos.wid, 0, param) + new->last_lscore; 01662 } 01663 /* assign the initial node to token list */ 01664 node_assign_token(d, node, newid); 01665 01666 } 01667 01668 if (r->lmtype == LM_DFA && r->lmvar == LM_DFA_GRAMMAR) { 01669 01670 /* 初期仮説: 文法上文頭に接続しうる単語集合 */ 01671 /* initial words: all words that can be begin of sentence grammatically */ 01672 /* アクティブな文法に属する単語のみ許す */ 01673 /* only words in active grammars are allowed to be an initial words */ 01674 MULTIGRAM *m; 01675 int t,tb,te; 01676 WORD_ID iw; 01677 boolean flag; 01678 DFA_INFO *gdfa; 01679 01680 gdfa = r->lm->dfa; 01681 01682 flag = FALSE; 01683 /* for all active grammar */ 01684 for(m = r->lm->grammars; m; m = m->next) { 01685 if (m->active) { 01686 tb = m->cate_begin; 01687 te = tb + m->dfa->term_num; 01688 for(t=tb;t<te;t++) { 01689 /* for all word categories that belong to the grammar */ 01690 if (dfa_cp_begin(gdfa, t) == TRUE) { 01691 /* if the category can appear at beginning of sentence, */ 01692 flag = TRUE; 01693 for (iw=0;iw<gdfa->term.wnum[t];iw++) { 01694 /* create the initial token at the first node of all words that belongs to the category */ 01695 i = gdfa->term.tw[t][iw]; 01696 if (wchmm->hmminfo->multipath) { 01697 node = wchmm->wordbegin[i]; 01698 } else { 01699 node = wchmm->offset[i][0]; 01700 } 01701 /* in tree lexicon, words in the same category may share the same root node, so skip it if the node has already existed */ 01702 if (node_exist_token(d, d->tn, node, d->bos.wid) != TOKENID_UNDEFINED) continue; 01703 newid = create_token(d); 01704 new = &(d->tlist[d->tn][newid]); 01705 new->last_tre = &(d->bos); 01706 #ifdef FIX_PENALTY 01707 new->last_lscore = 0.0; 01708 #else 01709 new->last_lscore = d->penalty1; 01710 #endif 01711 if (wchmm->hmminfo->multipath) { 01712 new->score = new->last_lscore; 01713 } else { 01714 new->score = outprob_style(wchmm, node, d->bos.wid, 0, param) + new->last_lscore; 01715 } 01716 node_assign_token(d, node, newid); 01717 } 01718 } 01719 } 01720 } 01721 } 01722 if (!flag) { 01723 jlog("ERROR: init_nodescore: no initial state found in active DFA grammar\n"); 01724 return FALSE; 01725 } 01726 } 01727 01728 if (r->lmtype == LM_DFA && r->lmvar == LM_DFA_WORD) { 01729 /* アクティブな文法に属する単語のみ許す */ 01730 /* only words in active grammars are allowed to be an initial words */ 01731 MULTIGRAM *m; 01732 01733 for(m = r->lm->grammars; m; m = m->next) { 01734 if (m->active) { 01735 for(i = m->word_begin; i < m->word_begin + m->winfo->num; i++) { 01736 if (wchmm->hmminfo->multipath) { 01737 node = wchmm->wordbegin[i]; 01738 } else { 01739 node = wchmm->offset[i][0]; 01740 } 01741 if (node_exist_token(d, d->tn, node, d->bos.wid) != TOKENID_UNDEFINED) continue; 01742 newid = create_token(d); 01743 new = &(d->tlist[d->tn][newid]); 01744 new->last_tre = &(d->bos); 01745 new->last_lscore = 0.0; 01746 if (wchmm->hmminfo->multipath) { 01747 new->score = 0.0; 01748 } else { 01749 new->score = outprob_style(wchmm, node, d->bos.wid, 0, param); 01750 } 01751 node_assign_token(d, node, newid); 01752 } 01753 } 01754 } 01755 } 01756 01757 return TRUE; 01758 01759 } 01760 01761 /******************************************************/ 01762 /* フレーム同期ビーム探索の実行 --- 最初のフレーム用 */ 01763 /* frame synchronous beam search --- first frame only */ 01764 /******************************************************/ 01765 01790 boolean 01791 get_back_trellis_init(HTK_Param *param, RecogProcess *r) 01792 { 01793 WCHMM_INFO *wchmm; 01794 BACKTRELLIS *backtrellis; 01795 FSBeam *d; 01796 01797 wchmm = r->wchmm; 01798 backtrellis = r->backtrellis; 01799 d = &(r->pass1); 01800 01801 /* Viterbi演算用ワークエリアのスイッチャー tl,tn の初期値設定 */ 01802 /* tn: このフレーム用ID tl: 1フレーム前のID */ 01803 /* initialize switch tl, tn for Viterbi computation */ 01804 /* tn: this frame tl: last frame */ 01805 d->tn = 0; 01806 d->tl = 1; 01807 01808 /* 結果の単語トレリスを格納するバックトレリス構造体を初期化 */ 01809 /* initialize backtrellis structure to store resulting word trellis */ 01810 bt_prepare(backtrellis); 01811 01812 /* 計算用ワークエリアを初期化 */ 01813 /* initialize some data on work area */ 01814 01815 if (r->lmtype == LM_PROB) { 01816 d->lm_weight = r->config->lmp.lm_weight; 01817 d->lm_penalty = r->config->lmp.lm_penalty; 01818 } 01819 if (r->lmtype == LM_DFA) { 01820 d->penalty1 = r->config->lmp.penalty1; 01821 } 01822 #if defined(WPAIR) && defined(WPAIR_KEEP_NLIMIT) 01823 d->wpair_keep_nlimit = r->config->pass1.wpair_keep_nlimit; 01824 #endif 01825 01826 /* ワークエリアを確保 */ 01827 /* malloc work area */ 01828 /* 使用するトークン量 = viterbi時に遷移先となる状態候補の数 01829 * 予測: ビーム数 x 2 (自己遷移+次状態) + 木構造化辞書のルートノード数 01830 */ 01831 /* assumed initial number of needed tokens: beam width x 2 (self + next trans.) 01832 * + root node on the tree lexicon (for inter-word trans.) 01833 */ 01834 if (d->totalnodenum != wchmm->n) { 01835 free_nodes(d); 01836 } 01837 if (d->nodes_malloced == FALSE) { 01838 malloc_nodes(d, wchmm->n, r->trellis_beam_width * 2 + wchmm->startnum); 01839 } 01840 prepare_nodes(d, r->trellis_beam_width); 01841 01842 /* 初期スコアを nodescore[tn] にセット */ 01843 /* set initial score to nodescore[tn] */ 01844 if (init_nodescore(param, r) == FALSE) { 01845 jlog("ERROR: get_back_trellis_init: failed to set initial node scores\n"); 01846 return FALSE; 01847 } 01848 01849 sort_token_no_order(d, r->trellis_beam_width, &(d->n_start), &(d->n_end)); 01850 01851 /* 漸次出力を行なう場合のインターバルを計算 */ 01852 /* set interval frame for progout */ 01853 r->config->output.progout_interval_frame = (int)((float)r->config->output.progout_interval / ((float)param->header.wshift / 10000.0)); 01854 01855 if (r->config->successive.enabled) { 01856 /* ショートポーズセグメンテーション用パラメータの初期化 */ 01857 /* initialize parameter for short pause segmentation */ 01858 d->in_sparea = TRUE; /* assume beginning is silence */ 01859 r->am->mfcc->sparea_start = d->tmp_sparea_start = 0; /* set start frame to 0 */ 01860 #ifdef SP_BREAK_RESUME_WORD_BEGIN 01861 d->tmp_sp_break_last_word = WORD_INVALID; 01862 #endif 01863 r->sp_break_last_word = WORD_INVALID; 01864 /* 最初のセグメント: 次の非ポーズフレームで第2パスへ移行しない */ 01865 /* the first end of pause segment should be always silB, so 01866 skip the first segment */ 01867 d->first_sparea = TRUE; 01868 r->sp_break_2_begin_word = WORD_INVALID; 01869 } 01870 01871 #ifdef DETERMINE 01872 if (r->lmvar == LM_DFA_WORD) { 01873 /* initialize */ 01874 determine_word(r, 0, NULL, 0, 0); 01875 } 01876 #endif 01877 01878 #ifdef SCORE_PRUNING 01879 d->score_pruning_threshold = LOG_ZERO; 01880 d->score_pruning_count = 0; 01881 #endif 01882 01883 return TRUE; 01884 } 01885 01886 /*****************************************************/ 01887 /* frame synchronous beam search --- proceed 1 frame */ 01888 /* フレーム同期ビーム探索の実行 --- 1フレーム進める */ 01889 /*****************************************************/ 01890 01909 static void 01910 propagate_token(FSBeam *d, int next_node, LOGPROB next_score, TRELLIS_ATOM *last_tre, WORD_ID last_cword, LOGPROB last_lscore) 01911 { 01912 TOKEN2 *tknext; 01913 TOKENID tknextid; 01914 01915 if ((tknextid = node_exist_token(d, d->tn, next_node, last_tre->wid)) != TOKENID_UNDEFINED) { 01916 /* 遷移先ノードには既に他ノードから伝搬済み: スコアが高いほうを残す */ 01917 /* the destination node already has a token: compare score */ 01918 tknext = &(d->tlist[d->tn][tknextid]); 01919 if (tknext->score < next_score) { 01920 /* その遷移先ノードが持つトークンの内容を上書きする(新規トークンは作らない) */ 01921 /* overwrite the content of existing destination token: not create a new token */ 01922 tknext->last_tre = last_tre; /* propagate last word info */ 01923 tknext->last_cword = last_cword; /* propagate last context word info */ 01924 tknext->last_lscore = last_lscore; /* set new LM score */ 01925 tknext->score = next_score; /* set new score */ 01926 } 01927 } else { 01928 /* 遷移先ノードは未伝搬: 新規トークンを作って割り付ける */ 01929 /* token unassigned: create new token and assign */ 01930 if (next_score > LOG_ZERO) { /* valid token */ 01931 tknextid = create_token(d); /* get new token */ 01932 } 01933 tknext = &(d->tlist[d->tn][tknextid]); 01934 tknext->last_tre = last_tre; /* propagate last word info */ 01935 tknext->last_cword = last_cword; /* propagate last context word info */ 01936 tknext->last_lscore = last_lscore; 01937 tknext->score = next_score; /* set new score */ 01938 node_assign_token(d, next_node, tknextid); /* assign this new token to the next node */ 01939 } 01940 } 01941 01965 static void 01966 beam_intra_word_core(WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, int j, int next_node, LOGPROB next_a) 01967 { 01968 int node; 01969 LOGPROB tmpsum; 01970 LOGPROB ngram_score_cache; 01971 TOKEN2 *tk; 01972 01973 tk = *tk_ret; 01974 01975 node = tk->node; 01976 01977 /* now, 'node' is the source node, 'next_node' is the destication node, 01978 and ac-> holds transition probability */ 01979 /* tk->score is the accumulated score at the 'node' on previous frame */ 01980 01981 /******************************************************************/ 01982 /* 2.1.1 遷移先へのスコア計算(遷移確率+言語スコア) */ 01983 /* compute score of destination node (transition prob + LM) */ 01984 /******************************************************************/ 01985 tmpsum = tk->score + next_a; 01986 ngram_score_cache = LOG_ZERO; 01987 /* the next score at 'next_node' will be computed on 'tmpsum', and 01988 the new LM probability (if updated) will be stored on 'ngram_score_cache' at below */ 01989 01990 if (!wchmm->category_tree) { 01991 /* 言語スコア factoring: 01992 arcが自己遷移でない単語内の遷移で,かつ遷移先にsuccessorリスト 01993 があれば,lexicon tree の分岐部分の遷移である */ 01994 /* LM factoring: 01995 If this is not a self transition and destination node has successor 01996 list, this is branching transition 01997 */ 01998 if (next_node != node) { 01999 if (wchmm->state[next_node].scid != 0 02000 #ifdef UNIGRAM_FACTORING 02001 /* 1-gram factoring 使用時は, 複数で共有される枝では 02002 wchmm->state[node].scid は負の値となり,その絶対値を 02003 添字として wchmm->fscore[] に単語集合の1-gramの最大値が格納 02004 されている. 末端の枝(複数単語で共有されない)では, 02005 wchmm->state[node].scid は正の値となり, 02006 1単語を sc として持つのでそこで正確な2-gramを計算する */ 02007 /* When uni-gram factoring, 02008 wchmm->state[node].scid is below 0 for shared branches. 02009 In this case the maximum uni-gram probability for sub-tree 02010 is stored in wchmm->fscore[- wchmm->state[node].scid]. 02011 Leaf branches (with only one successor word): the scid is 02012 larger than 0, and has 02013 the word ID in wchmm->sclist[wchmm->state[node].scid]. 02014 So precise 2-gram is computed in this point */ 02015 #endif 02016 ){ 02017 02018 if (wchmm->lmtype == LM_PROB) { 02019 /* ここで言語モデル確率を更新する */ 02020 /* LM value should be update at this transition */ 02021 /* N-gram確率からfactoring 値を計算 */ 02022 /* compute new factoring value from N-gram probabilities */ 02023 #ifdef FIX_PENALTY 02024 /* if at the beginning of sentence, not add lm_penalty */ 02025 if (tk->last_cword == WORD_INVALID) { 02026 ngram_score_cache = max_successor_prob(wchmm, tk->last_cword, next_node) * d->lm_weight; 02027 } else { 02028 ngram_score_cache = max_successor_prob(wchmm, tk->last_cword, next_node) * d->lm_weight + d->lm_penalty; 02029 } 02030 #else 02031 ngram_score_cache = max_successor_prob(wchmm, tk->last_cword, next_node) * d->lm_weight + d->lm_penalty; 02032 #endif 02033 /* スコアの更新: tk->last_lscore に単語内での最後のfactoring値が 02034 入っているので, それをスコアから引いてリセットし, 新たなスコアを 02035 セットする */ 02036 /* update score: since 'tk->last_lscore' holds the last LM factoring 02037 value in this word, we first remove the score from the current 02038 score, and then add the new LM value computed above. */ 02039 tmpsum -= tk->last_lscore; 02040 tmpsum += ngram_score_cache; 02041 } 02042 02043 if (wchmm->lmtype == LM_DFA && wchmm->lmvar == LM_DFA_GRAMMAR) { 02044 /* 文法を用いる場合, カテゴリ単位の木構造化がなされていれば, 02045 接続制約は単語間遷移のみで扱われるので,factoring は必要ない. 02046 カテゴリ単位木構造化が行われない場合, 文法間の接続制約はここ 02047 で factoring で行われることになる. */ 02048 /* With DFA, we use category-pair constraint extracted from the DFA 02049 at this 1st pass. So if we compose a tree lexicon per word's 02050 category, the each category tree has the same connection 02051 constraint and thus we can apply the constraint on the cross-word 02052 transition. This per-category tree lexicon is enabled by default, 02053 and in the case the constraint will be applied at the word end. 02054 If you disable per-category tree lexicon by undefining 02055 'CATEGORY_TREE', the word-pair contrained will be applied in a 02056 factoring style at here. 02057 */ 02058 02059 /* 決定的factoring: 直前単語に対して,sub-tree内にカテゴリ対制約上 02060 接続しうる単語が1つもなければ, この遷移は不可 */ 02061 /* deterministic factoring in grammar mode: 02062 transition disabled if there are totally no sub-tree word that can 02063 grammatically (in category-pair constraint) connect 02064 to the previous word. 02065 */ 02066 if (!can_succeed(wchmm, tk->last_tre->wid, next_node)) { 02067 tmpsum = LOG_ZERO; 02068 } 02069 } 02070 02071 } 02072 } 02073 } 02074 /* factoring not needed when DFA mode and uses category-tree */ 02075 02076 /****************************************/ 02077 /* 2.1.2 遷移先ノードへトークン伝搬 */ 02078 /* pass token to destination node */ 02079 /****************************************/ 02080 02081 if (ngram_score_cache == LOG_ZERO) ngram_score_cache = tk->last_lscore; 02082 propagate_token(d, next_node, tmpsum, tk->last_tre, tk->last_cword, ngram_score_cache); 02083 02084 if (d->expanded) { 02085 /* if work area has been expanded at 'create_token()' above, 02086 the inside 'realloc()' will destroy the pointers. 02087 so, reset local pointers from token index */ 02088 tk = &(d->tlist[d->tl][d->tindex[d->tl][j]]); 02089 d->expanded = FALSE; 02090 } 02091 02092 *tk_ret = tk; 02093 02094 } 02095 02115 static void 02116 beam_intra_word(WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, int j) 02117 { 02118 A_CELL2 *ac; 02119 TOKEN2 *tk; 02120 int node; 02121 int k; 02122 02123 tk = *tk_ret; 02124 02125 node = tk->node; 02126 02127 if (wchmm->self_a[node] != LOG_ZERO) { 02128 beam_intra_word_core(wchmm, d, tk_ret, j, node, wchmm->self_a[node]); 02129 } 02130 02131 if (wchmm->next_a[node] != LOG_ZERO) { 02132 beam_intra_word_core(wchmm, d, tk_ret, j, node+1, wchmm->next_a[node]); 02133 } 02134 02135 for(ac=wchmm->ac[node];ac;ac=ac->next) { 02136 for(k=0;k<ac->n;k++) { 02137 beam_intra_word_core(wchmm, d, tk_ret, j, ac->arc[k], ac->a[k]); 02138 } 02139 } 02140 } 02141 02142 /**************************/ 02143 /* 2.2. トレリス単語保存 */ 02144 /* save trellis word */ 02145 /**************************/ 02170 static TRELLIS_ATOM * 02171 save_trellis(BACKTRELLIS *bt, WCHMM_INFO *wchmm, TOKEN2 *tk, int t, boolean final_for_multipath) 02172 { 02173 TRELLIS_ATOM *tre; 02174 int sword; 02175 02176 sword = wchmm->stend[tk->node]; 02177 02178 /* この遷移元の単語終端ノードは「直前フレームで」生き残ったノード. 02179 (「このフレーム」でないことに注意!!) 02180 よってここで, 時間(t-1) を単語終端とするトレリス上の単語仮説 02181 (TRELLIS_ATOM)として,単語トレリス構造体に保存する. */ 02182 /* This source node (= word end node) has been survived in the 02183 "last" frame (notice: not "this" frame!!). So this word end 02184 is saved here to the word trellis structure (BACKTRELLIS) as a 02185 trellis word (TRELLIS_ATOM) with end frame (t-1). */ 02186 tre = bt_new(bt); 02187 tre->wid = sword; /* word ID */ 02188 tre->backscore = tk->score; /* log score (AM + LM) */ 02189 tre->begintime = tk->last_tre->endtime + 1; /* word beginning frame */ 02190 tre->endtime = t-1; /* word end frame */ 02191 tre->last_tre = tk->last_tre; /* link to previous trellis word */ 02192 tre->lscore = tk->last_lscore; /* log LM score */ 02193 bt_store(bt, tre); /* save to backtrellis */ 02194 #ifdef WORD_GRAPH 02195 if (tre->last_tre != NULL) { 02196 /* mark to indicate that the following words was survived in beam */ 02197 tre->last_tre->within_context = TRUE; 02198 } 02199 if (final_for_multipath) { 02200 /* last node */ 02201 if (tre->wid == wchmm->winfo->tail_silwid) { 02202 tre->within_context = TRUE; 02203 } 02204 } 02205 #endif /* WORD_GRAPH */ 02206 02207 return tre; 02208 } 02209 02210 02231 static void 02232 beam_inter_word(WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, TRELLIS_ATOM *tre, int j) 02233 { 02234 A_CELL2 *ac; 02235 TOKEN2 *tk; 02236 int sword; 02237 int node, next_node; 02238 LOGPROB *iwparray; 02239 int stid; 02240 #ifdef UNIGRAM_FACTORING 02241 int isoid; 02242 #endif 02243 LOGPROB tmpprob, tmpsum, ngram_score_cache; 02244 int k; 02245 WORD_ID last_word; 02246 02247 tk = *tk_ret; 02248 02249 node = tk->node; 02250 sword = wchmm->stend[node]; 02251 last_word = wchmm->winfo->is_transparent[sword] ? tk->last_cword : sword; 02252 02253 if (wchmm->lmtype == LM_PROB) { 02254 02255 /* 遷移元単語が末尾単語の終端なら,どこへも遷移させない */ 02256 /* do not allow transition if the source word is end-of-sentence word */ 02257 if (sword == wchmm->winfo->tail_silwid) return; 02258 02259 #ifdef UNIGRAM_FACTORING 02260 #ifndef WPAIR 02261 /* あとで共有単語先頭ノードに対して単語間遷移をまとめて計算するため,*/ 02262 /* このループ内では最大尤度を持つ単語終端ノードを記録しておく */ 02263 /* here we will record the best wordend node of maximum likelihood 02264 at this frame, to compute later the cross-word transitions toward 02265 shared factoring word-head node */ 02266 tmpprob = tk->score; 02267 if (!wchmm->hmminfo->multipath) tmpprob += wchmm->wordend_a[sword]; 02268 if (d->wordend_best_score < tmpprob) { 02269 d->wordend_best_score = tmpprob; 02270 d->wordend_best_node = node; 02271 d->wordend_best_tre = tre; 02272 d->wordend_best_last_cword = tk->last_cword; 02273 } 02274 #endif 02275 #endif 02276 02277 /* N-gramにおいては常に全単語の接続を考慮する必要があるため, 02278 ここで単語間の言語確率値をすべて計算しておく. 02279 キャッシュは max_successor_prob_iw() 内で考慮. */ 02280 /* As all words are possible to connect in N-gram, we first compute 02281 all the inter-word LM probability here. 02282 Cache is onsidered in max_successor_prob_iw(). */ 02283 if (wchmm->winfo->is_transparent[sword]) { 02284 iwparray = max_successor_prob_iw(wchmm, tk->last_cword); 02285 } else { 02286 iwparray = max_successor_prob_iw(wchmm, sword); 02287 } 02288 } 02289 02290 /* すべての単語始端ノードに対して以下を実行 */ 02291 /* for all beginning-of-word nodes, */ 02292 /* wchmm->startnode[0..stid-1] ... 単語始端ノードリスト */ 02293 /* wchmm->startnode[0..stid-1] ... list of word start node (shared) */ 02294 for (stid = wchmm->startnum - 1; stid >= 0; stid--) { 02295 next_node = wchmm->startnode[stid]; 02296 if (wchmm->hmminfo->multipath) { 02297 if (wchmm->lmtype == LM_PROB) { 02298 /* connection to the head silence word is not allowed */ 02299 if (wchmm->wordbegin[wchmm->winfo->head_silwid] == next_node) continue; 02300 } 02301 } 02302 02303 /*****************************************/ 02304 /* 2.3.1. 単語間言語制約を適用 */ 02305 /* apply cross-word LM constraint */ 02306 /*****************************************/ 02307 02308 if (wchmm->lmtype == LM_PROB) { 02309 /* N-gram確率を計算 */ 02310 /* compute N-gram probability */ 02311 #ifdef UNIGRAM_FACTORING 02312 /* wchmm,start2isolate[0..stid-1] ... ノードを共有しない単語は 02313 その通しID, 共有する(キャッシュの必要のない)単語は -1 */ 02314 /* wchmm->start2isolate[0..stid-1] ... isolate ID for 02315 beginning-of-word state. value: -1 for states that has 02316 1-gram factoring value (share nodes with some other words), 02317 and ID for unshared words 02318 */ 02319 isoid = wchmm->start2isolate[stid]; 02320 #ifdef WPAIR 02321 /* Efficient cross-word LM handling should be disabled for 02322 word-pair approximation */ 02323 if (isoid == -1) { 02324 tmpprob = wchmm->fscore[- wchmm->state[next_node].scid]; 02325 } else { 02326 tmpprob = iwparray[isoid]; 02327 } 02328 #else /* ~WPAIR */ 02329 /* 1-gram factoring における単語間言語確率キャッシュの効率化: 02330 1-gram factoring は単語履歴に依存しないので, 02331 ここで参照する factoring 値の多くは 02332 wchmm->fscore[] に既に格納され, 探索中も不変である. 02333 よって計算が必要な単語(どの単語ともノードを共有しない単語) 02334 についてのみ iwparray[] で計算・キャッシュする. */ 02335 /* Efficient cross-word LM cache: 02336 As 1-gram factoring values are independent of word context, 02337 they remain unchanged while search. So, in cross-word LM 02338 computation, beginning-of-word states which share nodes with 02339 others and has factoring value in wchmm does not need cache. 02340 So only the unshared beginning-of-word states are computed and 02341 cached here in iwparray[]. 02342 */ 02343 /* 計算が必要でない単語先頭ノードはパスをまとめて後に計算するので 02344 ここではスキップ */ 02345 /* the shared nodes will be computed afterward, so just skip them 02346 here */ 02347 if (isoid == -1) continue; 02348 tmpprob = iwparray[isoid]; 02349 #endif /* ~WPAIR */ 02350 #else /* ~UNIGRAM_FACTORING */ 02351 tmpprob = iwparray[stid]; 02352 #endif 02353 } 02354 02355 /* 遷移先の単語が先頭単語なら遷移させない. 02356 これは wchmm.c で該当単語に stid を割り振らないことで対応 02357 しているので,ここでは何もしなくてよい */ 02358 /* do not allow transition if the destination word is 02359 beginning-of-sentence word. This limitation is realized by 02360 not assigning 'stid' for the word in wchmm.c, so we have nothing 02361 to do here. 02362 */ 02363 02364 if (wchmm->category_tree) { 02365 /* 文法の場合, 制約は決定的: カテゴリ対制約上許されない場合は遷移させない */ 02366 /* With DFA and per-category tree lexicon, 02367 LM constraint is deterministic: 02368 do not allow transition if the category connection is not allowed 02369 (with category tree, constraint can be determined on top node) */ 02370 if (dfa_cp(wchmm->dfa, wchmm->winfo->wton[sword], wchmm->winfo->wton[wchmm->start2wid[stid]]) == FALSE) continue; 02371 } 02372 02373 /*******************************************************************/ 02374 /* 2.3.2. 遷移先の単語先頭へのスコア計算(遷移確率+言語スコア) */ 02375 /* compute score of destination node (transition prob + LM) */ 02376 /*******************************************************************/ 02377 tmpsum = tk->score; 02378 if (!wchmm->hmminfo->multipath) tmpsum += wchmm->wordend_a[sword]; 02379 02380 /* 'tmpsum' now holds outgoing score from the wordend node */ 02381 if (wchmm->lmtype == LM_PROB) { 02382 /* 言語スコアを追加 */ 02383 /* add LM score */ 02384 ngram_score_cache = tmpprob * d->lm_weight + d->lm_penalty; 02385 tmpsum += ngram_score_cache; 02386 if (wchmm->winfo->is_transparent[sword] && wchmm->winfo->is_transparent[tk->last_cword]) { 02387 02388 tmpsum += d->lm_penalty_trans; 02389 } 02390 } 02391 if (wchmm->lmtype == LM_DFA) { 02392 /* grammar: 単語挿入ペナルティを追加 */ 02393 /* grammar: add insertion penalty */ 02394 ngram_score_cache = d->penalty1; 02395 tmpsum += ngram_score_cache; 02396 02397 /* grammar: deterministic factoring (in case category-tree not enabled) */ 02398 if (!wchmm->category_tree) { 02399 if (!can_succeed(wchmm, sword, next_node)) { 02400 tmpsum = LOG_ZERO; 02401 } 02402 } 02403 } 02404 02405 /*********************************************************************/ 02406 /* 2.3.3. 遷移先ノードへトークン伝搬(単語履歴情報は更新) */ 02407 /* pass token to destination node (updating word-context info */ 02408 /*********************************************************************/ 02409 02410 if (wchmm->hmminfo->multipath) { 02411 /* since top node has no ouput, we should go one more step further */ 02412 if (wchmm->self_a[next_node] != LOG_ZERO) { 02413 propagate_token(d, next_node, tmpsum + wchmm->self_a[next_node], tre, last_word, ngram_score_cache); 02414 if (d->expanded) { 02415 /* if work area has been expanded at 'create_token()' above, 02416 the inside 'realloc()' will destroy the pointers. 02417 so, reset local pointers from token index */ 02418 tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]); 02419 d->expanded = FALSE; 02420 } 02421 } 02422 if (wchmm->next_a[next_node] != LOG_ZERO) { 02423 propagate_token(d, next_node+1, tmpsum + wchmm->next_a[next_node], tre, last_word, ngram_score_cache); 02424 if (d->expanded) { 02425 /* if work area has been expanded at 'create_token()' above, 02426 the inside 'realloc()' will destroy the pointers. 02427 so, reset local pointers from token index */ 02428 tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]); 02429 d->expanded = FALSE; 02430 } 02431 } 02432 for(ac=wchmm->ac[next_node];ac;ac=ac->next) { 02433 for(k=0;k<ac->n;k++) { 02434 propagate_token(d, ac->arc[k], tmpsum + ac->a[k], tre, last_word, ngram_score_cache); 02435 if (d->expanded) { 02436 /* if work area has been expanded at 'create_token()' above, 02437 the inside 'realloc()' will destroy the pointers. 02438 so, reset local pointers from token index */ 02439 tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]); 02440 d->expanded = FALSE; 02441 } 02442 } 02443 } 02444 } else { 02445 propagate_token(d, next_node, tmpsum, tre, last_word, ngram_score_cache); 02446 if (d->expanded) { 02447 /* if work area has been expanded at 'create_token()' above, 02448 the inside 'realloc()' will destroy the pointers. 02449 so, reset local pointers from token index */ 02450 tk = &(d->tlist[d->tl][d->tindex[d->tl][j]]); 02451 d->expanded = FALSE; 02452 } 02453 } 02454 02455 } /* end of next word heads */ 02456 02457 *tk_ret = tk; 02458 02459 02460 } /* end of cross-word processing */ 02461 02462 02463 #ifdef UNIGRAM_FACTORING 02464 02491 static void 02492 beam_inter_word_factoring(WCHMM_INFO *wchmm, FSBeam *d) 02493 { 02494 int sword; 02495 int node, next_node; 02496 int stid; 02497 LOGPROB tmpprob, tmpsum, ngram_score_cache; 02498 A_CELL2 *ac; 02499 int j; 02500 WORD_ID last_word; 02501 02502 node = d->wordend_best_node; 02503 sword = wchmm->stend[node]; 02504 last_word = wchmm->winfo->is_transparent[sword] ? d->wordend_best_last_cword : sword; 02505 02506 for (stid = wchmm->startnum - 1; stid >= 0; stid--) { 02507 next_node = wchmm->startnode[stid]; 02508 /* compute transition from 'node' at end of word 'sword' to 'next_node' */ 02509 /* skip isolated words already calculated in the above main loop */ 02510 if (wchmm->start2isolate[stid] != -1) continue; 02511 /* rest should have 1-gram factoring score at wchmm->fscore */ 02512 if (wchmm->state[next_node].scid >= 0) { 02513 j_internal_error("get_back_trellis_proceed: scid mismatch at 1-gram factoring of shared states\n"); 02514 } 02515 tmpprob = wchmm->fscore[- wchmm->state[next_node].scid]; 02516 ngram_score_cache = tmpprob * d->lm_weight + d->lm_penalty; 02517 tmpsum = d->wordend_best_score; 02518 tmpsum += ngram_score_cache; 02519 if (wchmm->winfo->is_transparent[sword] && wchmm->winfo->is_transparent[d->wordend_best_last_cword]) { 02520 tmpsum += d->lm_penalty_trans; 02521 } 02522 #ifdef SCORE_PRUNING 02523 if (tmpsum < d->score_pruning_threshold) { 02524 d->score_pruning_count++; 02525 continue; 02526 } 02527 #endif 02528 if (wchmm->hmminfo->multipath) { 02529 /* since top node has no ouput, we should go one more step further */ 02530 if (wchmm->self_a[next_node] != LOG_ZERO) { 02531 propagate_token(d, next_node, tmpsum + wchmm->self_a[next_node], d->wordend_best_tre, last_word, ngram_score_cache); 02532 if (d->expanded) { 02533 d->expanded = FALSE; 02534 } 02535 } 02536 if (wchmm->next_a[next_node] != LOG_ZERO) { 02537 propagate_token(d, next_node+1, tmpsum + wchmm->next_a[next_node], d->wordend_best_tre, last_word, ngram_score_cache); 02538 if (d->expanded) { 02539 d->expanded = FALSE; 02540 } 02541 } 02542 for(ac=wchmm->ac[next_node];ac;ac=ac->next) { 02543 for(j=0;j<ac->n;j++) { 02544 propagate_token(d, ac->arc[j], tmpsum + ac->a[j], d->wordend_best_tre, last_word, ngram_score_cache); 02545 if (d->expanded) { 02546 d->expanded = FALSE; 02547 } 02548 } 02549 } 02550 02551 } else { 02552 propagate_token(d, next_node, tmpsum, d->wordend_best_tre, last_word, ngram_score_cache); 02553 if (d->expanded) { 02554 d->expanded = FALSE; 02555 } 02556 } 02557 02558 } 02559 } 02560 02561 #endif /* UNIGRAM_FACTORING */ 02562 02563 02605 boolean 02606 get_back_trellis_proceed(int t, HTK_Param *param, RecogProcess *r, boolean final_for_multipath) 02607 { 02608 /* local static work area for get_back_trellis_proceed() */ 02609 /* these are local work area and need not to be kept for another call */ 02610 TRELLIS_ATOM *tre; 02611 int node; 02612 int lmtype, lmvar; 02613 02614 WCHMM_INFO *wchmm; 02615 FSBeam *d; 02616 int j; 02617 TOKEN2 *tk; 02618 LOGPROB minscore; 02619 02620 /* local copied variables */ 02621 int tn, tl; 02622 02623 /* store pointer to local for rapid access */ 02624 wchmm = r->wchmm; 02625 d = &(r->pass1); 02626 02627 02628 lmtype = r->lmtype; 02629 lmvar = r->lmvar; 02630 02631 /*********************/ 02632 /* 1. 初期化 */ 02633 /* initialization */ 02634 /*********************/ 02635 02636 /* tl と tn を入れ替えて作業領域を切り替え */ 02637 /* tl (= 直前の tn) は直前フレームの結果を持つ */ 02638 /* swap tl and tn to switch work buffer */ 02639 /* tl (= last tn) holds result of the previous frame */ 02640 d->tl = d->tn; 02641 if (d->tn == 0) d->tn = 1; else d->tn = 0; 02642 02643 /* store locally for rapid access */ 02644 tl = d->tl; 02645 tn = d->tn; 02646 02647 #ifdef UNIGRAM_FACTORING 02648 #ifndef WPAIR 02649 /* 1-gram factoring では単語先頭での言語確率が一定で直前単語に依存しない 02650 ため,単語間 Viterbi において選ばれる直前単語は,次単語によらず共通である. 02651 よって単語終端からfactoring値のある単語先頭への遷移は1つにまとめられる. 02652 ただし,木から独立した単語については, 単語先頭で履歴に依存した2-gramが 02653 与えられるため, 最尤の単語間 Viterbi パスは次単語ごとに異なる. 02654 よってそれらについてはまとめずに別に計算する */ 02655 /* In 1-gram factoring, the language score on the word-head node is constant 02656 and independent of the previous word. So, the same word hypothesis will 02657 be selected as the best previous word at the inter-word Viterbi 02658 processing. So, in inter-word processing, we can (1) select only 02659 the best word-end hypothesis, and then (2) process viterbi from the node 02660 to each word-head node. On the other hand, the isolated words, 02661 i.e. words not sharing any node with other word, has unique word-head 02662 node and the true 2-gram language score is determined at the top node. 02663 In such case the best word hypothesis prior to each node will differ 02664 according to the language scores. So we have to deal such words separately. */ 02665 /* initialize max value to delect best word-end hypothesis */ 02666 if (lmtype == LM_PROB) { 02667 d->wordend_best_score = LOG_ZERO; 02668 } 02669 #endif 02670 #endif 02671 02672 #ifdef DEBUG 02673 /* debug */ 02674 /* node_check_token(d, tl); */ 02675 #endif 02676 02677 /* トークンバッファを初期化: 直前フレームで使われた部分だけクリアすればよい */ 02678 /* initialize token buffer: for speedup, only ones used in the last call will be cleared */ 02679 clear_tokens(d, tl); 02680 02681 /**************************/ 02682 /* 2. Viterbi計算 */ 02683 /* Viterbi computation */ 02684 /**************************/ 02685 /* 直前フレームからこのフレームへの Viterbi 計算を行なう */ 02686 /* tindex[tl][n_start..n_end] に直前フレーム上位ノードのIDが格納されている */ 02687 /* do one viterbi computation from last frame to this frame */ 02688 /* tindex[tl][n_start..n_end] holds IDs of survived nodes in last frame */ 02689 02690 if (wchmm->hmminfo->multipath) { 02691 /*********************************/ 02692 /* MULTIPATH MODE */ 02693 /*********************************/ 02694 02695 for (j = d->n_start; j <= d->n_end; j++) { 02696 /* tk: 対象トークン node: そのトークンを持つ木構造化辞書ノードID */ 02697 /* tk: token data node: lexicon tree node ID that holds the 'tk' */ 02698 tk = &(d->tlist[tl][d->tindex[tl][j]]); 02699 if (tk->score <= LOG_ZERO) continue; /* invalid node */ 02700 #ifdef SCORE_PRUNING 02701 if (tk->score < d->score_pruning_threshold) { 02702 d->score_pruning_count++; 02703 continue; 02704 } 02705 #endif 02706 node = tk->node; 02707 /*********************************/ 02708 /* 2.1. 単語内遷移 */ 02709 /* word-internal transition */ 02710 /*********************************/ 02711 beam_intra_word(wchmm, d, &tk, j); 02712 } 02713 /*******************************************************/ 02714 /* 2.2. スコアでトークンをソートしビーム幅分の上位を決定 */ 02715 /* sort tokens by score up to beam width */ 02716 /*******************************************************/ 02717 sort_token_no_order(d, r->trellis_beam_width, &(d->n_start), &(d->n_end)); 02718 02719 /*************************/ 02720 /* 2.3. 単語間Viterbi計算 */ 02721 /* cross-word viterbi */ 02722 /*************************/ 02723 for(j = d->n_start; j <= d->n_end; j++) { 02724 tk = &(d->tlist[tn][d->tindex[tn][j]]); 02725 node = tk->node; 02726 #ifdef SCORE_PRUNING 02727 if (tk->score < d->score_pruning_threshold) { 02728 d->score_pruning_count++; 02729 continue; 02730 } 02731 #endif 02732 /* 遷移元ノードが単語終端ならば */ 02733 /* if source node is end state of a word, */ 02734 if (wchmm->stend[node] != WORD_INVALID) { 02735 02736 /**************************/ 02737 /* 2.4. トレリス単語保存 */ 02738 /* save trellis word */ 02739 /**************************/ 02740 #ifdef SPSEGMENT_NAIST 02741 if (r->config->successive.enabled && !d->after_trigger) { 02742 tre = tk->last_tre; /* dummy */ 02743 } else { 02744 tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath); 02745 } 02746 #else 02747 tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath); 02748 #endif 02749 /* 最終フレームであればここまで:遷移はさせない */ 02750 /* If this is a final frame, does not do cross-word transition */ 02751 if (final_for_multipath) continue; 02752 /* 単語認識モードでは単語間遷移は必要ない */ 02753 if (lmvar == LM_DFA_WORD) continue; 02754 02755 /******************************/ 02756 /* 2.5. 単語間遷移 */ 02757 /* cross-word transition */ 02758 /******************************/ 02759 02760 #ifdef UNIGRAM_FACTORING 02761 /* ここで処理されるのは isolated words のみ, 02762 shared nodes はまとめてこのループの外で計算する */ 02763 /* Only the isolated words will be processed here. 02764 The shared nodes with constant factoring values will be computed 02765 after this loop */ 02766 #endif 02767 beam_inter_word(wchmm, d, &tk, tre, j); 02768 02769 } /* end of cross-word processing */ 02770 02771 } /* end of main viterbi loop */ 02772 02773 02774 02775 } else { 02776 02777 /*********************************/ 02778 /* NORMAL MODE */ 02779 /*********************************/ 02780 02781 for (j = d->n_start; j <= d->n_end; j++) { 02782 /* tk: 対象トークン node: そのトークンを持つ木構造化辞書ノードID */ 02783 /* tk: token data node: lexicon tree node ID that holds the 'tk' */ 02784 tk = &(d->tlist[tl][d->tindex[tl][j]]); 02785 if (tk->score <= LOG_ZERO) continue; /* invalid node */ 02786 #ifdef SCORE_PRUNING 02787 if (tk->score < d->score_pruning_threshold) { 02788 d->score_pruning_count++; 02789 continue; 02790 } 02791 #endif 02792 node = tk->node; 02793 02794 /*********************************/ 02795 /* 2.1. 単語内遷移 */ 02796 /* word-internal transition */ 02797 /*********************************/ 02798 beam_intra_word(wchmm, d, &tk, j); 02799 02800 /* 遷移元ノードが単語終端ならば */ 02801 /* if source node is end state of a word, */ 02802 if (wchmm->stend[node] != WORD_INVALID) { 02803 02804 /**************************/ 02805 /* 2.2. トレリス単語保存 */ 02806 /* save trellis word */ 02807 /**************************/ 02808 #ifdef SPSEGMENT_NAIST 02809 if (r->config->successive.enabled && !d->after_trigger) { 02810 tre = tk->last_tre; /* dummy */ 02811 } else { 02812 tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath); 02813 } 02814 #else 02815 tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath); 02816 #endif 02817 /* 単語認識モードでは単語間遷移は必要ない */ 02818 if (lmvar == LM_DFA_WORD) continue; 02819 02820 /******************************/ 02821 /* 2.3. 単語間遷移 */ 02822 /* cross-word transition */ 02823 /******************************/ 02824 02825 #ifdef UNIGRAM_FACTORING 02826 /* ここで処理されるのは isolated words のみ, 02827 shared nodes はまとめてこのループの外で計算する */ 02828 /* Only the isolated words will be processed here. 02829 The shared nodes with constant factoring values will be computed 02830 after this loop */ 02831 #endif 02832 02833 beam_inter_word(wchmm, d, &tk, tre, j); 02834 02835 } /* end of cross-word processing */ 02836 02837 } /* end of main viterbi loop */ 02838 02839 02840 } 02841 02842 #ifdef UNIGRAM_FACTORING 02843 #ifndef WPAIR 02844 02845 if (lmtype == LM_PROB) { 02846 02847 /***********************************************************/ 02848 /* 2.x 単語終端からfactoring付き単語先頭への遷移 ***********/ 02849 /* transition from wordend to shared (factorized) nodes */ 02850 /***********************************************************/ 02851 /* d->wordend_best_* holds the best word ends at this frame. */ 02852 if (d->wordend_best_score > LOG_ZERO) { 02853 beam_inter_word_factoring(wchmm, d); 02854 } 02855 } 02856 #endif 02857 #endif /* UNIGRAM_FACTORING */ 02858 02859 /***************************************/ 02860 /* 3. 状態の出力確率計算 */ 02861 /* compute state output probability */ 02862 /***************************************/ 02863 02864 /* 次段の有効ノードについて出力確率を計算してスコアに加える */ 02865 /* compute outprob for new valid (token assigned) nodes and add to score */ 02866 /* 今扱っているのが入力の最終フレームの場合出力確率は計算しない */ 02867 /* don't calculate the last frame (transition only) */ 02868 02869 #ifdef SCORE_PRUNING 02870 d->score_pruning_max = LOG_ZERO; 02871 minscore = 0.0; 02872 #endif 02873 if (wchmm->hmminfo->multipath) { 02874 if (! final_for_multipath) { 02875 for (j = 0; j < d->tnum[tn]; j++) { 02876 tk = &(d->tlist[tn][d->tindex[tn][j]]); 02877 /* skip non-output state */ 02878 if (wchmm->state[tk->node].out.state == NULL) continue; 02879 tk->score += outprob_style(wchmm, tk->node, tk->last_tre->wid, t, param); 02880 #ifdef SCORE_PRUNING 02881 if (d->score_pruning_max < tk->score) d->score_pruning_max = tk->score; 02882 if (minscore > tk->score) minscore = tk->score; 02883 #endif 02884 } 02885 } 02886 } else { 02887 for (j = 0; j < d->tnum[tn]; j++) { 02888 tk = &(d->tlist[tn][d->tindex[tn][j]]); 02889 tk->score += outprob_style(wchmm, tk->node, tk->last_tre->wid, t, param); 02890 #ifdef SCORE_PRUNING 02891 if (d->score_pruning_max < tk->score) d->score_pruning_max = tk->score; 02892 if (minscore > tk->score) minscore = tk->score; 02893 #endif 02894 } 02895 } 02896 #ifdef SCORE_PRUNING 02897 if (r->config->pass1.score_pruning_width >= 0.0) { 02898 d->score_pruning_threshold = d->score_pruning_max - r->config->pass1.score_pruning_width; 02899 //printf("width=%f, tnum=%d\n", d->score_pruning_max - minscore, d->tnum[tn]); 02900 } else { 02901 // disable score pruning 02902 d->score_pruning_threshold = LOG_ZERO; 02903 } 02904 #endif 02905 /*******************************************************/ 02906 /* 4. スコアでトークンをソートしビーム幅分の上位を決定 */ 02907 /* sort tokens by score up to beam width */ 02908 /*******************************************************/ 02909 02910 /* tlist[tl]を次段のためにリセット */ 02911 clear_tlist(d, tl); 02912 02913 /* ヒープソートを用いてこの段のノード集合から上位(bwidth)個を得ておく */ 02914 /* (上位内の順列は必要ない) */ 02915 sort_token_no_order(d, r->trellis_beam_width, &(d->n_start), &(d->n_end)); 02916 /***************/ 02917 /* 5. 終了処理 */ 02918 /* finalize */ 02919 /***************/ 02920 02921 #ifdef SPSEGMENT_NAIST 02922 if (!r->config->successive.enabled || d->after_trigger) { 02923 #endif 02924 02925 /* call frame-wise callback */ 02926 r->have_interim = FALSE; 02927 if (t > 0) { 02928 if (r->config->output.progout_flag) { 02929 /* 漸次出力: 現フレームのベストパスを一定時間おきに上書き出力 */ 02930 /* progressive result output: output current best path in certain time interval */ 02931 if (((t-1) % r->config->output.progout_interval_frame) == 0) { 02932 r->have_interim = TRUE; 02933 bt_current_max(r, t-1); 02934 } 02935 } 02936 } 02937 02938 /* jlog("DEBUG: %d: %d\n",t,tnum[tn]); */ 02939 /* for debug: output current max word */ 02940 if (debug2_flag) { 02941 bt_current_max_word(r, t-1); 02942 } 02943 02944 #ifdef DETERMINE 02945 if (lmvar == LM_DFA_WORD) { 02946 check_determine_word(r, t-1); 02947 } 02948 #endif 02949 02950 #ifdef SPSEGMENT_NAIST 02951 } 02952 #endif 02953 02954 /* ビーム内ノード数が 0 になってしまったら,強制終了 */ 02955 if (d->tnum[tn] == 0) { 02956 jlog("ERROR: get_back_trellis_proceed: %02d %s: frame %d: no nodes left in beam, now terminates search\n", r->config->id, r->config->name, t); 02957 return(FALSE); 02958 } 02959 02960 return(TRUE); 02961 02962 } 02963 02964 /*************************************************/ 02965 /* frame synchronous beam search --- last frame */ 02966 /* フレーム同期ビーム探索の実行 --- 最終フレーム */ 02967 /*************************************************/ 02968 02994 void 02995 get_back_trellis_end(HTK_Param *param, RecogProcess *r) 02996 { 02997 WCHMM_INFO *wchmm; 02998 FSBeam *d; 02999 int j; 03000 TOKEN2 *tk; 03001 03002 wchmm = r->wchmm; 03003 d = &(r->pass1); 03004 03005 /* 最後にビーム内に残った単語終端トークンを処理する */ 03006 /* process the last wordend tokens */ 03007 03008 03009 if (r->am->hmminfo->multipath) { 03010 /* MULTI-PATH VERSION */ 03011 03012 /* 単語末ノードへの遷移のみ計算 */ 03013 /* only arcs to word-end node is calculated */ 03014 get_back_trellis_proceed(param->samplenum, param, r, TRUE); 03015 03016 } else { 03017 /* NORMAL VERSION */ 03018 03019 /* 最後の遷移のあとの単語終端処理を行う */ 03020 /* process the word-ends at the last frame */ 03021 d->tl = d->tn; 03022 if (d->tn == 0) d->tn = 1; else d->tn = 0; 03023 for (j = d->n_start; j <= d->n_end; j++) { 03024 tk = &(d->tlist[d->tl][d->tindex[d->tl][j]]); 03025 if (wchmm->stend[tk->node] != WORD_INVALID) { 03026 save_trellis(r->backtrellis, wchmm, tk, param->samplenum, TRUE); 03027 } 03028 } 03029 03030 } 03031 #ifdef SCORE_PRUNING 03032 if (debug2_flag) jlog("STAT: %d tokens pruned by score beam\n", d->score_pruning_count); 03033 #endif 03034 03035 } 03036 03037 /*************************/ 03038 /* 探索終了 --- 終了処理 */ 03039 /* end of search */ 03040 /*************************/ 03075 void 03076 finalize_1st_pass(RecogProcess *r, int len) 03077 { 03078 BACKTRELLIS *backtrellis; 03079 03080 backtrellis = r->backtrellis; 03081 03082 backtrellis->framelen = len; 03083 03084 /* 単語トレリス(backtrellis) を整理: トレリス単語の再配置とソート */ 03085 /* re-arrange backtrellis: index them by frame, and sort by word ID */ 03086 03087 bt_relocate_rw(backtrellis); 03088 bt_sort_rw(backtrellis); 03089 if (backtrellis->num == NULL) { 03090 if (backtrellis->framelen > 0) { 03091 jlog("WARNING: %02d %s: input processed, but no survived word found\n", r->config->id, r->config->name); 03092 } 03093 /* reognition failed */ 03094 r->result.status = J_RESULT_STATUS_FAIL; 03095 return; 03096 } 03097 03098 /* 第1パスのベストパスを結果に格納する */ 03099 /* store 1st pass result (best hypothesis) to result */ 03100 if (r->lmvar == LM_DFA_WORD) { 03101 find_1pass_result_word(len, r); 03102 } else { 03103 find_1pass_result(len, r); 03104 } 03105 } 03106 03121 void 03122 fsbeam_free(FSBeam *d) 03123 { 03124 free_nodes(d); 03125 if (d->pausemodelnames != NULL) { 03126 free(d->pausemodelnames); 03127 free(d->pausemodel); 03128 } 03129 } 03130 03131 /* end of file */