Julius 4.1.5
|
00001 00041 /* 00042 * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University 00043 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00044 * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology 00045 * All rights reserved 00046 */ 00047 00048 #include <julius/julius.h> 00049 00050 /* declaration of local functions */ 00051 static NODE *get_best_from_stack(NODE **start, int *stacknum); 00052 static int put_to_stack(NODE *new, NODE **start, NODE **bottom, int *stacknum, int stacksize); 00053 static void put_all_in_stack(NODE **start, int *stacknum, WORD_INFO *winfo); 00054 static void free_all_nodes(NODE *node); 00055 static void put_hypo_woutput(NODE *hypo, WORD_INFO *winfo); 00056 static void put_hypo_wname(NODE *hypo, WORD_INFO *winfo); 00057 00058 /**********************************************************************/ 00059 /********** 次単語格納領域の割り当て *************************/ 00060 /********** allocate memory for nextword data *************************/ 00061 /**********************************************************************/ 00062 00085 static NEXTWORD ** 00086 nw_malloc(int *maxlen, NEXTWORD **root, int max) 00087 { 00088 NEXTWORD *nwtmp; 00089 NEXTWORD **nw; 00090 int i; 00091 00092 nw = (NEXTWORD **)mymalloc(max * sizeof(NEXTWORD *)); 00093 nwtmp = (NEXTWORD *)mymalloc(max * sizeof(NEXTWORD)); 00094 for (i=0;i<max; i++) { 00095 nw[i] = &(nwtmp[i]); 00096 } 00097 *maxlen = max; 00098 *root = nwtmp; 00099 return nw; 00100 } 00101 00117 static void 00118 nw_free(NEXTWORD **nw, NEXTWORD *root) 00119 { 00120 free(root); 00121 free(nw); 00122 } 00123 00162 static NEXTWORD ** 00163 nw_expand(NEXTWORD **nwold, int *maxlen, NEXTWORD **root, int num) 00164 { 00165 NEXTWORD *nwtmp; 00166 NEXTWORD **nw; 00167 int i; 00168 int nwmaxlen; 00169 00170 nwmaxlen = *maxlen + num; 00171 00172 nwtmp = (NEXTWORD *)myrealloc(*root, nwmaxlen * sizeof(NEXTWORD)); 00173 nw = (NEXTWORD **)myrealloc(nwold, nwmaxlen * sizeof(NEXTWORD *)); 00174 nw[0] = nwtmp; 00175 for (i=1;i<nwmaxlen; i++) { 00176 nw[i] = &(nwtmp[i]); 00177 } 00178 *maxlen = nwmaxlen; 00179 *root = nwtmp; 00180 return nw; 00181 } 00182 00183 00184 /**********************************************************************/ 00185 /********** 仮説スタックの操作 ********************************/ 00186 /********** Hypothesis stack operation ********************************/ 00187 /**********************************************************************/ 00188 00208 static NODE * 00209 get_best_from_stack(NODE **start, int *stacknum) 00210 { 00211 NODE *tmp; 00212 00213 /* return top */ 00214 tmp=(*start); 00215 if ((*start)!=NULL) { /* delete it from stack */ 00216 (*start)=(*start)->next; 00217 if ((*start)!=NULL) (*start)->prev=NULL; 00218 (*stacknum)--; 00219 return(tmp); 00220 } 00221 else { 00222 return(NULL); 00223 } 00224 } 00225 00252 static int 00253 can_put_to_stack(NODE *new, NODE **bottom, int *stacknum, int stacksize) 00254 { 00255 /* stack size check */ 00256 if ((*stacknum + 1) > stacksize && (*bottom)->score >= new->score) { 00257 /* new node is below the bottom: discard it */ 00258 return(-1); 00259 } 00260 return(0); 00261 } 00262 00291 static int 00292 put_to_stack(NODE *new, NODE **start, NODE **bottom, int *stacknum, int stacksize) 00293 { 00294 NODE *tmp; 00295 00296 /* stack size is going to increase... */ 00297 (*stacknum)++; 00298 00299 /* stack size check */ 00300 if ((*stacknum)>stacksize) { 00301 /* stack size overflow */ 00302 (*stacknum)--; 00303 if ((*bottom)->score < new->score) { 00304 /* new node will be inserted in the stack: free the bottom */ 00305 tmp=(*bottom); 00306 (*bottom)->prev->next=NULL; 00307 (*bottom)=(*bottom)->prev; 00308 free_node(tmp); 00309 } else { 00310 /* new node is below the bottom: discard it */ 00311 free_node(new); 00312 return(-1); 00313 } 00314 } 00315 00316 /* insert new node on edge */ 00317 if ((*start)==NULL) { /* no node in stack */ 00318 /* new node is the only node */ 00319 (*start)=new; 00320 (*bottom)=new; 00321 new->next=NULL; 00322 new->prev=NULL; 00323 return(0); 00324 } 00325 if ((*start)->score <= new->score) { 00326 /* insert on the top */ 00327 new->next = (*start); 00328 new->next->prev = new; 00329 (*start)=new; 00330 new->prev=NULL; 00331 return(0); 00332 } 00333 if ((*bottom)->score >= new->score) { 00334 /* insert on the bottom */ 00335 new->prev = (*bottom); 00336 new->prev->next = new; 00337 (*bottom)=new; 00338 new->next=NULL; 00339 return(0); 00340 } 00341 00342 /* now the new node is between (*start) and (*bottom) */ 00343 if (((*start)->score + (*bottom)->score) / 2 > new->score) { 00344 /* search from bottom */ 00345 tmp=(*bottom); 00346 while(tmp->score < new->score) tmp=tmp->prev; 00347 new->prev=tmp; 00348 new->next=tmp->next; 00349 tmp->next->prev=new; 00350 tmp->next=new; 00351 } else { 00352 /* search from start */ 00353 tmp=(*start); 00354 while(tmp->score > new->score) tmp=tmp->next; 00355 new->next=tmp; 00356 new->prev=tmp->prev; 00357 tmp->prev->next=new; 00358 tmp->prev=new; 00359 } 00360 return(0); 00361 } 00362 00379 static void 00380 put_all_in_stack(NODE **start, int *stacknum, WORD_INFO *winfo) 00381 { 00382 NODE *ntmp; 00383 00384 jlog("DEBUG: hypotheses remained in global stack\n"); 00385 while ((ntmp = get_best_from_stack(start, stacknum)) != NULL) { 00386 jlog("DEBUG: %3d: s=%f",*stacknum, ntmp->score); 00387 put_hypo_woutput(ntmp, winfo); 00388 free_node(ntmp); 00389 } 00390 } 00391 00404 static void 00405 free_all_nodes(NODE *start) 00406 { 00407 NODE *tmp; 00408 NODE *next; 00409 00410 tmp=start; 00411 while(tmp) { 00412 next=tmp->next; 00413 free_node(tmp); 00414 tmp=next; 00415 } 00416 } 00417 00418 00419 #ifdef CONFIDENCE_MEASURE 00420 00421 /**********************************************************************/ 00422 /********** 単語信頼度の計算 ******************************************/ 00423 /********** Confidence scoring ****************************************/ 00424 /**********************************************************************/ 00425 00426 #ifdef CM_SEARCH 00427 /**************************************************************/ 00428 /**** CM computation method 1(default): ******/ 00429 /**** - Use local posterior probabilities while search ******/ 00430 /**** - Obtain CM at hypothesis expansion time ******/ 00431 /**************************************************************/ 00432 00451 static void 00452 cm_init(StackDecode *sd, int wnum, LOGPROB cm_alpha 00453 #ifdef CM_MULTIPLE_ALPHA 00454 , cm_alpha_num 00455 #endif 00456 ) 00457 { 00458 sd->l_stacksize = wnum; 00459 sd->l_start = sd->l_bottom = NULL; 00460 sd->l_stacknum = 0; 00461 sd->cm_alpha = cm_alpha; 00462 #ifdef CM_MULTIPLE_ALPHA 00463 if (sd->cmsumlist) { 00464 if (sd->cmsumlistlen < cm_alpha_num) { 00465 free(sd->cmsumlist); 00466 sd->cmsumlist = NULL; 00467 } 00468 } 00469 if (sd->cmsumlist == NULL) { 00470 sd->cmsumlist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * cm_alpha_num); 00471 sd->cmsumlistlen = cm_alpha_num; 00472 } 00473 #endif 00474 } 00475 00490 static void 00491 cm_store(StackDecode *sd, NODE *new) 00492 { 00493 /* store the generated hypo into local stack */ 00494 put_to_stack(new, &(sd->l_start), &(sd->l_bottom), &(sd->l_stacknum), sd->l_stacksize); 00495 } 00496 00512 static void 00513 cm_sum_score(StackDecode *sd 00514 #ifdef CM_MULTIPLE_ALPHA 00515 , bgn, end, step 00516 #endif 00517 ) 00518 { 00519 NODE *node; 00520 LOGPROB sum; 00521 #ifdef CM_MULTIPLE_ALPHA 00522 LOGPROB a; 00523 int j; 00524 #endif 00525 00526 if (sd->l_start == NULL) return; /* no hypo */ 00527 sd->cm_tmpbestscore = sd->l_start->score; /* best hypo is at the top of the stack */ 00528 00529 #ifdef CM_MULTIPLE_ALPHA 00530 for (j = 0, a = bgn; a <= end; a += step) { 00531 sum = 0.0; 00532 for(node = sd->l_start; node; node = node->next) { 00533 sum += pow(10, a * (node->score - sd->cm_tmpbestscore)); 00534 } 00535 sd->cmsumlist[j++] = sum; /* store sums for each alpha coef. */ 00536 } 00537 #else 00538 sum = 0.0; 00539 for(node = sd->l_start; node; node = node->next) { 00540 sum += pow(10, sd->cm_alpha * (node->score - sd->cm_tmpbestscore)); 00541 } 00542 sd->cm_tmpsum = sum; /* store sum */ 00543 #endif 00544 00545 } 00546 00563 static void 00564 cm_set_score(StackDecode *sd, NODE *node 00565 #ifdef CM_MULTIPLE_ALPHA 00566 , bgn, end, step 00567 #endif 00568 ) 00569 { 00570 #ifdef CM_MULTIPLE_ALPHA 00571 int j; 00572 LOGPROB a; 00573 #endif 00574 00575 #ifdef CM_MULTIPLE_ALPHA 00576 for (j = 0, a = bgn; a <= end; a += step) { 00577 node->cmscore[node->seqnum-1][j] = pow(10, a * (node->score - sd->cm_tmpbestscore)) / sd->cmsumlist[j]; 00578 j++; 00579 } 00580 #else 00581 node->cmscore[node->seqnum-1] = pow(10, sd->cm_alpha * (node->score - sd->cm_tmpbestscore)) / sd->cm_tmpsum; 00582 #endif 00583 } 00584 00601 static NODE * 00602 cm_get_node(StackDecode *sd) 00603 { 00604 return(get_best_from_stack(&(sd->l_start), &(sd->l_stacknum))); 00605 } 00606 00607 #endif /* CM_SEARCH */ 00608 00609 #ifdef CM_NBEST 00610 /*****************************************************************/ 00611 /**** CM computation method 2: conventional N-best scoring *******/ 00612 /**** NOTE: enough N-best should be computed (-n 10 ~ -n 100) ****/ 00613 /*****************************************************************/ 00614 00634 static void 00635 cm_compute_from_nbest(StackDecode *sd, NODE *start, int stacknum, JCONF_SEARCH *jconf) 00636 { 00637 NODE *node; 00638 LOGPROB bestscore, sum, s; 00639 WORD_ID w; 00640 int i; 00641 LOGPROB cm_alpha; 00642 int j; 00643 00644 /* prepare buffer */ 00645 #ifdef CM_MULTIPLE_ALPHA 00646 if (sd->cmsumlist) { 00647 if (sd->cmsumlistlen < jconf->annotate.cm_alpha_num) { 00648 free(sd->cmsumlist); 00649 sd->cmsumlist = NULL; 00650 } 00651 } 00652 if (sd->cmsumlist == NULL) { 00653 sd->cmsumlist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * jconf->annotate.cm_alpha_num); 00654 sd->cmsumlistlen = cm_alpha_num; 00655 } 00656 #endif 00657 if (sd->sentcm == NULL) { /* not allocated yet */ 00658 sd->sentcm = (LOGPROB *)mymalloc(sizeof(LOGPROB)*stacknum); 00659 sd->sentnum = stacknum; 00660 } else if (sd->sentnum < stacknum) { /* need expanded */ 00661 sd->sentcm = (LOGPROB *)myrealloc(sentcm, sizeof(LOGPROB)*stacknum); 00662 sd->sentnum = stacknum; 00663 } 00664 if (sd->wordcm == NULL) { 00665 sd->wordcm = (LOGPROB *)mymalloc(sizeof(LOGPROB) * winfo->num); 00666 } 00667 00668 cm_alpha = jconf->annotate.cm_alpha; 00669 #ifdef CM_MULTIPLE_ALPHA 00670 for (j = 0, cm_alpha = jconf->annotate.cm_alpha_bgn; cm_alpha <= jconf->annotate.cm_alpha_end; cm_alpha += jconf->annotate.cm_alpha_step) { 00671 #endif 00672 /* clear whole word cm buffer */ 00673 for(w=0;w<winfo->num;w++) { 00674 sd->wordcm[w] = 0.0; 00675 } 00676 /* get best score */ 00677 bestscore = start->score; 00678 /* compute sum score of all hypothesis */ 00679 sum = 0.0; 00680 for (node = start; node != NULL; node = node->next) { 00681 sum += pow(10, cm_alpha * (node->score - bestscore)); 00682 } 00683 /* compute sentence posteriori probabilities */ 00684 i = 0; 00685 for (node = start; node != NULL; node = node->next) { 00686 sd->sentcm[i] = pow(10, cm_alpha * (node->score - bestscore)) / sum; 00687 i++; 00688 } 00689 /* compute word posteriori probabilities */ 00690 i = 0; 00691 for (node = start; node != NULL; node = node->next) { 00692 for (w=0;w<node->seqnum;w++) { 00693 sd->wordcm[node->seq[w]] += sd->sentcm[i]; 00694 } 00695 i++; 00696 } 00697 /* store the probabilities to node */ 00698 for (node = start; node != NULL; node = node->next) { 00699 for (w=0;w<node->seqnum;w++) { 00700 #ifdef CM_MULTIPLE_ALPHA 00701 node->cmscore[w][j] = sd->wordcm[node->seq[w]]; 00702 #else 00703 node->cmscore[w] = sd->wordcm[node->seq[w]]; 00704 #endif 00705 } 00706 } 00707 #ifdef CM_MULTIPLE_ALPHA 00708 j++; 00709 } 00710 #endif 00711 } 00712 00713 #endif /* CM_NBEST */ 00714 00715 #endif /* CONFIDENCE_MEASURE */ 00716 00717 00718 /**********************************************************************/ 00719 /********** Enveloped best-first search *******************************/ 00720 /**********************************************************************/ 00721 00722 /* 00723 * 1. Word envelope 00724 * 00725 * 一種の仮説ビーム幅を設定: 展開元となった仮説の数をその仮説長(単語数) 00726 * ごとにカウントする. 一定数を越えたらそれより短い仮説は以後展開しない. 00727 * 00728 * Introduce a kind of beam width to search tree: count the number of 00729 * popped hypotheses per the depth of the hypotheses, and when a count 00730 * in a certain depth reaches the threshold, all hypotheses shorter than 00731 * the depth will be dropped from candidates. 00732 * 00733 */ 00734 00748 static void 00749 wb_init(StackDecode *s) 00750 { 00751 int i; 00752 for(i=0;i<=MAXSEQNUM;i++) s->hypo_len_count[i] = 0; 00753 s->maximum_filled_length = -1; 00754 } 00755 00782 static boolean 00783 wb_ok(StackDecode *s, NODE *now, int width) 00784 { 00785 if (now->seqnum <= s->maximum_filled_length) { 00786 /* word expansion is not allowed because a word expansion count 00787 at deeper level has already reached the limit */ 00788 return FALSE; 00789 } else { 00790 /* word expansion is possible. Increment the word expansion count 00791 of the given depth */ 00792 s->hypo_len_count[now->seqnum]++; 00793 if (s->hypo_len_count[now->seqnum] > width) { 00794 /* the word expansion count of this level has reached the limit, so 00795 set the beam-filled depth to this level to inhibit further 00796 expansion of shorter hypotheses. */ 00797 if (s->maximum_filled_length < now->seqnum) s->maximum_filled_length = now->seqnum; 00798 } 00799 return TRUE; 00800 } 00801 } 00802 00803 #ifdef SCAN_BEAM 00804 /* 00805 * 2. Score envelope 00806 * 00807 * Viterbi計算量の削減: 入力フレームごとの最大尤度 (score envelope) を 00808 * 全仮説にわたって記録しておく. 仮説の前向き尤度計算時に,その envelope 00809 * から一定幅以上スコアが下回るとき,Viterbi パスの演算を中断する. 00810 * 00811 * ここでは,取り出した仮説からフレームごとの score envelope を更新する 00812 * 部分が記述されている. Envelope を考慮した Viterbi 計算の実際は 00813 * scan_word() を参照のこと. 00814 * 00815 * Reduce computation cost of hypothesis Viterbi processing by setting a 00816 * "score envelope" that holds the maximum scores at every frames 00817 * throughout the expanded hypotheses. When calculating Viterbi path 00818 * on HMM trellis for updating score of popped hypothesis, Viterbi paths 00819 * that goes below a certain range from the score envelope are dropped. 00820 * 00821 * These functions are for updating the score envelope according to the 00822 * popped hypothesis. For actual Viterbi process with score envelope, 00823 * see scan_word(). 00824 * 00825 */ 00826 00842 static void 00843 envl_init(StackDecode *s, int framenum) 00844 { 00845 int i; 00846 for(i=0;i<framenum;i++) s->framemaxscore[i] = LOG_ZERO; 00847 } 00848 00865 static void 00866 envl_update(StackDecode *s, NODE *n, int framenum) 00867 { 00868 int t; 00869 for(t=framenum-1;t>=0;t--) { 00870 if (s->framemaxscore[t] < n->g[t]) s->framemaxscore[t] = n->g[t]; 00871 } 00872 } 00873 #endif /* SCAN_BEAM */ 00874 00875 00876 /**********************************************************************/ 00877 /********** Short pause segmentation **********************************/ 00878 /**********************************************************************/ 00879 00903 void 00904 segment_set_last_nword(NODE *hypo, RecogProcess *r) 00905 { 00906 int i; 00907 WORD_ID w; 00908 00909 if (r->sp_break_last_nword_allow_override) { 00910 for(i=0;i<hypo->seqnum;i++) { 00911 w = hypo->seq[i]; 00912 if (w != r->sp_break_last_word 00913 && !is_sil(w, r) 00914 && !r->lm->winfo->is_transparent[w] 00915 ) { 00916 r->sp_break_last_nword = w; 00917 break; 00918 } 00919 } 00920 #ifdef SP_BREAK_DEBUG 00921 printf("sp_break_last_nword=%d[%s]\n", r->sp_break_last_nword, r->lm->winfo->woutput[r->sp_break_last_nword]); 00922 #endif 00923 } else { 00924 r->sp_break_last_nword = WORD_INVALID; 00925 } 00926 } 00927 00928 00929 /**********************************************************************/ 00930 /********* Debug output of hypothesis while search ********************/ 00931 /**********************************************************************/ 00932 00947 static void 00948 put_hypo_woutput(NODE *hypo, WORD_INFO *winfo) 00949 { 00950 int i,w; 00951 00952 if (hypo != NULL) { 00953 for (i=hypo->seqnum-1;i>=0;i--) { 00954 w = hypo->seq[i]; 00955 jlog(" %s", winfo->woutput[w]); 00956 } 00957 } 00958 jlog("\n"); 00959 } 00960 00975 static void 00976 put_hypo_wname(NODE *hypo, WORD_INFO *winfo) 00977 { 00978 int i,w; 00979 00980 if (hypo != NULL) { 00981 for (i=hypo->seqnum-1;i>=0;i--) { 00982 w = hypo->seq[i]; 00983 jlog(" %s", winfo->wname[w]); 00984 } 00985 } 00986 jlog("\n"); 00987 } 00988 01001 static void 01002 store_result_pass2(NODE *hypo, RecogProcess *r) 01003 { 01004 int i; 01005 Sentence *s; 01006 01007 s = &(r->result.sent[r->result.sentnum]); 01008 01009 s->word_num = hypo->seqnum; 01010 for (i = 0; i < hypo->seqnum; i++) { 01011 s->word[i] = hypo->seq[hypo->seqnum - 1 - i]; 01012 } 01013 #ifdef CONFIDENCE_MEASURE 01014 for (i = 0; i < hypo->seqnum; i++) { 01015 s->confidence[i] = hypo->cmscore[hypo->seqnum - 1 - i]; 01016 } 01017 #endif 01018 01019 s->score = hypo->score; 01020 s->score_lm = hypo->totallscore; 01021 s->score_am = hypo->score - hypo->totallscore; 01022 if (r->lmtype == LM_DFA) { 01023 /* output which grammar the hypothesis belongs to on multiple grammar */ 01024 /* determine only by the last word */ 01025 if (multigram_get_all_num(r->lm) > 0) { 01026 s->gram_id = multigram_get_gram_from_category(r->lm->winfo->wton[hypo->seq[0]], r->lm); 01027 } else { 01028 s->gram_id = 0; 01029 } 01030 } 01031 01032 r->result.sentnum++; 01033 /* add to tail */ 01034 } 01035 01036 01037 /**********************************************************************/ 01038 /******** Output top 'ncan' hypotheses in a stack and free all ********/ 01039 /**********************************************************************/ 01040 01083 static void 01084 result_reorder_and_output(NODE **r_start, NODE **r_bottom, int *r_stacknum, int ncan, RecogProcess *r, HTK_Param *param) 01085 { 01086 NODE *now; 01087 int num; 01088 01089 #ifdef CM_NBEST 01090 /* compute CM from the N-best sentence candidates */ 01091 cm_compute_from_nbest(&(r->pass2), *r_start, *r_stacknum, r->config); 01092 #endif 01093 num = 0; 01094 while ((now = get_best_from_stack(r_start,r_stacknum)) != NULL && num < ncan) { 01095 num++; 01096 /* output result */ 01097 store_result_pass2(now, r); 01098 01099 /* if in sp segmentation mode, */ 01100 /* set the last context-aware word for next recognition */ 01101 if (r->lmtype == LM_PROB && r->config->successive.enabled && num == 1) segment_set_last_nword(now, r); 01102 01103 free_node(now); 01104 } 01105 /* free the rest */ 01106 if (now != NULL) free_node(now); 01107 free_all_nodes(*r_start); 01108 } 01109 01145 void 01146 pass2_finalize_on_no_result(RecogProcess *r, boolean use_1pass_as_final) 01147 { 01148 NODE *now; 01149 int i, j; 01150 01151 /* 探索失敗 */ 01152 /* search failed */ 01153 01154 /* make temporal hypothesis data from the result of previous 1st pass */ 01155 now = newnode(r); 01156 for (i=0;i<r->pass1_wnum;i++) { 01157 now->seq[i] = r->pass1_wseq[r->pass1_wnum-1-i]; 01158 } 01159 now->seqnum = r->pass1_wnum; 01160 now->score = r->pass1_score; 01161 #ifdef CONFIDENCE_MEASURE 01162 /* fill in null values */ 01163 #ifdef CM_MULTIPLE_ALPHA 01164 for(j=0;j<jconf->annotate.cm_alpha_num;j++) { 01165 for(i=0;i<now->seqnum;i++) now->cmscore[i][j] = 0.0; 01166 } 01167 #else 01168 for(i=0;i<now->seqnum;i++) now->cmscore[i] = 0.0; 01169 #endif 01170 #endif /* CONFIDENCE_MEASURE */ 01171 01172 if (r->lmtype == LM_PROB && r->config->successive.enabled) { 01173 /* if in sp segment mode, */ 01174 /* find segment restart words from 1st pass result */ 01175 segment_set_last_nword(now, r); 01176 } 01177 01178 if (use_1pass_as_final) { 01179 /* 第1パスの結果をそのまま出力する */ 01180 /* output the result of the previous 1st pass as a final result. */ 01181 store_result_pass2(now, r); 01182 r->result.status = J_RESULT_STATUS_SUCCESS; 01183 } else { 01184 /* store output as failure */ 01185 r->result.status = J_RESULT_STATUS_FAIL; 01186 //callback_exec(CALLBACK_RESULT, r); 01187 } 01188 01189 free_node(now); 01190 } 01191 01192 01193 /**********************************************************************/ 01194 /********* Main stack decoding function *******************************/ 01195 /**********************************************************************/ 01196 01224 void 01225 wchmm_fbs(HTK_Param *param, RecogProcess *r, int cate_bgn, int cate_num) 01226 { 01227 /* 文仮説スタック */ 01228 /* hypothesis stack (double-linked list) */ 01229 int stacknum; /* current stack size */ 01230 NODE *start = NULL; /* top node */ 01231 NODE *bottom = NULL; /* bottom node */ 01232 01233 /* 認識結果格納スタック(結果はここへいったん集められる) */ 01234 /* result sentence stack (found results will be stored here and then re-ordered) */ 01235 int r_stacksize; 01236 int r_stacknum; 01237 NODE *r_start = NULL; 01238 NODE *r_bottom = NULL; 01239 01240 /* ワークエリア */ 01241 /* work area */ 01242 NEXTWORD fornoise; /* Dummy NEXTWORD data for short-pause insertion handling */ 01243 01244 NEXTWORD **nextword, *nwroot; /* buffer to store predicted words */ 01245 int maxnwnum; /* current allocated number of words in nextword */ 01246 int nwnum; /* current number of words in nextword */ 01247 NODE *now, *new; /* popped current hypo., expanded new hypo. */ 01248 NODE *now_noise; /* for inserting/deleting noise word */ 01249 boolean now_noise_calced; 01250 boolean acc; 01251 int t; 01252 int w,i,j; 01253 LOGPROB last_score = LOG_ZERO; 01254 /* for graph generation */ 01255 LOGPROB prev_score; 01256 WordGraph *wordgraph_root = NULL; 01257 boolean merged_p; 01258 #ifdef GRAPHOUT_DYNAMIC 01259 int dynamic_merged_num = 0; 01260 WordGraph *wtmp; 01261 LOGPROB lscore_prev; 01262 #endif 01263 #ifdef GRAPHOUT_SEARCH 01264 int terminate_search_num = 0; 01265 #endif 01266 01267 /* local temporal parameter */ 01268 int stacksize, ncan, maxhypo, peseqlen; 01269 JCONF_SEARCH *jconf; 01270 WORD_INFO *winfo; 01271 NGRAM_INFO *ngram; 01272 DFA_INFO *gdfa; 01273 BACKTRELLIS *backtrellis; 01274 StackDecode *dwrk; 01275 01276 if (r->lmtype == LM_DFA) { 01277 if (debug2_flag) jlog("DEBUG: only words in these categories will be expanded: %d-%d\n", cate_bgn, cate_bgn + cate_num-1); 01278 } 01279 01280 /* 01281 * 初期化 01282 * Initialize 01283 */ 01284 01285 /* just for quick access */ 01286 jconf = r->config; 01287 winfo = r->lm->winfo; 01288 if (r->lmtype == LM_PROB) { 01289 ngram = r->lm->ngram; 01290 } else if (r->lmtype == LM_DFA) { 01291 gdfa = r->lm->dfa; 01292 } 01293 backtrellis = r->backtrellis; 01294 dwrk = &(r->pass2); 01295 01296 stacksize = jconf->pass2.stack_size; 01297 ncan = jconf->pass2.nbest; 01298 maxhypo = jconf->pass2.hypo_overflow; 01299 peseqlen = backtrellis->framelen; 01300 01301 /* store data for sub routines */ 01302 r->peseqlen = backtrellis->framelen; 01303 //recog->ccd_flag = recog->jconf->am.ccd_flag; 01304 /* 予測単語格納領域を確保 */ 01305 /* malloc area for word prediction */ 01306 /* the initial maximum number of nextwords is the size of vocabulary */ 01307 nextword = nw_malloc(&maxnwnum, &nwroot, winfo->num); 01308 /* 前向きスコア計算用の領域を確保 */ 01309 /* malloc are for forward viterbi (scan_word()) */ 01310 malloc_wordtrellis(r); /* scan_word用領域 */ 01311 /* 仮説スタック初期化 */ 01312 /* initialize hypothesis stack */ 01313 start = bottom = NULL; 01314 stacknum = 0; 01315 /* 結果格納スタック初期化 */ 01316 /* initialize result stack */ 01317 r_stacksize = ncan; 01318 r_start = r_bottom = NULL; 01319 r_stacknum = 0; 01320 01321 /* カウンタ初期化 */ 01322 /* initialize counter */ 01323 dwrk->popctr = 0; 01324 dwrk->genectr = 0; 01325 dwrk->pushctr = 0; 01326 dwrk->finishnum = 0; 01327 01328 #ifdef CM_SEARCH 01329 /* initialize local stack */ 01330 cm_init(dwrk, winfo->num, jconf->annotate.cm_alpha 01331 #ifdef CM_MULTIPLE_ALPHA 01332 , jconf->annotate.cm_alpha_num 01333 #endif 01334 ); 01335 #endif 01336 #ifdef SCAN_BEAM 01337 /* prepare and initialize score envelope */ 01338 dwrk->framemaxscore = (LOGPROB *)mymalloc(sizeof(LOGPROB)*peseqlen); 01339 envl_init(dwrk, peseqlen); 01340 #endif /* SCAN_BEAM */ 01341 01342 /* エンベロープ探索用の単語長別展開数カウンタを初期化 */ 01343 /* initialize counters for envelope search */ 01344 if (jconf->pass2.enveloped_bestfirst_width >= 0) wb_init(dwrk); 01345 01346 if (jconf->graph.enabled) { 01347 wordgraph_init(r->wchmm); 01348 } 01349 01350 /* 01351 * 初期仮説(1単語からなる)を得, 文仮説スタックにいれる 01352 * get a set of initial words from LM function and push them as initial 01353 * hypotheses 01354 */ 01355 /* the first words will be stored in nextword[] */ 01356 if (r->lmtype == LM_PROB) { 01357 nwnum = ngram_firstwords(nextword, peseqlen, maxnwnum, r); 01358 } else if (r->lmtype == LM_DFA) { 01359 nwnum = dfa_firstwords(nextword, peseqlen, maxnwnum, r); 01360 /* 溢れたら、バッファを増やして再チャレンジ */ 01361 /* If the number of nextwords can exceed the buffer size, expand the 01362 nextword data area */ 01363 while (nwnum < 0) { 01364 nextword = nw_expand(nextword, &maxnwnum, &nwroot, winfo->num); 01365 nwnum = dfa_firstwords(nextword, peseqlen, maxnwnum, r); 01366 } 01367 } 01368 01369 if (debug2_flag) { 01370 jlog("DEBUG: %d words in wordtrellis as first hypothesis\n", nwnum); 01371 } 01372 01373 /* store them to stack */ 01374 for (w = 0; w < nwnum; w++) { 01375 if (r->lmtype == LM_DFA) { 01376 /* limit word hypothesis */ 01377 if (! (winfo->wton[nextword[w]->id] >= cate_bgn && winfo->wton[nextword[w]->id] < cate_bgn + cate_num)) { 01378 continue; 01379 } 01380 } 01381 /* generate new hypothesis */ 01382 new = newnode(r); 01383 start_word(new, nextword[w], param, r); 01384 if (r->lmtype == LM_DFA) { 01385 if (new->score <= LOG_ZERO) { /* not on trellis */ 01386 free_node(new); 01387 continue; 01388 } 01389 } 01390 dwrk->genectr++; 01391 #ifdef CM_SEARCH 01392 /* store the local hypothesis to temporal stack */ 01393 cm_store(dwrk, new); 01394 #else 01395 /* put to stack */ 01396 if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) { 01397 dwrk->current = new; 01398 //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r); 01399 if (jconf->graph.enabled) { 01400 new->prevgraph = NULL; 01401 new->lastcontext = NULL; 01402 } 01403 dwrk->pushctr++; 01404 } 01405 #endif 01406 } 01407 #ifdef CM_SEARCH 01408 /* compute score sum */ 01409 cm_sum_score(dwrk 01410 #ifdef CM_MULTIPLE_ALPHA 01411 , jconf->annotate.cm_alpha_bgn 01412 , jconf->annotate.cm_alpha_end 01413 , jconf->annotate.cm_alpha_step 01414 #endif 01415 ); 01416 01417 /* compute CM and put the generated hypotheses to global stack */ 01418 while ((new = cm_get_node(dwrk)) != NULL) { 01419 cm_set_score(dwrk, new 01420 #ifdef CM_MULTIPLE_ALPHA 01421 , jconf->annotate.cm_alpha_bgn 01422 , jconf->annotate.cm_alpha_end 01423 , jconf->annotate.cm_alpha_step 01424 #endif 01425 ); 01426 #ifdef CM_SEARCH_LIMIT 01427 if (new->cmscore[new->seqnum-1] < jconf->annotate.cm_cut_thres 01428 #ifdef CM_SEARCH_LIMIT_AFTER 01429 && dwrk->finishnum > 0 01430 #endif 01431 ) { 01432 free_node(new); 01433 continue; 01434 } 01435 #endif /* CM_SEARCH_LIMIT */ 01436 01437 if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) { 01438 dwrk->current = new; 01439 //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r); 01440 if (r->graphout) { 01441 new->prevgraph = NULL; 01442 new->lastcontext = NULL; 01443 } 01444 dwrk->pushctr++; 01445 } 01446 } 01447 #endif 01448 01449 if (debug2_flag) { 01450 jlog("DEBUG: %d pushed\n", dwrk->pushctr); 01451 } 01452 01453 /********************/ 01454 /* main search loop */ 01455 /********************/ 01456 01457 for (;;) { 01458 01459 /* if terminate signal has been received, cancel this input */ 01460 /* 01461 * if (recog->process_want_terminate) { 01462 * jlog("DEBUG: process terminated by request\n"); 01463 * break; 01464 * } 01465 */ 01466 01467 /* 01468 * 仮説スタックから最もスコアの高い仮説を取り出す 01469 * pop the top hypothesis from stack 01470 */ 01471 #ifdef DEBUG 01472 jlog("DEBUG: get one hypothesis\n"); 01473 #endif 01474 now = get_best_from_stack(&start,&stacknum); 01475 if (now == NULL) { /* stack empty ---> 探索終了*/ 01476 jlog("WARNING: %02d %s: hypothesis stack exhausted, terminate search now\n", r->config->id, r->config->name); 01477 jlog("STAT: %02d %s: %d sentences have been found\n", r->config->id, r->config->name, dwrk->finishnum); 01478 break; 01479 } 01480 /* (bogus score check) */ 01481 if (now->score <= LOG_ZERO) { 01482 free_node(now); 01483 continue; 01484 } 01485 01486 01487 /* 単語グラフ用に pop 仮説の f スコアを一時保存 */ 01488 if (r->graphout) { 01489 prev_score = now->score; 01490 } 01491 01492 /* word envelope チェック */ 01493 /* consult word envelope */ 01494 if (jconf->pass2.enveloped_bestfirst_width >= 0) { 01495 if (!wb_ok(dwrk, now, jconf->pass2.enveloped_bestfirst_width)) { 01496 /* この仮説長における展開元仮説数の累計数は既に閾値を越えている. 01497 そのため,この仮説は捨てる. */ 01498 /* the number of popped hypotheses at the length already 01499 reaches its limit, so the current popped hypothesis should 01500 be discarded here with no expansion */ 01501 if (debug2_flag) { 01502 jlog("DEBUG: popped but pruned by word envelope:"); 01503 put_hypo_woutput(now, r->lm->winfo); 01504 } 01505 free_node(now); 01506 continue; 01507 } 01508 } 01509 01510 #ifdef CM_SEARCH_LIMIT_POP 01511 if (now->cmscore[now->seqnum-1] < jconf->annotate.cm_cut_thres_pop) { 01512 free_node(now); 01513 continue; 01514 } 01515 #endif /* CM_SEARCH_LIMIT_POP */ 01516 01517 dwrk->popctr++; 01518 01519 /* (for debug) 取り出した仮説とそのスコアを出力 */ 01520 /* output information of the popped hypothesis to stdout */ 01521 if (debug2_flag) { 01522 jlog("DEBUG: --- pop %d:\n", dwrk->popctr); 01523 jlog("DEBUG: "); put_hypo_woutput(now, r->lm->winfo); 01524 jlog("DEBUG: "); put_hypo_wname(now, r->lm->winfo); 01525 jlog("DEBUG: %d words, f=%f, g=%f\n", now->seqnum, now->score, now->g[now->bestt]); 01526 jlog("DEBUG: last word on trellis: [%d-%d]\n", now->estimated_next_t + 1, now->bestt); 01527 } 01528 01529 dwrk->current = now; 01530 //callback_exec(CALLBACK_DEBUG_PASS2_POP, r); 01531 01532 if (r->graphout) { 01533 01534 #ifdef GRAPHOUT_DYNAMIC 01535 /* merge last word in popped hypo if possible */ 01536 wtmp = wordgraph_check_merge(now->prevgraph, &wordgraph_root, now->seq[now->seqnum-1], &merged_p, jconf); 01537 if (wtmp != NULL) { /* wtmp holds merged word */ 01538 dynamic_merged_num++; 01539 01540 lscore_prev = (now->prevgraph) ? now->prevgraph->lscore_tmp : 0.0; 01541 01542 if (now->prevgraph != NULL) { 01543 if (now->prevgraph->saved) { 01544 j_internal_error("wchmm_fbs: already saved??\n"); 01545 } 01546 wordgraph_free(now->prevgraph); 01547 } 01548 01549 if (now->lastcontext != NULL 01550 && now->lastcontext != wtmp /* avoid self loop */ 01551 ) { 01552 wordgraph_check_and_add_leftword(now->lastcontext, wtmp, lscore_prev); 01553 #ifdef GRAPHOUT_SEARCH_CONSIDER_RIGHT 01554 if (merged_p) { 01555 if (wordgraph_check_and_add_rightword(wtmp, now->lastcontext, lscore_prev) == FALSE) { 01556 merged_p = TRUE; 01557 } else { 01558 merged_p = FALSE; 01559 } 01560 } else { 01561 wordgraph_check_and_add_rightword(wtmp, now->lastcontext, lscore_prev); 01562 } 01563 #else 01564 wordgraph_check_and_add_rightword(wtmp, now->lastcontext, lscore_prev); 01565 #endif 01566 } 01567 01568 now->prevgraph = wtmp; /* change word to the merged one */ 01569 01570 /*printf("last word merged\n");*/ 01571 /* previous still remains at memory here... (will be purged later) */ 01572 } else { 01573 wordgraph_save(now->prevgraph, now->lastcontext, &wordgraph_root); 01574 } 01575 #ifdef GRAPHOUT_SEARCH 01576 /* if recent hypotheses are included in the existing graph, terminate */ 01577 if (merged_p && now->endflag == FALSE 01578 #ifdef GRAPHOUT_SEARCH_DELAY_TERMINATION 01579 /* Do not apply search termination by graph merging 01580 until the first sentence candidate is found. */ 01581 && (jconf->graph.graphout_search_delay == FALSE || dwrk->finishnum > 0) 01582 #endif 01583 ) { 01584 terminate_search_num++; 01585 free_node(now); 01586 continue; 01587 } 01588 #endif 01589 #else /* ~GRAPHOUT_DYNAMIC */ 01590 /* always save */ 01591 wordgraph_save(now->prevgraph, now->lastcontext, &wordgraph_root); 01592 #endif /* ~GRAPHOUT_DYNAMIC */ 01593 } 01594 01595 /* 取り出した仮説のスコアを元に score envelope を更新 */ 01596 /* update score envelope using the popped hypothesis */ 01597 envl_update(dwrk, now, peseqlen); 01598 01599 /* 01600 * 取り出した仮説の受理フラグが既に立っていれば, 01601 * その仮説は探索終了とみなし,結果として出力して次のループへ. 01602 * 01603 * If the popped hypothesis already reached to the end, 01604 * we can treat it as a recognition result. 01605 */ 01606 #ifdef DEBUG 01607 VERMES("endflag check\n"); 01608 #endif 01609 01610 if (now->endflag) { 01611 if (debug2_flag) { 01612 jlog("DEBUG: This is a full sentence candidate\n"); 01613 } 01614 /* quick, dirty hack */ 01615 if (now->score == last_score) { 01616 free_node(now); 01617 continue; 01618 } else { 01619 last_score = now->score; 01620 } 01621 01622 dwrk->finishnum++; 01623 if (debug2_flag) { 01624 jlog("DEBUG: %d-th sentence found\n", dwrk->finishnum); 01625 } 01626 01627 /* 一定数の仮説が得られたあとスコアでソートするため, 01628 一時的に別のスタックに格納しておく */ 01629 /* store the result to result stack 01630 after search is finished, they will be re-ordered and output */ 01631 put_to_stack(now, &r_start, &r_bottom, &r_stacknum, r_stacksize); 01632 /* 指定数の文仮説が得られたなら探索を終了する */ 01633 /* finish search if specified number of results are found */ 01634 if (dwrk->finishnum >= ncan) { 01635 break; 01636 } else { 01637 continue; 01638 } 01639 01640 } /* end of now->endflag */ 01641 01642 01643 /* 01644 * 探索失敗を検出する. 01645 * 仮説数が maxhypo 以上展開されたら, もうこれ以上は探索しない 01646 * 01647 * detecting search failure: 01648 * if the number of expanded hypotheses reaches maxhypo, giveup further search 01649 */ 01650 #ifdef DEBUG 01651 jlog("DEBUG: loop end check\n"); 01652 #endif 01653 if (dwrk->popctr >= maxhypo) { 01654 jlog("WARNING: %02d %s: num of popped hypotheses reached the limit (%d)\n", r->config->id, r->config->name, maxhypo); 01655 /* (for debug) 探索失敗時に、スタックに残った情報を吐き出す */ 01656 /* (for debug) output all hypothesis remaining in the stack */ 01657 if (debug2_flag) put_all_in_stack(&start, &stacknum, r->lm->winfo); 01658 free_node(now); 01659 break; /* end of search */ 01660 } 01661 /* 仮説長が一定値を越えたとき,その仮説を破棄する */ 01662 /* check hypothesis word length overflow */ 01663 if (now->seqnum >= MAXSEQNUM) { 01664 jlog("ERROR: sentence length exceeded system limit ( > %d)\n", MAXSEQNUM); 01665 free_node(now); 01666 continue; 01667 } 01668 01669 #ifndef GRAPHOUT_PRECISE_BOUNDARY 01670 if (r->graphout) { 01671 /* if monophone (= no backscan), the tail g score should be kept here */ 01672 /* else, updated tail g score will be computed in scan_word() */ 01673 if(!jconf->am.ccd_flag) { 01674 now->tail_g_score = now->g[now->bestt]; 01675 } 01676 } 01677 #endif 01678 01679 /* 01680 * 前向きスコアを更新する: 最後の単語の部分の前向きスコアを計算する. 01681 * update forward score: compute forward trellis for the last word 01682 */ 01683 #ifdef DEBUG 01684 jlog("DEBUG: scan_word\n"); 01685 #endif 01686 scan_word(now, param, r); 01687 if (now->score < LOG_ZERO) { /* another end-of-search detecter */ 01688 jlog("WARNING: too low score, ignore: score=%f",now->score); 01689 put_hypo_woutput(now, r->lm->winfo); 01690 free_node(now); 01691 continue; 01692 } 01693 01694 /* 01695 * 取り出した仮説が文として受理可能であれば, 01696 * 受理フラグを立ててをスタックにいれ直しておく. 01697 * (次に取り出されたら解となる) 01698 * 01699 * if the current popped hypothesis is acceptable, set endflag 01700 * and return it to stack: it will become the recognition result 01701 * when popped again. 01702 */ 01703 #ifdef DEBUG 01704 jlog("DEBUG: accept check\n"); 01705 #endif 01706 01707 if (r->lmtype == LM_PROB) { 01708 acc = ngram_acceptable(now, r); 01709 } else if (r->lmtype == LM_DFA) { 01710 acc = dfa_acceptable(now, r); 01711 } 01712 if (acc && now->estimated_next_t <= 5) { 01713 new = newnode(r); 01714 /* new に now の中身をコピーして,最終的なスコアを計算 */ 01715 /* copy content of 'now' to 'new', and compute the final score */ 01716 last_next_word(now, new, param, r); 01717 if (debug2_flag) { 01718 jlog("DEBUG: This is acceptable as a sentence candidate\n"); 01719 } 01720 /* g[] が入力始端に達していなければ棄却 */ 01721 /* reject this sentence candidate if g[] does not reach the end */ 01722 if (new->score <= LOG_ZERO) { 01723 if (debug2_flag) { 01724 jlog("DEBUG: But invalid because Viterbi pass does not reach the 0th frame\n"); 01725 } 01726 free_node(new); 01727 free_node(now); 01728 continue; 01729 } 01730 /* 受理フラグを立てて入れ直す */ 01731 /* set endflag and push again */ 01732 if (debug2_flag) { 01733 jlog("DEBUG This hypo itself was pushed with final score=%f\n", new->score); 01734 } 01735 new->endflag = TRUE; 01736 if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) { 01737 if (r->graphout) { 01738 if (new->score > LOG_ZERO) { 01739 new->lastcontext = now->prevgraph; 01740 new->prevgraph = wordgraph_assign(new->seq[new->seqnum-1], 01741 WORD_INVALID, 01742 (new->seqnum >= 2) ? new->seq[new->seqnum-2] : WORD_INVALID, 01743 0, 01744 #ifdef GRAPHOUT_PRECISE_BOUNDARY 01745 /* wordend are shifted to the last */ 01746 #ifdef PASS2_STRICT_IWCD 01747 new->wordend_frame[0], 01748 #else 01749 now->wordend_frame[0], 01750 #endif 01751 #else 01752 now->bestt, 01753 #endif 01754 new->score, 01755 prev_score, 01756 now->g[0], 01757 #ifdef GRAPHOUT_PRECISE_BOUNDARY 01758 #ifdef PASS2_STRICT_IWCD 01759 new->wordend_gscore[0], 01760 #else 01761 now->wordend_gscore[0], 01762 #endif 01763 #else 01764 now->tail_g_score, 01765 #endif 01766 now->lscore, 01767 #ifdef CM_SEARCH 01768 new->cmscore[new->seqnum-1], 01769 #else 01770 LOG_ZERO, 01771 #endif 01772 r 01773 ); 01774 } else { 01775 new->lastcontext = now->lastcontext; 01776 new->prevgraph = now->prevgraph; 01777 } 01778 } /* put_to_stack() != -1 */ 01779 } /* recog->graphout */ 01780 /* この仮説はここで終わらずに, ここからさらに単語展開する */ 01781 /* continue with the 'now' hypothesis, not terminate here */ 01782 } 01783 01784 /* 01785 * この仮説から,次単語集合を決定する. 01786 * 次単語集合は, この仮説の推定始端フレーム周辺に存在した 01787 * 第1パスのトレリス単語集合. 01788 * 01789 * N-gramの場合は各単語の n-gram 接続確率が含まれる. 01790 * DFA の場合は, その中でさらに DFA 上で接続可能なもののみが返ってくる 01791 */ 01792 /* 01793 * Determine next word set that can connect to this hypothesis. 01794 * They come from the trellis word that has been survived at near the 01795 * beginning of the last word. 01796 * 01797 * In N-gram mode, they also contain N-gram probabilities toward the 01798 * source hypothesis. In DFA mode, the word set is further reduced 01799 * by the grammatical constraint 01800 */ 01801 #ifdef DEBUG 01802 jlog("DEBUG: get next words\n"); 01803 #endif 01804 if (r->lmtype == LM_PROB) { 01805 nwnum = ngram_nextwords(now, nextword, maxnwnum, r); 01806 } else if (r->lmtype == LM_DFA) { 01807 nwnum = dfa_nextwords(now, nextword, maxnwnum, r); 01808 /* nextword が溢れたら、バッファを増やして再チャレンジ */ 01809 /* If the number of nextwords can exceed the buffer size, expand the 01810 nextword data area */ 01811 while (nwnum < 0) { 01812 nextword = nw_expand(nextword, &maxnwnum, &nwroot, winfo->num); 01813 nwnum = dfa_nextwords(now, nextword, maxnwnum, r); 01814 } 01815 } 01816 if (debug2_flag) { 01817 jlog("DEBUG: %d words extracted from wordtrellis\n", nwnum); 01818 } 01819 01820 /* 01821 * 仮説と次単語集合から新たな文仮説を生成し,スタックにいれる. 01822 */ 01823 /* 01824 * generate new hypotheses from 'now' and 'nextword', 01825 * and push them to stack 01826 */ 01827 #ifdef DEBUG 01828 jlog("DEBUG: generate hypo\n"); 01829 #endif 01830 01831 if (r->lmtype == LM_DFA) { 01832 now_noise_calced = FALSE; /* TRUE is noise-inserted score has been calculated */ 01833 } 01834 i = dwrk->pushctr; /* store old value */ 01835 01836 #ifdef CM_SEARCH 01837 /* initialize local stack */ 01838 cm_init(dwrk, winfo->num, jconf->annotate.cm_alpha 01839 #ifdef CM_MULTIPLE_ALPHA 01840 , jconf->annotate.cm_alpha_num 01841 #endif 01842 ); 01843 #endif 01844 01845 /* for each nextword, generate a new hypothesis */ 01846 for (w = 0; w < nwnum; w++) { 01847 if (r->lmtype == LM_DFA) { 01848 /* limit word hypothesis */ 01849 if (! (winfo->wton[nextword[w]->id] >= cate_bgn && winfo->wton[nextword[w]->id] < cate_bgn + cate_num)) { 01850 continue; 01851 } 01852 } 01853 new = newnode(r); 01854 01855 if (r->lmtype == LM_DFA) { 01856 01857 if (nextword[w]->can_insert_sp == TRUE) { 01858 /* ノイズを挟んだトレリススコアを計算し,挟まない場合との最大値を取る */ 01859 /* compute hypothesis score with noise inserted */ 01860 01861 if (now_noise_calced == FALSE) { 01862 /* now に sp をつけた仮説 now_noise を作り,そのスコアを計算 */ 01863 /* generate temporal hypothesis 'now_noise' which has short-pause 01864 word after the original 'now' */ 01865 fornoise.id = gdfa->sp_id; 01866 now_noise = newnode(r); 01867 cpy_node(now_noise, now); 01868 #if 0 01869 now_noise_tmp = newnode(r); 01870 next_word(now, now_noise_tmp, &fornoise, param, r); 01871 scan_word(now_noise_tmp, param, r); 01872 for(t=0;t<peseqlen;t++) { 01873 now_noise->g[t] = max(now_noise_tmp->g[t], now->g[t]); 01874 } 01875 free_node(now_noise_tmp); 01876 #else 01877 /* expand NOISE only if it exists in backward trellis */ 01878 /* begin patch by kashima */ 01879 if (jconf->pass2.looktrellis_flag) { 01880 if(!dfa_look_around(&fornoise, now, r)){ 01881 free_node(now_noise); 01882 free_node(new); 01883 continue; 01884 } 01885 } 01886 /* end patch by kashima */ 01887 01888 /* now_nosie の スコア g[] を計算し,元の now の g[] と比較して 01889 高い方を採用 */ 01890 /* compute trellis score g[], and adopt the maximum score 01891 for each frame compared with now->g[] */ 01892 next_word(now, now_noise, &fornoise, param, r); 01893 scan_word(now_noise, param, r); 01894 for(t=0;t<peseqlen;t++) { 01895 now_noise->g[t] = max(now_noise->g[t], now->g[t]); 01896 } 01897 /* ノイズを挟んだ際を考慮したスコアを計算したので, 01898 ここで最後のノイズ単語を now_noise から消す */ 01899 /* now that score has been computed considering pause insertion, 01900 we can delete the last noise word from now_noise here */ 01901 now_noise->seqnum--; 01902 #endif 01903 now_noise_calced = TRUE; 01904 } 01905 01906 /* expand word only if it exists in backward trellis */ 01907 /* begin patch by kashima */ 01908 if (jconf->pass2.looktrellis_flag) { 01909 if(!dfa_look_around(nextword[w], now_noise, r)){ 01910 free_node(new); 01911 continue; 01912 } 01913 } 01914 /* end patch by kashima */ 01915 01916 /* 新しい仮説' new' を 'now_noise' から生成 */ 01917 /* generate a new hypothesis 'new' from 'now_noise' */ 01918 next_word(now_noise, new, nextword[w], param, r); 01919 01920 } else { 01921 01922 /* expand word only if it exists in backward trellis */ 01923 /* begin patch by kashima */ 01924 if (jconf->pass2.looktrellis_flag) { 01925 if(!dfa_look_around(nextword[w], now, r)){ 01926 free_node(new); 01927 continue; 01928 } 01929 } 01930 /* end patch by kashima */ 01931 01932 /* 新しい仮説' new' を 'now_noise' から生成 */ 01933 /* generate a new hypothesis 'new' from 'now_noise' */ 01934 next_word(now, new, nextword[w], param, r); 01935 01936 } 01937 } 01938 01939 if (r->lmtype == LM_PROB) { 01940 01941 /* 新しい仮説' new' を 'now_noise' から生成 01942 N-gram の場合はノイズを特別扱いしない */ 01943 /* generate a new hypothesis 'new' from 'now'. 01944 pause insertion is treated as same as normal words in N-gram mode. */ 01945 next_word(now, new, nextword[w], param, r); 01946 01947 } 01948 01949 if (new->score <= LOG_ZERO) { /* not on trellis */ 01950 free_node(new); 01951 continue; 01952 } 01953 01954 dwrk->genectr++; 01955 01956 #ifdef CM_SEARCH 01957 /* store the local hypothesis to temporal stack */ 01958 cm_store(dwrk, new); 01959 #else 01960 /* 生成した仮説 'new' をスタックに入れる */ 01961 /* push the generated hypothesis 'new' to stack */ 01962 01963 /* stack overflow */ 01964 if (can_put_to_stack(new, &bottom, &stacknum, stacksize) == -1) { 01965 free_node(new); 01966 continue; 01967 } 01968 01969 if (r->graphout) { 01970 /* assign a word arc to the last fixed word */ 01971 new->lastcontext = now->prevgraph; 01972 new->prevgraph = wordgraph_assign(new->seq[new->seqnum-2], 01973 new->seq[new->seqnum-1], 01974 (new->seqnum >= 3) ? new->seq[new->seqnum-3] : WORD_INVALID, 01975 new->bestt + 1, 01976 #ifdef GRAPHOUT_PRECISE_BOUNDARY 01977 #ifdef PASS2_STRICT_IWCD 01978 /* most up-to-date wordend_gscore is on new, because the last phone of 'now' will be computed at next_word() */ 01979 new->wordend_frame[new->bestt], 01980 #else 01981 now->wordend_frame[new->bestt], 01982 #endif 01983 #else 01984 now->bestt, 01985 #endif 01986 new->score, prev_score, 01987 #ifdef PASS2_STRICT_IWCD 01988 new->g[new->bestt] - new->lscore, 01989 #else 01990 now->g[new->bestt+1], 01991 #endif 01992 #ifdef GRAPHOUT_PRECISE_BOUNDARY 01993 #ifdef PASS2_STRICT_IWCD 01994 /* most up-to-date wordend_gscore is on new, because the last phone of 'now' will be computed at next_word() */ 01995 new->wordend_gscore[new->bestt], 01996 #else 01997 now->wordend_gscore[new->bestt], 01998 #endif 01999 #else 02000 now->tail_g_score, 02001 #endif 02002 now->lscore, 02003 #ifdef CM_SEARCH 02004 new->cmscore[new->seqnum-2], 02005 #else 02006 LOG_ZERO, 02007 #endif 02008 r 02009 ); 02010 } /* recog->graphout */ 02011 put_to_stack(new, &start, &bottom, &stacknum, stacksize); 02012 if (debug2_flag) { 02013 j = new->seq[new->seqnum-1]; 02014 jlog("DEBUG: %15s [%15s](id=%5d)(%f) [%d-%d] pushed\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt); 02015 } 02016 dwrk->current = new; 02017 //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r); 02018 dwrk->pushctr++; 02019 #endif 02020 } /* end of nextword loop */ 02021 02022 #ifdef CM_SEARCH 02023 /* compute score sum */ 02024 cm_sum_score(dwrk 02025 #ifdef CM_MULTIPLE_ALPHA 02026 , jconf->annotate.cm_alpha_bgn 02027 , jconf->annotate.cm_alpha_end 02028 , jconf->annotate.cm_alpha_step 02029 #endif 02030 ); 02031 /* compute CM and put the generated hypotheses to global stack */ 02032 while ((new = cm_get_node(dwrk)) != NULL) { 02033 cm_set_score(dwrk, new 02034 #ifdef CM_MULTIPLE_ALPHA 02035 , jconf->annotate.cm_alpha_bgn 02036 , jconf->annotate.cm_alpha_end 02037 , jconf->annotate.cm_alpha_step 02038 #endif 02039 ); 02040 #ifdef CM_SEARCH_LIMIT 02041 if (new->cmscore[new->seqnum-1] < jconf->annotate.cm_cut_thres 02042 #ifdef CM_SEARCH_LIMIT_AFTER 02043 && dwrk->finishnum > 0 02044 #endif 02045 ) { 02046 free_node(new); 02047 continue; 02048 } 02049 #endif /* CM_SEARCH_LIMIT */ 02050 /* j = new->seq[new->seqnum-1]; 02051 printf(" %15s [%15s](id=%5d)(%f) [%d-%d] cm=%f\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt, new->cmscore[new->seqnum-1]);*/ 02052 02053 /* stack overflow */ 02054 if (can_put_to_stack(new, &bottom, &stacknum, stacksize) == -1) { 02055 free_node(new); 02056 continue; 02057 } 02058 02059 if (r->graphout) { 02060 02061 /* assign a word arc to the last fixed word */ 02062 new->lastcontext = now->prevgraph; 02063 new->prevgraph = wordgraph_assign(new->seq[new->seqnum-2], 02064 new->seq[new->seqnum-1], 02065 (new->seqnum >= 3) ? new->seq[new->seqnum-3] : WORD_INVALID, 02066 new->bestt + 1, 02067 #ifdef GRAPHOUT_PRECISE_BOUNDARY 02068 #ifdef PASS2_STRICT_IWCD 02069 new->wordend_frame[new->bestt], 02070 #else 02071 now->wordend_frame[new->bestt], 02072 #endif 02073 #else 02074 now->bestt, 02075 #endif 02076 new->score, prev_score, 02077 #ifdef PASS2_STRICT_IWCD 02078 new->g[new->bestt] - new->lscore, 02079 #else 02080 now->g[new->bestt+1], 02081 #endif 02082 #ifdef GRAPHOUT_PRECISE_BOUNDARY 02083 #ifdef PASS2_STRICT_IWCD 02084 new->wordend_gscore[new->bestt], 02085 #else 02086 now->wordend_gscore[new->bestt], 02087 #endif 02088 #else 02089 now->tail_g_score, 02090 #endif 02091 now->lscore, 02092 #ifdef CM_SEARCH 02093 new->cmscore[new->seqnum-2], 02094 #else 02095 LOG_ZERO, 02096 #endif 02097 r 02098 ); 02099 } /* recog->graphout */ 02100 02101 put_to_stack(new, &start, &bottom, &stacknum, stacksize); 02102 if (debug2_flag) { 02103 j = new->seq[new->seqnum-1]; 02104 jlog("DEBUG: %15s [%15s](id=%5d)(%f) [%d-%d] pushed\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt); 02105 } 02106 dwrk->current = new; 02107 //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r); 02108 dwrk->pushctr++; 02109 } 02110 #endif 02111 02112 if (debug2_flag) { 02113 jlog("DEBUG: %d pushed\n",dwrk->pushctr-i); 02114 } 02115 if (r->lmtype == LM_DFA) { 02116 if (now_noise_calced == TRUE) free_node(now_noise); 02117 } 02118 02119 /* 02120 * 取り出した仮説を捨てる 02121 * free the source hypothesis 02122 */ 02123 free_node(now); 02124 02125 } 02126 /***************/ 02127 /* End of Loop */ 02128 /***************/ 02129 02130 /* output */ 02131 if (dwrk->finishnum == 0) { /* if search failed */ 02132 02133 /* finalize result when no hypothesis was obtained */ 02134 if (verbose_flag) { 02135 if (r->config->sw.fallback_pass1_flag) { 02136 jlog("%02d %s: got no candidates, output 1st pass result as a final result\n", r->config->id, r->config->name); 02137 } else { 02138 jlog("WARNING: %02d %s: got no candidates, search failed\n", r->config->id, r->config->name); 02139 } 02140 } 02141 pass2_finalize_on_no_result(r, r->config->sw.fallback_pass1_flag); 02142 02143 } else { /* if at least 1 candidate found */ 02144 02145 if (debug2_flag) { 02146 jlog("STAT: %02d %s: got %d candidates\n", r->config->id, r->config->name, dwrk->finishnum); 02147 } 02148 /* 結果はまだ出力されていないので,文候補用スタック内をソートして 02149 ここで出力する */ 02150 /* As all of the found candidate are in result stack, we sort them 02151 and output them here */ 02152 if (debug2_flag) jlog("DEBUG: done\n"); 02153 result_reorder_and_output(&r_start, &r_bottom, &r_stacknum, jconf->output.output_hypo_maxnum, r, param); 02154 02155 r->result.status = J_RESULT_STATUS_SUCCESS; 02156 //callback_exec(CALLBACK_RESULT, r); 02157 //callback_exec(CALLBACK_EVENT_PASS2_END, r); 02158 } 02159 02160 /* 各種カウンタを出力 */ 02161 /* output counters */ 02162 if (verbose_flag) { 02163 jlog("STAT: %02d %s: %d generated, %d pushed, %d nodes popped in %d\n", 02164 r->config->id, r->config->name, 02165 dwrk->genectr, dwrk->pushctr, dwrk->popctr, backtrellis->framelen); 02166 jlog_flush(); 02167 #ifdef GRAPHOUT_DYNAMIC 02168 if (r->graphout) { 02169 jlog("STAT: %02d %s: graph: %d merged", r->config->id, r->config->name, dynamic_merged_num); 02170 #ifdef GRAPHOUT_SEARCH 02171 jlog("S, %d terminated", terminate_search_num); 02172 #endif 02173 jlog(" in %d\n", dwrk->popctr); 02174 } 02175 #endif 02176 } 02177 02178 if (dwrk->finishnum > 0 && r->graphout) { 02179 if (verbose_flag) jlog("STAT: ------ wordgraph post-processing begin ------\n"); 02180 /* garbage collection and word merging */ 02181 /* words with no following word (except end of sentence) will be erased */ 02182 wordgraph_purge_leaf_nodes(&wordgraph_root, r); 02183 #ifdef GRAPHOUT_DEPTHCUT 02184 /* perform word graph depth cutting here */ 02185 wordgraph_depth_cut(&wordgraph_root, r); 02186 #endif 02187 02188 /* if GRAPHOUT_PRECISE_BOUNDARY defined, 02189 propagate exact time boundary to the right context. 02190 words of different boundary will be duplicated here. 02191 */ 02192 wordgraph_adjust_boundary(&wordgraph_root, r); 02193 02194 if (jconf->graph.confnet) { 02195 /* CONFUSION NETWORK GENERATION */ 02196 02197 /* old merging functions should be skipped */ 02198 02199 /* finalize: sort and annotate ID */ 02200 r->graph_totalwordnum = wordgraph_sort_and_annotate_id(&wordgraph_root, r); 02201 /* check coherence */ 02202 wordgraph_check_coherence(wordgraph_root, r); 02203 /* compute graph CM by forward-backward processing */ 02204 graph_forward_backward(wordgraph_root, r); 02205 if (verbose_flag) jlog("STAT: ------ wordgraph post-processing end ------\n"); 02206 02207 r->result.wg = wordgraph_root; 02208 02209 /* 02210 * if (jconf->graph.lattice) { 02211 * callback_exec(CALLBACK_RESULT_GRAPH, r); 02212 * } 02213 */ 02214 02215 /* parse the graph to extract order relationship */ 02216 graph_make_order(wordgraph_root, r); 02217 /* create confusion network */ 02218 r->result.confnet = confnet_create(wordgraph_root, r); 02219 /* output confusion network */ 02220 //callback_exec(CALLBACK_RESULT_CONFNET, r); 02221 /* free area for order relationship */ 02222 graph_free_order(r); 02223 /* free confusion network clusters */ 02224 //cn_free_all(&(r->result.confnet)); 02225 02226 } else if (jconf->graph.lattice) { 02227 /* WORD LATTICE POSTPROCESSING */ 02228 02229 /* merge words with the same time and same score */ 02230 wordgraph_compaction_thesame(&wordgraph_root); 02231 /* merge words with the same time (skip if "-graphrange -1") */ 02232 wordgraph_compaction_exacttime(&wordgraph_root, r); 02233 /* merge words of near time (skip if "-graphrange" value <= 0 */ 02234 wordgraph_compaction_neighbor(&wordgraph_root, r); 02235 /* finalize: sort and annotate ID */ 02236 r->graph_totalwordnum = wordgraph_sort_and_annotate_id(&wordgraph_root, r); 02237 /* check coherence */ 02238 wordgraph_check_coherence(wordgraph_root, r); 02239 /* compute graph CM by forward-backward processing */ 02240 graph_forward_backward(wordgraph_root, r); 02241 if (verbose_flag) jlog("STAT: ------ wordgraph post-processing end ------\n"); 02242 /* output graph */ 02243 r->result.wg = wordgraph_root; 02244 //callback_exec(CALLBACK_RESULT_GRAPH, r); 02245 02246 } else { 02247 j_internal_error("InternalError: graph generation specified but no output format specified?\n"); 02248 } 02249 02250 /* clear all wordgraph */ 02251 //wordgraph_clean(&(r->result.wg)); 02252 } /* r->graphout */ 02253 02254 02255 /* 終了処理 */ 02256 /* finalize */ 02257 nw_free(nextword, nwroot); 02258 free_all_nodes(start); 02259 free_wordtrellis(dwrk); 02260 #ifdef SCAN_BEAM 02261 free(dwrk->framemaxscore); 02262 #endif 02263 //result_sentence_free(r); 02264 clear_stocker(dwrk); 02265 } 02266 02283 void 02284 wchmm_fbs_prepare(RecogProcess *r) 02285 { 02286 StackDecode *dwrk; 02287 dwrk = &(r->pass2); 02288 02289 /* N-gram 用ワークエリアを確保 */ 02290 /* malloc work area for N-gram */ 02291 if (r->lmtype == LM_PROB && r->lm->ngram) { 02292 dwrk->cnword = (WORD_ID *)mymalloc(sizeof(WORD_ID) * r->lm->ngram->n); 02293 dwrk->cnwordrev = (WORD_ID *)mymalloc(sizeof(WORD_ID) * r->lm->ngram->n); 02294 } else { 02295 dwrk->cnword = dwrk->cnwordrev = NULL; 02296 } 02297 dwrk->stocker_root = NULL; 02298 #ifdef CONFIDENVE_MEASURE 02299 #ifdef CM_MULTIPLE_ALPHA 02300 dwrk->cmsumlist = NULL; 02301 #endif 02302 #ifdef CM_NBEST; 02303 dwrk->sentcm = NULL; 02304 dwrk->wordcm = NULL; 02305 #endif 02306 #endif 02307 } 02308 02325 void 02326 wchmm_fbs_free(RecogProcess *r) 02327 { 02328 StackDecode *dwrk; 02329 dwrk = &(r->pass2); 02330 02331 if (r->lmtype == LM_PROB && r->lm->ngram) { 02332 free(dwrk->cnword); 02333 free(dwrk->cnwordrev); 02334 dwrk->cnword = dwrk->cnwordrev = NULL; 02335 } 02336 02337 #ifdef CONFIDENVE_MEASURE 02338 #ifdef CM_MULTIPLE_ALPHA 02339 if (dwrk->cmsumlist) { 02340 free(dwrk->cmsumlist); 02341 dwrk->cmsumlist = NULL; 02342 } 02343 #endif 02344 #ifdef CM_NBEST; 02345 if (dwrk->sentcm) { 02346 free(dwrk->sentcm); 02347 dwrk->sentcm = NULL; 02348 } 02349 if (dwrk->wordcm) { 02350 free(dwrk->wordcm); 02351 dwrk->wordcm = NULL; 02352 } 02353 #endif 02354 #endif 02355 } 02356 02357 /* end of file */