Julius 4.2
|
00001 00041 /* 00042 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00043 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00044 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00045 * All rights reserved 00046 */ 00047 00048 #include <julius/julius.h> 00049 00050 /* declaration of local functions */ 00051 static NODE *get_best_from_stack(NODE **start, int *stacknum); 00052 static int put_to_stack(NODE *new, NODE **start, NODE **bottom, int *stacknum, int stacksize); 00053 static void put_all_in_stack(NODE **start, int *stacknum, WORD_INFO *winfo); 00054 static void free_all_nodes(NODE *node); 00055 static void put_hypo_woutput(NODE *hypo, WORD_INFO *winfo); 00056 static void put_hypo_wname(NODE *hypo, WORD_INFO *winfo); 00057 00058 /**********************************************************************/ 00059 /********** 次単語格納領域の割り当て *************************/ 00060 /********** allocate memory for nextword data *************************/ 00061 /**********************************************************************/ 00062 00085 static NEXTWORD ** 00086 nw_malloc(int *maxlen, NEXTWORD **root, int max) 00087 { 00088 NEXTWORD *nwtmp; 00089 NEXTWORD **nw; 00090 int i; 00091 00092 nw = (NEXTWORD **)mymalloc(max * sizeof(NEXTWORD *)); 00093 nwtmp = (NEXTWORD *)mymalloc(max * sizeof(NEXTWORD)); 00094 for (i=0;i<max; i++) { 00095 nw[i] = &(nwtmp[i]); 00096 } 00097 *maxlen = max; 00098 *root = nwtmp; 00099 return nw; 00100 } 00101 00117 static void 00118 nw_free(NEXTWORD **nw, NEXTWORD *root) 00119 { 00120 free(root); 00121 free(nw); 00122 } 00123 00162 static NEXTWORD ** 00163 nw_expand(NEXTWORD **nwold, int *maxlen, NEXTWORD **root, int num) 00164 { 00165 NEXTWORD *nwtmp; 00166 NEXTWORD **nw; 00167 int i; 00168 int nwmaxlen; 00169 00170 nwmaxlen = *maxlen + num; 00171 00172 nwtmp = (NEXTWORD *)myrealloc(*root, nwmaxlen * sizeof(NEXTWORD)); 00173 nw = (NEXTWORD **)myrealloc(nwold, nwmaxlen * sizeof(NEXTWORD *)); 00174 nw[0] = nwtmp; 00175 for (i=1;i<nwmaxlen; i++) { 00176 nw[i] = &(nwtmp[i]); 00177 } 00178 *maxlen = nwmaxlen; 00179 *root = nwtmp; 00180 return nw; 00181 } 00182 00183 00184 /**********************************************************************/ 00185 /********** 仮説スタックの操作 ********************************/ 00186 /********** Hypothesis stack operation ********************************/ 00187 /**********************************************************************/ 00188 00208 static NODE * 00209 get_best_from_stack(NODE **start, int *stacknum) 00210 { 00211 NODE *tmp; 00212 00213 /* return top */ 00214 tmp=(*start); 00215 if ((*start)!=NULL) { /* delete it from stack */ 00216 (*start)=(*start)->next; 00217 if ((*start)!=NULL) (*start)->prev=NULL; 00218 (*stacknum)--; 00219 return(tmp); 00220 } 00221 else { 00222 return(NULL); 00223 } 00224 } 00225 00252 static int 00253 can_put_to_stack(NODE *new, NODE **bottom, int *stacknum, int stacksize) 00254 { 00255 /* stack size check */ 00256 if ((*stacknum + 1) > stacksize && (*bottom)->score >= new->score) { 00257 /* new node is below the bottom: discard it */ 00258 return(-1); 00259 } 00260 return(0); 00261 } 00262 00291 static int 00292 put_to_stack(NODE *new, NODE **start, NODE **bottom, int *stacknum, int stacksize) 00293 { 00294 NODE *tmp; 00295 00296 /* stack size is going to increase... */ 00297 (*stacknum)++; 00298 00299 /* stack size check */ 00300 if ((*stacknum)>stacksize) { 00301 /* stack size overflow */ 00302 (*stacknum)--; 00303 if ((*bottom)->score < new->score) { 00304 /* new node will be inserted in the stack: free the bottom */ 00305 tmp=(*bottom); 00306 (*bottom)->prev->next=NULL; 00307 (*bottom)=(*bottom)->prev; 00308 free_node(tmp); 00309 } else { 00310 /* new node is below the bottom: discard it */ 00311 free_node(new); 00312 return(-1); 00313 } 00314 } 00315 00316 /* insert new node on edge */ 00317 if ((*start)==NULL) { /* no node in stack */ 00318 /* new node is the only node */ 00319 (*start)=new; 00320 (*bottom)=new; 00321 new->next=NULL; 00322 new->prev=NULL; 00323 return(0); 00324 } 00325 if ((*start)->score <= new->score) { 00326 /* insert on the top */ 00327 new->next = (*start); 00328 new->next->prev = new; 00329 (*start)=new; 00330 new->prev=NULL; 00331 return(0); 00332 } 00333 if ((*bottom)->score >= new->score) { 00334 /* insert on the bottom */ 00335 new->prev = (*bottom); 00336 new->prev->next = new; 00337 (*bottom)=new; 00338 new->next=NULL; 00339 return(0); 00340 } 00341 00342 /* now the new node is between (*start) and (*bottom) */ 00343 if (((*start)->score + (*bottom)->score) / 2 > new->score) { 00344 /* search from bottom */ 00345 tmp=(*bottom); 00346 while(tmp->score < new->score) tmp=tmp->prev; 00347 new->prev=tmp; 00348 new->next=tmp->next; 00349 tmp->next->prev=new; 00350 tmp->next=new; 00351 } else { 00352 /* search from start */ 00353 tmp=(*start); 00354 while(tmp->score > new->score) tmp=tmp->next; 00355 new->next=tmp; 00356 new->prev=tmp->prev; 00357 tmp->prev->next=new; 00358 tmp->prev=new; 00359 } 00360 return(0); 00361 } 00362 00379 static void 00380 put_all_in_stack(NODE **start, int *stacknum, WORD_INFO *winfo) 00381 { 00382 NODE *ntmp; 00383 00384 jlog("DEBUG: hypotheses remained in global stack\n"); 00385 while ((ntmp = get_best_from_stack(start, stacknum)) != NULL) { 00386 jlog("DEBUG: %3d: s=%f",*stacknum, ntmp->score); 00387 put_hypo_woutput(ntmp, winfo); 00388 free_node(ntmp); 00389 } 00390 } 00391 00404 static void 00405 free_all_nodes(NODE *start) 00406 { 00407 NODE *tmp; 00408 NODE *next; 00409 00410 tmp=start; 00411 while(tmp) { 00412 next=tmp->next; 00413 free_node(tmp); 00414 tmp=next; 00415 } 00416 } 00417 00418 00419 #ifdef CONFIDENCE_MEASURE 00420 00421 /**********************************************************************/ 00422 /********** 単語信頼度の計算 ******************************************/ 00423 /********** Confidence scoring ****************************************/ 00424 /**********************************************************************/ 00425 00426 #ifdef CM_SEARCH 00427 /**************************************************************/ 00428 /**** CM computation method 1(default): ******/ 00429 /**** - Use local posterior probabilities while search ******/ 00430 /**** - Obtain CM at hypothesis expansion time ******/ 00431 /**************************************************************/ 00432 00451 static void 00452 cm_init(StackDecode *sd, int wnum, LOGPROB cm_alpha 00453 #ifdef CM_MULTIPLE_ALPHA 00454 , cm_alpha_num 00455 #endif 00456 ) 00457 { 00458 sd->l_stacksize = wnum; 00459 sd->l_start = sd->l_bottom = NULL; 00460 sd->l_stacknum = 0; 00461 sd->cm_alpha = cm_alpha; 00462 #ifdef CM_MULTIPLE_ALPHA 00463 if (sd->cmsumlist) { 00464 if (sd->cmsumlistlen < cm_alpha_num) { 00465 free(sd->cmsumlist); 00466 sd->cmsumlist = NULL; 00467 } 00468 } 00469 if (sd->cmsumlist == NULL) { 00470 sd->cmsumlist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * cm_alpha_num); 00471 sd->cmsumlistlen = cm_alpha_num; 00472 } 00473 #endif 00474 } 00475 00490 static void 00491 cm_store(StackDecode *sd, NODE *new) 00492 { 00493 /* store the generated hypo into local stack */ 00494 put_to_stack(new, &(sd->l_start), &(sd->l_bottom), &(sd->l_stacknum), sd->l_stacksize); 00495 } 00496 00512 static void 00513 cm_sum_score(StackDecode *sd 00514 #ifdef CM_MULTIPLE_ALPHA 00515 , bgn, end, step 00516 #endif 00517 ) 00518 { 00519 NODE *node; 00520 LOGPROB sum; 00521 #ifdef CM_MULTIPLE_ALPHA 00522 LOGPROB a; 00523 int j; 00524 #endif 00525 00526 if (sd->l_start == NULL) return; /* no hypo */ 00527 sd->cm_tmpbestscore = sd->l_start->score; /* best hypo is at the top of the stack */ 00528 00529 #ifdef CM_MULTIPLE_ALPHA 00530 for (j = 0, a = bgn; a <= end; a += step) { 00531 sum = 0.0; 00532 for(node = sd->l_start; node; node = node->next) { 00533 sum += pow(10, a * (node->score - sd->cm_tmpbestscore)); 00534 } 00535 sd->cmsumlist[j++] = sum; /* store sums for each alpha coef. */ 00536 } 00537 #else 00538 sum = 0.0; 00539 for(node = sd->l_start; node; node = node->next) { 00540 sum += pow(10, sd->cm_alpha * (node->score - sd->cm_tmpbestscore)); 00541 } 00542 sd->cm_tmpsum = sum; /* store sum */ 00543 #endif 00544 00545 } 00546 00563 static void 00564 cm_set_score(StackDecode *sd, NODE *node 00565 #ifdef CM_MULTIPLE_ALPHA 00566 , bgn, end, step 00567 #endif 00568 ) 00569 { 00570 #ifdef CM_MULTIPLE_ALPHA 00571 int j; 00572 LOGPROB a; 00573 #endif 00574 00575 #ifdef CM_MULTIPLE_ALPHA 00576 for (j = 0, a = bgn; a <= end; a += step) { 00577 node->cmscore[node->seqnum-1][j] = pow(10, a * (node->score - sd->cm_tmpbestscore)) / sd->cmsumlist[j]; 00578 j++; 00579 } 00580 #else 00581 node->cmscore[node->seqnum-1] = pow(10, sd->cm_alpha * (node->score - sd->cm_tmpbestscore)) / sd->cm_tmpsum; 00582 #endif 00583 } 00584 00601 static NODE * 00602 cm_get_node(StackDecode *sd) 00603 { 00604 return(get_best_from_stack(&(sd->l_start), &(sd->l_stacknum))); 00605 } 00606 00607 #endif /* CM_SEARCH */ 00608 00609 #ifdef CM_NBEST 00610 /*****************************************************************/ 00611 /**** CM computation method 2: conventional N-best scoring *******/ 00612 /**** NOTE: enough N-best should be computed (-n 10 ~ -n 100) ****/ 00613 /*****************************************************************/ 00614 00634 static void 00635 cm_compute_from_nbest(StackDecode *sd, NODE *start, int stacknum, JCONF_SEARCH *jconf) 00636 { 00637 NODE *node; 00638 LOGPROB bestscore, sum, s; 00639 WORD_ID w; 00640 int i; 00641 LOGPROB cm_alpha; 00642 int j; 00643 00644 /* prepare buffer */ 00645 #ifdef CM_MULTIPLE_ALPHA 00646 if (sd->cmsumlist) { 00647 if (sd->cmsumlistlen < jconf->annotate.cm_alpha_num) { 00648 free(sd->cmsumlist); 00649 sd->cmsumlist = NULL; 00650 } 00651 } 00652 if (sd->cmsumlist == NULL) { 00653 sd->cmsumlist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * jconf->annotate.cm_alpha_num); 00654 sd->cmsumlistlen = cm_alpha_num; 00655 } 00656 #endif 00657 if (sd->sentcm == NULL) { /* not allocated yet */ 00658 sd->sentcm = (LOGPROB *)mymalloc(sizeof(LOGPROB)*stacknum); 00659 sd->sentnum = stacknum; 00660 } else if (sd->sentnum < stacknum) { /* need expanded */ 00661 sd->sentcm = (LOGPROB *)myrealloc(sd->sentcm, sizeof(LOGPROB)*stacknum); 00662 sd->sentnum = stacknum; 00663 } 00664 if (sd->wordcm == NULL) { 00665 sd->wordcm = (LOGPROB *)mymalloc(sizeof(LOGPROB) * winfo->num); 00666 sd->wordnum = winfo->num; 00667 } else if (sd->wordnum < winfo->num) { 00668 sd->wordcm = (LOGPROB *)myremalloc(sd->wordcm, sizeof(LOGPROB) * winfo->num); 00669 sd->wordnum = winfo->num; 00670 } 00671 00672 cm_alpha = jconf->annotate.cm_alpha; 00673 #ifdef CM_MULTIPLE_ALPHA 00674 for (j = 0, cm_alpha = jconf->annotate.cm_alpha_bgn; cm_alpha <= jconf->annotate.cm_alpha_end; cm_alpha += jconf->annotate.cm_alpha_step) { 00675 #endif 00676 /* clear whole word cm buffer */ 00677 for(w=0;w<sd->wordnum;w++) { 00678 sd->wordcm[w] = 0.0; 00679 } 00680 /* get best score */ 00681 bestscore = start->score; 00682 /* compute sum score of all hypothesis */ 00683 sum = 0.0; 00684 for (node = start; node != NULL; node = node->next) { 00685 sum += pow(10, cm_alpha * (node->score - bestscore)); 00686 } 00687 /* compute sentence posteriori probabilities */ 00688 i = 0; 00689 for (node = start; node != NULL; node = node->next) { 00690 sd->sentcm[i] = pow(10, cm_alpha * (node->score - bestscore)) / sum; 00691 i++; 00692 } 00693 /* compute word posteriori probabilities */ 00694 i = 0; 00695 for (node = start; node != NULL; node = node->next) { 00696 for (w=0;w<node->seqnum;w++) { 00697 sd->wordcm[node->seq[w]] += sd->sentcm[i]; 00698 } 00699 i++; 00700 } 00701 /* store the probabilities to node */ 00702 for (node = start; node != NULL; node = node->next) { 00703 for (w=0;w<node->seqnum;w++) { 00704 #ifdef CM_MULTIPLE_ALPHA 00705 node->cmscore[w][j] = sd->wordcm[node->seq[w]]; 00706 #else 00707 node->cmscore[w] = sd->wordcm[node->seq[w]]; 00708 #endif 00709 } 00710 } 00711 #ifdef CM_MULTIPLE_ALPHA 00712 j++; 00713 } 00714 #endif 00715 } 00716 00717 #endif /* CM_NBEST */ 00718 00719 #endif /* CONFIDENCE_MEASURE */ 00720 00721 00722 /**********************************************************************/ 00723 /********** Enveloped best-first search *******************************/ 00724 /**********************************************************************/ 00725 00726 /* 00727 * 1. Word envelope 00728 * 00729 * 一種の仮説ビーム幅を設定: 展開元となった仮説の数をその仮説長(単語数) 00730 * ごとにカウントする. 一定数を越えたらそれより短い仮説は以後展開しない. 00731 * 00732 * Introduce a kind of beam width to search tree: count the number of 00733 * popped hypotheses per the depth of the hypotheses, and when a count 00734 * in a certain depth reaches the threshold, all hypotheses shorter than 00735 * the depth will be dropped from candidates. 00736 * 00737 */ 00738 00752 static void 00753 wb_init(StackDecode *s) 00754 { 00755 int i; 00756 for(i=0;i<=MAXSEQNUM;i++) s->hypo_len_count[i] = 0; 00757 s->maximum_filled_length = -1; 00758 } 00759 00786 static boolean 00787 wb_ok(StackDecode *s, NODE *now, int width) 00788 { 00789 if (now->seqnum <= s->maximum_filled_length) { 00790 /* word expansion is not allowed because a word expansion count 00791 at deeper level has already reached the limit */ 00792 return FALSE; 00793 } else { 00794 /* word expansion is possible. Increment the word expansion count 00795 of the given depth */ 00796 s->hypo_len_count[now->seqnum]++; 00797 if (s->hypo_len_count[now->seqnum] > width) { 00798 /* the word expansion count of this level has reached the limit, so 00799 set the beam-filled depth to this level to inhibit further 00800 expansion of shorter hypotheses. */ 00801 if (s->maximum_filled_length < now->seqnum) s->maximum_filled_length = now->seqnum; 00802 } 00803 return TRUE; 00804 } 00805 } 00806 00807 #ifdef SCAN_BEAM 00808 /* 00809 * 2. Score envelope 00810 * 00811 * Viterbi計算量の削減: 入力フレームごとの最大尤度 (score envelope) を 00812 * 全仮説にわたって記録しておく. 仮説の前向き尤度計算時に,その envelope 00813 * から一定幅以上スコアが下回るとき,Viterbi パスの演算を中断する. 00814 * 00815 * ここでは,取り出した仮説からフレームごとの score envelope を更新する 00816 * 部分が記述されている. Envelope を考慮した Viterbi 計算の実際は 00817 * scan_word() を参照のこと. 00818 * 00819 * Reduce computation cost of hypothesis Viterbi processing by setting a 00820 * "score envelope" that holds the maximum scores at every frames 00821 * throughout the expanded hypotheses. When calculating Viterbi path 00822 * on HMM trellis for updating score of popped hypothesis, Viterbi paths 00823 * that goes below a certain range from the score envelope are dropped. 00824 * 00825 * These functions are for updating the score envelope according to the 00826 * popped hypothesis. For actual Viterbi process with score envelope, 00827 * see scan_word(). 00828 * 00829 */ 00830 00846 static void 00847 envl_init(StackDecode *s, int framenum) 00848 { 00849 int i; 00850 for(i=0;i<framenum;i++) s->framemaxscore[i] = LOG_ZERO; 00851 } 00852 00869 static void 00870 envl_update(StackDecode *s, NODE *n, int framenum) 00871 { 00872 int t; 00873 for(t=framenum-1;t>=0;t--) { 00874 if (s->framemaxscore[t] < n->g[t]) s->framemaxscore[t] = n->g[t]; 00875 } 00876 } 00877 #endif /* SCAN_BEAM */ 00878 00879 00880 /**********************************************************************/ 00881 /********** Short pause segmentation **********************************/ 00882 /**********************************************************************/ 00883 00907 void 00908 segment_set_last_nword(NODE *hypo, RecogProcess *r) 00909 { 00910 int i; 00911 WORD_ID w; 00912 00913 if (r->sp_break_last_nword_allow_override) { 00914 for(i=0;i<hypo->seqnum;i++) { 00915 w = hypo->seq[i]; 00916 if (w != r->sp_break_last_word 00917 && !is_sil(w, r) 00918 && !r->lm->winfo->is_transparent[w] 00919 ) { 00920 r->sp_break_last_nword = w; 00921 break; 00922 } 00923 } 00924 #ifdef SP_BREAK_DEBUG 00925 printf("sp_break_last_nword=%d[%s]\n", r->sp_break_last_nword, r->lm->winfo->woutput[r->sp_break_last_nword]); 00926 #endif 00927 } else { 00928 r->sp_break_last_nword = WORD_INVALID; 00929 } 00930 } 00931 00932 00933 /**********************************************************************/ 00934 /********* Debug output of hypothesis while search ********************/ 00935 /**********************************************************************/ 00936 00951 static void 00952 put_hypo_woutput(NODE *hypo, WORD_INFO *winfo) 00953 { 00954 int i,w; 00955 00956 if (hypo != NULL) { 00957 for (i=hypo->seqnum-1;i>=0;i--) { 00958 w = hypo->seq[i]; 00959 jlog(" %s", winfo->woutput[w]); 00960 } 00961 } 00962 jlog("\n"); 00963 } 00964 00979 static void 00980 put_hypo_wname(NODE *hypo, WORD_INFO *winfo) 00981 { 00982 int i,w; 00983 00984 if (hypo != NULL) { 00985 for (i=hypo->seqnum-1;i>=0;i--) { 00986 w = hypo->seq[i]; 00987 jlog(" %s", winfo->wname[w]); 00988 } 00989 } 00990 jlog("\n"); 00991 } 00992 01005 static void 01006 store_result_pass2(NODE *hypo, RecogProcess *r) 01007 { 01008 int i; 01009 Sentence *s; 01010 01011 s = &(r->result.sent[r->result.sentnum]); 01012 01013 s->word_num = hypo->seqnum; 01014 for (i = 0; i < hypo->seqnum; i++) { 01015 s->word[i] = hypo->seq[hypo->seqnum - 1 - i]; 01016 } 01017 #ifdef CONFIDENCE_MEASURE 01018 for (i = 0; i < hypo->seqnum; i++) { 01019 s->confidence[i] = hypo->cmscore[hypo->seqnum - 1 - i]; 01020 } 01021 #endif 01022 01023 s->score = hypo->score; 01024 s->score_lm = hypo->totallscore; 01025 s->score_am = hypo->score - hypo->totallscore; 01026 if (r->lmtype == LM_DFA) { 01027 /* output which grammar the hypothesis belongs to on multiple grammar */ 01028 /* determine only by the last word */ 01029 if (multigram_get_all_num(r->lm) > 0) { 01030 s->gram_id = multigram_get_gram_from_category(r->lm->winfo->wton[hypo->seq[0]], r->lm); 01031 } else { 01032 s->gram_id = 0; 01033 } 01034 } 01035 01036 r->result.sentnum++; 01037 /* add to tail */ 01038 } 01039 01040 01041 /**********************************************************************/ 01042 /******** Output top 'ncan' hypotheses in a stack and free all ********/ 01043 /**********************************************************************/ 01044 01087 static void 01088 result_reorder_and_output(NODE **r_start, NODE **r_bottom, int *r_stacknum, int ncan, RecogProcess *r, HTK_Param *param) 01089 { 01090 NODE *now; 01091 int num; 01092 01093 #ifdef CM_NBEST 01094 /* compute CM from the N-best sentence candidates */ 01095 cm_compute_from_nbest(&(r->pass2), *r_start, *r_stacknum, r->config); 01096 #endif 01097 num = 0; 01098 while ((now = get_best_from_stack(r_start,r_stacknum)) != NULL && num < ncan) { 01099 num++; 01100 /* output result */ 01101 store_result_pass2(now, r); 01102 01103 /* if in sp segmentation mode, */ 01104 /* set the last context-aware word for next recognition */ 01105 if (r->lmtype == LM_PROB && r->config->successive.enabled && num == 1) segment_set_last_nword(now, r); 01106 01107 free_node(now); 01108 } 01109 /* free the rest */ 01110 if (now != NULL) free_node(now); 01111 free_all_nodes(*r_start); 01112 } 01113 01149 void 01150 pass2_finalize_on_no_result(RecogProcess *r, boolean use_1pass_as_final) 01151 { 01152 NODE *now; 01153 int i, j; 01154 01155 /* 探索失敗 */ 01156 /* search failed */ 01157 01158 /* make temporal hypothesis data from the result of previous 1st pass */ 01159 now = newnode(r); 01160 for (i=0;i<r->pass1_wnum;i++) { 01161 now->seq[i] = r->pass1_wseq[r->pass1_wnum-1-i]; 01162 } 01163 now->seqnum = r->pass1_wnum; 01164 now->score = r->pass1_score; 01165 #ifdef CONFIDENCE_MEASURE 01166 /* fill in null values */ 01167 #ifdef CM_MULTIPLE_ALPHA 01168 for(j=0;j<jconf->annotate.cm_alpha_num;j++) { 01169 for(i=0;i<now->seqnum;i++) now->cmscore[i][j] = 0.0; 01170 } 01171 #else 01172 for(i=0;i<now->seqnum;i++) now->cmscore[i] = 0.0; 01173 #endif 01174 #endif /* CONFIDENCE_MEASURE */ 01175 01176 if (r->lmtype == LM_PROB && r->config->successive.enabled) { 01177 /* if in sp segment mode, */ 01178 /* find segment restart words from 1st pass result */ 01179 segment_set_last_nword(now, r); 01180 } 01181 01182 if (use_1pass_as_final) { 01183 /* 第1パスの結果をそのまま出力する */ 01184 /* output the result of the previous 1st pass as a final result. */ 01185 store_result_pass2(now, r); 01186 r->result.status = J_RESULT_STATUS_SUCCESS; 01187 } else { 01188 /* store output as failure */ 01189 r->result.status = J_RESULT_STATUS_FAIL; 01190 //callback_exec(CALLBACK_RESULT, r); 01191 } 01192 01193 free_node(now); 01194 } 01195 01196 01197 /**********************************************************************/ 01198 /********* Main stack decoding function *******************************/ 01199 /**********************************************************************/ 01200 01228 void 01229 wchmm_fbs(HTK_Param *param, RecogProcess *r, int cate_bgn, int cate_num) 01230 { 01231 /* 文仮説スタック */ 01232 /* hypothesis stack (double-linked list) */ 01233 int stacknum; /* current stack size */ 01234 NODE *start = NULL; /* top node */ 01235 NODE *bottom = NULL; /* bottom node */ 01236 01237 /* 認識結果格納スタック(結果はここへいったん集められる) */ 01238 /* result sentence stack (found results will be stored here and then re-ordered) */ 01239 int r_stacksize; 01240 int r_stacknum; 01241 NODE *r_start = NULL; 01242 NODE *r_bottom = NULL; 01243 01244 /* ワークエリア */ 01245 /* work area */ 01246 NEXTWORD fornoise; /* Dummy NEXTWORD data for short-pause insertion handling */ 01247 01248 NEXTWORD **nextword, *nwroot; /* buffer to store predicted words */ 01249 int maxnwnum; /* current allocated number of words in nextword */ 01250 int nwnum; /* current number of words in nextword */ 01251 NODE *now, *new; /* popped current hypo., expanded new hypo. */ 01252 NODE *now_noise; /* for inserting/deleting noise word */ 01253 boolean now_noise_calced; 01254 boolean acc; 01255 int t; 01256 int w,i,j; 01257 LOGPROB last_score = LOG_ZERO; 01258 /* for graph generation */ 01259 LOGPROB prev_score; 01260 WordGraph *wordgraph_root = NULL; 01261 boolean merged_p; 01262 #ifdef GRAPHOUT_DYNAMIC 01263 int dynamic_merged_num = 0; 01264 WordGraph *wtmp; 01265 LOGPROB lscore_prev; 01266 #endif 01267 #ifdef GRAPHOUT_SEARCH 01268 int terminate_search_num = 0; 01269 #endif 01270 01271 /* local temporal parameter */ 01272 int stacksize, ncan, maxhypo, peseqlen; 01273 JCONF_SEARCH *jconf; 01274 WORD_INFO *winfo; 01275 NGRAM_INFO *ngram; 01276 DFA_INFO *gdfa; 01277 BACKTRELLIS *backtrellis; 01278 StackDecode *dwrk; 01279 01280 if (r->lmtype == LM_DFA) { 01281 if (debug2_flag) jlog("DEBUG: only words in these categories will be expanded: %d-%d\n", cate_bgn, cate_bgn + cate_num-1); 01282 } 01283 01284 /* 01285 * 初期化 01286 * Initialize 01287 */ 01288 01289 /* just for quick access */ 01290 jconf = r->config; 01291 winfo = r->lm->winfo; 01292 if (r->lmtype == LM_PROB) { 01293 ngram = r->lm->ngram; 01294 } else if (r->lmtype == LM_DFA) { 01295 gdfa = r->lm->dfa; 01296 } 01297 backtrellis = r->backtrellis; 01298 dwrk = &(r->pass2); 01299 01300 stacksize = jconf->pass2.stack_size; 01301 ncan = jconf->pass2.nbest; 01302 maxhypo = jconf->pass2.hypo_overflow; 01303 peseqlen = backtrellis->framelen; 01304 01305 /* store data for sub routines */ 01306 r->peseqlen = backtrellis->framelen; 01307 //recog->ccd_flag = recog->jconf->am.ccd_flag; 01308 /* 予測単語格納領域を確保 */ 01309 /* malloc area for word prediction */ 01310 /* the initial maximum number of nextwords is the size of vocabulary */ 01311 nextword = nw_malloc(&maxnwnum, &nwroot, winfo->num); 01312 /* 前向きスコア計算用の領域を確保 */ 01313 /* malloc are for forward viterbi (scan_word()) */ 01314 malloc_wordtrellis(r); /* scan_word用領域 */ 01315 /* 仮説スタック初期化 */ 01316 /* initialize hypothesis stack */ 01317 start = bottom = NULL; 01318 stacknum = 0; 01319 /* 結果格納スタック初期化 */ 01320 /* initialize result stack */ 01321 r_stacksize = ncan; 01322 r_start = r_bottom = NULL; 01323 r_stacknum = 0; 01324 01325 /* カウンタ初期化 */ 01326 /* initialize counter */ 01327 dwrk->popctr = 0; 01328 dwrk->genectr = 0; 01329 dwrk->pushctr = 0; 01330 dwrk->finishnum = 0; 01331 01332 #ifdef CM_SEARCH 01333 /* initialize local stack */ 01334 cm_init(dwrk, winfo->num, jconf->annotate.cm_alpha 01335 #ifdef CM_MULTIPLE_ALPHA 01336 , jconf->annotate.cm_alpha_num 01337 #endif 01338 ); 01339 #endif 01340 #ifdef SCAN_BEAM 01341 /* prepare and initialize score envelope */ 01342 dwrk->framemaxscore = (LOGPROB *)mymalloc(sizeof(LOGPROB)*peseqlen); 01343 envl_init(dwrk, peseqlen); 01344 #endif /* SCAN_BEAM */ 01345 01346 /* エンベロープ探索用の単語長別展開数カウンタを初期化 */ 01347 /* initialize counters for envelope search */ 01348 if (jconf->pass2.enveloped_bestfirst_width >= 0) wb_init(dwrk); 01349 01350 if (jconf->graph.enabled) { 01351 wordgraph_init(r->wchmm); 01352 } 01353 01354 /* 01355 * 初期仮説(1単語からなる)を得, 文仮説スタックにいれる 01356 * get a set of initial words from LM function and push them as initial 01357 * hypotheses 01358 */ 01359 /* the first words will be stored in nextword[] */ 01360 if (r->lmtype == LM_PROB) { 01361 nwnum = ngram_firstwords(nextword, peseqlen, maxnwnum, r); 01362 } else if (r->lmtype == LM_DFA) { 01363 nwnum = dfa_firstwords(nextword, peseqlen, maxnwnum, r); 01364 /* 溢れたら、バッファを増やして再チャレンジ */ 01365 /* If the number of nextwords can exceed the buffer size, expand the 01366 nextword data area */ 01367 while (nwnum < 0) { 01368 nextword = nw_expand(nextword, &maxnwnum, &nwroot, winfo->num); 01369 nwnum = dfa_firstwords(nextword, peseqlen, maxnwnum, r); 01370 } 01371 } 01372 01373 if (debug2_flag) { 01374 jlog("DEBUG: %d words in wordtrellis as first hypothesis\n", nwnum); 01375 } 01376 01377 /* store them to stack */ 01378 for (w = 0; w < nwnum; w++) { 01379 if (r->lmtype == LM_DFA) { 01380 /* limit word hypothesis */ 01381 if (! (winfo->wton[nextword[w]->id] >= cate_bgn && winfo->wton[nextword[w]->id] < cate_bgn + cate_num)) { 01382 continue; 01383 } 01384 } 01385 /* generate new hypothesis */ 01386 new = newnode(r); 01387 start_word(new, nextword[w], param, r); 01388 if (r->lmtype == LM_DFA) { 01389 if (new->score <= LOG_ZERO) { /* not on trellis */ 01390 free_node(new); 01391 continue; 01392 } 01393 } 01394 dwrk->genectr++; 01395 #ifdef CM_SEARCH 01396 /* store the local hypothesis to temporal stack */ 01397 cm_store(dwrk, new); 01398 #else 01399 /* put to stack */ 01400 if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) { 01401 dwrk->current = new; 01402 //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r); 01403 if (jconf->graph.enabled) { 01404 new->prevgraph = NULL; 01405 new->lastcontext = NULL; 01406 } 01407 dwrk->pushctr++; 01408 } 01409 #endif 01410 } 01411 #ifdef CM_SEARCH 01412 /* compute score sum */ 01413 cm_sum_score(dwrk 01414 #ifdef CM_MULTIPLE_ALPHA 01415 , jconf->annotate.cm_alpha_bgn 01416 , jconf->annotate.cm_alpha_end 01417 , jconf->annotate.cm_alpha_step 01418 #endif 01419 ); 01420 01421 /* compute CM and put the generated hypotheses to global stack */ 01422 while ((new = cm_get_node(dwrk)) != NULL) { 01423 cm_set_score(dwrk, new 01424 #ifdef CM_MULTIPLE_ALPHA 01425 , jconf->annotate.cm_alpha_bgn 01426 , jconf->annotate.cm_alpha_end 01427 , jconf->annotate.cm_alpha_step 01428 #endif 01429 ); 01430 #ifdef CM_SEARCH_LIMIT 01431 if (new->cmscore[new->seqnum-1] < jconf->annotate.cm_cut_thres 01432 #ifdef CM_SEARCH_LIMIT_AFTER 01433 && dwrk->finishnum > 0 01434 #endif 01435 ) { 01436 free_node(new); 01437 continue; 01438 } 01439 #endif /* CM_SEARCH_LIMIT */ 01440 01441 if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) { 01442 dwrk->current = new; 01443 //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r); 01444 if (r->graphout) { 01445 new->prevgraph = NULL; 01446 new->lastcontext = NULL; 01447 } 01448 dwrk->pushctr++; 01449 } 01450 } 01451 #endif 01452 01453 if (debug2_flag) { 01454 jlog("DEBUG: %d pushed\n", dwrk->pushctr); 01455 } 01456 01457 /********************/ 01458 /* main search loop */ 01459 /********************/ 01460 01461 for (;;) { 01462 01463 /* if terminate signal has been received, cancel this input */ 01464 /* 01465 * if (recog->process_want_terminate) { 01466 * jlog("DEBUG: process terminated by request\n"); 01467 * break; 01468 * } 01469 */ 01470 01471 /* 01472 * 仮説スタックから最もスコアの高い仮説を取り出す 01473 * pop the top hypothesis from stack 01474 */ 01475 #ifdef DEBUG 01476 jlog("DEBUG: get one hypothesis\n"); 01477 #endif 01478 now = get_best_from_stack(&start,&stacknum); 01479 if (now == NULL) { /* stack empty ---> 探索終了*/ 01480 jlog("WARNING: %02d %s: hypothesis stack exhausted, terminate search now\n", r->config->id, r->config->name); 01481 jlog("STAT: %02d %s: %d sentences have been found\n", r->config->id, r->config->name, dwrk->finishnum); 01482 break; 01483 } 01484 /* (bogus score check) */ 01485 if (now->score <= LOG_ZERO) { 01486 free_node(now); 01487 continue; 01488 } 01489 01490 01491 /* 単語グラフ用に pop 仮説の f スコアを一時保存 */ 01492 if (r->graphout) { 01493 prev_score = now->score; 01494 } 01495 01496 /* word envelope チェック */ 01497 /* consult word envelope */ 01498 if (jconf->pass2.enveloped_bestfirst_width >= 0) { 01499 if (!wb_ok(dwrk, now, jconf->pass2.enveloped_bestfirst_width)) { 01500 /* この仮説長における展開元仮説数の累計数は既に閾値を越えている. 01501 そのため,この仮説は捨てる. */ 01502 /* the number of popped hypotheses at the length already 01503 reaches its limit, so the current popped hypothesis should 01504 be discarded here with no expansion */ 01505 if (debug2_flag) { 01506 jlog("DEBUG: popped but pruned by word envelope:"); 01507 put_hypo_woutput(now, r->lm->winfo); 01508 } 01509 free_node(now); 01510 continue; 01511 } 01512 } 01513 01514 #ifdef CM_SEARCH_LIMIT_POP 01515 if (now->cmscore[now->seqnum-1] < jconf->annotate.cm_cut_thres_pop) { 01516 free_node(now); 01517 continue; 01518 } 01519 #endif /* CM_SEARCH_LIMIT_POP */ 01520 01521 dwrk->popctr++; 01522 01523 /* (for debug) 取り出した仮説とそのスコアを出力 */ 01524 /* output information of the popped hypothesis to stdout */ 01525 if (debug2_flag) { 01526 jlog("DEBUG: --- pop %d:\n", dwrk->popctr); 01527 jlog("DEBUG: "); put_hypo_woutput(now, r->lm->winfo); 01528 jlog("DEBUG: "); put_hypo_wname(now, r->lm->winfo); 01529 jlog("DEBUG: %d words, f=%f, g=%f\n", now->seqnum, now->score, now->g[now->bestt]); 01530 jlog("DEBUG: last word on trellis: [%d-%d]\n", now->estimated_next_t + 1, now->bestt); 01531 } 01532 01533 dwrk->current = now; 01534 //callback_exec(CALLBACK_DEBUG_PASS2_POP, r); 01535 01536 if (r->graphout) { 01537 01538 #ifdef GRAPHOUT_DYNAMIC 01539 /* merge last word in popped hypo if possible */ 01540 wtmp = wordgraph_check_merge(now->prevgraph, &wordgraph_root, now->seq[now->seqnum-1], &merged_p, jconf); 01541 if (wtmp != NULL) { /* wtmp holds merged word */ 01542 dynamic_merged_num++; 01543 01544 lscore_prev = (now->prevgraph) ? now->prevgraph->lscore_tmp : 0.0; 01545 01546 if (now->prevgraph != NULL) { 01547 if (now->prevgraph->saved) { 01548 j_internal_error("wchmm_fbs: already saved??\n"); 01549 } 01550 wordgraph_free(now->prevgraph); 01551 } 01552 01553 if (now->lastcontext != NULL 01554 && now->lastcontext != wtmp /* avoid self loop */ 01555 ) { 01556 wordgraph_check_and_add_leftword(now->lastcontext, wtmp, lscore_prev); 01557 #ifdef GRAPHOUT_SEARCH_CONSIDER_RIGHT 01558 if (merged_p) { 01559 if (wordgraph_check_and_add_rightword(wtmp, now->lastcontext, lscore_prev) == FALSE) { 01560 merged_p = TRUE; 01561 } else { 01562 merged_p = FALSE; 01563 } 01564 } else { 01565 wordgraph_check_and_add_rightword(wtmp, now->lastcontext, lscore_prev); 01566 } 01567 #else 01568 wordgraph_check_and_add_rightword(wtmp, now->lastcontext, lscore_prev); 01569 #endif 01570 } 01571 01572 now->prevgraph = wtmp; /* change word to the merged one */ 01573 01574 /*printf("last word merged\n");*/ 01575 /* previous still remains at memory here... (will be purged later) */ 01576 } else { 01577 wordgraph_save(now->prevgraph, now->lastcontext, &wordgraph_root); 01578 } 01579 #ifdef GRAPHOUT_SEARCH 01580 /* if recent hypotheses are included in the existing graph, terminate */ 01581 if (merged_p && now->endflag == FALSE 01582 #ifdef GRAPHOUT_SEARCH_DELAY_TERMINATION 01583 /* Do not apply search termination by graph merging 01584 until the first sentence candidate is found. */ 01585 && (jconf->graph.graphout_search_delay == FALSE || dwrk->finishnum > 0) 01586 #endif 01587 ) { 01588 terminate_search_num++; 01589 free_node(now); 01590 continue; 01591 } 01592 #endif 01593 #else /* ~GRAPHOUT_DYNAMIC */ 01594 /* always save */ 01595 wordgraph_save(now->prevgraph, now->lastcontext, &wordgraph_root); 01596 #endif /* ~GRAPHOUT_DYNAMIC */ 01597 } 01598 01599 /* 取り出した仮説のスコアを元に score envelope を更新 */ 01600 /* update score envelope using the popped hypothesis */ 01601 envl_update(dwrk, now, peseqlen); 01602 01603 /* 01604 * 取り出した仮説の受理フラグが既に立っていれば, 01605 * その仮説は探索終了とみなし,結果として出力して次のループへ. 01606 * 01607 * If the popped hypothesis already reached to the end, 01608 * we can treat it as a recognition result. 01609 */ 01610 #ifdef DEBUG 01611 VERMES("endflag check\n"); 01612 #endif 01613 01614 if (now->endflag) { 01615 if (debug2_flag) { 01616 jlog("DEBUG: This is a full sentence candidate\n"); 01617 } 01618 /* quick, dirty hack */ 01619 if (now->score == last_score) { 01620 free_node(now); 01621 continue; 01622 } else { 01623 last_score = now->score; 01624 } 01625 01626 dwrk->finishnum++; 01627 if (debug2_flag) { 01628 jlog("DEBUG: %d-th sentence found\n", dwrk->finishnum); 01629 } 01630 01631 /* 一定数の仮説が得られたあとスコアでソートするため, 01632 一時的に別のスタックに格納しておく */ 01633 /* store the result to result stack 01634 after search is finished, they will be re-ordered and output */ 01635 put_to_stack(now, &r_start, &r_bottom, &r_stacknum, r_stacksize); 01636 /* 指定数の文仮説が得られたなら探索を終了する */ 01637 /* finish search if specified number of results are found */ 01638 if (dwrk->finishnum >= ncan) { 01639 break; 01640 } else { 01641 continue; 01642 } 01643 01644 } /* end of now->endflag */ 01645 01646 01647 /* 01648 * 探索失敗を検出する. 01649 * 仮説数が maxhypo 以上展開されたら, もうこれ以上は探索しない 01650 * 01651 * detecting search failure: 01652 * if the number of expanded hypotheses reaches maxhypo, giveup further search 01653 */ 01654 #ifdef DEBUG 01655 jlog("DEBUG: loop end check\n"); 01656 #endif 01657 if (dwrk->popctr >= maxhypo) { 01658 jlog("WARNING: %02d %s: num of popped hypotheses reached the limit (%d)\n", r->config->id, r->config->name, maxhypo); 01659 /* (for debug) 探索失敗時に、スタックに残った情報を吐き出す */ 01660 /* (for debug) output all hypothesis remaining in the stack */ 01661 if (debug2_flag) put_all_in_stack(&start, &stacknum, r->lm->winfo); 01662 free_node(now); 01663 break; /* end of search */ 01664 } 01665 /* 仮説長が一定値を越えたとき,その仮説を破棄する */ 01666 /* check hypothesis word length overflow */ 01667 if (now->seqnum >= MAXSEQNUM) { 01668 jlog("ERROR: sentence length exceeded system limit ( > %d)\n", MAXSEQNUM); 01669 free_node(now); 01670 continue; 01671 } 01672 01673 #ifndef GRAPHOUT_PRECISE_BOUNDARY 01674 if (r->graphout) { 01675 /* if monophone (= no backscan), the tail g score should be kept here */ 01676 /* else, updated tail g score will be computed in scan_word() */ 01677 if(!jconf->am.ccd_flag) { 01678 now->tail_g_score = now->g[now->bestt]; 01679 } 01680 } 01681 #endif 01682 01683 /* 01684 * 前向きスコアを更新する: 最後の単語の部分の前向きスコアを計算する. 01685 * update forward score: compute forward trellis for the last word 01686 */ 01687 #ifdef DEBUG 01688 jlog("DEBUG: scan_word\n"); 01689 #endif 01690 scan_word(now, param, r); 01691 if (now->score < LOG_ZERO) { /* another end-of-search detecter */ 01692 jlog("WARNING: too low score, ignore: score=%f",now->score); 01693 put_hypo_woutput(now, r->lm->winfo); 01694 free_node(now); 01695 continue; 01696 } 01697 01698 /* 01699 * 取り出した仮説が文として受理可能であれば, 01700 * 受理フラグを立ててをスタックにいれ直しておく. 01701 * (次に取り出されたら解となる) 01702 * 01703 * if the current popped hypothesis is acceptable, set endflag 01704 * and return it to stack: it will become the recognition result 01705 * when popped again. 01706 */ 01707 #ifdef DEBUG 01708 jlog("DEBUG: accept check\n"); 01709 #endif 01710 01711 if (r->lmtype == LM_PROB) { 01712 acc = ngram_acceptable(now, r); 01713 } else if (r->lmtype == LM_DFA) { 01714 acc = dfa_acceptable(now, r); 01715 } 01716 if (acc && now->estimated_next_t <= 5) { 01717 new = newnode(r); 01718 /* new に now の中身をコピーして,最終的なスコアを計算 */ 01719 /* copy content of 'now' to 'new', and compute the final score */ 01720 last_next_word(now, new, param, r); 01721 if (debug2_flag) { 01722 jlog("DEBUG: This is acceptable as a sentence candidate\n"); 01723 } 01724 /* g[] が入力始端に達していなければ棄却 */ 01725 /* reject this sentence candidate if g[] does not reach the end */ 01726 if (new->score <= LOG_ZERO) { 01727 if (debug2_flag) { 01728 jlog("DEBUG: But invalid because Viterbi pass does not reach the 0th frame\n"); 01729 } 01730 free_node(new); 01731 free_node(now); 01732 continue; 01733 } 01734 /* 受理フラグを立てて入れ直す */ 01735 /* set endflag and push again */ 01736 if (debug2_flag) { 01737 jlog("DEBUG This hypo itself was pushed with final score=%f\n", new->score); 01738 } 01739 new->endflag = TRUE; 01740 if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) { 01741 if (r->graphout) { 01742 if (new->score > LOG_ZERO) { 01743 new->lastcontext = now->prevgraph; 01744 new->prevgraph = wordgraph_assign(new->seq[new->seqnum-1], 01745 WORD_INVALID, 01746 (new->seqnum >= 2) ? new->seq[new->seqnum-2] : WORD_INVALID, 01747 0, 01748 #ifdef GRAPHOUT_PRECISE_BOUNDARY 01749 /* wordend are shifted to the last */ 01750 #ifdef PASS2_STRICT_IWCD 01751 new->wordend_frame[0], 01752 #else 01753 now->wordend_frame[0], 01754 #endif 01755 #else 01756 now->bestt, 01757 #endif 01758 new->score, 01759 prev_score, 01760 now->g[0], 01761 #ifdef GRAPHOUT_PRECISE_BOUNDARY 01762 #ifdef PASS2_STRICT_IWCD 01763 new->wordend_gscore[0], 01764 #else 01765 now->wordend_gscore[0], 01766 #endif 01767 #else 01768 now->tail_g_score, 01769 #endif 01770 now->lscore, 01771 #ifdef CM_SEARCH 01772 new->cmscore[new->seqnum-1], 01773 #else 01774 LOG_ZERO, 01775 #endif 01776 r 01777 ); 01778 } else { 01779 new->lastcontext = now->lastcontext; 01780 new->prevgraph = now->prevgraph; 01781 } 01782 } /* put_to_stack() != -1 */ 01783 } /* recog->graphout */ 01784 /* この仮説はここで終わらずに, ここからさらに単語展開する */ 01785 /* continue with the 'now' hypothesis, not terminate here */ 01786 } 01787 01788 /* 01789 * この仮説から,次単語集合を決定する. 01790 * 次単語集合は, この仮説の推定始端フレーム周辺に存在した 01791 * 第1パスのトレリス単語集合. 01792 * 01793 * N-gramの場合は各単語の n-gram 接続確率が含まれる. 01794 * DFA の場合は, その中でさらに DFA 上で接続可能なもののみが返ってくる 01795 */ 01796 /* 01797 * Determine next word set that can connect to this hypothesis. 01798 * They come from the trellis word that has been survived at near the 01799 * beginning of the last word. 01800 * 01801 * In N-gram mode, they also contain N-gram probabilities toward the 01802 * source hypothesis. In DFA mode, the word set is further reduced 01803 * by the grammatical constraint 01804 */ 01805 #ifdef DEBUG 01806 jlog("DEBUG: get next words\n"); 01807 #endif 01808 if (r->lmtype == LM_PROB) { 01809 nwnum = ngram_nextwords(now, nextword, maxnwnum, r); 01810 } else if (r->lmtype == LM_DFA) { 01811 nwnum = dfa_nextwords(now, nextword, maxnwnum, r); 01812 /* nextword が溢れたら、バッファを増やして再チャレンジ */ 01813 /* If the number of nextwords can exceed the buffer size, expand the 01814 nextword data area */ 01815 while (nwnum < 0) { 01816 nextword = nw_expand(nextword, &maxnwnum, &nwroot, winfo->num); 01817 nwnum = dfa_nextwords(now, nextword, maxnwnum, r); 01818 } 01819 } 01820 if (debug2_flag) { 01821 jlog("DEBUG: %d words extracted from wordtrellis\n", nwnum); 01822 } 01823 01824 /* 01825 * 仮説と次単語集合から新たな文仮説を生成し,スタックにいれる. 01826 */ 01827 /* 01828 * generate new hypotheses from 'now' and 'nextword', 01829 * and push them to stack 01830 */ 01831 #ifdef DEBUG 01832 jlog("DEBUG: generate hypo\n"); 01833 #endif 01834 01835 if (r->lmtype == LM_DFA) { 01836 now_noise_calced = FALSE; /* TRUE is noise-inserted score has been calculated */ 01837 } 01838 i = dwrk->pushctr; /* store old value */ 01839 01840 #ifdef CM_SEARCH 01841 /* initialize local stack */ 01842 cm_init(dwrk, winfo->num, jconf->annotate.cm_alpha 01843 #ifdef CM_MULTIPLE_ALPHA 01844 , jconf->annotate.cm_alpha_num 01845 #endif 01846 ); 01847 #endif 01848 01849 /* for each nextword, generate a new hypothesis */ 01850 for (w = 0; w < nwnum; w++) { 01851 if (r->lmtype == LM_DFA) { 01852 /* limit word hypothesis */ 01853 if (! (winfo->wton[nextword[w]->id] >= cate_bgn && winfo->wton[nextword[w]->id] < cate_bgn + cate_num)) { 01854 continue; 01855 } 01856 } 01857 new = newnode(r); 01858 01859 if (r->lmtype == LM_DFA) { 01860 01861 if (nextword[w]->can_insert_sp == TRUE) { 01862 /* ノイズを挟んだトレリススコアを計算し,挟まない場合との最大値を取る */ 01863 /* compute hypothesis score with noise inserted */ 01864 01865 if (now_noise_calced == FALSE) { 01866 /* now に sp をつけた仮説 now_noise を作り,そのスコアを計算 */ 01867 /* generate temporal hypothesis 'now_noise' which has short-pause 01868 word after the original 'now' */ 01869 fornoise.id = gdfa->sp_id; 01870 now_noise = newnode(r); 01871 cpy_node(now_noise, now); 01872 #if 0 01873 now_noise_tmp = newnode(r); 01874 next_word(now, now_noise_tmp, &fornoise, param, r); 01875 scan_word(now_noise_tmp, param, r); 01876 for(t=0;t<peseqlen;t++) { 01877 now_noise->g[t] = max(now_noise_tmp->g[t], now->g[t]); 01878 } 01879 free_node(now_noise_tmp); 01880 #else 01881 /* expand NOISE only if it exists in backward trellis */ 01882 /* begin patch by kashima */ 01883 if (jconf->pass2.looktrellis_flag) { 01884 if(!dfa_look_around(&fornoise, now, r)){ 01885 free_node(now_noise); 01886 free_node(new); 01887 continue; 01888 } 01889 } 01890 /* end patch by kashima */ 01891 01892 /* now_nosie の スコア g[] を計算し,元の now の g[] と比較して 01893 高い方を採用 */ 01894 /* compute trellis score g[], and adopt the maximum score 01895 for each frame compared with now->g[] */ 01896 next_word(now, now_noise, &fornoise, param, r); 01897 scan_word(now_noise, param, r); 01898 for(t=0;t<peseqlen;t++) { 01899 now_noise->g[t] = max(now_noise->g[t], now->g[t]); 01900 } 01901 /* ノイズを挟んだ際を考慮したスコアを計算したので, 01902 ここで最後のノイズ単語を now_noise から消す */ 01903 /* now that score has been computed considering pause insertion, 01904 we can delete the last noise word from now_noise here */ 01905 now_noise->seqnum--; 01906 #endif 01907 now_noise_calced = TRUE; 01908 } 01909 01910 /* expand word only if it exists in backward trellis */ 01911 /* begin patch by kashima */ 01912 if (jconf->pass2.looktrellis_flag) { 01913 if(!dfa_look_around(nextword[w], now_noise, r)){ 01914 free_node(new); 01915 continue; 01916 } 01917 } 01918 /* end patch by kashima */ 01919 01920 /* 新しい仮説' new' を 'now_noise' から生成 */ 01921 /* generate a new hypothesis 'new' from 'now_noise' */ 01922 next_word(now_noise, new, nextword[w], param, r); 01923 01924 } else { 01925 01926 /* expand word only if it exists in backward trellis */ 01927 /* begin patch by kashima */ 01928 if (jconf->pass2.looktrellis_flag) { 01929 if(!dfa_look_around(nextword[w], now, r)){ 01930 free_node(new); 01931 continue; 01932 } 01933 } 01934 /* end patch by kashima */ 01935 01936 /* 新しい仮説' new' を 'now_noise' から生成 */ 01937 /* generate a new hypothesis 'new' from 'now_noise' */ 01938 next_word(now, new, nextword[w], param, r); 01939 01940 } 01941 } 01942 01943 if (r->lmtype == LM_PROB) { 01944 01945 /* 新しい仮説' new' を 'now_noise' から生成 01946 N-gram の場合はノイズを特別扱いしない */ 01947 /* generate a new hypothesis 'new' from 'now'. 01948 pause insertion is treated as same as normal words in N-gram mode. */ 01949 next_word(now, new, nextword[w], param, r); 01950 01951 } 01952 01953 if (new->score <= LOG_ZERO) { /* not on trellis */ 01954 free_node(new); 01955 continue; 01956 } 01957 01958 dwrk->genectr++; 01959 01960 #ifdef CM_SEARCH 01961 /* store the local hypothesis to temporal stack */ 01962 cm_store(dwrk, new); 01963 #else 01964 /* 生成した仮説 'new' をスタックに入れる */ 01965 /* push the generated hypothesis 'new' to stack */ 01966 01967 /* stack overflow */ 01968 if (can_put_to_stack(new, &bottom, &stacknum, stacksize) == -1) { 01969 free_node(new); 01970 continue; 01971 } 01972 01973 if (r->graphout) { 01974 /* assign a word arc to the last fixed word */ 01975 new->lastcontext = now->prevgraph; 01976 new->prevgraph = wordgraph_assign(new->seq[new->seqnum-2], 01977 new->seq[new->seqnum-1], 01978 (new->seqnum >= 3) ? new->seq[new->seqnum-3] : WORD_INVALID, 01979 new->bestt + 1, 01980 #ifdef GRAPHOUT_PRECISE_BOUNDARY 01981 #ifdef PASS2_STRICT_IWCD 01982 /* most up-to-date wordend_gscore is on new, because the last phone of 'now' will be computed at next_word() */ 01983 new->wordend_frame[new->bestt], 01984 #else 01985 now->wordend_frame[new->bestt], 01986 #endif 01987 #else 01988 now->bestt, 01989 #endif 01990 new->score, prev_score, 01991 #ifdef PASS2_STRICT_IWCD 01992 new->g[new->bestt] - new->lscore, 01993 #else 01994 now->g[new->bestt+1], 01995 #endif 01996 #ifdef GRAPHOUT_PRECISE_BOUNDARY 01997 #ifdef PASS2_STRICT_IWCD 01998 /* most up-to-date wordend_gscore is on new, because the last phone of 'now' will be computed at next_word() */ 01999 new->wordend_gscore[new->bestt], 02000 #else 02001 now->wordend_gscore[new->bestt], 02002 #endif 02003 #else 02004 now->tail_g_score, 02005 #endif 02006 now->lscore, 02007 #ifdef CM_SEARCH 02008 new->cmscore[new->seqnum-2], 02009 #else 02010 LOG_ZERO, 02011 #endif 02012 r 02013 ); 02014 } /* recog->graphout */ 02015 put_to_stack(new, &start, &bottom, &stacknum, stacksize); 02016 if (debug2_flag) { 02017 j = new->seq[new->seqnum-1]; 02018 jlog("DEBUG: %15s [%15s](id=%5d)(%f) [%d-%d] pushed\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt); 02019 } 02020 dwrk->current = new; 02021 //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r); 02022 dwrk->pushctr++; 02023 #endif 02024 } /* end of nextword loop */ 02025 02026 #ifdef CM_SEARCH 02027 /* compute score sum */ 02028 cm_sum_score(dwrk 02029 #ifdef CM_MULTIPLE_ALPHA 02030 , jconf->annotate.cm_alpha_bgn 02031 , jconf->annotate.cm_alpha_end 02032 , jconf->annotate.cm_alpha_step 02033 #endif 02034 ); 02035 /* compute CM and put the generated hypotheses to global stack */ 02036 while ((new = cm_get_node(dwrk)) != NULL) { 02037 cm_set_score(dwrk, new 02038 #ifdef CM_MULTIPLE_ALPHA 02039 , jconf->annotate.cm_alpha_bgn 02040 , jconf->annotate.cm_alpha_end 02041 , jconf->annotate.cm_alpha_step 02042 #endif 02043 ); 02044 #ifdef CM_SEARCH_LIMIT 02045 if (new->cmscore[new->seqnum-1] < jconf->annotate.cm_cut_thres 02046 #ifdef CM_SEARCH_LIMIT_AFTER 02047 && dwrk->finishnum > 0 02048 #endif 02049 ) { 02050 free_node(new); 02051 continue; 02052 } 02053 #endif /* CM_SEARCH_LIMIT */ 02054 /* j = new->seq[new->seqnum-1]; 02055 printf(" %15s [%15s](id=%5d)(%f) [%d-%d] cm=%f\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt, new->cmscore[new->seqnum-1]);*/ 02056 02057 /* stack overflow */ 02058 if (can_put_to_stack(new, &bottom, &stacknum, stacksize) == -1) { 02059 free_node(new); 02060 continue; 02061 } 02062 02063 if (r->graphout) { 02064 02065 /* assign a word arc to the last fixed word */ 02066 new->lastcontext = now->prevgraph; 02067 new->prevgraph = wordgraph_assign(new->seq[new->seqnum-2], 02068 new->seq[new->seqnum-1], 02069 (new->seqnum >= 3) ? new->seq[new->seqnum-3] : WORD_INVALID, 02070 new->bestt + 1, 02071 #ifdef GRAPHOUT_PRECISE_BOUNDARY 02072 #ifdef PASS2_STRICT_IWCD 02073 new->wordend_frame[new->bestt], 02074 #else 02075 now->wordend_frame[new->bestt], 02076 #endif 02077 #else 02078 now->bestt, 02079 #endif 02080 new->score, prev_score, 02081 #ifdef PASS2_STRICT_IWCD 02082 new->g[new->bestt] - new->lscore, 02083 #else 02084 now->g[new->bestt+1], 02085 #endif 02086 #ifdef GRAPHOUT_PRECISE_BOUNDARY 02087 #ifdef PASS2_STRICT_IWCD 02088 new->wordend_gscore[new->bestt], 02089 #else 02090 now->wordend_gscore[new->bestt], 02091 #endif 02092 #else 02093 now->tail_g_score, 02094 #endif 02095 now->lscore, 02096 #ifdef CM_SEARCH 02097 new->cmscore[new->seqnum-2], 02098 #else 02099 LOG_ZERO, 02100 #endif 02101 r 02102 ); 02103 } /* recog->graphout */ 02104 02105 put_to_stack(new, &start, &bottom, &stacknum, stacksize); 02106 if (debug2_flag) { 02107 j = new->seq[new->seqnum-1]; 02108 jlog("DEBUG: %15s [%15s](id=%5d)(%f) [%d-%d] pushed\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt); 02109 } 02110 dwrk->current = new; 02111 //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r); 02112 dwrk->pushctr++; 02113 } 02114 #endif 02115 02116 if (debug2_flag) { 02117 jlog("DEBUG: %d pushed\n",dwrk->pushctr-i); 02118 } 02119 if (r->lmtype == LM_DFA) { 02120 if (now_noise_calced == TRUE) free_node(now_noise); 02121 } 02122 02123 /* 02124 * 取り出した仮説を捨てる 02125 * free the source hypothesis 02126 */ 02127 free_node(now); 02128 02129 } 02130 /***************/ 02131 /* End of Loop */ 02132 /***************/ 02133 02134 /* output */ 02135 if (dwrk->finishnum == 0) { /* if search failed */ 02136 02137 /* finalize result when no hypothesis was obtained */ 02138 if (verbose_flag) { 02139 if (r->config->sw.fallback_pass1_flag) { 02140 jlog("%02d %s: got no candidates, output 1st pass result as a final result\n", r->config->id, r->config->name); 02141 } else { 02142 jlog("WARNING: %02d %s: got no candidates, search failed\n", r->config->id, r->config->name); 02143 } 02144 } 02145 pass2_finalize_on_no_result(r, r->config->sw.fallback_pass1_flag); 02146 02147 } else { /* if at least 1 candidate found */ 02148 02149 if (debug2_flag) { 02150 jlog("STAT: %02d %s: got %d candidates\n", r->config->id, r->config->name, dwrk->finishnum); 02151 } 02152 /* 結果はまだ出力されていないので,文候補用スタック内をソートして 02153 ここで出力する */ 02154 /* As all of the found candidate are in result stack, we sort them 02155 and output them here */ 02156 if (debug2_flag) jlog("DEBUG: done\n"); 02157 result_reorder_and_output(&r_start, &r_bottom, &r_stacknum, jconf->output.output_hypo_maxnum, r, param); 02158 02159 r->result.status = J_RESULT_STATUS_SUCCESS; 02160 //callback_exec(CALLBACK_RESULT, r); 02161 //callback_exec(CALLBACK_EVENT_PASS2_END, r); 02162 } 02163 02164 /* 各種カウンタを出力 */ 02165 /* output counters */ 02166 if (verbose_flag) { 02167 jlog("STAT: %02d %s: %d generated, %d pushed, %d nodes popped in %d\n", 02168 r->config->id, r->config->name, 02169 dwrk->genectr, dwrk->pushctr, dwrk->popctr, backtrellis->framelen); 02170 jlog_flush(); 02171 #ifdef GRAPHOUT_DYNAMIC 02172 if (r->graphout) { 02173 jlog("STAT: %02d %s: graph: %d merged", r->config->id, r->config->name, dynamic_merged_num); 02174 #ifdef GRAPHOUT_SEARCH 02175 jlog("S, %d terminated", terminate_search_num); 02176 #endif 02177 jlog(" in %d\n", dwrk->popctr); 02178 } 02179 #endif 02180 } 02181 02182 if (dwrk->finishnum > 0 && r->graphout) { 02183 if (verbose_flag) jlog("STAT: ------ wordgraph post-processing begin ------\n"); 02184 /* garbage collection and word merging */ 02185 /* words with no following word (except end of sentence) will be erased */ 02186 wordgraph_purge_leaf_nodes(&wordgraph_root, r); 02187 #ifdef GRAPHOUT_DEPTHCUT 02188 /* perform word graph depth cutting here */ 02189 wordgraph_depth_cut(&wordgraph_root, r); 02190 #endif 02191 02192 /* if GRAPHOUT_PRECISE_BOUNDARY defined, 02193 propagate exact time boundary to the right context. 02194 words of different boundary will be duplicated here. 02195 */ 02196 wordgraph_adjust_boundary(&wordgraph_root, r); 02197 02198 if (jconf->graph.confnet) { 02199 /* CONFUSION NETWORK GENERATION */ 02200 02201 /* old merging functions should be skipped */ 02202 02203 /* finalize: sort and annotate ID */ 02204 r->graph_totalwordnum = wordgraph_sort_and_annotate_id(&wordgraph_root, r); 02205 /* check coherence */ 02206 wordgraph_check_coherence(wordgraph_root, r); 02207 /* compute graph CM by forward-backward processing */ 02208 graph_forward_backward(wordgraph_root, r); 02209 if (verbose_flag) jlog("STAT: ------ wordgraph post-processing end ------\n"); 02210 02211 r->result.wg = wordgraph_root; 02212 02213 /* 02214 * if (jconf->graph.lattice) { 02215 * callback_exec(CALLBACK_RESULT_GRAPH, r); 02216 * } 02217 */ 02218 02219 /* parse the graph to extract order relationship */ 02220 graph_make_order(wordgraph_root, r); 02221 /* create confusion network */ 02222 r->result.confnet = confnet_create(wordgraph_root, r); 02223 /* output confusion network */ 02224 //callback_exec(CALLBACK_RESULT_CONFNET, r); 02225 /* free area for order relationship */ 02226 graph_free_order(r); 02227 /* free confusion network clusters */ 02228 //cn_free_all(&(r->result.confnet)); 02229 02230 } else if (jconf->graph.lattice) { 02231 /* WORD LATTICE POSTPROCESSING */ 02232 02233 /* merge words with the same time and same score */ 02234 wordgraph_compaction_thesame(&wordgraph_root); 02235 /* merge words with the same time (skip if "-graphrange -1") */ 02236 wordgraph_compaction_exacttime(&wordgraph_root, r); 02237 /* merge words of near time (skip if "-graphrange" value <= 0 */ 02238 wordgraph_compaction_neighbor(&wordgraph_root, r); 02239 /* finalize: sort and annotate ID */ 02240 r->graph_totalwordnum = wordgraph_sort_and_annotate_id(&wordgraph_root, r); 02241 /* check coherence */ 02242 wordgraph_check_coherence(wordgraph_root, r); 02243 /* compute graph CM by forward-backward processing */ 02244 graph_forward_backward(wordgraph_root, r); 02245 if (verbose_flag) jlog("STAT: ------ wordgraph post-processing end ------\n"); 02246 /* output graph */ 02247 r->result.wg = wordgraph_root; 02248 //callback_exec(CALLBACK_RESULT_GRAPH, r); 02249 02250 } else { 02251 j_internal_error("InternalError: graph generation specified but no output format specified?\n"); 02252 } 02253 02254 /* clear all wordgraph */ 02255 //wordgraph_clean(&(r->result.wg)); 02256 } /* r->graphout */ 02257 02258 02259 /* 終了処理 */ 02260 /* finalize */ 02261 nw_free(nextword, nwroot); 02262 free_all_nodes(start); 02263 free_wordtrellis(dwrk); 02264 #ifdef SCAN_BEAM 02265 free(dwrk->framemaxscore); 02266 #endif 02267 //result_sentence_free(r); 02268 clear_stocker(dwrk); 02269 } 02270 02287 void 02288 wchmm_fbs_prepare(RecogProcess *r) 02289 { 02290 StackDecode *dwrk; 02291 dwrk = &(r->pass2); 02292 02293 /* N-gram 用ワークエリアを確保 */ 02294 /* malloc work area for N-gram */ 02295 if (r->lmtype == LM_PROB && r->lm->ngram) { 02296 dwrk->cnword = (WORD_ID *)mymalloc(sizeof(WORD_ID) * r->lm->ngram->n); 02297 dwrk->cnwordrev = (WORD_ID *)mymalloc(sizeof(WORD_ID) * r->lm->ngram->n); 02298 } else { 02299 dwrk->cnword = dwrk->cnwordrev = NULL; 02300 } 02301 dwrk->stocker_root = NULL; 02302 #ifdef CONFIDENVE_MEASURE 02303 #ifdef CM_MULTIPLE_ALPHA 02304 dwrk->cmsumlist = NULL; 02305 #endif 02306 #ifdef CM_NBEST; 02307 dwrk->sentcm = NULL; 02308 dwrk->wordcm = NULL; 02309 #endif 02310 #endif 02311 } 02312 02329 void 02330 wchmm_fbs_free(RecogProcess *r) 02331 { 02332 StackDecode *dwrk; 02333 dwrk = &(r->pass2); 02334 02335 if (r->lmtype == LM_PROB && r->lm->ngram) { 02336 free(dwrk->cnword); 02337 free(dwrk->cnwordrev); 02338 dwrk->cnword = dwrk->cnwordrev = NULL; 02339 } 02340 02341 #ifdef CONFIDENVE_MEASURE 02342 #ifdef CM_MULTIPLE_ALPHA 02343 if (dwrk->cmsumlist) { 02344 free(dwrk->cmsumlist); 02345 dwrk->cmsumlist = NULL; 02346 } 02347 #endif 02348 #ifdef CM_NBEST; 02349 if (dwrk->sentcm) { 02350 free(dwrk->sentcm); 02351 dwrk->sentcm = NULL; 02352 } 02353 if (dwrk->wordcm) { 02354 free(dwrk->wordcm); 02355 dwrk->wordcm = NULL; 02356 } 02357 #endif 02358 #endif 02359 } 02360 02361 /* end of file */