Julius: libjulius/src/search_bestfirst_main.c ソースファイル

Julius 4.1.5
00001 
00041 /*
00042  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00043  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00044  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00045  * All rights reserved
00046  */
00047 
00048 #include <julius/julius.h>
00049 
00050 /* declaration of local functions */
00051 static NODE *get_best_from_stack(NODE **start, int *stacknum);
00052 static int put_to_stack(NODE *new, NODE **start, NODE **bottom, int *stacknum, int stacksize);
00053 static void put_all_in_stack(NODE **start, int *stacknum, WORD_INFO *winfo);
00054 static void free_all_nodes(NODE *node);
00055 static void put_hypo_woutput(NODE *hypo, WORD_INFO *winfo);
00056 static void put_hypo_wname(NODE *hypo, WORD_INFO *winfo);
00057 
00058 /**********************************************************************/
00059 /********** 次単語格納領域の割り当て          *************************/
00060 /********** allocate memory for nextword data *************************/
00061 /**********************************************************************/
00062 
00085 static NEXTWORD **
00086 nw_malloc(int *maxlen, NEXTWORD **root, int max)
00087 {
00088   NEXTWORD *nwtmp;
00089   NEXTWORD **nw;
00090   int i;
00091 
00092   nw = (NEXTWORD **)mymalloc(max * sizeof(NEXTWORD *));
00093   nwtmp = (NEXTWORD *)mymalloc(max * sizeof(NEXTWORD));
00094   for (i=0;i<max; i++) {
00095     nw[i] = &(nwtmp[i]);
00096   }
00097   *maxlen = max;
00098   *root = nwtmp;
00099   return nw;
00100 }
00101 
00117 static void
00118 nw_free(NEXTWORD **nw, NEXTWORD *root)
00119 {
00120   free(root);
00121   free(nw);
00122 }
00123 
00162 static NEXTWORD **
00163 nw_expand(NEXTWORD **nwold, int *maxlen, NEXTWORD **root, int num)
00164 {
00165   NEXTWORD *nwtmp;
00166   NEXTWORD **nw;
00167   int i;
00168   int nwmaxlen;
00169 
00170   nwmaxlen = *maxlen + num;
00171 
00172   nwtmp = (NEXTWORD *)myrealloc(*root, nwmaxlen * sizeof(NEXTWORD));
00173   nw = (NEXTWORD **)myrealloc(nwold, nwmaxlen * sizeof(NEXTWORD *));
00174   nw[0] = nwtmp;
00175   for (i=1;i<nwmaxlen; i++) {
00176     nw[i] = &(nwtmp[i]);
00177   }
00178   *maxlen = nwmaxlen;
00179   *root = nwtmp;
00180   return nw;
00181 }
00182 
00183 
00184 /**********************************************************************/
00185 /********** 仮説スタックの操作         ********************************/
00186 /********** Hypothesis stack operation ********************************/
00187 /**********************************************************************/
00188 
00208 static NODE *
00209 get_best_from_stack(NODE **start, int *stacknum)
00210 {
00211   NODE *tmp;
00212 
00213   /* return top */
00214   tmp=(*start);
00215   if ((*start)!=NULL) {         /* delete it from stack */
00216     (*start)=(*start)->next;
00217     if ((*start)!=NULL) (*start)->prev=NULL;
00218     (*stacknum)--;
00219     return(tmp);
00220   }
00221   else {
00222     return(NULL);
00223   }
00224 }
00225 
00252 static int
00253 can_put_to_stack(NODE *new, NODE **bottom, int *stacknum, int stacksize)
00254 {
00255   /* stack size check */
00256   if ((*stacknum + 1) > stacksize && (*bottom)->score >= new->score) {
00257     /* new node is below the bottom: discard it */
00258     return(-1);
00259   }
00260   return(0);
00261 }
00262 
00291 static int
00292 put_to_stack(NODE *new, NODE **start, NODE **bottom, int *stacknum, int stacksize)
00293 {
00294   NODE *tmp;
00295 
00296   /* stack size is going to increase... */
00297   (*stacknum)++;
00298 
00299   /* stack size check */
00300   if ((*stacknum)>stacksize) {
00301     /* stack size overflow */
00302     (*stacknum)--;
00303     if ((*bottom)->score < new->score) {
00304       /* new node will be inserted in the stack: free the bottom */
00305       tmp=(*bottom);
00306       (*bottom)->prev->next=NULL;
00307       (*bottom)=(*bottom)->prev;
00308       free_node(tmp);
00309     } else {
00310       /* new node is below the bottom: discard it */
00311       free_node(new);
00312       return(-1);
00313     }
00314   }
00315 
00316   /* insert new node on edge */
00317   if ((*start)==NULL) {         /* no node in stack */
00318     /* new node is the only node */
00319     (*start)=new;
00320     (*bottom)=new;
00321     new->next=NULL;
00322     new->prev=NULL;
00323     return(0);
00324   }
00325   if ((*start)->score <= new->score) {
00326     /* insert on the top */
00327     new->next = (*start);
00328     new->next->prev = new;
00329     (*start)=new;
00330     new->prev=NULL;
00331     return(0);
00332   }
00333   if ((*bottom)->score >= new->score) {
00334     /* insert on the bottom */
00335     new->prev = (*bottom);
00336     new->prev->next = new;
00337     (*bottom)=new;
00338     new->next=NULL;
00339     return(0);
00340   }
00341     
00342   /* now the new node is between (*start) and (*bottom) */
00343   if (((*start)->score + (*bottom)->score) / 2 > new->score) {
00344     /* search from bottom */
00345     tmp=(*bottom);
00346     while(tmp->score < new->score) tmp=tmp->prev;
00347     new->prev=tmp;
00348     new->next=tmp->next;
00349     tmp->next->prev=new;
00350     tmp->next=new;
00351   } else {
00352     /* search from start */
00353     tmp=(*start);
00354     while(tmp->score > new->score) tmp=tmp->next;
00355     new->next=tmp;
00356     new->prev=tmp->prev;
00357     tmp->prev->next=new;
00358     tmp->prev=new;
00359   }
00360   return(0);
00361 }
00362 
00379 static void
00380 put_all_in_stack(NODE **start, int *stacknum, WORD_INFO *winfo)
00381 {
00382   NODE *ntmp;
00383   
00384   jlog("DEBUG: hypotheses remained in global stack\n");
00385   while ((ntmp = get_best_from_stack(start, stacknum)) != NULL) {
00386     jlog("DEBUG: %3d: s=%f",*stacknum, ntmp->score);
00387     put_hypo_woutput(ntmp, winfo);
00388     free_node(ntmp);
00389   }
00390 }
00391 
00404 static void
00405 free_all_nodes(NODE *start)
00406 {
00407   NODE *tmp;
00408   NODE *next;
00409 
00410   tmp=start;
00411   while(tmp) {
00412     next=tmp->next;
00413     free_node(tmp);
00414     tmp=next;
00415   }
00416 }
00417 
00418 
00419 #ifdef CONFIDENCE_MEASURE
00420 
00421 /**********************************************************************/
00422 /********** 単語信頼度の計算 ******************************************/
00423 /********** Confidence scoring ****************************************/
00424 /**********************************************************************/
00425 
00426 #ifdef CM_SEARCH
00427 /**************************************************************/
00428 /**** CM computation method 1(default):                  ******/
00429 /****   - Use local posterior probabilities while search ******/
00430 /****   - Obtain CM at hypothesis expansion time         ******/
00431 /**************************************************************/
00432 
00451 static void
00452 cm_init(StackDecode *sd, int wnum, LOGPROB cm_alpha
00453 #ifdef CM_MULTIPLE_ALPHA
00454         , cm_alpha_num
00455 #endif
00456         )
00457 {
00458   sd->l_stacksize = wnum;
00459   sd->l_start = sd->l_bottom = NULL;
00460   sd->l_stacknum = 0;
00461   sd->cm_alpha = cm_alpha;
00462 #ifdef CM_MULTIPLE_ALPHA
00463   if (sd->cmsumlist) {
00464     if (sd->cmsumlistlen < cm_alpha_num) {
00465       free(sd->cmsumlist);
00466       sd->cmsumlist = NULL;
00467     }
00468   }
00469   if (sd->cmsumlist == NULL) {
00470     sd->cmsumlist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * cm_alpha_num);
00471     sd->cmsumlistlen = cm_alpha_num;
00472   }
00473 #endif    
00474 }
00475 
00490 static void
00491 cm_store(StackDecode *sd, NODE *new)
00492 {
00493   /* store the generated hypo into local stack */
00494   put_to_stack(new, &(sd->l_start), &(sd->l_bottom), &(sd->l_stacknum), sd->l_stacksize);
00495 }
00496 
00512 static void
00513 cm_sum_score(StackDecode *sd
00514 #ifdef CM_MULTIPLE_ALPHA
00515              , bgn, end, step
00516 #endif
00517 )
00518 {
00519   NODE *node;
00520   LOGPROB sum;
00521 #ifdef CM_MULTIPLE_ALPHA
00522   LOGPROB a;
00523   int j;
00524 #endif
00525 
00526   if (sd->l_start == NULL) return;      /* no hypo */
00527   sd->cm_tmpbestscore = sd->l_start->score; /* best hypo is at the top of the stack */
00528 
00529 #ifdef CM_MULTIPLE_ALPHA
00530   for (j = 0, a = bgn; a <= end; a += step) {
00531     sum = 0.0;
00532     for(node = sd->l_start; node; node = node->next) {
00533       sum += pow(10, a * (node->score - sd->cm_tmpbestscore));
00534     }
00535     sd->cmsumlist[j++] = sum;   /* store sums for each alpha coef. */
00536   }
00537 #else
00538   sum = 0.0;
00539   for(node = sd->l_start; node; node = node->next) {
00540     sum += pow(10, sd->cm_alpha * (node->score - sd->cm_tmpbestscore));
00541   }
00542   sd->cm_tmpsum = sum;          /* store sum */
00543 #endif
00544 
00545 }
00546 
00563 static void
00564 cm_set_score(StackDecode *sd, NODE *node
00565 #ifdef CM_MULTIPLE_ALPHA
00566              , bgn, end, step
00567 #endif
00568              )
00569 {
00570 #ifdef CM_MULTIPLE_ALPHA
00571   int j;
00572   LOGPROB a;
00573 #endif
00574 
00575 #ifdef CM_MULTIPLE_ALPHA
00576   for (j = 0, a = bgn; a <= end; a += step) {
00577     node->cmscore[node->seqnum-1][j] = pow(10, a * (node->score - sd->cm_tmpbestscore)) / sd->cmsumlist[j];
00578     j++;
00579   }
00580 #else
00581   node->cmscore[node->seqnum-1] = pow(10, sd->cm_alpha * (node->score - sd->cm_tmpbestscore)) / sd->cm_tmpsum;
00582 #endif
00583 }
00584 
00601 static NODE *
00602 cm_get_node(StackDecode *sd)
00603 {
00604   return(get_best_from_stack(&(sd->l_start), &(sd->l_stacknum)));
00605 }
00606 
00607 #endif /* CM_SEARCH */
00608 
00609 #ifdef CM_NBEST
00610 /*****************************************************************/
00611 /**** CM computation method 2: conventional N-best scoring *******/
00612 /**** NOTE: enough N-best should be computed (-n 10 ~ -n 100) ****/
00613 /*****************************************************************/
00614 
00634 static void
00635 cm_compute_from_nbest(StackDecode *sd, NODE *start, int stacknum, JCONF_SEARCH *jconf)
00636 {
00637   NODE *node;
00638   LOGPROB bestscore, sum, s;
00639   WORD_ID w;
00640   int i;
00641   LOGPROB cm_alpha;
00642   int j;
00643 
00644   /* prepare buffer */
00645 #ifdef CM_MULTIPLE_ALPHA
00646   if (sd->cmsumlist) {
00647     if (sd->cmsumlistlen < jconf->annotate.cm_alpha_num) {
00648       free(sd->cmsumlist);
00649       sd->cmsumlist = NULL;
00650     }
00651   }
00652   if (sd->cmsumlist == NULL) {
00653     sd->cmsumlist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * jconf->annotate.cm_alpha_num);
00654     sd->cmsumlistlen = cm_alpha_num;
00655   }
00656 #endif    
00657   if (sd->sentcm == NULL) {             /* not allocated yet */
00658     sd->sentcm = (LOGPROB *)mymalloc(sizeof(LOGPROB)*stacknum);
00659     sd->sentnum = stacknum;
00660   } else if (sd->sentnum < stacknum) { /* need expanded */
00661     sd->sentcm = (LOGPROB *)myrealloc(sentcm, sizeof(LOGPROB)*stacknum);
00662     sd->sentnum = stacknum;
00663   }
00664   if (sd->wordcm == NULL) {
00665     sd->wordcm = (LOGPROB *)mymalloc(sizeof(LOGPROB) * winfo->num);
00666   }
00667   
00668   cm_alpha = jconf->annotate.cm_alpha;
00669 #ifdef CM_MULTIPLE_ALPHA
00670   for (j = 0, cm_alpha = jconf->annotate.cm_alpha_bgn; cm_alpha <= jconf->annotate.cm_alpha_end; cm_alpha += jconf->annotate.cm_alpha_step) {
00671 #endif
00672     /* clear whole word cm buffer */
00673     for(w=0;w<winfo->num;w++) {
00674       sd->wordcm[w] = 0.0;
00675     }
00676     /* get best score */
00677     bestscore = start->score;
00678     /* compute sum score of all hypothesis */
00679     sum = 0.0;
00680     for (node = start; node != NULL; node = node->next) {
00681       sum += pow(10, cm_alpha * (node->score - bestscore));
00682     }
00683     /* compute sentence posteriori probabilities */
00684     i = 0;
00685     for (node = start; node != NULL; node = node->next) {
00686       sd->sentcm[i] = pow(10, cm_alpha * (node->score - bestscore)) / sum;
00687       i++;
00688     }
00689     /* compute word posteriori probabilities */
00690     i = 0;
00691     for (node = start; node != NULL; node = node->next) {
00692       for (w=0;w<node->seqnum;w++) {
00693         sd->wordcm[node->seq[w]] += sd->sentcm[i];
00694       }
00695       i++;
00696     }
00697     /* store the probabilities to node */
00698     for (node = start; node != NULL; node = node->next) {
00699       for (w=0;w<node->seqnum;w++) {
00700 #ifdef CM_MULTIPLE_ALPHA
00701         node->cmscore[w][j] = sd->wordcm[node->seq[w]];
00702 #else   
00703         node->cmscore[w] = sd->wordcm[node->seq[w]];
00704 #endif
00705       }
00706     }
00707 #ifdef CM_MULTIPLE_ALPHA
00708     j++;
00709   }
00710 #endif
00711 }
00712 
00713 #endif /* CM_NBEST */
00714 
00715 #endif /* CONFIDENCE_MEASURE */
00716 
00717 
00718 /**********************************************************************/
00719 /********** Enveloped best-first search *******************************/
00720 /**********************************************************************/
00721 
00722 /*
00723  * 1. Word envelope
00724  *
00725  * 一種の仮説ビーム幅を設定: 展開元となった仮説の数をその仮説長(単語数)
00726  * ごとにカウントする. 一定数を越えたらそれより短い仮説は以後展開しない. 
00727  * 
00728  * Introduce a kind of beam width to search tree: count the number of
00729  * popped hypotheses per the depth of the hypotheses, and when a count
00730  * in a certain depth reaches the threshold, all hypotheses shorter than
00731  * the depth will be dropped from candidates.
00732  *
00733  */
00734 
00748 static void
00749 wb_init(StackDecode *s)
00750 {
00751   int i;
00752   for(i=0;i<=MAXSEQNUM;i++) s->hypo_len_count[i] = 0;
00753   s->maximum_filled_length = -1;
00754 }
00755 
00782 static boolean
00783 wb_ok(StackDecode *s, NODE *now, int width)
00784 {
00785   if (now->seqnum <= s->maximum_filled_length) {
00786     /* word expansion is not allowed because a word expansion count
00787        at deeper level has already reached the limit */
00788     return FALSE;
00789   } else {
00790     /* word expansion is possible.  Increment the word expansion count
00791        of the given depth */
00792     s->hypo_len_count[now->seqnum]++;
00793     if (s->hypo_len_count[now->seqnum] > width) {
00794       /* the word expansion count of this level has reached the limit, so
00795          set the beam-filled depth to this level to inhibit further
00796          expansion of shorter hypotheses. */
00797       if (s->maximum_filled_length < now->seqnum) s->maximum_filled_length = now->seqnum;
00798     }
00799     return TRUE;
00800   }
00801 }
00802 
00803 #ifdef SCAN_BEAM
00804 /*
00805  * 2. Score envelope
00806  *
00807  * Viterbi計算量の削減: 入力フレームごとの最大尤度 (score envelope) を
00808  * 全仮説にわたって記録しておく. 仮説の前向き尤度計算時に，その envelope
00809  * から一定幅以上スコアが下回るとき，Viterbi パスの演算を中断する. 
00810  *
00811  * ここでは，取り出した仮説からフレームごとの score envelope を更新する
00812  * 部分が記述されている. Envelope を考慮した Viterbi 計算の実際は
00813  * scan_word() を参照のこと. 
00814  *
00815  * Reduce computation cost of hypothesis Viterbi processing by setting a
00816  * "score envelope" that holds the maximum scores at every frames
00817  * throughout the expanded hypotheses.  When calculating Viterbi path
00818  * on HMM trellis for updating score of popped hypothesis, Viterbi paths
00819  * that goes below a certain range from the score envelope are dropped.
00820  *
00821  * These functions are for updating the score envelope according to the
00822  * popped hypothesis.  For actual Viterbi process with score envelope, 
00823  * see scan_word().
00824  *
00825  */
00826 
00842 static void
00843 envl_init(StackDecode *s, int framenum)
00844 {
00845   int i;
00846   for(i=0;i<framenum;i++) s->framemaxscore[i] = LOG_ZERO;
00847 }
00848 
00865 static void
00866 envl_update(StackDecode *s, NODE *n, int framenum)
00867 {
00868   int t;
00869   for(t=framenum-1;t>=0;t--) {
00870     if (s->framemaxscore[t] < n->g[t]) s->framemaxscore[t] = n->g[t];
00871   }
00872 }
00873 #endif /* SCAN_BEAM */
00874 
00875 
00876 /**********************************************************************/
00877 /********** Short pause segmentation **********************************/
00878 /**********************************************************************/
00879 
00903 void
00904 segment_set_last_nword(NODE *hypo, RecogProcess *r)
00905 {
00906   int i;
00907   WORD_ID w;
00908 
00909   if (r->sp_break_last_nword_allow_override) {
00910     for(i=0;i<hypo->seqnum;i++) {
00911       w = hypo->seq[i];
00912       if (w != r->sp_break_last_word
00913           && !is_sil(w, r)
00914           && !r->lm->winfo->is_transparent[w]
00915           ) {
00916         r->sp_break_last_nword = w;
00917         break;
00918       }
00919     }
00920 #ifdef SP_BREAK_DEBUG
00921     printf("sp_break_last_nword=%d[%s]\n", r->sp_break_last_nword, r->lm->winfo->woutput[r->sp_break_last_nword]);
00922 #endif
00923   } else {
00924     r->sp_break_last_nword = WORD_INVALID;
00925   }
00926 }
00927 
00928 
00929 /**********************************************************************/
00930 /********* Debug output of hypothesis while search ********************/
00931 /**********************************************************************/
00932 
00947 static void
00948 put_hypo_woutput(NODE *hypo, WORD_INFO *winfo)
00949 {
00950   int i,w;
00951 
00952   if (hypo != NULL) {
00953     for (i=hypo->seqnum-1;i>=0;i--) {
00954       w = hypo->seq[i];
00955       jlog(" %s", winfo->woutput[w]);
00956     }
00957   }
00958   jlog("\n");  
00959 }
00960 
00975 static void
00976 put_hypo_wname(NODE *hypo, WORD_INFO *winfo)
00977 {
00978   int i,w;
00979 
00980   if (hypo != NULL) {
00981     for (i=hypo->seqnum-1;i>=0;i--) {
00982       w = hypo->seq[i];
00983       jlog(" %s", winfo->wname[w]);
00984     }
00985   }
00986   jlog("\n");  
00987 }
00988 
01001 static void
01002 store_result_pass2(NODE *hypo, RecogProcess *r)
01003 {
01004   int i;
01005   Sentence *s;
01006 
01007   s = &(r->result.sent[r->result.sentnum]);
01008 
01009   s->word_num = hypo->seqnum;
01010   for (i = 0; i < hypo->seqnum; i++) {
01011     s->word[i] = hypo->seq[hypo->seqnum - 1 - i];
01012   }
01013 #ifdef CONFIDENCE_MEASURE
01014   for (i = 0; i < hypo->seqnum; i++) {
01015     s->confidence[i] = hypo->cmscore[hypo->seqnum - 1 - i];
01016   }
01017 #endif
01018 
01019   s->score = hypo->score;
01020   s->score_lm = hypo->totallscore;
01021   s->score_am = hypo->score - hypo->totallscore;
01022   if (r->lmtype == LM_DFA) {
01023     /* output which grammar the hypothesis belongs to on multiple grammar */
01024     /* determine only by the last word */
01025     if (multigram_get_all_num(r->lm) > 0) {
01026       s->gram_id = multigram_get_gram_from_category(r->lm->winfo->wton[hypo->seq[0]], r->lm);
01027     } else {
01028       s->gram_id = 0;
01029     }
01030   }
01031 
01032   r->result.sentnum++;
01033   /* add to tail */
01034 }
01035 
01036 
01037 /**********************************************************************/
01038 /******** Output top 'ncan' hypotheses in a stack and free all ********/
01039 /**********************************************************************/
01040 
01083 static void
01084 result_reorder_and_output(NODE **r_start, NODE **r_bottom, int *r_stacknum, int ncan, RecogProcess *r, HTK_Param *param)
01085 {
01086   NODE *now;
01087   int num;
01088 
01089 #ifdef CM_NBEST 
01090   /* compute CM from the N-best sentence candidates */
01091   cm_compute_from_nbest(&(r->pass2), *r_start, *r_stacknum, r->config);
01092 #endif
01093   num = 0;
01094   while ((now = get_best_from_stack(r_start,r_stacknum)) != NULL && num < ncan) {
01095     num++;
01096     /* output result */
01097     store_result_pass2(now, r);
01098 
01099     /* if in sp segmentation mode, */
01100     /* set the last context-aware word for next recognition */
01101     if (r->lmtype == LM_PROB && r->config->successive.enabled && num == 1) segment_set_last_nword(now, r);
01102 
01103     free_node(now);
01104   }
01105   /* free the rest */
01106   if (now != NULL) free_node(now);
01107   free_all_nodes(*r_start);
01108 }  
01109 
01145 void
01146 pass2_finalize_on_no_result(RecogProcess *r, boolean use_1pass_as_final)
01147 {
01148   NODE *now;
01149   int i, j;
01150 
01151   /* 探索失敗 */
01152   /* search failed */
01153 
01154   /* make temporal hypothesis data from the result of previous 1st pass */
01155   now = newnode(r);
01156   for (i=0;i<r->pass1_wnum;i++) {
01157     now->seq[i] = r->pass1_wseq[r->pass1_wnum-1-i];
01158   }
01159   now->seqnum = r->pass1_wnum;
01160   now->score = r->pass1_score;
01161 #ifdef CONFIDENCE_MEASURE
01162   /* fill in null values */
01163 #ifdef CM_MULTIPLE_ALPHA
01164   for(j=0;j<jconf->annotate.cm_alpha_num;j++) {
01165     for(i=0;i<now->seqnum;i++) now->cmscore[i][j] = 0.0;
01166   }
01167 #else
01168   for(i=0;i<now->seqnum;i++) now->cmscore[i] = 0.0;
01169 #endif
01170 #endif /* CONFIDENCE_MEASURE */
01171   
01172   if (r->lmtype == LM_PROB && r->config->successive.enabled) {
01173     /* if in sp segment mode, */
01174     /* find segment restart words from 1st pass result */
01175     segment_set_last_nword(now, r);
01176   }
01177     
01178   if (use_1pass_as_final) {
01179     /* 第1パスの結果をそのまま出力する */
01180     /* output the result of the previous 1st pass as a final result. */
01181     store_result_pass2(now, r);
01182     r->result.status = J_RESULT_STATUS_SUCCESS;
01183   } else {
01184     /* store output as failure */
01185     r->result.status = J_RESULT_STATUS_FAIL;
01186     //callback_exec(CALLBACK_RESULT, r);
01187   }
01188   
01189   free_node(now);
01190 }
01191 
01192 
01193 /**********************************************************************/
01194 /********* Main stack decoding function *******************************/
01195 /**********************************************************************/
01196 
01224 void
01225 wchmm_fbs(HTK_Param *param, RecogProcess *r, int cate_bgn, int cate_num)
01226 {
01227   /* 文仮説スタック */
01228   /* hypothesis stack (double-linked list) */
01229   int stacknum;                 /* current stack size */
01230   NODE *start = NULL;           /* top node */
01231   NODE *bottom = NULL;          /* bottom node */
01232 
01233   /* 認識結果格納スタック(結果はここへいったん集められる) */
01234   /* result sentence stack (found results will be stored here and then re-ordered) */
01235   int r_stacksize;
01236   int r_stacknum;
01237   NODE *r_start = NULL;
01238   NODE *r_bottom = NULL;
01239 
01240   /* ワークエリア */
01241   /* work area */
01242   NEXTWORD fornoise; /* Dummy NEXTWORD data for short-pause insertion handling */
01243 
01244   NEXTWORD **nextword, *nwroot; /* buffer to store predicted words */
01245   int maxnwnum;                 /* current allocated number of words in nextword */
01246   int nwnum;                    /* current number of words in nextword */
01247   NODE *now, *new;              /* popped current hypo., expanded new hypo. */
01248   NODE *now_noise;             /* for inserting/deleting noise word */
01249   boolean now_noise_calced;
01250   boolean acc;
01251   int t;
01252   int w,i,j;
01253   LOGPROB last_score = LOG_ZERO;
01254   /* for graph generation */
01255   LOGPROB prev_score;
01256   WordGraph *wordgraph_root = NULL;
01257   boolean merged_p;
01258 #ifdef GRAPHOUT_DYNAMIC
01259   int dynamic_merged_num = 0;
01260   WordGraph *wtmp;
01261   LOGPROB lscore_prev;
01262 #endif
01263 #ifdef GRAPHOUT_SEARCH
01264   int terminate_search_num = 0;
01265 #endif
01266 
01267   /* local temporal parameter */
01268   int stacksize, ncan, maxhypo, peseqlen;
01269   JCONF_SEARCH *jconf;
01270   WORD_INFO *winfo;
01271   NGRAM_INFO *ngram;
01272   DFA_INFO *gdfa;
01273   BACKTRELLIS *backtrellis;
01274   StackDecode *dwrk;
01275 
01276   if (r->lmtype == LM_DFA) {
01277     if (debug2_flag) jlog("DEBUG: only words in these categories will be expanded: %d-%d\n", cate_bgn, cate_bgn + cate_num-1);
01278   }
01279 
01280   /*
01281    * 初期化
01282    * Initialize
01283    */
01284 
01285   /* just for quick access */
01286   jconf = r->config;
01287   winfo = r->lm->winfo;
01288   if (r->lmtype == LM_PROB) {
01289     ngram = r->lm->ngram;
01290   } else if (r->lmtype == LM_DFA) {
01291     gdfa = r->lm->dfa;
01292   }
01293   backtrellis = r->backtrellis;
01294   dwrk = &(r->pass2);
01295 
01296   stacksize = jconf->pass2.stack_size;
01297   ncan = jconf->pass2.nbest;
01298   maxhypo = jconf->pass2.hypo_overflow;
01299   peseqlen = backtrellis->framelen;
01300 
01301   /* store data for sub routines */
01302   r->peseqlen = backtrellis->framelen;
01303   //recog->ccd_flag = recog->jconf->am.ccd_flag;
01304   /* 予測単語格納領域を確保 */
01305   /* malloc area for word prediction */
01306   /* the initial maximum number of nextwords is the size of vocabulary */
01307   nextword = nw_malloc(&maxnwnum, &nwroot, winfo->num);
01308   /* 前向きスコア計算用の領域を確保 */
01309   /* malloc are for forward viterbi (scan_word()) */
01310   malloc_wordtrellis(r);                /* scan_word用領域 */
01311   /* 仮説スタック初期化 */
01312   /* initialize hypothesis stack */
01313   start = bottom = NULL;
01314   stacknum = 0;
01315   /* 結果格納スタック初期化 */
01316   /* initialize result stack */
01317   r_stacksize = ncan;
01318   r_start = r_bottom = NULL;
01319   r_stacknum = 0;
01320 
01321   /* カウンタ初期化 */
01322   /* initialize counter */
01323   dwrk->popctr = 0;
01324   dwrk->genectr = 0;
01325   dwrk->pushctr = 0;
01326   dwrk->finishnum = 0;
01327   
01328 #ifdef CM_SEARCH
01329   /* initialize local stack */
01330   cm_init(dwrk, winfo->num, jconf->annotate.cm_alpha
01331 #ifdef CM_MULTIPLE_ALPHA
01332           , jconf->annotate.cm_alpha_num
01333 #endif
01334           );
01335 #endif
01336 #ifdef SCAN_BEAM
01337   /* prepare and initialize score envelope */
01338   dwrk->framemaxscore = (LOGPROB *)mymalloc(sizeof(LOGPROB)*peseqlen);
01339   envl_init(dwrk, peseqlen);
01340 #endif /* SCAN_BEAM */
01341 
01342   /* エンベロープ探索用の単語長別展開数カウンタを初期化 */
01343   /* initialize counters for envelope search */
01344   if (jconf->pass2.enveloped_bestfirst_width >= 0) wb_init(dwrk);
01345 
01346   if (jconf->graph.enabled) {
01347     wordgraph_init(r->wchmm);
01348   }
01349 
01350   /* 
01351    * 初期仮説(1単語からなる)を得, 文仮説スタックにいれる
01352    * get a set of initial words from LM function and push them as initial
01353    * hypotheses
01354    */
01355   /* the first words will be stored in nextword[] */
01356   if (r->lmtype == LM_PROB) {
01357     nwnum = ngram_firstwords(nextword, peseqlen, maxnwnum, r);
01358   } else if (r->lmtype == LM_DFA) {
01359     nwnum = dfa_firstwords(nextword, peseqlen, maxnwnum, r);
01360     /* 溢れたら、バッファを増やして再チャレンジ */
01361     /* If the number of nextwords can exceed the buffer size, expand the
01362        nextword data area */
01363     while (nwnum < 0) {
01364       nextword = nw_expand(nextword, &maxnwnum, &nwroot, winfo->num);
01365       nwnum = dfa_firstwords(nextword, peseqlen, maxnwnum, r);
01366     }
01367   }
01368 
01369   if (debug2_flag) {
01370     jlog("DEBUG: %d words in wordtrellis as first hypothesis\n", nwnum);
01371   }
01372   
01373   /* store them to stack */
01374   for (w = 0; w < nwnum; w++) {
01375     if (r->lmtype == LM_DFA) {
01376       /* limit word hypothesis */
01377       if (! (winfo->wton[nextword[w]->id] >= cate_bgn && winfo->wton[nextword[w]->id] < cate_bgn + cate_num)) {
01378         continue;
01379       }
01380     }
01381     /* generate new hypothesis */
01382     new = newnode(r);
01383     start_word(new, nextword[w], param, r);
01384     if (r->lmtype == LM_DFA) {
01385       if (new->score <= LOG_ZERO) { /* not on trellis */
01386         free_node(new);
01387         continue;
01388       }
01389     }
01390     dwrk->genectr++;
01391 #ifdef CM_SEARCH
01392     /* store the local hypothesis to temporal stack */
01393     cm_store(dwrk, new);
01394 #else 
01395     /* put to stack */
01396     if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) {
01397       dwrk->current = new;
01398       //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r);
01399       if (jconf->graph.enabled) {
01400         new->prevgraph = NULL;
01401         new->lastcontext = NULL;
01402       }
01403       dwrk->pushctr++;
01404     }
01405 #endif
01406   }
01407 #ifdef CM_SEARCH
01408   /* compute score sum */
01409   cm_sum_score(dwrk
01410 #ifdef CM_MULTIPLE_ALPHA
01411                , jconf->annotate.cm_alpha_bgn
01412                , jconf->annotate.cm_alpha_end
01413                , jconf->annotate.cm_alpha_step
01414 #endif
01415                );
01416 
01417   /* compute CM and put the generated hypotheses to global stack */
01418   while ((new = cm_get_node(dwrk)) != NULL) {
01419     cm_set_score(dwrk, new
01420 #ifdef CM_MULTIPLE_ALPHA
01421                  , jconf->annotate.cm_alpha_bgn
01422                  , jconf->annotate.cm_alpha_end
01423                  , jconf->annotate.cm_alpha_step
01424 #endif
01425                  );
01426 #ifdef CM_SEARCH_LIMIT
01427     if (new->cmscore[new->seqnum-1] < jconf->annotate.cm_cut_thres
01428 #ifdef CM_SEARCH_LIMIT_AFTER
01429         && dwrk->finishnum > 0
01430 #endif
01431         ) {
01432       free_node(new);
01433       continue;
01434     }
01435 #endif /* CM_SEARCH_LIMIT */
01436     
01437     if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) {
01438       dwrk->current = new;
01439       //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r);
01440       if (r->graphout) {
01441         new->prevgraph = NULL;
01442         new->lastcontext = NULL;
01443       }
01444       dwrk->pushctr++;
01445     }
01446   }
01447 #endif
01448 
01449   if (debug2_flag) {
01450     jlog("DEBUG: %d pushed\n", dwrk->pushctr);
01451   }
01452   
01453   /********************/
01454   /* main search loop */
01455   /********************/
01456 
01457   for (;;) {
01458 
01459     /* if terminate signal has been received, cancel this input */
01460     /* 
01461      * if (recog->process_want_terminate) {
01462      *   jlog("DEBUG: process terminated by request\n");
01463      *   break;
01464      * }
01465      */
01466     
01467     /* 
01468      * 仮説スタックから最もスコアの高い仮説を取り出す
01469      * pop the top hypothesis from stack
01470      */
01471 #ifdef DEBUG
01472     jlog("DEBUG: get one hypothesis\n");
01473 #endif
01474     now = get_best_from_stack(&start,&stacknum);
01475     if (now == NULL) {  /* stack empty ---> 探索終了*/
01476       jlog("WARNING: %02d %s: hypothesis stack exhausted, terminate search now\n", r->config->id, r->config->name);
01477       jlog("STAT: %02d %s: %d sentences have been found\n", r->config->id, r->config->name, dwrk->finishnum);
01478       break;
01479     }
01480     /* (bogus score check) */
01481     if (now->score <= LOG_ZERO) {
01482       free_node(now);
01483       continue;
01484     }
01485 
01486 
01487     /* 単語グラフ用に pop 仮説の f スコアを一時保存 */
01488     if (r->graphout) {
01489       prev_score = now->score;
01490     }
01491 
01492     /* word envelope チェック */
01493     /* consult word envelope */
01494     if (jconf->pass2.enveloped_bestfirst_width >= 0) {
01495       if (!wb_ok(dwrk, now, jconf->pass2.enveloped_bestfirst_width)) {
01496         /* この仮説長における展開元仮説数の累計数は既に閾値を越えている. 
01497            そのため，この仮説は捨てる. */
01498         /* the number of popped hypotheses at the length already
01499            reaches its limit, so the current popped hypothesis should
01500            be discarded here with no expansion */
01501         if (debug2_flag) {
01502           jlog("DEBUG: popped but pruned by word envelope:");
01503           put_hypo_woutput(now, r->lm->winfo);
01504         }
01505         free_node(now);
01506         continue;
01507       }
01508     }
01509     
01510 #ifdef CM_SEARCH_LIMIT_POP
01511     if (now->cmscore[now->seqnum-1] < jconf->annotate.cm_cut_thres_pop) {
01512       free_node(now);
01513       continue;
01514     }
01515 #endif /* CM_SEARCH_LIMIT_POP */
01516 
01517     dwrk->popctr++;
01518 
01519     /* (for debug) 取り出した仮説とそのスコアを出力 */
01520     /*             output information of the popped hypothesis to stdout */
01521     if (debug2_flag) {
01522       jlog("DEBUG: --- pop %d:\n", dwrk->popctr);
01523       jlog("DEBUG:  "); put_hypo_woutput(now, r->lm->winfo);
01524       jlog("DEBUG:  "); put_hypo_wname(now, r->lm->winfo);
01525       jlog("DEBUG:  %d words, f=%f, g=%f\n", now->seqnum, now->score, now->g[now->bestt]);
01526       jlog("DEBUG:  last word on trellis: [%d-%d]\n", now->estimated_next_t + 1, now->bestt);
01527     }
01528 
01529     dwrk->current = now;
01530     //callback_exec(CALLBACK_DEBUG_PASS2_POP, r);
01531 
01532     if (r->graphout) {
01533 
01534 #ifdef GRAPHOUT_DYNAMIC
01535       /* merge last word in popped hypo if possible */
01536       wtmp = wordgraph_check_merge(now->prevgraph, &wordgraph_root, now->seq[now->seqnum-1], &merged_p, jconf);
01537       if (wtmp != NULL) {               /* wtmp holds merged word */
01538         dynamic_merged_num++;
01539 
01540         lscore_prev = (now->prevgraph) ? now->prevgraph->lscore_tmp : 0.0;
01541 
01542         if (now->prevgraph != NULL) {
01543           if (now->prevgraph->saved) {
01544             j_internal_error("wchmm_fbs: already saved??\n");
01545           }
01546           wordgraph_free(now->prevgraph);
01547         }
01548 
01549         if (now->lastcontext != NULL
01550             && now->lastcontext != wtmp /* avoid self loop */
01551             ) {
01552           wordgraph_check_and_add_leftword(now->lastcontext, wtmp, lscore_prev);
01553 #ifdef GRAPHOUT_SEARCH_CONSIDER_RIGHT
01554           if (merged_p) {
01555             if (wordgraph_check_and_add_rightword(wtmp, now->lastcontext, lscore_prev) == FALSE) {
01556               merged_p = TRUE;
01557             } else {
01558               merged_p = FALSE;
01559             }
01560           } else {
01561             wordgraph_check_and_add_rightword(wtmp, now->lastcontext, lscore_prev);
01562           }
01563 #else
01564           wordgraph_check_and_add_rightword(wtmp, now->lastcontext, lscore_prev);
01565 #endif    
01566         }
01567         
01568         now->prevgraph = wtmp;  /* change word to the merged one */
01569 
01570         /*printf("last word merged\n");*/
01571         /* previous still remains at memory here... (will be purged later) */
01572       } else {
01573         wordgraph_save(now->prevgraph, now->lastcontext, &wordgraph_root);
01574       }
01575 #ifdef GRAPHOUT_SEARCH
01576       /* if recent hypotheses are included in the existing graph, terminate */
01577       if (merged_p && now->endflag == FALSE
01578 #ifdef GRAPHOUT_SEARCH_DELAY_TERMINATION
01579           /* Do not apply search termination by graph merging
01580              until the first sentence candidate is found. */
01581           && (jconf->graph.graphout_search_delay == FALSE || dwrk->finishnum > 0)
01582 #endif
01583           ) {
01584         terminate_search_num++;
01585         free_node(now);
01586         continue;
01587       }
01588 #endif
01589 #else  /* ~GRAPHOUT_DYNAMIC */
01590       /* always save */
01591       wordgraph_save(now->prevgraph, now->lastcontext, &wordgraph_root);
01592 #endif /* ~GRAPHOUT_DYNAMIC */
01593     }
01594 
01595     /* 取り出した仮説のスコアを元に score envelope を更新 */
01596     /* update score envelope using the popped hypothesis */
01597     envl_update(dwrk, now, peseqlen);
01598 
01599     /* 
01600      * 取り出した仮説の受理フラグが既に立っていれば，
01601      * その仮説は探索終了とみなし，結果として出力して次のループへ. 
01602      *
01603      * If the popped hypothesis already reached to the end, 
01604      * we can treat it as a recognition result.
01605      */
01606 #ifdef DEBUG
01607     VERMES("endflag check\n");
01608 #endif
01609     
01610     if (now->endflag) {
01611       if (debug2_flag) {
01612         jlog("DEBUG:  This is a full sentence candidate\n");
01613       }
01614       /* quick, dirty hack */
01615       if (now->score == last_score) {
01616         free_node(now);
01617         continue;
01618       } else {
01619         last_score = now->score;
01620       }
01621       
01622       dwrk->finishnum++;
01623       if (debug2_flag) {
01624         jlog("DEBUG:  %d-th sentence found\n", dwrk->finishnum);
01625       }
01626 
01627         /* 一定数の仮説が得られたあとスコアでソートするため，
01628            一時的に別のスタックに格納しておく */
01629         /* store the result to result stack
01630            after search is finished, they will be re-ordered and output */
01631         put_to_stack(now, &r_start, &r_bottom, &r_stacknum, r_stacksize);
01632         /* 指定数の文仮説が得られたなら探索を終了する */
01633         /* finish search if specified number of results are found */
01634         if (dwrk->finishnum >= ncan) {
01635           break;
01636         } else {
01637           continue;
01638         }
01639       
01640     } /* end of now->endflag */
01641 
01642     
01643     /* 
01644      * 探索失敗を検出する. 
01645      * 仮説数が maxhypo 以上展開されたら, もうこれ以上は探索しない
01646      *
01647      * detecting search failure:
01648      * if the number of expanded hypotheses reaches maxhypo, giveup further search
01649      */
01650 #ifdef DEBUG
01651     jlog("DEBUG: loop end check\n");
01652 #endif
01653     if (dwrk->popctr >= maxhypo) {
01654       jlog("WARNING: %02d %s: num of popped hypotheses reached the limit (%d)\n", r->config->id, r->config->name, maxhypo);
01655       /* (for debug) 探索失敗時に、スタックに残った情報を吐き出す */
01656       /* (for debug) output all hypothesis remaining in the stack */
01657       if (debug2_flag) put_all_in_stack(&start, &stacknum, r->lm->winfo);
01658       free_node(now);
01659       break;                    /* end of search */
01660     }
01661     /* 仮説長が一定値を越えたとき，その仮説を破棄する */
01662     /* check hypothesis word length overflow */
01663     if (now->seqnum >= MAXSEQNUM) {
01664       jlog("ERROR: sentence length exceeded system limit ( > %d)\n", MAXSEQNUM);
01665       free_node(now);
01666       continue;
01667     }
01668 
01669 #ifndef GRAPHOUT_PRECISE_BOUNDARY
01670     if (r->graphout) {
01671       /* if monophone (= no backscan), the tail g score should be kept here */
01672       /* else, updated tail g score will be computed in scan_word()  */
01673       if(!jconf->am.ccd_flag) {
01674         now->tail_g_score = now->g[now->bestt];
01675       }
01676     }
01677 #endif
01678 
01679     /*
01680      * 前向きスコアを更新する： 最後の単語の部分の前向きスコアを計算する. 
01681      * update forward score: compute forward trellis for the last word
01682      */
01683 #ifdef DEBUG
01684     jlog("DEBUG: scan_word\n");
01685 #endif
01686     scan_word(now, param, r);
01687     if (now->score < LOG_ZERO) { /* another end-of-search detecter */
01688       jlog("WARNING: too low score, ignore: score=%f",now->score);
01689       put_hypo_woutput(now, r->lm->winfo);
01690       free_node(now);
01691       continue;
01692     }
01693 
01694     /* 
01695      * 取り出した仮説が文として受理可能であれば，
01696      * 受理フラグを立ててをスタックにいれ直しておく. 
01697      * (次に取り出されたら解となる)
01698      *
01699      * if the current popped hypothesis is acceptable, set endflag
01700      * and return it to stack: it will become the recognition result
01701      * when popped again.
01702      */
01703 #ifdef DEBUG
01704     jlog("DEBUG: accept check\n");
01705 #endif
01706 
01707     if (r->lmtype == LM_PROB) {
01708       acc = ngram_acceptable(now, r);
01709     } else if (r->lmtype == LM_DFA) {
01710       acc = dfa_acceptable(now, r);
01711     }
01712     if (acc && now->estimated_next_t <= 5) {
01713       new = newnode(r);
01714       /* new に now の中身をコピーして，最終的なスコアを計算 */
01715       /* copy content of 'now' to 'new', and compute the final score */
01716       last_next_word(now, new, param, r);
01717       if (debug2_flag) {
01718         jlog("DEBUG:  This is acceptable as a sentence candidate\n");
01719       }
01720       /* g[] が入力始端に達していなければ棄却 */
01721       /* reject this sentence candidate if g[] does not reach the end */
01722       if (new->score <= LOG_ZERO) {
01723         if (debug2_flag) {
01724           jlog("DEBUG:  But invalid because Viterbi pass does not reach the 0th frame\n");
01725         }
01726         free_node(new);
01727         free_node(now);
01728         continue;
01729       }
01730       /* 受理フラグを立てて入れ直す */
01731       /* set endflag and push again  */
01732       if (debug2_flag) {
01733         jlog("DEBUG  This hypo itself was pushed with final score=%f\n", new->score);
01734       }
01735       new->endflag = TRUE;
01736       if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) {
01737         if (r->graphout) {
01738           if (new->score > LOG_ZERO) {
01739             new->lastcontext = now->prevgraph;
01740             new->prevgraph = wordgraph_assign(new->seq[new->seqnum-1],
01741                                               WORD_INVALID,
01742                                               (new->seqnum >= 2) ? new->seq[new->seqnum-2] : WORD_INVALID,
01743                                               0,
01744 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01745                                               /* wordend are shifted to the last */
01746 #ifdef PASS2_STRICT_IWCD
01747                                               new->wordend_frame[0],
01748 #else
01749                                               now->wordend_frame[0],
01750 #endif
01751 #else
01752                                               now->bestt,
01753 #endif
01754                                               new->score,
01755                                               prev_score,
01756                                               now->g[0],
01757 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01758 #ifdef PASS2_STRICT_IWCD
01759                                               new->wordend_gscore[0],
01760 #else
01761                                               now->wordend_gscore[0],
01762 #endif
01763 #else
01764                                               now->tail_g_score,
01765 #endif
01766                                               now->lscore,
01767 #ifdef CM_SEARCH
01768                                               new->cmscore[new->seqnum-1],
01769 #else
01770                                               LOG_ZERO,
01771 #endif
01772                                               r
01773                                               );
01774           } else {
01775             new->lastcontext = now->lastcontext;
01776             new->prevgraph = now->prevgraph;
01777           }
01778         } /* put_to_stack() != -1 */
01779       } /* recog->graphout */
01780       /* この仮説はここで終わらずに, ここからさらに単語展開する */
01781       /* continue with the 'now' hypothesis, not terminate here */
01782     }
01783     
01784     /*
01785      * この仮説から，次単語集合を決定する. 
01786      * 次単語集合は, この仮説の推定始端フレーム周辺に存在した
01787      * 第１パスのトレリス単語集合. 
01788      *
01789      * N-gramの場合は各単語の n-gram 接続確率が含まれる. 
01790      * DFA の場合は, その中でさらに DFA 上で接続可能なもののみが返ってくる
01791      */
01792     /*
01793      * Determine next word set that can connect to this hypothesis.
01794      * They come from the trellis word that has been survived at near the
01795      * beginning of the last word.
01796      *
01797      * In N-gram mode, they also contain N-gram probabilities toward the
01798      * source hypothesis.  In DFA mode, the word set is further reduced
01799      * by the grammatical constraint
01800      */
01801 #ifdef DEBUG
01802     jlog("DEBUG: get next words\n");
01803 #endif
01804     if (r->lmtype == LM_PROB) {
01805       nwnum = ngram_nextwords(now, nextword, maxnwnum, r);
01806     } else if (r->lmtype == LM_DFA) {
01807       nwnum = dfa_nextwords(now, nextword, maxnwnum, r);
01808       /* nextword が溢れたら、バッファを増やして再チャレンジ */
01809       /* If the number of nextwords can exceed the buffer size, expand the
01810          nextword data area */
01811       while (nwnum < 0) {
01812         nextword = nw_expand(nextword, &maxnwnum, &nwroot, winfo->num);
01813         nwnum = dfa_nextwords(now, nextword, maxnwnum, r);
01814       }
01815     }
01816     if (debug2_flag) {
01817       jlog("DEBUG:  %d words extracted from wordtrellis\n", nwnum);
01818     }
01819 
01820     /* 
01821      * 仮説と次単語集合から新たな文仮説を生成し，スタックにいれる. 
01822      */
01823     /*
01824      * generate new hypotheses from 'now' and 'nextword', 
01825      * and push them to stack
01826      */
01827 #ifdef DEBUG
01828     jlog("DEBUG: generate hypo\n");
01829 #endif
01830 
01831     if (r->lmtype == LM_DFA) {
01832       now_noise_calced = FALSE; /* TRUE is noise-inserted score has been calculated */
01833     }
01834     i = dwrk->pushctr;          /* store old value */
01835 
01836 #ifdef CM_SEARCH
01837     /* initialize local stack */
01838     cm_init(dwrk, winfo->num, jconf->annotate.cm_alpha
01839 #ifdef CM_MULTIPLE_ALPHA
01840             , jconf->annotate.cm_alpha_num
01841 #endif
01842             );
01843 #endif
01844 
01845     /* for each nextword, generate a new hypothesis */
01846     for (w = 0; w < nwnum; w++) {
01847       if (r->lmtype == LM_DFA) {
01848         /* limit word hypothesis */
01849         if (! (winfo->wton[nextword[w]->id] >= cate_bgn && winfo->wton[nextword[w]->id] < cate_bgn + cate_num)) {
01850           continue;
01851         }
01852       }
01853       new = newnode(r);
01854 
01855       if (r->lmtype == LM_DFA) {
01856 
01857         if (nextword[w]->can_insert_sp == TRUE) {
01858           /* ノイズを挟んだトレリススコアを計算し，挟まない場合との最大値を取る */
01859           /* compute hypothesis score with noise inserted */
01860           
01861           if (now_noise_calced == FALSE) {
01862             /* now に sp をつけた仮説 now_noise を作り,そのスコアを計算 */
01863             /* generate temporal hypothesis 'now_noise' which has short-pause
01864                word after the original 'now' */
01865             fornoise.id = gdfa->sp_id;
01866             now_noise = newnode(r);
01867             cpy_node(now_noise, now);
01868 #if 0
01869             now_noise_tmp = newnode(r);
01870             next_word(now, now_noise_tmp, &fornoise, param, r);
01871             scan_word(now_noise_tmp, param, r);
01872             for(t=0;t<peseqlen;t++) {
01873               now_noise->g[t] = max(now_noise_tmp->g[t], now->g[t]);
01874             }
01875             free_node(now_noise_tmp);
01876 #else
01877             /* expand NOISE only if it exists in backward trellis */
01878             /* begin patch by kashima */
01879             if (jconf->pass2.looktrellis_flag) {
01880               if(!dfa_look_around(&fornoise, now, r)){
01881                 free_node(now_noise);
01882                 free_node(new);
01883                 continue;
01884               }
01885             }
01886             /* end patch by kashima */
01887             
01888             /* now_nosie の スコア g[] を計算し，元の now の g[] と比較して
01889                高い方を採用 */
01890             /* compute trellis score g[], and adopt the maximum score
01891                for each frame compared with now->g[] */
01892             next_word(now, now_noise, &fornoise, param, r);
01893             scan_word(now_noise, param, r);
01894             for(t=0;t<peseqlen;t++) {
01895               now_noise->g[t] = max(now_noise->g[t], now->g[t]);
01896             }
01897             /* ノイズを挟んだ際を考慮したスコアを計算したので，
01898                ここで最後のノイズ単語を now_noise から消す */
01899             /* now that score has been computed considering pause insertion,
01900                we can delete the last noise word from now_noise here */
01901             now_noise->seqnum--;
01902 #endif
01903             now_noise_calced = TRUE;
01904           }
01905           
01906           /* expand word only if it exists in backward trellis */
01907           /* begin patch by kashima */
01908           if (jconf->pass2.looktrellis_flag) {
01909             if(!dfa_look_around(nextword[w], now_noise, r)){
01910               free_node(new);
01911               continue;
01912             }
01913           }
01914           /* end patch by kashima */
01915           
01916           /* 新しい仮説' new' を 'now_noise' から生成 */
01917           /* generate a new hypothesis 'new' from 'now_noise' */
01918           next_word(now_noise, new, nextword[w], param, r);
01919           
01920         } else {
01921           
01922           /* expand word only if it exists in backward trellis */
01923           /* begin patch by kashima */
01924           if (jconf->pass2.looktrellis_flag) {
01925             if(!dfa_look_around(nextword[w], now, r)){
01926               free_node(new);
01927               continue;
01928             }
01929           }
01930           /* end patch by kashima */
01931           
01932           /* 新しい仮説' new' を 'now_noise' から生成 */
01933           /* generate a new hypothesis 'new' from 'now_noise' */
01934           next_word(now, new, nextword[w], param, r);
01935           
01936         }
01937       }
01938 
01939       if (r->lmtype == LM_PROB) {
01940 
01941         /* 新しい仮説' new' を 'now_noise' から生成
01942            N-gram の場合はノイズを特別扱いしない */
01943         /* generate a new hypothesis 'new' from 'now'.
01944            pause insertion is treated as same as normal words in N-gram mode. */
01945         next_word(now, new, nextword[w], param, r);
01946 
01947       }
01948 
01949       if (new->score <= LOG_ZERO) { /* not on trellis */
01950         free_node(new);
01951         continue;
01952       }
01953         
01954       dwrk->genectr++;
01955 
01956 #ifdef CM_SEARCH
01957       /* store the local hypothesis to temporal stack */
01958       cm_store(dwrk, new);
01959 #else 
01960       /* 生成した仮説 'new' をスタックに入れる */
01961       /* push the generated hypothesis 'new' to stack */
01962 
01963       /* stack overflow */
01964       if (can_put_to_stack(new, &bottom, &stacknum, stacksize) == -1) {
01965         free_node(new);
01966         continue;
01967       }
01968       
01969       if (r->graphout) {
01970         /* assign a word arc to the last fixed word */
01971         new->lastcontext = now->prevgraph;
01972         new->prevgraph = wordgraph_assign(new->seq[new->seqnum-2],
01973                                           new->seq[new->seqnum-1],
01974                                           (new->seqnum >= 3) ? new->seq[new->seqnum-3] : WORD_INVALID,
01975                                           new->bestt + 1,
01976 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01977 #ifdef PASS2_STRICT_IWCD
01978                                           /* most up-to-date wordend_gscore is on new, because the last phone of 'now' will be computed at next_word() */
01979                                           new->wordend_frame[new->bestt],
01980 #else
01981                                           now->wordend_frame[new->bestt],
01982 #endif
01983 #else
01984                                           now->bestt,
01985 #endif
01986                                           new->score, prev_score,
01987 #ifdef PASS2_STRICT_IWCD
01988                                           new->g[new->bestt] - new->lscore,
01989 #else
01990                                           now->g[new->bestt+1],
01991 #endif
01992 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01993 #ifdef PASS2_STRICT_IWCD
01994                                           /* most up-to-date wordend_gscore is on new, because the last phone of 'now' will be computed at next_word() */
01995                                           new->wordend_gscore[new->bestt],
01996 #else
01997                                           now->wordend_gscore[new->bestt],
01998 #endif
01999 #else
02000                                           now->tail_g_score,
02001 #endif
02002                                           now->lscore,
02003 #ifdef CM_SEARCH
02004                                           new->cmscore[new->seqnum-2],
02005 #else
02006                                           LOG_ZERO,
02007 #endif
02008                                           r
02009                                           );
02010       } /* recog->graphout */
02011       put_to_stack(new, &start, &bottom, &stacknum, stacksize);
02012       if (debug2_flag) {
02013         j = new->seq[new->seqnum-1];
02014         jlog("DEBUG:  %15s [%15s](id=%5d)(%f) [%d-%d] pushed\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt);
02015       }
02016       dwrk->current = new;
02017       //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r);
02018       dwrk->pushctr++;
02019 #endif
02020     } /* end of nextword loop */
02021 
02022 #ifdef CM_SEARCH
02023     /* compute score sum */
02024     cm_sum_score(dwrk
02025 #ifdef CM_MULTIPLE_ALPHA
02026                  , jconf->annotate.cm_alpha_bgn 
02027                  , jconf->annotate.cm_alpha_end
02028                  , jconf->annotate.cm_alpha_step
02029 #endif
02030                  );
02031     /* compute CM and put the generated hypotheses to global stack */
02032     while ((new = cm_get_node(dwrk)) != NULL) {
02033       cm_set_score(dwrk, new
02034 #ifdef CM_MULTIPLE_ALPHA
02035                    , jconf->annotate.cm_alpha_bgn
02036                    , jconf->annotate.cm_alpha_end
02037                    , jconf->annotate.cm_alpha_step
02038 #endif
02039                    );
02040 #ifdef CM_SEARCH_LIMIT
02041       if (new->cmscore[new->seqnum-1] < jconf->annotate.cm_cut_thres
02042 #ifdef CM_SEARCH_LIMIT_AFTER
02043           && dwrk->finishnum > 0
02044 #endif
02045           ) {
02046         free_node(new);
02047         continue;
02048       }
02049 #endif /* CM_SEARCH_LIMIT */
02050       /*      j = new->seq[new->seqnum-1];
02051               printf("  %15s [%15s](id=%5d)(%f) [%d-%d] cm=%f\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt, new->cmscore[new->seqnum-1]);*/
02052 
02053       /* stack overflow */
02054       if (can_put_to_stack(new, &bottom, &stacknum, stacksize) == -1) {
02055         free_node(new);
02056         continue;
02057       }
02058 
02059       if (r->graphout) {
02060 
02061         /* assign a word arc to the last fixed word */
02062         new->lastcontext = now->prevgraph;
02063         new->prevgraph = wordgraph_assign(new->seq[new->seqnum-2],
02064                                           new->seq[new->seqnum-1],
02065                                           (new->seqnum >= 3) ? new->seq[new->seqnum-3] : WORD_INVALID,
02066                                           new->bestt + 1,
02067 #ifdef GRAPHOUT_PRECISE_BOUNDARY
02068 #ifdef PASS2_STRICT_IWCD
02069                                           new->wordend_frame[new->bestt],
02070 #else
02071                                           now->wordend_frame[new->bestt],
02072 #endif
02073 #else
02074                                           now->bestt,
02075 #endif
02076                                           new->score, prev_score,
02077 #ifdef PASS2_STRICT_IWCD
02078                                           new->g[new->bestt] - new->lscore,
02079 #else
02080                                           now->g[new->bestt+1],
02081 #endif
02082 #ifdef GRAPHOUT_PRECISE_BOUNDARY
02083 #ifdef PASS2_STRICT_IWCD
02084                                           new->wordend_gscore[new->bestt],
02085 #else
02086                                           now->wordend_gscore[new->bestt],
02087 #endif
02088 #else
02089                                           now->tail_g_score,
02090 #endif
02091                                           now->lscore,
02092 #ifdef CM_SEARCH
02093                                           new->cmscore[new->seqnum-2],
02094 #else
02095                                           LOG_ZERO,
02096 #endif
02097                                           r
02098                                           );
02099       } /* recog->graphout */
02100       
02101       put_to_stack(new, &start, &bottom, &stacknum, stacksize);
02102       if (debug2_flag) {
02103         j = new->seq[new->seqnum-1];
02104         jlog("DEBUG:  %15s [%15s](id=%5d)(%f) [%d-%d] pushed\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt);
02105       }
02106       dwrk->current = new;
02107       //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r);
02108       dwrk->pushctr++;
02109     }
02110 #endif
02111 
02112     if (debug2_flag) {
02113       jlog("DEBUG: %d pushed\n",dwrk->pushctr-i);
02114     }
02115     if (r->lmtype == LM_DFA) {
02116       if (now_noise_calced == TRUE) free_node(now_noise);
02117     }
02118     
02119     /* 
02120      * 取り出した仮説を捨てる
02121      * free the source hypothesis
02122      */
02123     free_node(now);
02124 
02125   }
02126   /***************/
02127   /* End of Loop */
02128   /***************/
02129 
02130   /* output */
02131   if (dwrk->finishnum == 0) {           /* if search failed */
02132 
02133     /* finalize result when no hypothesis was obtained */
02134     if (verbose_flag) {
02135       if (r->config->sw.fallback_pass1_flag) {
02136         jlog("%02d %s: got no candidates, output 1st pass result as a final result\n", r->config->id, r->config->name);
02137       } else {
02138         jlog("WARNING: %02d %s: got no candidates, search failed\n", r->config->id, r->config->name);
02139       }
02140     }
02141     pass2_finalize_on_no_result(r, r->config->sw.fallback_pass1_flag);
02142 
02143   } else {                      /* if at least 1 candidate found */
02144 
02145     if (debug2_flag) {
02146       jlog("STAT: %02d %s: got %d candidates\n", r->config->id, r->config->name, dwrk->finishnum);
02147     }
02148       /* 結果はまだ出力されていないので，文候補用スタック内をソートして
02149          ここで出力する */
02150       /* As all of the found candidate are in result stack, we sort them
02151          and output them here  */
02152       if (debug2_flag) jlog("DEBUG: done\n");
02153       result_reorder_and_output(&r_start, &r_bottom, &r_stacknum, jconf->output.output_hypo_maxnum, r, param);
02154 
02155       r->result.status = J_RESULT_STATUS_SUCCESS;
02156       //callback_exec(CALLBACK_RESULT, r);
02157       //callback_exec(CALLBACK_EVENT_PASS2_END, r);
02158   }
02159   
02160   /* 各種カウンタを出力 */
02161   /* output counters */
02162   if (verbose_flag) {
02163     jlog("STAT: %02d %s: %d generated, %d pushed, %d nodes popped in %d\n",
02164          r->config->id, r->config->name,
02165          dwrk->genectr, dwrk->pushctr, dwrk->popctr, backtrellis->framelen);
02166     jlog_flush();
02167 #ifdef GRAPHOUT_DYNAMIC
02168     if (r->graphout) {
02169       jlog("STAT: %02d %s: graph: %d merged", r->config->id, r->config->name, dynamic_merged_num);
02170 #ifdef GRAPHOUT_SEARCH
02171       jlog("S, %d terminated", terminate_search_num);
02172 #endif
02173       jlog(" in %d\n", dwrk->popctr);
02174     }
02175 #endif
02176   }
02177     
02178   if (dwrk->finishnum > 0 && r->graphout) {
02179     if (verbose_flag) jlog("STAT: ------ wordgraph post-processing begin ------\n");
02180     /* garbage collection and word merging */
02181     /* words with no following word (except end of sentence) will be erased */
02182     wordgraph_purge_leaf_nodes(&wordgraph_root, r);
02183 #ifdef GRAPHOUT_DEPTHCUT
02184     /* perform word graph depth cutting here */
02185     wordgraph_depth_cut(&wordgraph_root, r);
02186 #endif
02187 
02188     /* if GRAPHOUT_PRECISE_BOUNDARY defined,
02189        propagate exact time boundary to the right context.
02190        words of different boundary will be duplicated here.
02191     */
02192     wordgraph_adjust_boundary(&wordgraph_root, r);
02193 
02194     if (jconf->graph.confnet) {
02195       /* CONFUSION NETWORK GENERATION */
02196 
02197       /* old merging functions should be skipped */
02198 
02199       /* finalize: sort and annotate ID */
02200       r->graph_totalwordnum = wordgraph_sort_and_annotate_id(&wordgraph_root, r);
02201       /* check coherence */
02202       wordgraph_check_coherence(wordgraph_root, r);
02203       /* compute graph CM by forward-backward processing */
02204       graph_forward_backward(wordgraph_root, r);
02205       if (verbose_flag) jlog("STAT: ------ wordgraph post-processing end ------\n");
02206 
02207       r->result.wg = wordgraph_root;
02208 
02209       /* 
02210        * if (jconf->graph.lattice) {
02211        *   callback_exec(CALLBACK_RESULT_GRAPH, r);
02212        * }
02213        */
02214       
02215       /* parse the graph to extract order relationship */
02216       graph_make_order(wordgraph_root, r);
02217       /* create confusion network */
02218       r->result.confnet = confnet_create(wordgraph_root, r);
02219       /* output confusion network */
02220       //callback_exec(CALLBACK_RESULT_CONFNET, r);
02221       /* free area for order relationship */
02222       graph_free_order(r);
02223       /* free confusion network clusters */
02224       //cn_free_all(&(r->result.confnet));
02225 
02226     } else if (jconf->graph.lattice) {
02227       /* WORD LATTICE POSTPROCESSING */
02228 
02229       /* merge words with the same time and same score */
02230       wordgraph_compaction_thesame(&wordgraph_root);
02231       /* merge words with the same time (skip if "-graphrange -1") */
02232       wordgraph_compaction_exacttime(&wordgraph_root, r);
02233       /* merge words of near time (skip if "-graphrange" value <= 0 */
02234       wordgraph_compaction_neighbor(&wordgraph_root, r);
02235       /* finalize: sort and annotate ID */
02236       r->graph_totalwordnum = wordgraph_sort_and_annotate_id(&wordgraph_root, r);
02237       /* check coherence */
02238       wordgraph_check_coherence(wordgraph_root, r);
02239       /* compute graph CM by forward-backward processing */
02240       graph_forward_backward(wordgraph_root, r);
02241       if (verbose_flag) jlog("STAT: ------ wordgraph post-processing end ------\n");
02242       /* output graph */
02243       r->result.wg = wordgraph_root;
02244       //callback_exec(CALLBACK_RESULT_GRAPH, r);
02245 
02246     } else {
02247       j_internal_error("InternalError: graph generation specified but no output format specified?\n");
02248     }
02249 
02250     /* clear all wordgraph */
02251     //wordgraph_clean(&(r->result.wg));
02252   } /* r->graphout */
02253 
02254   
02255   /* 終了処理 */
02256   /* finalize */
02257   nw_free(nextword, nwroot);
02258   free_all_nodes(start);
02259   free_wordtrellis(dwrk);
02260 #ifdef SCAN_BEAM
02261   free(dwrk->framemaxscore);
02262 #endif
02263   //result_sentence_free(r);
02264   clear_stocker(dwrk);
02265 }
02266 
02283 void
02284 wchmm_fbs_prepare(RecogProcess *r)
02285 {
02286   StackDecode *dwrk;
02287   dwrk = &(r->pass2);
02288   
02289   /* N-gram 用ワークエリアを確保 */
02290   /* malloc work area for N-gram */
02291   if (r->lmtype == LM_PROB && r->lm->ngram) {
02292     dwrk->cnword = (WORD_ID *)mymalloc(sizeof(WORD_ID) * r->lm->ngram->n);
02293     dwrk->cnwordrev = (WORD_ID *)mymalloc(sizeof(WORD_ID) * r->lm->ngram->n);
02294   } else {
02295     dwrk->cnword = dwrk->cnwordrev = NULL;
02296   }
02297   dwrk->stocker_root = NULL;
02298 #ifdef CONFIDENVE_MEASURE
02299 #ifdef CM_MULTIPLE_ALPHA
02300   dwrk->cmsumlist = NULL;
02301 #endif
02302 #ifdef CM_NBEST;
02303   dwrk->sentcm = NULL;
02304   dwrk->wordcm = NULL;
02305 #endif
02306 #endif
02307 }
02308 
02325 void
02326 wchmm_fbs_free(RecogProcess *r)
02327 {
02328   StackDecode *dwrk;
02329   dwrk = &(r->pass2);
02330 
02331   if (r->lmtype == LM_PROB && r->lm->ngram) {
02332     free(dwrk->cnword);
02333     free(dwrk->cnwordrev);
02334     dwrk->cnword = dwrk->cnwordrev = NULL;
02335   }
02336 
02337 #ifdef CONFIDENVE_MEASURE
02338 #ifdef CM_MULTIPLE_ALPHA
02339   if (dwrk->cmsumlist) {
02340     free(dwrk->cmsumlist);
02341     dwrk->cmsumlist = NULL;
02342   }
02343 #endif
02344 #ifdef CM_NBEST;
02345   if (dwrk->sentcm) {
02346     free(dwrk->sentcm);
02347     dwrk->sentcm = NULL;
02348   }
02349   if (dwrk->wordcm) {
02350     free(dwrk->wordcm);
02351     dwrk->wordcm = NULL;
02352   }
02353 #endif
02354 #endif
02355 }
02356 
02357 /* end of file */