Julius: libjulius/src/ngram_decode.c ソースファイル

Julius 4.2
00001 
00041 /*
00042  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00043  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00044  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00045  * All rights reserved
00046  */
00047    
00048 #include <julius/julius.h>
00049 
00068 static int
00069 compare_nw(NEXTWORD **a, NEXTWORD **b)
00070 {
00071   if ((*a)->id > (*b)->id) return 1;
00072   if ((*a)->id < (*b)->id) return -1;
00073   return 0;
00074 }
00075 
00097 /* find next word candiate whose id 'w' */
00098 static NEXTWORD *
00099 search_nw(NEXTWORD **nw, WORD_ID w, int num)
00100 {
00101   int left,right,mid;
00102   NEXTWORD *tmp;
00103 
00104   if (num == 0) return NULL;
00105   left = 0;
00106   right = num - 1;
00107   while (left < right) {
00108     mid = (left + right) / 2;
00109     if ((nw[mid])->id < w) {
00110       left = mid + 1;
00111     } else {
00112       right = mid;
00113     }
00114   }
00115   tmp = nw[left];
00116   if (tmp->id == w) {
00117     return tmp;
00118   } else {
00119     return NULL;
00120   }
00121 }
00122 
00138 static LOGPROB
00139 ngram_forw2back(NGRAM_INFO *ngram, WORD_ID *w, int wlen)
00140 {
00141   int i;
00142   LOGPROB p1, p2;
00143 
00144   p1 = 0.0;
00145   for(i = 1; i < ngram->n; i++) {
00146     if (i >= wlen) break;
00147     p1 += ngram_prob(ngram, i, &(w[1]));
00148   }
00149   p2 = 0.0;
00150   for(i = 0; i < ngram->n; i++) {
00151     if (i >= wlen) break;
00152     p2 += ngram_prob(ngram, i+1, w);
00153   }
00154 
00155   return(p2 - p1);
00156 }
00157 
00191 static int
00192 pick_backtrellis_words(RecogProcess *r, NEXTWORD **nw, int oldnum, NODE *hypo, short t)
00193 {
00194   int i;
00195   WORD_ID w;
00196   LOGPROB rawscore;
00197 #ifdef WPAIR
00198   int w_old = WORD_INVALID;
00199 #endif
00200   int num;
00201   int cnnum;            
00202   int last_trans;               
00203   StackDecode *dwrk;
00204 
00205   BACKTRELLIS *bt;
00206   WORD_INFO *winfo;
00207   NGRAM_INFO *ngram;
00208   LOGPROB lm_weight2, lm_penalty2, lm_penalty_trans;
00209 
00210   num = oldnum;
00211   bt = r->backtrellis;
00212   winfo = r->lm->winfo;
00213   ngram = r->lm->ngram;
00214   lm_weight2 = r->config->lmp.lm_weight2;
00215   lm_penalty2 = r->config->lmp.lm_penalty2;
00216   lm_penalty_trans = r->config->lmp.lm_penalty_trans;
00217   dwrk = &(r->pass2);
00218 
00219   /* set word contexts to cnword[] from 1 considering transparent words */
00220   if (ngram) {
00221     cnnum = 0;
00222     last_trans = 0;
00223     for(i=hypo->seqnum-1;i>=0;i--) {
00224       if (! winfo->is_transparent[hypo->seq[i]]) {
00225         dwrk->cnword[cnnum+1] = hypo->seq[i];
00226         cnnum++;
00227         if (cnnum >= ngram->n - 1) break;
00228       } else {
00229         last_trans++;
00230       }
00231     }
00232     if (ngram->dir == DIR_RL) {
00233       for(i=0;i<cnnum;i++) {
00234         dwrk->cnwordrev[cnnum-1-i] = dwrk->cnword[i+1];
00235       }
00236     }
00237     /* use ngram id */
00238     if (ngram->dir == DIR_RL) {
00239       for(i=0;i<cnnum;i++) dwrk->cnwordrev[i] = winfo->wton[dwrk->cnwordrev[i]];
00240     } else {
00241       for(i=0;i<cnnum;i++) dwrk->cnword[i+1] = winfo->wton[dwrk->cnword[i+1]];
00242     }
00243   }
00244 
00245   /* lookup survived words in backtrellis on time frame 't' */
00246   for (i=0;i<bt->num[t];i++) {
00247     w = (bt->rw[t][i])->wid;
00248 #ifdef WORD_GRAPH
00249     /* only words on the word graphs are expanded */
00250     if (!(bt->rw[t][i])->within_wordgraph) continue;
00251 #endif /* not WORD_GRAPH */
00252 #ifdef WPAIR
00253     /* some word have same word ID with different previous word, so
00254        only one will be opened (best word will be selected later
00255        by next_word() */
00256     if (w == w_old) continue;   /* backtrellis is sorted by word ID */
00257     else w_old = w;
00258 #endif /* WPAIR */
00259     /* skip if already exist */
00260     if (search_nw(nw, w, oldnum) != NULL) continue;
00261 
00262     /* compute LM probability of the word */
00263     if (ngram) {
00264       /* compute N-gram probability */
00265       if (ngram->dir == DIR_RL) {
00266         /* just compute N-gram prob of the word candidate */
00267         dwrk->cnwordrev[cnnum] = winfo->wton[w];
00268         rawscore = ngram_prob(ngram, cnnum + 1, dwrk->cnwordrev);
00269       } else {
00270         dwrk->cnword[0] = winfo->wton[w];
00271         rawscore = ngram_forw2back(ngram, dwrk->cnword, cnnum + 1);
00272       }
00273 #ifdef CLASS_NGRAM
00274       rawscore += winfo->cprob[w];
00275 #endif
00276     }
00277     if (r->lmvar == LM_NGRAM_USER) {
00278       /* call user-defined function */
00279       /* be careful that the word context is ordered in backward direction */
00280       rawscore = (*(r->lm->lmfunc.lmprob))(winfo, hypo->seq, hypo->seqnum, w, rawscore);
00281     }
00282 
00283     nw[num]->tre   = bt->rw[t][i];
00284     nw[num]->id    = w;
00285     nw[num]->lscore = rawscore * lm_weight2 + lm_penalty2;
00286     if (winfo->is_transparent[w]) {
00287       /*nw[num]->lscore -= (LOGPROB)last_trans * TRANS_RENZOKU_PENALTY;*/
00288       if (winfo->is_transparent[hypo->seq[hypo->seqnum-1]]) {
00289         nw[num]->lscore += lm_penalty_trans;
00290       }
00291     }
00292     
00293     /* j_printf("%d: %s added\n", num, winfo->wname[nw[num]->id]); */
00294     num++;
00295   }
00296 
00297   return num;
00298 }
00299 
00333 static int
00334 get_backtrellis_words(RecogProcess *r, NEXTWORD **nw, NODE *hypo, short tm, short t_end)
00335 {
00336   int num = 0;
00337   int t, t_step;
00338   int oldnum=0;
00339 
00340   BACKTRELLIS *bt;
00341   int lookup_range;
00342 
00343   if (tm < 0) return(0);
00344 
00345   bt = r->backtrellis;
00346   lookup_range = r->config->pass2.lookup_range;
00347 
00348 #ifdef PREFER_CENTER_ON_TRELLIS_LOOKUP
00349   /* fix for 3.2 (01/10/18 by ri) */
00350   /* before and after (one near center frame has high priority) */
00351   for (t_step = 0; t_step < lookup_range; t_step++) {
00352     /* before or center */
00353     t = tm - t_step;
00354     if (t < 0 || t > bt->framelen - 1 || t >= t_end) continue;
00355     num = pick_backtrellis_words(r, nw, oldnum, hypo, t);
00356     if (num > oldnum) {
00357       qsort(nw, num, sizeof(NEXTWORD *),
00358             (int (*)(const void *,const void *))compare_nw);
00359       oldnum = num;
00360     }
00361     if (t_step == 0) continue;  /* center */
00362     /* after */
00363     t = tm + t_step;
00364     if (t < 0 || t > bt->framelen - 1 || t >= t_end) continue;
00365     num = pick_backtrellis_words(r, nw, oldnum, hypo, t);
00366     if (num > oldnum) {
00367       qsort(nw, num, sizeof(NEXTWORD *),
00368             (int (*)(const void *,const void *))compare_nw);
00369       oldnum = num;
00370     }
00371   }
00372 
00373 #else
00374 
00375   /* before the center frame */
00376   for(t = tm; t >= tm - lookup_range; t--) {
00377     if (t < 0) break;
00378     num = pick_backtrellis_words(r, nw, oldnum, hypo, t);
00379     if (num > oldnum) {
00380       qsort(nw, num, sizeof(NEXTWORD *),
00381             (int (*)(const void *,const void *))compare_nw);
00382       oldnum = num;
00383     }
00384   }
00385   /* after the center frame */
00386   for(t = tm + 1; t < tm + lookup_range; t++) {
00387     if (t > bt->framelen - 1) break;
00388     if (t >= t_end) break;
00389     num = pick_backtrellis_words(r, nw, oldnum, hypo, t);
00390     if (num > oldnum) {
00391       qsort(nw, num, sizeof(NEXTWORD *),
00392             (int (*)(const void *,const void *))compare_nw);
00393       oldnum = num;
00394     }
00395   }
00396 #endif
00397 
00398   return num;
00399 }
00400 
00427 static int
00428 limit_nw(NEXTWORD **nw, NODE *hypo, int num, WORD_INFO *winfo)
00429 {
00430   int src,dst;
00431   int newnum;
00432 
00433   /* <s>からは何も展開しない */
00434   /* no hypothesis will be generated after "<s>" */
00435   if (hypo->seq[hypo->seqnum-1] == winfo->head_silwid) {
00436     return(0);
00437   }
00438 
00439   dst = 0;
00440   for (src=0; src<num; src++) {
00441     if (nw[src]->id == winfo->tail_silwid) {
00442       /* </s> は展開しない */
00443       /* do not expand </s> (it only appears at start) */
00444       continue;
00445     }
00446 #ifdef FIX_35_INHIBIT_SAME_WORD_EXPANSION
00447     /* 直前単語と同じトレリス単語は展開しない */
00448     /* inhibit expanding the exactly the same trellis word twice */
00449     if (nw[src]->tre == hypo->tre) continue;
00450 #endif
00451     
00452     if (src != dst) memcpy(nw[dst], nw[src], sizeof(NEXTWORD));
00453     dst++;
00454   }
00455   newnum = dst;
00456 
00457   return newnum;
00458 }
00459         
00460 
00495 int
00496 ngram_firstwords(NEXTWORD **nw, int peseqlen, int maxnw, RecogProcess *r)
00497 {
00498 
00499   if (r->config->successive.enabled) {
00500     /* in sp segment mode  */
00501     if (r->sp_break_2_begin_word != WORD_INVALID) {
00502       /* 初期仮説は 最終フレームに残った単語トレリス上の最尤単語 */
00503       /* the initial hypothesis is the best word survived on the last frame of
00504          the segment */
00505       nw[0]->id = r->sp_break_2_begin_word;
00506     } else {
00507       /* 最終セグメント: 初期仮説は 単語の末尾の無音単語(=winfo->tail_silwid) */
00508       /* we are in the last of sentence: initial hypothesis is word-end silence word */
00509       nw[0]->id = r->lm->winfo->tail_silwid;
00510     }
00511   } else {
00512     /* initial hypothesis should be word-end silence word */
00513     nw[0]->id = r->lm->winfo->tail_silwid;
00514   }
00515 
00516   nw[0]->lscore = uni_prob(r->wchmm->ngram, r->wchmm->winfo->wton[nw[0]->id]);
00517 #ifdef CLASS_NGRAM
00518   nw[0]->lscore += r->wchmm->winfo->cprob[nw[0]->id];
00519 #endif
00520   nw[0]->lscore *= r->config->lmp.lm_weight2;
00521 #ifndef FIX_PENALTY
00522   nw[0]->lscore += r->config->lmp.lm_penalty2;
00523 #endif
00524 
00525   return 1;                     /* number of words = 1 */
00526 }
00527 
00565 int
00566 ngram_nextwords(NODE *hypo, NEXTWORD **nw, int maxnw, RecogProcess *r)
00567 {
00568   int num, num2;
00569 
00570   if (hypo->seqnum == 0) {
00571     j_internal_error("ngram_nextwords: hypo contains no word\n");
00572   }
00573 
00574   /* 仮説の推定終端時刻において backtrellis内に残っている単語を得る */
00575   /* get survived words on backtrellis at the estimated end frame */
00576   num = get_backtrellis_words(r, nw, hypo, hypo->estimated_next_t, hypo->bestt);
00577 
00578   /* 展開できない単語をチェックして外す */
00579   /* exclude unallowed words */
00580   num2 = limit_nw(nw, hypo, num, r->lm->winfo);
00581 
00582   if (debug2_flag) jlog("DEBUG: ngram_decode: %d-%d=%d unfolded\n",num, num-num2,num2);
00583 
00584   return(num2);
00585 }
00586 
00615 boolean
00616 ngram_acceptable(NODE *hypo, RecogProcess *r)
00617 {
00618 
00619   if (r->config->successive.enabled) {
00620     /* 最後の仮説が第１パス最尤仮説の最初の単語と一致しなければならない */
00621     /* the last word should be equal to the first word on the best hypothesis on 1st pass */
00622     if (hypo->seq[hypo->seqnum-1] == r->sp_break_2_end_word) {
00623       return TRUE;
00624     }
00625   } else {
00626     /* 最後の仮説が文頭無音単語でなければならない */
00627     /* the last word should be head silence word */
00628     if (hypo->seq[hypo->seqnum-1] == r->lm->winfo->head_silwid) {
00629       return TRUE;
00630     }
00631   }
00632   return FALSE;
00633 }
00634 
00635 /* end of file */