Julius 4.2
|
00001 00041 /* 00042 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00043 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00044 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00045 * All rights reserved 00046 */ 00047 00048 #include <julius/julius.h> 00049 00068 static int 00069 compare_nw(NEXTWORD **a, NEXTWORD **b) 00070 { 00071 if ((*a)->id > (*b)->id) return 1; 00072 if ((*a)->id < (*b)->id) return -1; 00073 return 0; 00074 } 00075 00097 /* find next word candiate whose id 'w' */ 00098 static NEXTWORD * 00099 search_nw(NEXTWORD **nw, WORD_ID w, int num) 00100 { 00101 int left,right,mid; 00102 NEXTWORD *tmp; 00103 00104 if (num == 0) return NULL; 00105 left = 0; 00106 right = num - 1; 00107 while (left < right) { 00108 mid = (left + right) / 2; 00109 if ((nw[mid])->id < w) { 00110 left = mid + 1; 00111 } else { 00112 right = mid; 00113 } 00114 } 00115 tmp = nw[left]; 00116 if (tmp->id == w) { 00117 return tmp; 00118 } else { 00119 return NULL; 00120 } 00121 } 00122 00138 static LOGPROB 00139 ngram_forw2back(NGRAM_INFO *ngram, WORD_ID *w, int wlen) 00140 { 00141 int i; 00142 LOGPROB p1, p2; 00143 00144 p1 = 0.0; 00145 for(i = 1; i < ngram->n; i++) { 00146 if (i >= wlen) break; 00147 p1 += ngram_prob(ngram, i, &(w[1])); 00148 } 00149 p2 = 0.0; 00150 for(i = 0; i < ngram->n; i++) { 00151 if (i >= wlen) break; 00152 p2 += ngram_prob(ngram, i+1, w); 00153 } 00154 00155 return(p2 - p1); 00156 } 00157 00191 static int 00192 pick_backtrellis_words(RecogProcess *r, NEXTWORD **nw, int oldnum, NODE *hypo, short t) 00193 { 00194 int i; 00195 WORD_ID w; 00196 LOGPROB rawscore; 00197 #ifdef WPAIR 00198 int w_old = WORD_INVALID; 00199 #endif 00200 int num; 00201 int cnnum; 00202 int last_trans; 00203 StackDecode *dwrk; 00204 00205 BACKTRELLIS *bt; 00206 WORD_INFO *winfo; 00207 NGRAM_INFO *ngram; 00208 LOGPROB lm_weight2, lm_penalty2, lm_penalty_trans; 00209 00210 num = oldnum; 00211 bt = r->backtrellis; 00212 winfo = r->lm->winfo; 00213 ngram = r->lm->ngram; 00214 lm_weight2 = r->config->lmp.lm_weight2; 00215 lm_penalty2 = r->config->lmp.lm_penalty2; 00216 lm_penalty_trans = r->config->lmp.lm_penalty_trans; 00217 dwrk = &(r->pass2); 00218 00219 /* set word contexts to cnword[] from 1 considering transparent words */ 00220 if (ngram) { 00221 cnnum = 0; 00222 last_trans = 0; 00223 for(i=hypo->seqnum-1;i>=0;i--) { 00224 if (! winfo->is_transparent[hypo->seq[i]]) { 00225 dwrk->cnword[cnnum+1] = hypo->seq[i]; 00226 cnnum++; 00227 if (cnnum >= ngram->n - 1) break; 00228 } else { 00229 last_trans++; 00230 } 00231 } 00232 if (ngram->dir == DIR_RL) { 00233 for(i=0;i<cnnum;i++) { 00234 dwrk->cnwordrev[cnnum-1-i] = dwrk->cnword[i+1]; 00235 } 00236 } 00237 /* use ngram id */ 00238 if (ngram->dir == DIR_RL) { 00239 for(i=0;i<cnnum;i++) dwrk->cnwordrev[i] = winfo->wton[dwrk->cnwordrev[i]]; 00240 } else { 00241 for(i=0;i<cnnum;i++) dwrk->cnword[i+1] = winfo->wton[dwrk->cnword[i+1]]; 00242 } 00243 } 00244 00245 /* lookup survived words in backtrellis on time frame 't' */ 00246 for (i=0;i<bt->num[t];i++) { 00247 w = (bt->rw[t][i])->wid; 00248 #ifdef WORD_GRAPH 00249 /* only words on the word graphs are expanded */ 00250 if (!(bt->rw[t][i])->within_wordgraph) continue; 00251 #endif /* not WORD_GRAPH */ 00252 #ifdef WPAIR 00253 /* some word have same word ID with different previous word, so 00254 only one will be opened (best word will be selected later 00255 by next_word() */ 00256 if (w == w_old) continue; /* backtrellis is sorted by word ID */ 00257 else w_old = w; 00258 #endif /* WPAIR */ 00259 /* skip if already exist */ 00260 if (search_nw(nw, w, oldnum) != NULL) continue; 00261 00262 /* compute LM probability of the word */ 00263 if (ngram) { 00264 /* compute N-gram probability */ 00265 if (ngram->dir == DIR_RL) { 00266 /* just compute N-gram prob of the word candidate */ 00267 dwrk->cnwordrev[cnnum] = winfo->wton[w]; 00268 rawscore = ngram_prob(ngram, cnnum + 1, dwrk->cnwordrev); 00269 } else { 00270 dwrk->cnword[0] = winfo->wton[w]; 00271 rawscore = ngram_forw2back(ngram, dwrk->cnword, cnnum + 1); 00272 } 00273 #ifdef CLASS_NGRAM 00274 rawscore += winfo->cprob[w]; 00275 #endif 00276 } 00277 if (r->lmvar == LM_NGRAM_USER) { 00278 /* call user-defined function */ 00279 /* be careful that the word context is ordered in backward direction */ 00280 rawscore = (*(r->lm->lmfunc.lmprob))(winfo, hypo->seq, hypo->seqnum, w, rawscore); 00281 } 00282 00283 nw[num]->tre = bt->rw[t][i]; 00284 nw[num]->id = w; 00285 nw[num]->lscore = rawscore * lm_weight2 + lm_penalty2; 00286 if (winfo->is_transparent[w]) { 00287 /*nw[num]->lscore -= (LOGPROB)last_trans * TRANS_RENZOKU_PENALTY;*/ 00288 if (winfo->is_transparent[hypo->seq[hypo->seqnum-1]]) { 00289 nw[num]->lscore += lm_penalty_trans; 00290 } 00291 } 00292 00293 /* j_printf("%d: %s added\n", num, winfo->wname[nw[num]->id]); */ 00294 num++; 00295 } 00296 00297 return num; 00298 } 00299 00333 static int 00334 get_backtrellis_words(RecogProcess *r, NEXTWORD **nw, NODE *hypo, short tm, short t_end) 00335 { 00336 int num = 0; 00337 int t, t_step; 00338 int oldnum=0; 00339 00340 BACKTRELLIS *bt; 00341 int lookup_range; 00342 00343 if (tm < 0) return(0); 00344 00345 bt = r->backtrellis; 00346 lookup_range = r->config->pass2.lookup_range; 00347 00348 #ifdef PREFER_CENTER_ON_TRELLIS_LOOKUP 00349 /* fix for 3.2 (01/10/18 by ri) */ 00350 /* before and after (one near center frame has high priority) */ 00351 for (t_step = 0; t_step < lookup_range; t_step++) { 00352 /* before or center */ 00353 t = tm - t_step; 00354 if (t < 0 || t > bt->framelen - 1 || t >= t_end) continue; 00355 num = pick_backtrellis_words(r, nw, oldnum, hypo, t); 00356 if (num > oldnum) { 00357 qsort(nw, num, sizeof(NEXTWORD *), 00358 (int (*)(const void *,const void *))compare_nw); 00359 oldnum = num; 00360 } 00361 if (t_step == 0) continue; /* center */ 00362 /* after */ 00363 t = tm + t_step; 00364 if (t < 0 || t > bt->framelen - 1 || t >= t_end) continue; 00365 num = pick_backtrellis_words(r, nw, oldnum, hypo, t); 00366 if (num > oldnum) { 00367 qsort(nw, num, sizeof(NEXTWORD *), 00368 (int (*)(const void *,const void *))compare_nw); 00369 oldnum = num; 00370 } 00371 } 00372 00373 #else 00374 00375 /* before the center frame */ 00376 for(t = tm; t >= tm - lookup_range; t--) { 00377 if (t < 0) break; 00378 num = pick_backtrellis_words(r, nw, oldnum, hypo, t); 00379 if (num > oldnum) { 00380 qsort(nw, num, sizeof(NEXTWORD *), 00381 (int (*)(const void *,const void *))compare_nw); 00382 oldnum = num; 00383 } 00384 } 00385 /* after the center frame */ 00386 for(t = tm + 1; t < tm + lookup_range; t++) { 00387 if (t > bt->framelen - 1) break; 00388 if (t >= t_end) break; 00389 num = pick_backtrellis_words(r, nw, oldnum, hypo, t); 00390 if (num > oldnum) { 00391 qsort(nw, num, sizeof(NEXTWORD *), 00392 (int (*)(const void *,const void *))compare_nw); 00393 oldnum = num; 00394 } 00395 } 00396 #endif 00397 00398 return num; 00399 } 00400 00427 static int 00428 limit_nw(NEXTWORD **nw, NODE *hypo, int num, WORD_INFO *winfo) 00429 { 00430 int src,dst; 00431 int newnum; 00432 00433 /* <s>からは何も展開しない */ 00434 /* no hypothesis will be generated after "<s>" */ 00435 if (hypo->seq[hypo->seqnum-1] == winfo->head_silwid) { 00436 return(0); 00437 } 00438 00439 dst = 0; 00440 for (src=0; src<num; src++) { 00441 if (nw[src]->id == winfo->tail_silwid) { 00442 /* </s> は展開しない */ 00443 /* do not expand </s> (it only appears at start) */ 00444 continue; 00445 } 00446 #ifdef FIX_35_INHIBIT_SAME_WORD_EXPANSION 00447 /* 直前単語と同じトレリス単語は展開しない */ 00448 /* inhibit expanding the exactly the same trellis word twice */ 00449 if (nw[src]->tre == hypo->tre) continue; 00450 #endif 00451 00452 if (src != dst) memcpy(nw[dst], nw[src], sizeof(NEXTWORD)); 00453 dst++; 00454 } 00455 newnum = dst; 00456 00457 return newnum; 00458 } 00459 00460 00495 int 00496 ngram_firstwords(NEXTWORD **nw, int peseqlen, int maxnw, RecogProcess *r) 00497 { 00498 00499 if (r->config->successive.enabled) { 00500 /* in sp segment mode */ 00501 if (r->sp_break_2_begin_word != WORD_INVALID) { 00502 /* 初期仮説は 最終フレームに残った単語トレリス上の最尤単語 */ 00503 /* the initial hypothesis is the best word survived on the last frame of 00504 the segment */ 00505 nw[0]->id = r->sp_break_2_begin_word; 00506 } else { 00507 /* 最終セグメント: 初期仮説は 単語の末尾の無音単語(=winfo->tail_silwid) */ 00508 /* we are in the last of sentence: initial hypothesis is word-end silence word */ 00509 nw[0]->id = r->lm->winfo->tail_silwid; 00510 } 00511 } else { 00512 /* initial hypothesis should be word-end silence word */ 00513 nw[0]->id = r->lm->winfo->tail_silwid; 00514 } 00515 00516 nw[0]->lscore = uni_prob(r->wchmm->ngram, r->wchmm->winfo->wton[nw[0]->id]); 00517 #ifdef CLASS_NGRAM 00518 nw[0]->lscore += r->wchmm->winfo->cprob[nw[0]->id]; 00519 #endif 00520 nw[0]->lscore *= r->config->lmp.lm_weight2; 00521 #ifndef FIX_PENALTY 00522 nw[0]->lscore += r->config->lmp.lm_penalty2; 00523 #endif 00524 00525 return 1; /* number of words = 1 */ 00526 } 00527 00565 int 00566 ngram_nextwords(NODE *hypo, NEXTWORD **nw, int maxnw, RecogProcess *r) 00567 { 00568 int num, num2; 00569 00570 if (hypo->seqnum == 0) { 00571 j_internal_error("ngram_nextwords: hypo contains no word\n"); 00572 } 00573 00574 /* 仮説の推定終端時刻において backtrellis内に残っている単語を得る */ 00575 /* get survived words on backtrellis at the estimated end frame */ 00576 num = get_backtrellis_words(r, nw, hypo, hypo->estimated_next_t, hypo->bestt); 00577 00578 /* 展開できない単語をチェックして外す */ 00579 /* exclude unallowed words */ 00580 num2 = limit_nw(nw, hypo, num, r->lm->winfo); 00581 00582 if (debug2_flag) jlog("DEBUG: ngram_decode: %d-%d=%d unfolded\n",num, num-num2,num2); 00583 00584 return(num2); 00585 } 00586 00615 boolean 00616 ngram_acceptable(NODE *hypo, RecogProcess *r) 00617 { 00618 00619 if (r->config->successive.enabled) { 00620 /* 最後の仮説が第1パス最尤仮説の最初の単語と一致しなければならない */ 00621 /* the last word should be equal to the first word on the best hypothesis on 1st pass */ 00622 if (hypo->seq[hypo->seqnum-1] == r->sp_break_2_end_word) { 00623 return TRUE; 00624 } 00625 } else { 00626 /* 最後の仮説が文頭無音単語でなければならない */ 00627 /* the last word should be head silence word */ 00628 if (hypo->seq[hypo->seqnum-1] == r->lm->winfo->head_silwid) { 00629 return TRUE; 00630 } 00631 } 00632 return FALSE; 00633 } 00634 00635 /* end of file */