Julius 4.2
|
00001 00159 /* 00160 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00161 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00162 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00163 * All rights reserved 00164 */ 00165 00166 #include <julius/julius.h> 00167 00168 /*----------------------------------------------------------------------*/ 00169 00186 void 00187 make_successor_list(WCHMM_INFO *wchmm) 00188 { 00189 int node; 00190 WORD_ID w; 00191 int i, j; 00192 int s; 00193 WORD_ID *scnumlist; 00194 WORD_ID *sclen; 00195 int scnum, new_scnum; 00196 int *scidmap; 00197 boolean *freemark; 00198 00199 jlog("STAT: make successor lists for factoring\n"); 00200 00201 /* 1. initialize */ 00202 /* initialize node->sclist index on wchmm tree */ 00203 for (node=0;node<wchmm->n;node++) wchmm->state[node].scid = 0; 00204 00205 /* parse the tree to assign unique scid and get the maximum size of 00206 successor list */ 00207 scnum = 1; 00208 for (w=0;w<wchmm->winfo->num;w++) { 00209 for (i=0;i<wchmm->winfo->wlen[w];i++) { 00210 if (wchmm->state[wchmm->offset[w][i]].scid == 0) { 00211 wchmm->state[wchmm->offset[w][i]].scid = scnum; 00212 scnum++; 00213 } 00214 } 00215 if (wchmm->state[wchmm->wordend[w]].scid == 0) { 00216 wchmm->state[wchmm->wordend[w]].scid = scnum; 00217 scnum++; 00218 } 00219 } 00220 if (debug2_flag) { 00221 jlog("DEBUG: initial successor list size = %d\n", scnum); 00222 } 00223 00224 /* 2. count number of each successor */ 00225 sclen = (WORD_ID *)mymalloc(sizeof(WORD_ID) * scnum); 00226 for (i=1;i<scnum;i++) sclen[i] = 0; 00227 for (w=0;w<wchmm->winfo->num;w++) { 00228 for (i=0;i<wchmm->winfo->wlen[w];i++) { 00229 sclen[wchmm->state[wchmm->offset[w][i]].scid]++; 00230 } 00231 sclen[wchmm->state[wchmm->wordend[w]].scid]++; 00232 } 00233 00234 /* 3. delete bogus successor lists */ 00235 freemark = (boolean *)mymalloc(sizeof(boolean) * scnum); 00236 for (i=1;i<scnum;i++) freemark[i] = FALSE; 00237 for (w=0;w<wchmm->winfo->num;w++) { 00238 node = wchmm->wordend[w]; /* begin from the word end node */ 00239 i = wchmm->winfo->wlen[w]-1; 00240 while (i >= 0) { /* for each phoneme start node */ 00241 if (node == wchmm->offset[w][i]) { 00242 /* word with only 1 state: skip */ 00243 i--; 00244 continue; 00245 } 00246 if (wchmm->state[node].scid == 0) break; /* already parsed */ 00247 if (sclen[wchmm->state[node].scid] == sclen[wchmm->state[wchmm->offset[w][i]].scid]) { 00248 freemark[wchmm->state[node].scid] = TRUE; /* mark the node */ 00249 wchmm->state[node].scid = 0; 00250 } 00251 node = wchmm->offset[w][i]; 00252 i--; 00253 } 00254 } 00255 /* build compaction map */ 00256 scidmap = (int *)mymalloc(sizeof(int) * scnum); 00257 scidmap[0] = 0; 00258 j = 1; 00259 for (i=1;i<scnum;i++) { 00260 if (freemark[i]) { 00261 scidmap[i] = 0; 00262 } else { 00263 scidmap[i] = j; 00264 j++; 00265 } 00266 } 00267 new_scnum = j; 00268 if (debug2_flag) { 00269 jlog("DEBUG: compacted successor list size = %d\n", new_scnum); 00270 } 00271 00272 /* 4. rewrite scid and do compaction for new sclen */ 00273 for (node=0;node<wchmm->n;node++) { 00274 if (wchmm->state[node].scid > 0) { 00275 wchmm->state[node].scid = scidmap[wchmm->state[node].scid]; 00276 } 00277 } 00278 wchmm->sclen = (WORD_ID *)mybmalloc2(sizeof(WORD_ID) * new_scnum, &(wchmm->malloc_root)); 00279 for (i=1;i<scnum;i++) { 00280 if (scidmap[i] != 0) wchmm->sclen[scidmap[i]] = sclen[i]; 00281 } 00282 wchmm->scnum = new_scnum; 00283 00284 free(scidmap); 00285 free(freemark); 00286 free(sclen); 00287 00288 /* 5. now index completed, make word list for each list */ 00289 wchmm->sclist = (WORD_ID **)mybmalloc2(sizeof(WORD_ID *) * wchmm->scnum, &(wchmm->malloc_root)); 00290 scnumlist = (WORD_ID *)mymalloc(sizeof(WORD_ID) * wchmm->scnum); 00291 for(i=1;i<wchmm->scnum;i++) { 00292 wchmm->sclist[i] = (WORD_ID *)mybmalloc2(sizeof(WORD_ID) * wchmm->sclen[i], &(wchmm->malloc_root)); 00293 scnumlist[i] = 0; 00294 } 00295 { 00296 int scid; 00297 for (w=0;w<wchmm->winfo->num;w++) { 00298 for (i=0;i<wchmm->winfo->wlen[w];i++) { 00299 scid = wchmm->state[wchmm->offset[w][i]].scid; 00300 if (scid != 0) { 00301 wchmm->sclist[scid][scnumlist[scid]] = w; 00302 scnumlist[scid]++; 00303 if (scnumlist[scid] > wchmm->sclen[scid]) { 00304 jlog("hogohohoho\n"); 00305 exit(1); 00306 } 00307 } 00308 } 00309 /* at word end */ 00310 scid = wchmm->state[wchmm->wordend[w]].scid; 00311 if (scid != 0) { 00312 wchmm->sclist[scid][scnumlist[scid]] = w; 00313 scnumlist[scid]++; 00314 if (scnumlist[scid] > wchmm->sclen[scid]) { 00315 jlog("hogohohoho\n"); 00316 exit(1); 00317 } 00318 } 00319 } 00320 } 00321 free(scnumlist); 00322 00323 jlog("STAT: done\n"); 00324 } 00325 00326 #ifdef UNIGRAM_FACTORING 00327 00344 void 00345 make_successor_list_unigram_factoring(WCHMM_INFO *wchmm) 00346 { 00347 00348 #ifndef FAST_FACTOR1_SUCCESSOR_LIST 00349 00350 /* old way */ 00351 make_successor_list(wchmm); 00352 calc_all_unigram_factoring_values(wchmm); 00353 00354 #else /* ~FAST_FACTOR1_SUCCESSOR_LIST */ 00355 00356 /* new way */ 00357 00358 int node, node2; 00359 WORD_ID w, w2; 00360 int i, j, n, f; 00361 int s; 00362 LOGPROB tmpprob; 00363 WORD_ID *mtmp; 00364 00365 jlog("STAT: make successor lists for unigram factoring\n"); 00366 00367 /* 1. initialize */ 00368 /* initialize node->sclist index on wchmm tree */ 00369 for (node=0;node<wchmm->n;node++) wchmm->state[node].scid = 0; 00370 00371 /* in unigram factoring, number of successor = vocabulary size */ 00372 wchmm->scnum = wchmm->winfo->num + 1; 00373 if (debug2_flag) { 00374 jlog("DEBUG: successor list size = %d\n", wchmm->scnum); 00375 } 00376 00377 /* allocate successor list for 1-gram factoring */ 00378 wchmm->scword = (WORD_ID *)mybmalloc2(sizeof(WORD_ID) * wchmm->scnum, &(wchmm->malloc_root)); 00379 00380 /* 2. make successor list, and count needed fscore num */ 00381 f = 1; 00382 s = 1; 00383 for (w=0;w<wchmm->winfo->num;w++) { 00384 for (i=0;i<wchmm->winfo->wlen[w] + 1;i++) { 00385 if (i < wchmm->winfo->wlen[w]) { 00386 node = wchmm->offset[w][i]; 00387 } else { 00388 node = wchmm->wordend[w]; 00389 } 00390 if (wchmm->state[node].scid == 0) { /* not assigned */ 00391 /* new node found, assign new and exit here */ 00392 wchmm->state[node].scid = s; 00393 wchmm->scword[s] = w; 00394 s++; 00395 if (s > wchmm->scnum) { 00396 jlog("InternalError: make_successor_list_unigram_factoring: scid num exceeded?\n"); 00397 return; 00398 } 00399 break; 00400 } else if (wchmm->state[node].scid > 0) { 00401 /* that node has successor */ 00402 /* move it to the current first isolated node in that word */ 00403 w2 = wchmm->scword[wchmm->state[node].scid]; 00404 for(j=i+1;j<wchmm->winfo->wlen[w2] + 1;j++) { 00405 if (j < wchmm->winfo->wlen[w2]) { 00406 node2 = wchmm->offset[w2][j]; 00407 } else { 00408 node2 = wchmm->wordend[w2]; 00409 } 00410 if (wchmm->state[node2].scid == 0) { /* not assigned */ 00411 /* move successor to there */ 00412 wchmm->state[node2].scid = wchmm->state[node].scid; 00413 break; 00414 } 00415 } 00416 if (j >= wchmm->winfo->wlen[w2] + 1) { 00417 /* not found? */ 00418 jlog("InternalError: make_successor_list_unigram_factoring: no isolated word for %d\n", w2); 00419 return; 00420 } 00421 /* make current node as fscore node */ 00422 n = f++; 00423 wchmm->state[node].scid = -n; 00424 /* not compute unigram factoring value yet */ 00425 } 00426 00427 } 00428 } 00429 00430 /* 2. allocate fscore buffer */ 00431 wchmm->fsnum = f; 00432 wchmm->fscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->fsnum); 00433 for(n=0;n<wchmm->fsnum;n++) wchmm->fscore[n] = LOG_ZERO; 00434 00435 /* 3. parse again to assign fscore values */ 00436 for (w=0;w<wchmm->winfo->num;w++) { 00437 for (i=0;i<wchmm->winfo->wlen[w] + 1;i++) { 00438 if (i < wchmm->winfo->wlen[w]) { 00439 node = wchmm->offset[w][i]; 00440 } else { 00441 node = wchmm->wordend[w]; 00442 } 00443 if (wchmm->state[node].scid < 0) { 00444 /* update max */ 00445 if (wchmm->ngram) { 00446 tmpprob = uni_prob(wchmm->ngram, wchmm->winfo->wton[w]) 00447 #ifdef CLASS_NGRAM 00448 + wchmm->winfo->cprob[w] 00449 #endif 00450 ; 00451 } else { 00452 tmpprob = LOG_ZERO; 00453 } 00454 if (wchmm->lmvar == LM_NGRAM_USER) { 00455 tmpprob = (*(wchmm->uni_prob_user))(wchmm->winfo, w, tmpprob); 00456 } 00457 n = - wchmm->state[node].scid; 00458 if (wchmm->fscore[n] < tmpprob) { 00459 wchmm->fscore[n] = tmpprob; 00460 } 00461 } 00462 00463 } 00464 } 00465 00466 #endif /* ~FAST_FACTOR1_SUCCESSOR_LIST */ 00467 00468 jlog("STAT: done\n"); 00469 } 00470 00471 #endif /* UNIGRAM_FACTORING */ 00472 00473 00492 void 00493 adjust_sc_index(WCHMM_INFO *wchmm) 00494 { 00495 WORD_ID w; 00496 int i,j,k; 00497 HMM_Logical *ltmp; 00498 int ltmp_state_num; 00499 int ato; 00500 LOGPROB prob; 00501 int node, scid; 00502 A_CELL2 *ac; 00503 00504 /* duplicate scid for HMMs with more than one arc from initial state */ 00505 for(w=0;w<wchmm->winfo->num;w++) { 00506 for(k=0;k<wchmm->winfo->wlen[w];k++) { 00507 node = wchmm->offset[w][k]; 00508 scid = wchmm->state[node].scid; 00509 if (scid == 0) continue; 00510 ltmp = wchmm->winfo->wseq[w][k]; 00511 ltmp_state_num = hmm_logical_state_num(ltmp); 00512 if ((hmm_logical_trans(ltmp))->a[0][ltmp_state_num-1] != LOG_ZERO) { 00513 j = k + 1; 00514 if (j == wchmm->winfo->wlen[w]) { 00515 if (wchmm->state[wchmm->wordend[w]].scid == 0) { 00516 jlog("STAT: word %d: factoring node copied for skip phone\n", w); 00517 wchmm->state[wchmm->wordend[w]].scid = scid; 00518 } 00519 } else { 00520 if (wchmm->state[wchmm->offset[w][j]].scid == 0) { 00521 jlog("STAT: word %d: factoring node copied for skip phone\n", w); 00522 wchmm->state[wchmm->offset[w][j]].scid = scid; 00523 } 00524 } 00525 } 00526 for(ato=1;ato<ltmp_state_num;ato++) { 00527 prob = (hmm_logical_trans(ltmp))->a[0][ato]; 00528 if (prob != LOG_ZERO) { 00529 wchmm->state[node+ato-1].scid = scid; 00530 } 00531 } 00532 } 00533 } 00534 00535 /* move scid and fscore on the head state to the head grammar state */ 00536 for(i=0;i<wchmm->startnum;i++) { 00537 node = wchmm->startnode[i]; 00538 if (wchmm->state[node].out.state != NULL) { 00539 j_internal_error("adjust_sc_index: outprob exist in word-head node??\n"); 00540 } 00541 if (wchmm->next_a[node] != LOG_ZERO) { 00542 if (wchmm->state[node+1].scid != 0) { 00543 if (wchmm->state[node].scid != 0 && wchmm->state[node].scid != wchmm->state[node+1].scid) { 00544 j_internal_error("adjust_sc_index: different successor list within word-head phone?\n"); 00545 } 00546 wchmm->state[node].scid = wchmm->state[node+1].scid; 00547 wchmm->state[node+1].scid = 0; 00548 } 00549 } 00550 for(ac=wchmm->ac[node];ac;ac=ac->next) { 00551 for(k=0;k<ac->n;k++) { 00552 if (wchmm->state[ac->arc[k]].scid != 0) { 00553 if (wchmm->state[node].scid != 0 && wchmm->state[node].scid != wchmm->state[ac->arc[k]].scid) { 00554 j_internal_error("adjust_sc_index: different successor list within word-head phone?\n"); 00555 } 00556 wchmm->state[node].scid = wchmm->state[ac->arc[k]].scid; 00557 wchmm->state[ac->arc[k]].scid = 0; 00558 } 00559 } 00560 } 00561 } 00562 } 00563 00564 00565 /* -------------------------------------------------------------------- */ 00566 /* factoring computation */ 00567 00586 void 00587 max_successor_cache_init(WCHMM_INFO *wchmm) 00588 { 00589 int i; 00590 LM_PROB_CACHE *l; 00591 WORD_ID wnum; 00592 00593 /* for word-internal */ 00594 l = &(wchmm->lmcache); 00595 00596 l->probcache = (LOGPROB *) mymalloc(sizeof(LOGPROB) * wchmm->scnum); 00597 l->lastwcache = (WORD_ID *) mymalloc(sizeof(WORD_ID) * wchmm->scnum); 00598 for (i=0;i<wchmm->scnum;i++) { 00599 l->lastwcache[i] = WORD_INVALID; 00600 } 00601 /* for cross-word */ 00602 if (wchmm->ngram) { 00603 wnum = wchmm->ngram->max_word_num; 00604 } else { 00605 wnum = wchmm->winfo->num; 00606 } 00607 #ifdef HASH_CACHE_IW 00608 l->iw_cache_num = wnum * jconf.search.pass1.iw_cache_rate / 100; 00609 if (l->iw_cache_num < 10) l->iw_cache_num = 10; 00610 #else 00611 l->iw_cache_num = wnum; 00612 #endif /* HASH_CACHE_IW */ 00613 l->iw_sc_cache = (LOGPROB **)mymalloc(sizeof(LOGPROB *) * l->iw_cache_num); 00614 for (i=0;i<l->iw_cache_num;i++) { 00615 l->iw_sc_cache[i] = NULL; 00616 } 00617 #ifdef HASH_CACHE_IW 00618 l->iw_lw_cache = (WORD_ID *)mymalloc(sizeof(WORD_ID) * l->iw_cache_num); 00619 for (i=0;i<l->iw_cache_num;i++) { 00620 l->iw_lw_cache[i] = WORD_INVALID; 00621 } 00622 #endif 00623 } 00624 00637 static void 00638 max_successor_prob_iw_free(WCHMM_INFO *wchmm) 00639 { 00640 int i; 00641 LM_PROB_CACHE *l; 00642 l = &(wchmm->lmcache); 00643 for (i=0;i<l->iw_cache_num;i++) { 00644 if (l->iw_sc_cache[i] != NULL) free(l->iw_sc_cache[i]); 00645 l->iw_sc_cache[i] = NULL; 00646 } 00647 } 00648 00665 void 00666 max_successor_cache_free(WCHMM_INFO *wchmm) 00667 { 00668 free(wchmm->lmcache.probcache); 00669 free(wchmm->lmcache.lastwcache); 00670 max_successor_prob_iw_free(wchmm); 00671 free(wchmm->lmcache.iw_sc_cache); 00672 #ifdef HASH_CACHE_IW 00673 free(wchmm->lmcache.iw_lw_cache); 00674 #endif 00675 } 00676 00677 #ifdef UNIGRAM_FACTORING 00678 00719 void 00720 make_iwcache_index(WCHMM_INFO *wchmm) 00721 { 00722 int i, node, num; 00723 00724 wchmm->start2isolate = (int *)mymalloc(sizeof(int) * wchmm->startnum); 00725 num = 0; 00726 for(i=0;i<wchmm->startnum;i++) { 00727 node = wchmm->startnode[i]; 00728 if (wchmm->state[node].scid >= 0) { /* not a factoring node (isolated node, has no 1-gram factoring value) */ 00729 wchmm->start2isolate[i] = num; 00730 num++; 00731 } else { /* factoring node (shared) */ 00732 wchmm->start2isolate[i] = -1; 00733 } 00734 } 00735 wchmm->isolatenum = num; 00736 } 00737 00738 #ifndef FAST_FACTOR1_SUCCESSOR_LIST 00739 00784 void 00785 calc_all_unigram_factoring_values(WCHMM_INFO *wchmm) 00786 { 00787 S_CELL *sc, *sctmp; 00788 LOGPROB tmpprob, maxprob; 00789 int i, n; 00790 00791 /* count needed number of 1-gram factoring nodes */ 00792 n = 0; 00793 for (i=1;i<wchmm->scnum;i++) { 00794 sc = wchmm->sclist[i]; 00795 if (sc == NULL) { 00796 j_internal_error("call_all_unigram_factoring_values: sclist has no sc?\n"); 00797 } 00798 if (sc->next != NULL) { 00799 /* more than two words, so compute maximum 1-gram probability */ 00800 n++; 00801 } 00802 } 00803 wchmm->fsnum = n + 1; 00804 /* allocate area */ 00805 wchmm->fscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->fsnum); 00806 /* assign values */ 00807 n = 1; 00808 for (i=1;i<wchmm->scnum;i++) { 00809 sc = wchmm->sclist[i]; 00810 if (sc->next != NULL) { 00811 maxprob = LOG_ZERO; 00812 for (sctmp = sc; sctmp; sctmp = sctmp->next) { 00813 if (wchmm->ngram) { 00814 tmpprob = uni_prob(wchmm->ngram, wchmm->winfo->wton[sctmp->word]) 00815 #ifdef CLASS_NGRAM 00816 + wchmm->winfo->cprob[sctmp->word] 00817 #endif 00818 ; 00819 } else { 00820 tmpprob = LOG_ZERO; 00821 } 00822 if (wchmm->lmvar == LM_NGRAM_USER) { 00823 tmpprob = (*(wchmm->uni_prob_user))(wchmm->winfo, sctmp->word, tmpprob); 00824 } 00825 if (maxprob < tmpprob) maxprob = tmpprob; 00826 } 00827 wchmm->fscore[n] = maxprob; 00828 free_successor(wchmm, i); 00829 wchmm->state[wchmm->sclist2node[i]].scid = - n; 00830 n++; 00831 } 00832 } 00833 /* garbage collection of factored sclist */ 00834 compaction_successor(wchmm); 00835 } 00836 00837 #endif 00838 00839 #else /* ~UNIGRAM_FACTORING */ 00840 00863 static LOGPROB 00864 calc_successor_prob(WCHMM_INFO *wchmm, WORD_ID lastword, int node) 00865 { 00866 LOGPROB tmpprob, maxprob; 00867 WORD_ID lw, w; 00868 int i; 00869 int scid; 00870 00871 maxprob = LOG_ZERO; 00872 if (wchmm->ngram) { 00873 lw = wchmm->winfo->wton[lastword]; 00874 } 00875 00876 scid = wchmm->state[node].scid; 00877 00878 for (i = 0; i < wchmm->sclen[scid]; i++) { 00879 w = wchmm->sclist[scid][i]; 00880 if (wchmm->ngram) { 00881 tmpprob = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, lw , wchmm->winfo->wton[w]) 00882 #ifdef CLASS_NGRAM 00883 + wchmm->winfo->cprob[w] 00884 #endif 00885 ; 00886 } else { 00887 tmpprob = LOG_ZERO; 00888 } 00889 if (wchmm->lmvar == LM_NGRAM_USER) { 00890 tmpprob = (*(wchmm->bi_prob_user))(wchmm->winfo, lastword, w, tmpprob); 00891 } 00892 if (maxprob < tmpprob) maxprob = tmpprob; 00893 } 00894 00895 return(maxprob); 00896 } 00897 00898 #endif /* ~UNIGRAM_FACTORING */ 00899 00942 LOGPROB 00943 max_successor_prob(WCHMM_INFO *wchmm, WORD_ID lastword, int node) 00944 { 00945 LOGPROB maxprob; 00946 WORD_ID last_nword, w; 00947 int scid; 00948 LM_PROB_CACHE *l; 00949 00950 l = &(wchmm->lmcache); 00951 00952 if (lastword != WORD_INVALID) { /* return nothing if no previous word */ 00953 if (wchmm->ngram) { 00954 last_nword = wchmm->winfo->wton[lastword]; 00955 } else { 00956 last_nword = lastword; 00957 } 00958 scid = wchmm->state[node].scid; 00959 #ifdef UNIGRAM_FACTORING 00960 if (scid < 0) { 00961 /* return 1-gram factoring value already calced */ 00962 return(wchmm->fscore[(- scid)]); 00963 } else { 00964 /* this node has only one successor */ 00965 /* return precise 2-gram score */ 00966 if (last_nword != l->lastwcache[scid]) { 00967 /* calc and cache */ 00968 w = wchmm->scword[scid]; 00969 if (wchmm->ngram) { 00970 maxprob = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, last_nword, wchmm->winfo->wton[w]) 00971 #ifdef CLASS_NGRAM 00972 + wchmm->winfo->cprob[w] 00973 #endif 00974 ; 00975 } else { 00976 maxprob = LOG_ZERO; 00977 } 00978 if (wchmm->lmvar == LM_NGRAM_USER) { 00979 maxprob = (*(wchmm->bi_prob_user))(wchmm->winfo, lastword, w, maxprob); 00980 } 00981 l->lastwcache[scid] = last_nword; 00982 l->probcache[scid] = maxprob; 00983 return(maxprob); 00984 } else { 00985 /* return cached */ 00986 return (l->probcache[scid]); 00987 } 00988 } 00989 #else /* UNIGRAM_FACTORING */ 00990 /* 2-gram */ 00991 if (last_nword != l->lastwcache[scid]) { 00992 maxprob = calc_successor_prob(wchmm, lastword, node); 00993 /* store to cache */ 00994 l->lastwcache[scid] = last_nword; 00995 l->probcache[scid] = maxprob; 00996 return(maxprob); 00997 } else { 00998 return (l->probcache[scid]); 00999 } 01000 #endif /* UNIGRAM_FACTORING */ 01001 } else { 01002 return(0.0); 01003 #if 0 01004 maxprob = LOG_ZERO; 01005 for (sc=wchmm->state[node].sc;sc;sc=sc->next) { 01006 tmpprob = uni_prob(wchmm->ngram, sc->word); 01007 if (maxprob < tmpprob) maxprob = tmpprob; 01008 } 01009 return(maxprob); 01010 #endif 01011 } 01012 01013 } 01014 01049 LOGPROB * 01050 max_successor_prob_iw(WCHMM_INFO *wchmm, WORD_ID lastword) 01051 { 01052 int i, j, x, node; 01053 int last_nword; 01054 WORD_ID w; 01055 LM_PROB_CACHE *l; 01056 LOGPROB p; 01057 01058 l = &(wchmm->lmcache); 01059 01060 if (wchmm->ngram) { 01061 last_nword = wchmm->winfo->wton[lastword]; 01062 } else { 01063 last_nword = lastword; 01064 } 01065 01066 #ifdef HASH_CACHE_IW 01067 x = last_nword % l->iw_cache_num; 01068 if (l->iw_lw_cache[x] == last_nword) { /* cache hit */ 01069 return(l->iw_sc_cache[x]); 01070 } 01071 #else /* full cache */ 01072 if (l->iw_sc_cache[last_nword] != NULL) { /* cache hit */ 01073 return(l->iw_sc_cache[last_nword]); 01074 } 01075 x = last_nword; 01076 /* cache mis-hit, calc probs and cache them as new */ 01077 #endif 01078 /* allocate cache memory */ 01079 if (l->iw_sc_cache[x] == NULL) { 01080 #ifdef UNIGRAM_FACTORING 01081 l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->isolatenum); 01082 #else 01083 l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->startnum); 01084 #endif 01085 if (l->iw_sc_cache[x] == NULL) { /* malloc failed */ 01086 /* clear existing cache, and retry */ 01087 max_successor_prob_iw_free(wchmm); 01088 jlog("STAT: inter-word LM cache (%dMB) rehashed\n", 01089 (l->iw_cache_num * 01090 #ifdef UNIGRAM_FACTORING 01091 wchmm->isolatenum 01092 #else 01093 wchmm->startnum 01094 #endif 01095 ) / 1000 * sizeof(LOGPROB) / 1000); 01096 #ifdef UNIGRAM_FACTORING 01097 l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->isolatenum); 01098 #else 01099 l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->startnum); 01100 #endif 01101 if (l->iw_sc_cache[x] == NULL) { /* malloc failed again? */ 01102 j_internal_error("max_successor_prob_iw: cannot malloc\n"); 01103 } 01104 } 01105 } 01106 01107 /* calc prob for all startid */ 01108 #ifdef UNIGRAM_FACTORING 01109 for (j=0;j<wchmm->startnum;j++) { 01110 i = wchmm->start2isolate[j]; 01111 if (i == -1) continue; 01112 node = wchmm->startnode[j]; 01113 if (wchmm->state[node].scid <= 0) { 01114 /* should not happen!!! below is just for debugging */ 01115 j_internal_error("max_successor_prob_iw: isolated (not shared) tree root node has unigram factoring value??\n"); 01116 } else { 01117 w = wchmm->scword[wchmm->state[node].scid]; 01118 if (wchmm->ngram) { 01119 p = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, last_nword, wchmm->winfo->wton[w]) 01120 #ifdef CLASS_NGRAM 01121 + wchmm->winfo->cprob[w] 01122 #endif 01123 ; 01124 } else { 01125 p = LOG_ZERO; 01126 } 01127 if (wchmm->lmvar == LM_NGRAM_USER) { 01128 p = (*(wchmm->bi_prob_user))(wchmm->winfo, lastword, w, p); 01129 } 01130 l->iw_sc_cache[x][i] = p; 01131 } 01132 } 01133 #else /* ~UNIGRAM_FACTORING */ 01134 for (i=0;i<wchmm->startnum;i++) { 01135 node = wchmm->startnode[i]; 01136 l->iw_sc_cache[x][i] = calc_successor_prob(wchmm, lastword, node); 01137 } 01138 #endif 01139 #ifdef HASH_CACHE_IW 01140 l->iw_lw_cache[x] = last_nword; 01141 #endif 01142 01143 return(l->iw_sc_cache[x]); 01144 } 01145 01195 boolean 01196 can_succeed(WCHMM_INFO *wchmm, WORD_ID lastword, int node) 01197 { 01198 int lc; 01199 int i; 01200 int s; 01201 01202 /* return TRUE if at least one subtree word can connect */ 01203 01204 s = wchmm->state[node].scid; 01205 01206 if (lastword == WORD_INVALID) { /* case at beginning-of-word */ 01207 for (i = 0; i < wchmm->sclen[s]; i++) { 01208 if (dfa_cp_begin(wchmm->dfa, wchmm->sclist[s][i]) == TRUE) return(TRUE); 01209 } 01210 return(FALSE); 01211 } else { 01212 lc = wchmm->winfo->wton[lastword]; 01213 for (i = 0; i < wchmm->sclen[s]; i++) { 01214 if (dfa_cp(wchmm->dfa, lc, wchmm->sclist[s][i]) == TRUE) return(TRUE); 01215 } 01216 return(FALSE); 01217 } 01218 } 01219 01220 /* end of file */