Julius: libjulius/src/factoring_sub.c ソースファイル

Julius 4.1.5
00001 
00159 /*
00160  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00161  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00162  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00163  * All rights reserved
00164  */
00165 
00166 #include <julius/julius.h>
00167 
00168 /*----------------------------------------------------------------------*/
00169 
00190 static void
00191 add_successor(WCHMM_INFO *wchmm, int node, WORD_ID w)
00192 {
00193   S_CELL *sctmp, *sc;
00194 
00195   /* malloc a new successor list element */
00196   sctmp=(S_CELL *) mymalloc(sizeof(S_CELL));
00197   /* assign word ID to the new element */
00198   sctmp->word = w;
00199   /* add the new element to existing list (keeping order) */
00200   if (wchmm->state[node].scid == 0) {
00201     j_internal_error("add_successor: sclist id not assigned to branch node?\n");
00202   }
00203   sc = wchmm->sclist[wchmm->state[node].scid];
00204   if (sc == NULL || sctmp->word < sc->word) {
00205     sctmp->next = sc;
00206     wchmm->sclist[wchmm->state[node].scid] = sctmp;
00207   } else {
00208     for(;sc;sc=sc->next) {
00209       if (sc->next == NULL || sctmp->word < (sc->next)->word) {
00210         if (sctmp->word == sc->word) break; /* avoid duplication */
00211         sctmp->next = sc->next;
00212         sc->next = sctmp;
00213         break;
00214       }
00215     }
00216   }
00217 }
00218 
00239 static boolean
00240 match_successor(WCHMM_INFO *wchmm, int node1, int node2)
00241 {
00242   S_CELL *sc1,*sc2;
00243 
00244   /* assume successor is sorted by ID */
00245   if (wchmm->state[node1].scid == 0 || wchmm->state[node2].scid == 0) {
00246     j_internal_error("match_successor: sclist id not assigned to branch node?\n");
00247   }
00248   sc1 = wchmm->sclist[wchmm->state[node1].scid];
00249   sc2 = wchmm->sclist[wchmm->state[node2].scid];
00250   for (;;) {
00251     if (sc1 == NULL || sc2 == NULL) {
00252       if (sc1 == NULL && sc2 == NULL) {
00253         return TRUE;
00254       } else {
00255         return FALSE;
00256       }
00257     } else if (sc1->word != sc2->word) {
00258       return FALSE;
00259     }
00260     sc1 = sc1->next;
00261     sc2 = sc2->next;
00262   }
00263 }
00264 
00279 static void
00280 free_successor(WCHMM_INFO *wchmm, int scid)
00281 {
00282   S_CELL *sc;
00283   S_CELL *sctmp;
00284 
00285   /* free sclist */
00286   sc = wchmm->sclist[scid];
00287   while (sc != NULL) {
00288     sctmp = sc;
00289     sc = sc->next;
00290     free(sctmp);
00291   }
00292 }
00293 
00308 static void
00309 compaction_successor(WCHMM_INFO *wchmm)
00310 {
00311   int src, dst;
00312 
00313   dst = 1;
00314   for(src=1;src<wchmm->scnum;src++) {
00315     if (wchmm->state[wchmm->sclist2node[src]].scid <= 0) {
00316       /* already freed, skip */
00317       continue;
00318     }
00319     if (dst != src) {
00320       wchmm->sclist[dst] = wchmm->sclist[src];
00321       wchmm->sclist2node[dst] = wchmm->sclist2node[src];
00322       wchmm->state[wchmm->sclist2node[dst]].scid = dst;
00323     }
00324     dst++;
00325   }
00326   if (debug2_flag) {
00327     jlog("DEBUG: successor list shrinked from %d to %d\n", wchmm->scnum, dst);
00328   }
00329   wchmm->scnum = dst;
00330 }
00331 
00346 static void
00347 shrink_successor(WCHMM_INFO *wchmm)
00348 {
00349   if (wchmm->sclist) {
00350     wchmm->sclist = (S_CELL **)myrealloc(wchmm->sclist, sizeof(S_CELL *) * wchmm->scnum);
00351   }
00352   if (wchmm->sclist2node) {
00353     wchmm->sclist2node = (int *)myrealloc(wchmm->sclist2node, sizeof(int) * wchmm->scnum);
00354   }
00355 }
00356 
00373 void
00374 make_successor_list(WCHMM_INFO *wchmm)
00375 {
00376   int node;
00377   WORD_ID w;
00378   int i;
00379   boolean *freemark;
00380   int s;
00381 
00382   jlog("STAT: make successor lists for factoring\n");
00383 
00384   /* 1. initialize */
00385   /* initialize node->sclist index on wchmm tree */
00386   for (node=0;node<wchmm->n;node++) wchmm->state[node].scid = 0;
00387 
00388   /* parse the tree to get the maximum size of successor list */
00389   s = 1;
00390   for (w=0;w<wchmm->winfo->num;w++) {
00391     for (i=0;i<wchmm->winfo->wlen[w];i++) {
00392       if (wchmm->state[wchmm->offset[w][i]].scid == 0) {
00393         wchmm->state[wchmm->offset[w][i]].scid = s;
00394         s++;
00395       }
00396     }
00397     if (wchmm->state[wchmm->wordend[w]].scid == 0) {
00398       wchmm->state[wchmm->wordend[w]].scid = s;
00399       s++;
00400     }
00401   }
00402   wchmm->scnum = s;
00403   if (debug2_flag) {
00404     jlog("DEBUG: initial successor list size = %d\n", wchmm->scnum);
00405   }
00406 
00407   /* allocate successor list for the maximum size */
00408   wchmm->sclist = (S_CELL **)mymalloc(sizeof(S_CELL *) * wchmm->scnum);
00409   for (i=1;i<wchmm->scnum;i++) wchmm->sclist[i] = NULL;
00410   wchmm->sclist2node = (int *)mymalloc(sizeof(int) * wchmm->scnum);
00411 
00412   /* allocate misc. work area */
00413   freemark = (boolean *)mymalloc(sizeof(boolean) * wchmm->scnum);
00414   for (i=1;i<wchmm->scnum;i++) freemark[i] = FALSE;
00415 
00416   /* 2. make initial successor list: assign at all possible nodes */
00417   for (w=0;w<wchmm->winfo->num;w++) {
00418     /* at each start node of phonemes */
00419     for (i=0;i<wchmm->winfo->wlen[w];i++) {
00420       wchmm->sclist2node[wchmm->state[wchmm->offset[w][i]].scid] = wchmm->offset[w][i];
00421       add_successor(wchmm, wchmm->offset[w][i], w);
00422     }
00423     /* at word end */
00424     wchmm->sclist2node[wchmm->state[wchmm->wordend[w]].scid] = wchmm->wordend[w];
00425     add_successor(wchmm, wchmm->wordend[w], w);
00426   }
00427   
00428   /* 3. erase unnecessary successor list */
00429   /* sucessor list same as the previous node is not needed, so */
00430   /* parse lexicon tree from every leaf to find the same succesor list */
00431   for (w=0;w<wchmm->winfo->num;w++) {
00432     node = wchmm->wordend[w];   /* begin from the word end node */
00433     i = wchmm->winfo->wlen[w]-1;
00434     while (i >= 0) {            /* for each phoneme start node */
00435       if (node == wchmm->offset[w][i]) {
00436         /* word with only 1 state: skip */
00437         i--;
00438         continue;
00439       }
00440       if (match_successor(wchmm, node, wchmm->offset[w][i])) {
00441         freemark[wchmm->state[node].scid] = TRUE;       /* mark the node */
00442       }
00443 /* 
00444  *       if (freemark[wchmm->offset[w][i]] != FALSE) {
00445  *         break;
00446  *       }
00447  */
00448       node = wchmm->offset[w][i];
00449       i--;
00450     }
00451   }
00452   /* really free */
00453   for (i=1;i<wchmm->scnum;i++) {
00454     if (freemark[i] == TRUE) {
00455       free_successor(wchmm, i);
00456       /* reset node -> sclist link */
00457       wchmm->state[wchmm->sclist2node[i]].scid = 0;
00458     }
00459   }
00460   /* garbage collection of deleted sclist */
00461   compaction_successor(wchmm);
00462 
00463   free(freemark);
00464 
00465   jlog("STAT: done\n");
00466 }
00467 
00468 #ifdef UNIGRAM_FACTORING
00469 
00486 void
00487 make_successor_list_unigram_factoring(WCHMM_INFO *wchmm)
00488 {
00489 
00490 #ifndef FAST_FACTOR1_SUCCESSOR_LIST
00491 
00492   /* old way */
00493   make_successor_list(wchmm);
00494   calc_all_unigram_factoring_values(wchmm);
00495 
00496 #else  /* ~FAST_FACTOR1_SUCCESSOR_LIST */
00497 
00498   /* new way */
00499 
00500   int node, node2;
00501   WORD_ID w, w2;
00502   int i, j, n, f;
00503   int s;
00504   LOGPROB tmpprob;
00505 
00506   jlog("STAT: make successor lists for unigram factoring\n");
00507 
00508   /* 1. initialize */
00509   /* initialize node->sclist index on wchmm tree */
00510   for (node=0;node<wchmm->n;node++) wchmm->state[node].scid = 0;
00511 
00512   /* in unigram factoring, number of successor = vocabulary size */
00513   wchmm->scnum = wchmm->winfo->num + 1;
00514   if (debug2_flag) {
00515     jlog("DEBUG: successor list size = %d\n", wchmm->scnum);
00516   }
00517 
00518   /* allocate successor list */
00519   wchmm->sclist = (S_CELL **)mymalloc(sizeof(S_CELL *) * wchmm->scnum);
00520   for (i=1;i<wchmm->scnum;i++) wchmm->sclist[i] = NULL;
00521   /* sclist2node is not used */
00522 
00523   /* 2. make successor list, and count needed fscore num */
00524   f = 1;
00525   s = 1;
00526   for (w=0;w<wchmm->winfo->num;w++) {
00527     for (i=0;i<wchmm->winfo->wlen[w] + 1;i++) {
00528       if (i < wchmm->winfo->wlen[w]) {
00529         node = wchmm->offset[w][i];
00530       } else {
00531         node = wchmm->wordend[w];
00532       }
00533       if (wchmm->state[node].scid == 0) { /* not assigned */
00534         /* new node found, assign new and exit here */
00535         wchmm->state[node].scid = s++;
00536         if (s > wchmm->scnum) {
00537           jlog("InternalError: make_successor_list_unigram_factoring: scid num exceeded?\n");
00538           return;
00539         }
00540         add_successor(wchmm, node, w);
00541         break;
00542       } else if (wchmm->state[node].scid > 0) {
00543         /* that node has sclist */
00544         /* move it to the current first isolated node in that word */
00545         w2 = wchmm->sclist[wchmm->state[node].scid]->word;
00546         for(j=i+1;j<wchmm->winfo->wlen[w2] + 1;j++) {
00547           if (j < wchmm->winfo->wlen[w2]) {
00548             node2 = wchmm->offset[w2][j];
00549           } else {
00550             node2 = wchmm->wordend[w2];
00551           }
00552           if (wchmm->state[node2].scid == 0) { /* not assigned */
00553             /* move sclist to there */
00554             wchmm->state[node2].scid = wchmm->state[node].scid;
00555             break;
00556           }
00557         }
00558         if (j >= wchmm->winfo->wlen[w2] + 1) {
00559           /* not found? */
00560           jlog("InternalError: make_successor_list_unigram_factoring: no isolated word for %d\n", w2);
00561           return;
00562         }
00563         /* make current node as fscore node */
00564         n = f++;
00565         wchmm->state[node].scid = -n;
00566         /* not compute unigram factoring value yet */
00567       }
00568 
00569     }
00570   }
00571 
00572   /* 2. allocate fscore buffer */
00573   wchmm->fsnum = f;
00574   wchmm->fscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->fsnum);
00575   for(n=0;n<wchmm->fsnum;n++) wchmm->fscore[n] = LOG_ZERO;
00576 
00577   /* 3. parse again to assign fscore values */
00578   for (w=0;w<wchmm->winfo->num;w++) {
00579     for (i=0;i<wchmm->winfo->wlen[w] + 1;i++) {
00580       if (i < wchmm->winfo->wlen[w]) {
00581         node = wchmm->offset[w][i];
00582       } else {
00583         node = wchmm->wordend[w];
00584       }
00585       if (wchmm->state[node].scid < 0) {
00586         /* update max */
00587         if (wchmm->ngram) {
00588           tmpprob = uni_prob(wchmm->ngram, wchmm->winfo->wton[w])
00589 #ifdef CLASS_NGRAM
00590             + wchmm->winfo->cprob[w]
00591 #endif
00592             ;
00593         } else {
00594           tmpprob = LOG_ZERO;
00595         }
00596         if (wchmm->lmvar == LM_NGRAM_USER) {
00597           tmpprob = (*(wchmm->uni_prob_user))(wchmm->winfo, w, tmpprob);
00598         }
00599         n = - wchmm->state[node].scid;
00600         if (wchmm->fscore[n] < tmpprob) {
00601           wchmm->fscore[n] = tmpprob;
00602         }
00603       }
00604 
00605     }
00606   }
00607 
00608 #endif  /* ~FAST_FACTOR1_SUCCESSOR_LIST */
00609 
00610   jlog("STAT: done\n");
00611 }
00612 
00613 #endif /* UNIGRAM_FACTORING */
00614 
00615 
00634 void
00635 adjust_sc_index(WCHMM_INFO *wchmm)
00636 {
00637   WORD_ID w;
00638   int i,j,k;
00639   HMM_Logical *ltmp;
00640   int ltmp_state_num;
00641   int ato;
00642   LOGPROB prob;
00643   int node, scid;
00644   A_CELL2 *ac;
00645   
00646   /* duplicate scid for HMMs with more than one arc from initial state */
00647   for(w=0;w<wchmm->winfo->num;w++) {
00648     for(k=0;k<wchmm->winfo->wlen[w];k++) {
00649       node = wchmm->offset[w][k];
00650       scid = wchmm->state[node].scid;
00651       if (scid == 0) continue;
00652       ltmp = wchmm->winfo->wseq[w][k];
00653       ltmp_state_num = hmm_logical_state_num(ltmp);
00654       if ((hmm_logical_trans(ltmp))->a[0][ltmp_state_num-1] != LOG_ZERO) {
00655         j = k + 1;
00656         if (j == wchmm->winfo->wlen[w]) {
00657           if (wchmm->state[wchmm->wordend[w]].scid == 0) {
00658             jlog("STAT: word %d: factoring node copied for skip phone\n", w);
00659             wchmm->state[wchmm->wordend[w]].scid = scid;
00660           }
00661         } else {
00662           if (wchmm->state[wchmm->offset[w][j]].scid == 0) {
00663             jlog("STAT: word %d: factoring node copied for skip phone\n", w);
00664             wchmm->state[wchmm->offset[w][j]].scid = scid;
00665           }
00666         }
00667       }
00668       for(ato=1;ato<ltmp_state_num;ato++) {
00669         prob = (hmm_logical_trans(ltmp))->a[0][ato];
00670         if (prob != LOG_ZERO) {
00671           wchmm->state[node+ato-1].scid = scid;
00672         }
00673       }
00674     }
00675   }
00676 
00677   /* move scid and fscore on the head state to the head grammar state */
00678   for(i=0;i<wchmm->startnum;i++) {
00679     node = wchmm->startnode[i];
00680     if (wchmm->state[node].out.state != NULL) {
00681       j_internal_error("adjust_sc_index: outprob exist in word-head node??\n");
00682     }
00683     if (wchmm->next_a[node] != LOG_ZERO) {
00684       if (wchmm->state[node+1].scid != 0) {
00685         if (wchmm->state[node].scid != 0 && wchmm->state[node].scid != wchmm->state[node+1].scid) {
00686           j_internal_error("adjust_sc_index: different successor list within word-head phone?\n");
00687         }
00688         wchmm->state[node].scid = wchmm->state[node+1].scid;
00689         wchmm->state[node+1].scid = 0;
00690       }
00691     }
00692     for(ac=wchmm->ac[node];ac;ac=ac->next) {
00693       for(k=0;k<ac->n;k++) {
00694         if (wchmm->state[ac->arc[k]].scid != 0) {
00695           if (wchmm->state[node].scid != 0 && wchmm->state[node].scid != wchmm->state[ac->arc[k]].scid) {
00696             j_internal_error("adjust_sc_index: different successor list within word-head phone?\n");
00697           }
00698           wchmm->state[node].scid = wchmm->state[ac->arc[k]].scid;
00699           wchmm->state[ac->arc[k]].scid = 0;
00700         }
00701       }
00702     }
00703   }
00704 }
00705 
00706 
00707 /* -------------------------------------------------------------------- */
00708 /* factoring computation */
00709 
00728 void
00729 max_successor_cache_init(WCHMM_INFO *wchmm)
00730 {
00731   int i;
00732   LM_PROB_CACHE *l;
00733   WORD_ID wnum;
00734 
00735   /* finally shrink the memory area of successor list here */
00736   shrink_successor(wchmm);
00737 
00738   /* for word-internal */
00739   l = &(wchmm->lmcache);
00740 
00741   l->probcache = (LOGPROB *) mymalloc(sizeof(LOGPROB) * wchmm->scnum);
00742   l->lastwcache = (WORD_ID *) mymalloc(sizeof(WORD_ID) * wchmm->scnum);
00743   for (i=0;i<wchmm->scnum;i++) {
00744     l->lastwcache[i] = WORD_INVALID;
00745   }
00746   /* for cross-word */
00747   if (wchmm->ngram) {
00748     wnum = wchmm->ngram->max_word_num;
00749   } else {
00750     wnum = wchmm->winfo->num;
00751   }
00752 #ifdef HASH_CACHE_IW
00753   l->iw_cache_num = wnum * jconf.search.pass1.iw_cache_rate / 100;
00754   if (l->iw_cache_num < 10) l->iw_cache_num = 10;
00755 #else
00756   l->iw_cache_num = wnum;
00757 #endif /* HASH_CACHE_IW */
00758   l->iw_sc_cache = (LOGPROB **)mymalloc(sizeof(LOGPROB *) * l->iw_cache_num);
00759   for (i=0;i<l->iw_cache_num;i++) {
00760     l->iw_sc_cache[i] = NULL;
00761   }
00762 #ifdef HASH_CACHE_IW
00763   l->iw_lw_cache = (WORD_ID *)mymalloc(sizeof(WORD_ID) * l->iw_cache_num);
00764   for (i=0;i<l->iw_cache_num;i++) {
00765     l->iw_lw_cache[i] = WORD_INVALID;
00766   }
00767 #endif
00768 }
00769 
00782 static void
00783 max_successor_prob_iw_free(WCHMM_INFO *wchmm)
00784 {
00785   int i;
00786   LM_PROB_CACHE *l;
00787   l = &(wchmm->lmcache);
00788   for (i=0;i<l->iw_cache_num;i++) {
00789     if (l->iw_sc_cache[i] != NULL) free(l->iw_sc_cache[i]);
00790     l->iw_sc_cache[i] = NULL;
00791   }
00792 }
00793 
00810 void
00811 max_successor_cache_free(WCHMM_INFO *wchmm)
00812 {
00813   free(wchmm->lmcache.probcache);
00814   free(wchmm->lmcache.lastwcache);
00815   max_successor_prob_iw_free(wchmm);
00816   free(wchmm->lmcache.iw_sc_cache);
00817 #ifdef HASH_CACHE_IW
00818   free(wchmm->lmcache.iw_lw_cache);
00819 #endif
00820 }
00821 
00822 #ifdef UNIGRAM_FACTORING
00823 
00864 void
00865 make_iwcache_index(WCHMM_INFO *wchmm)
00866 {
00867   int i, node, num;
00868 
00869   wchmm->start2isolate = (int *)mymalloc(sizeof(int) * wchmm->startnum);
00870   num = 0;
00871   for(i=0;i<wchmm->startnum;i++) {
00872     node = wchmm->startnode[i];
00873     if (wchmm->state[node].scid >= 0) { /* not a factoring node (isolated node, has no 1-gram factoring value) */
00874       wchmm->start2isolate[i] = num;
00875       num++;
00876     } else {                    /* factoring node (shared) */
00877       wchmm->start2isolate[i] = -1;
00878     }
00879   }
00880   wchmm->isolatenum = num;
00881 }
00882 
00927 void
00928 calc_all_unigram_factoring_values(WCHMM_INFO *wchmm)
00929 {
00930   S_CELL *sc, *sctmp;
00931   LOGPROB tmpprob, maxprob;
00932   int i, n;
00933 
00934   /* count needed number of 1-gram factoring nodes */
00935   n = 0;
00936   for (i=1;i<wchmm->scnum;i++) {
00937     sc = wchmm->sclist[i];
00938     if (sc == NULL) {
00939       j_internal_error("call_all_unigram_factoring_values: sclist has no sc?\n");
00940     }
00941     if (sc->next != NULL) {
00942       /* more than two words, so compute maximum 1-gram probability */
00943       n++;
00944     }
00945   }
00946   wchmm->fsnum = n + 1;
00947   /* allocate area */
00948   wchmm->fscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->fsnum);
00949   /* assign values */
00950   n = 1;
00951   for (i=1;i<wchmm->scnum;i++) {
00952     sc = wchmm->sclist[i];
00953     if (sc->next != NULL) {
00954       maxprob = LOG_ZERO;
00955       for (sctmp = sc; sctmp; sctmp = sctmp->next) {
00956         if (wchmm->ngram) {
00957           tmpprob = uni_prob(wchmm->ngram, wchmm->winfo->wton[sctmp->word])
00958 #ifdef CLASS_NGRAM
00959             + wchmm->winfo->cprob[sctmp->word] 
00960 #endif
00961             ;
00962         } else {
00963           tmpprob = LOG_ZERO;
00964         }
00965         if (wchmm->lmvar == LM_NGRAM_USER) {
00966           tmpprob = (*(wchmm->uni_prob_user))(wchmm->winfo, sctmp->word, tmpprob);
00967         }
00968         if (maxprob < tmpprob) maxprob = tmpprob;
00969       }
00970       wchmm->fscore[n] = maxprob;
00971       free_successor(wchmm, i);
00972       wchmm->state[wchmm->sclist2node[i]].scid = - n;
00973       n++;
00974     }
00975   }
00976   /* garbage collection of factored sclist */
00977   compaction_successor(wchmm);
00978 }
00979 
00980 #else  /* ~UNIGRAM_FACTORING */
00981 
01004 static LOGPROB
01005 calc_successor_prob(WCHMM_INFO *wchmm, WORD_ID lastword, int node)
01006 {
01007   S_CELL *sc;
01008   LOGPROB tmpprob, maxprob;
01009   WORD_ID lw;
01010 
01011   maxprob = LOG_ZERO;
01012   if (wchmm->ngram) {
01013     lw = wchmm->winfo->wton[lastword];
01014   }
01015 
01016   for (sc = wchmm->sclist[wchmm->state[node].scid]; sc; sc = sc->next) {
01017     if (wchmm->ngram) {
01018       tmpprob = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, lw , wchmm->winfo->wton[sc->word])
01019 #ifdef CLASS_NGRAM
01020         + wchmm->winfo->cprob[sc->word]
01021 #endif
01022         ;
01023     } else {
01024       tmpprob = LOG_ZERO;
01025     }
01026     if (wchmm->lmvar == LM_NGRAM_USER) {
01027       tmpprob = (*(wchmm->bi_prob_user))(wchmm->winfo, lastword, sc->word, tmpprob);
01028     }
01029     if (maxprob < tmpprob) maxprob = tmpprob;
01030   }
01031 
01032   return(maxprob);
01033 }
01034 
01035 #endif  /* ~UNIGRAM_FACTORING */
01036 
01079 LOGPROB
01080 max_successor_prob(WCHMM_INFO *wchmm, WORD_ID lastword, int node)
01081 {
01082   LOGPROB maxprob;
01083   WORD_ID last_nword, w;
01084   int scid;
01085   LM_PROB_CACHE *l;
01086 
01087   l = &(wchmm->lmcache);
01088 
01089   if (lastword != WORD_INVALID) { /* return nothing if no previous word */
01090     if (wchmm->ngram) {
01091       last_nword = wchmm->winfo->wton[lastword];
01092     } else {
01093       last_nword = lastword;
01094     }
01095     scid = wchmm->state[node].scid;
01096 #ifdef UNIGRAM_FACTORING
01097     if (scid < 0) {
01098       /* return 1-gram factoring value already calced */
01099       return(wchmm->fscore[(- scid)]);
01100     } else {
01101       /* this node has only one successor */
01102       /* return precise 2-gram score */
01103       if (last_nword != l->lastwcache[scid]) {
01104         /* calc and cache */
01105         w = (wchmm->sclist[scid])->word;
01106         if (wchmm->ngram) {
01107           maxprob = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, last_nword, wchmm->winfo->wton[w])
01108 #ifdef CLASS_NGRAM
01109             + wchmm->winfo->cprob[w]
01110 #endif
01111             ;
01112         } else {
01113           maxprob = LOG_ZERO;
01114         }
01115         if (wchmm->lmvar == LM_NGRAM_USER) {
01116           maxprob = (*(wchmm->bi_prob_user))(wchmm->winfo, lastword, w, maxprob);
01117         }
01118         l->lastwcache[scid] = last_nword;
01119         l->probcache[scid] = maxprob;
01120         return(maxprob);
01121       } else {
01122         /* return cached */
01123         return (l->probcache[scid]);
01124       }
01125     }
01126 #else  /* UNIGRAM_FACTORING */
01127     /* 2-gram */
01128     if (last_nword != l->lastwcache[scid]) {
01129       maxprob = calc_successor_prob(wchmm, lastword, node);
01130       /* store to cache */
01131       l->lastwcache[scid] = last_nword;
01132       l->probcache[scid] = maxprob;
01133       return(maxprob);
01134     } else {
01135       return (l->probcache[scid]);
01136     }
01137 #endif /* UNIGRAM_FACTORING */
01138   } else {
01139     return(0.0);
01140 #if 0
01141     maxprob = LOG_ZERO;
01142     for (sc=wchmm->state[node].sc;sc;sc=sc->next) {
01143       tmpprob = uni_prob(wchmm->ngram, sc->word);
01144       if (maxprob < tmpprob) maxprob = tmpprob;
01145     }
01146     return(maxprob);
01147 #endif
01148   }
01149 
01150 }
01151 
01186 LOGPROB *
01187 max_successor_prob_iw(WCHMM_INFO *wchmm, WORD_ID lastword)
01188 {
01189   int i, j, x, node;
01190   int last_nword;
01191   WORD_ID w;
01192   LM_PROB_CACHE *l;
01193   LOGPROB p;
01194 
01195   l = &(wchmm->lmcache);
01196 
01197   if (wchmm->ngram) {
01198     last_nword = wchmm->winfo->wton[lastword];
01199   } else {
01200     last_nword = lastword;
01201   }
01202 
01203 #ifdef HASH_CACHE_IW
01204   x = last_nword % l->iw_cache_num;
01205   if (l->iw_lw_cache[x] == last_nword) { /* cache hit */
01206     return(l->iw_sc_cache[x]);
01207   }
01208 #else  /* full cache */
01209   if (l->iw_sc_cache[last_nword] != NULL) { /* cache hit */
01210     return(l->iw_sc_cache[last_nword]);
01211   }
01212   x = last_nword;
01213   /* cache mis-hit, calc probs and cache them as new */
01214 #endif
01215   /* allocate cache memory */
01216   if (l->iw_sc_cache[x] == NULL) {
01217 #ifdef UNIGRAM_FACTORING
01218     l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->isolatenum);
01219 #else
01220     l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->startnum);
01221 #endif
01222     if (l->iw_sc_cache[x] == NULL) { /* malloc failed */
01223       /* clear existing cache, and retry */
01224       max_successor_prob_iw_free(wchmm);
01225       jlog("STAT: inter-word LM cache (%dMB) rehashed\n",
01226                (l->iw_cache_num * 
01227 #ifdef UNIGRAM_FACTORING
01228                 wchmm->isolatenum
01229 #else
01230                 wchmm->startnum
01231 #endif
01232                 ) / 1000 * sizeof(LOGPROB) / 1000);
01233 #ifdef UNIGRAM_FACTORING
01234       l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->isolatenum);
01235 #else
01236       l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->startnum);
01237 #endif
01238       if (l->iw_sc_cache[x] == NULL) { /* malloc failed again? */
01239         j_internal_error("max_successor_prob_iw: cannot malloc\n");
01240       }
01241     }
01242   }
01243 
01244   /* calc prob for all startid */
01245 #ifdef UNIGRAM_FACTORING
01246   for (j=0;j<wchmm->startnum;j++) {
01247     i = wchmm->start2isolate[j];
01248     if (i == -1) continue;
01249     node = wchmm->startnode[j];
01250     if (wchmm->state[node].scid <= 0) {
01251       /* should not happen!!! below is just for debugging */
01252       j_internal_error("max_successor_prob_iw: isolated (not shared) tree root node has unigram factoring value??\n");
01253     } else {
01254       w = (wchmm->sclist[wchmm->state[node].scid])->word;
01255       if (wchmm->ngram) {
01256         p = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, last_nword, wchmm->winfo->wton[w])
01257 #ifdef CLASS_NGRAM
01258           + wchmm->winfo->cprob[w]
01259 #endif
01260           ;
01261       } else {
01262         p = LOG_ZERO;
01263       }
01264       if (wchmm->lmvar == LM_NGRAM_USER) {
01265         p = (*(wchmm->bi_prob_user))(wchmm->winfo, lastword, w, p);
01266       }
01267       l->iw_sc_cache[x][i] = p;
01268     }
01269   }
01270 #else  /* ~UNIGRAM_FACTORING */
01271   for (i=0;i<wchmm->startnum;i++) {
01272     node = wchmm->startnode[i];
01273     l->iw_sc_cache[x][i] = calc_successor_prob(wchmm, lastword, node);
01274   }
01275 #endif
01276 #ifdef HASH_CACHE_IW
01277   l->iw_lw_cache[x] = last_nword;
01278 #endif
01279 
01280   return(l->iw_sc_cache[x]);
01281 }
01282 
01332 boolean
01333 can_succeed(WCHMM_INFO *wchmm, WORD_ID lastword, int node)
01334 {
01335   int lc;
01336   S_CELL *sc;
01337 
01338   /* return TRUE if at least one subtree word can connect */
01339 
01340   if (lastword == WORD_INVALID) { /* case at beginning-of-word */
01341     for (sc=wchmm->sclist[wchmm->state[node].scid];sc;sc=sc->next) {
01342       if (dfa_cp_begin(wchmm->dfa, sc->word) == TRUE) return(TRUE);
01343     }
01344     return(FALSE);
01345   } else {
01346     lc = wchmm->winfo->wton[lastword];
01347     for (sc=wchmm->sclist[wchmm->state[node].scid];sc;sc=sc->next) {
01348       if (dfa_cp(wchmm->dfa, lc, sc->word) == TRUE) return(TRUE);
01349     }
01350     return(FALSE);
01351   }
01352 }
01353 
01354 /* end of file */