Julius 4.2
libjulius/src/factoring_sub.c
説明を見る。
00001 
00159 /*
00160  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00161  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00162  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00163  * All rights reserved
00164  */
00165 
00166 #include <julius/julius.h>
00167 
00168 /*----------------------------------------------------------------------*/
00169 
00186 void
00187 make_successor_list(WCHMM_INFO *wchmm)
00188 {
00189   int node;
00190   WORD_ID w;
00191   int i, j;
00192   int s;
00193   WORD_ID *scnumlist;
00194   WORD_ID *sclen;
00195   int scnum, new_scnum;
00196   int *scidmap;
00197   boolean *freemark;
00198 
00199   jlog("STAT: make successor lists for factoring\n");
00200 
00201   /* 1. initialize */
00202   /* initialize node->sclist index on wchmm tree */
00203   for (node=0;node<wchmm->n;node++) wchmm->state[node].scid = 0;
00204 
00205   /* parse the tree to assign unique scid and get the maximum size of
00206      successor list */
00207   scnum = 1;
00208   for (w=0;w<wchmm->winfo->num;w++) {
00209     for (i=0;i<wchmm->winfo->wlen[w];i++) {
00210       if (wchmm->state[wchmm->offset[w][i]].scid == 0) {
00211         wchmm->state[wchmm->offset[w][i]].scid = scnum;
00212         scnum++;
00213       }
00214     }
00215     if (wchmm->state[wchmm->wordend[w]].scid == 0) {
00216       wchmm->state[wchmm->wordend[w]].scid = scnum;
00217       scnum++;
00218     }
00219   }
00220   if (debug2_flag) {
00221     jlog("DEBUG: initial successor list size = %d\n", scnum);
00222   }
00223 
00224   /* 2. count number of each successor */
00225   sclen = (WORD_ID *)mymalloc(sizeof(WORD_ID) * scnum);
00226   for (i=1;i<scnum;i++) sclen[i] = 0;
00227   for (w=0;w<wchmm->winfo->num;w++) {
00228     for (i=0;i<wchmm->winfo->wlen[w];i++) {
00229       sclen[wchmm->state[wchmm->offset[w][i]].scid]++;
00230     }
00231     sclen[wchmm->state[wchmm->wordend[w]].scid]++;
00232   }
00233 
00234   /* 3. delete bogus successor lists */
00235   freemark = (boolean *)mymalloc(sizeof(boolean) * scnum);
00236   for (i=1;i<scnum;i++) freemark[i] = FALSE;
00237   for (w=0;w<wchmm->winfo->num;w++) {
00238     node = wchmm->wordend[w];   /* begin from the word end node */
00239     i = wchmm->winfo->wlen[w]-1;
00240     while (i >= 0) {            /* for each phoneme start node */
00241       if (node == wchmm->offset[w][i]) {
00242         /* word with only 1 state: skip */
00243         i--;
00244         continue;
00245       }
00246       if (wchmm->state[node].scid == 0) break; /* already parsed */
00247       if (sclen[wchmm->state[node].scid] == sclen[wchmm->state[wchmm->offset[w][i]].scid]) {
00248         freemark[wchmm->state[node].scid] = TRUE;       /* mark the node */
00249         wchmm->state[node].scid = 0;
00250       }
00251       node = wchmm->offset[w][i];
00252       i--;
00253     }
00254   }
00255   /* build compaction map */
00256   scidmap = (int *)mymalloc(sizeof(int) * scnum);
00257   scidmap[0] = 0;
00258   j = 1;
00259   for (i=1;i<scnum;i++) {
00260     if (freemark[i]) {
00261       scidmap[i] = 0;
00262     } else {
00263       scidmap[i] = j;
00264       j++;
00265     }
00266   }
00267   new_scnum = j;
00268   if (debug2_flag) {
00269     jlog("DEBUG: compacted successor list size = %d\n", new_scnum);
00270   }
00271 
00272   /* 4. rewrite scid and do compaction for new sclen */
00273   for (node=0;node<wchmm->n;node++) {
00274     if (wchmm->state[node].scid > 0) {
00275       wchmm->state[node].scid = scidmap[wchmm->state[node].scid];
00276     }
00277   }
00278   wchmm->sclen = (WORD_ID *)mybmalloc2(sizeof(WORD_ID) * new_scnum, &(wchmm->malloc_root));
00279   for (i=1;i<scnum;i++) {
00280     if (scidmap[i] != 0) wchmm->sclen[scidmap[i]] = sclen[i];
00281   }
00282   wchmm->scnum = new_scnum;
00283 
00284   free(scidmap);
00285   free(freemark);
00286   free(sclen);
00287 
00288   /* 5. now index completed, make word list for each list */
00289   wchmm->sclist = (WORD_ID **)mybmalloc2(sizeof(WORD_ID *) * wchmm->scnum, &(wchmm->malloc_root));
00290   scnumlist = (WORD_ID *)mymalloc(sizeof(WORD_ID) * wchmm->scnum);
00291   for(i=1;i<wchmm->scnum;i++) {
00292     wchmm->sclist[i] = (WORD_ID *)mybmalloc2(sizeof(WORD_ID) * wchmm->sclen[i], &(wchmm->malloc_root));
00293     scnumlist[i] = 0;
00294   }
00295   {
00296     int scid;
00297     for (w=0;w<wchmm->winfo->num;w++) {
00298       for (i=0;i<wchmm->winfo->wlen[w];i++) {
00299         scid = wchmm->state[wchmm->offset[w][i]].scid;
00300         if (scid != 0) {
00301           wchmm->sclist[scid][scnumlist[scid]] = w;
00302           scnumlist[scid]++;
00303           if (scnumlist[scid] > wchmm->sclen[scid]) {
00304             jlog("hogohohoho\n");
00305             exit(1);
00306           }
00307         }
00308       }
00309       /* at word end */
00310       scid = wchmm->state[wchmm->wordend[w]].scid;
00311       if (scid != 0) {
00312         wchmm->sclist[scid][scnumlist[scid]] = w;
00313         scnumlist[scid]++;
00314           if (scnumlist[scid] > wchmm->sclen[scid]) {
00315             jlog("hogohohoho\n");
00316             exit(1);
00317           }
00318       }
00319     }
00320   }
00321   free(scnumlist);
00322 
00323   jlog("STAT: done\n");
00324 }
00325 
00326 #ifdef UNIGRAM_FACTORING
00327 
00344 void
00345 make_successor_list_unigram_factoring(WCHMM_INFO *wchmm)
00346 {
00347 
00348 #ifndef FAST_FACTOR1_SUCCESSOR_LIST
00349 
00350   /* old way */
00351   make_successor_list(wchmm);
00352   calc_all_unigram_factoring_values(wchmm);
00353 
00354 #else  /* ~FAST_FACTOR1_SUCCESSOR_LIST */
00355 
00356   /* new way */
00357 
00358   int node, node2;
00359   WORD_ID w, w2;
00360   int i, j, n, f;
00361   int s;
00362   LOGPROB tmpprob;
00363   WORD_ID *mtmp;
00364 
00365   jlog("STAT: make successor lists for unigram factoring\n");
00366 
00367   /* 1. initialize */
00368   /* initialize node->sclist index on wchmm tree */
00369   for (node=0;node<wchmm->n;node++) wchmm->state[node].scid = 0;
00370 
00371   /* in unigram factoring, number of successor = vocabulary size */
00372   wchmm->scnum = wchmm->winfo->num + 1;
00373   if (debug2_flag) {
00374     jlog("DEBUG: successor list size = %d\n", wchmm->scnum);
00375   }
00376 
00377   /* allocate successor list for 1-gram factoring */
00378   wchmm->scword = (WORD_ID *)mybmalloc2(sizeof(WORD_ID) * wchmm->scnum, &(wchmm->malloc_root));
00379 
00380   /* 2. make successor list, and count needed fscore num */
00381   f = 1;
00382   s = 1;
00383   for (w=0;w<wchmm->winfo->num;w++) {
00384     for (i=0;i<wchmm->winfo->wlen[w] + 1;i++) {
00385       if (i < wchmm->winfo->wlen[w]) {
00386         node = wchmm->offset[w][i];
00387       } else {
00388         node = wchmm->wordend[w];
00389       }
00390       if (wchmm->state[node].scid == 0) { /* not assigned */
00391         /* new node found, assign new and exit here */
00392         wchmm->state[node].scid = s;
00393         wchmm->scword[s] = w;
00394         s++;
00395         if (s > wchmm->scnum) {
00396           jlog("InternalError: make_successor_list_unigram_factoring: scid num exceeded?\n");
00397           return;
00398         }
00399         break;
00400       } else if (wchmm->state[node].scid > 0) {
00401         /* that node has successor */
00402         /* move it to the current first isolated node in that word */
00403         w2 = wchmm->scword[wchmm->state[node].scid];
00404         for(j=i+1;j<wchmm->winfo->wlen[w2] + 1;j++) {
00405           if (j < wchmm->winfo->wlen[w2]) {
00406             node2 = wchmm->offset[w2][j];
00407           } else {
00408             node2 = wchmm->wordend[w2];
00409           }
00410           if (wchmm->state[node2].scid == 0) { /* not assigned */
00411             /* move successor to there */
00412             wchmm->state[node2].scid = wchmm->state[node].scid;
00413             break;
00414           }
00415         }
00416         if (j >= wchmm->winfo->wlen[w2] + 1) {
00417           /* not found? */
00418           jlog("InternalError: make_successor_list_unigram_factoring: no isolated word for %d\n", w2);
00419           return;
00420         }
00421         /* make current node as fscore node */
00422         n = f++;
00423         wchmm->state[node].scid = -n;
00424         /* not compute unigram factoring value yet */
00425       }
00426 
00427     }
00428   }
00429 
00430   /* 2. allocate fscore buffer */
00431   wchmm->fsnum = f;
00432   wchmm->fscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->fsnum);
00433   for(n=0;n<wchmm->fsnum;n++) wchmm->fscore[n] = LOG_ZERO;
00434 
00435   /* 3. parse again to assign fscore values */
00436   for (w=0;w<wchmm->winfo->num;w++) {
00437     for (i=0;i<wchmm->winfo->wlen[w] + 1;i++) {
00438       if (i < wchmm->winfo->wlen[w]) {
00439         node = wchmm->offset[w][i];
00440       } else {
00441         node = wchmm->wordend[w];
00442       }
00443       if (wchmm->state[node].scid < 0) {
00444         /* update max */
00445         if (wchmm->ngram) {
00446           tmpprob = uni_prob(wchmm->ngram, wchmm->winfo->wton[w])
00447 #ifdef CLASS_NGRAM
00448             + wchmm->winfo->cprob[w]
00449 #endif
00450             ;
00451         } else {
00452           tmpprob = LOG_ZERO;
00453         }
00454         if (wchmm->lmvar == LM_NGRAM_USER) {
00455           tmpprob = (*(wchmm->uni_prob_user))(wchmm->winfo, w, tmpprob);
00456         }
00457         n = - wchmm->state[node].scid;
00458         if (wchmm->fscore[n] < tmpprob) {
00459           wchmm->fscore[n] = tmpprob;
00460         }
00461       }
00462 
00463     }
00464   }
00465 
00466 #endif  /* ~FAST_FACTOR1_SUCCESSOR_LIST */
00467 
00468   jlog("STAT: done\n");
00469 }
00470 
00471 #endif /* UNIGRAM_FACTORING */
00472 
00473 
00492 void
00493 adjust_sc_index(WCHMM_INFO *wchmm)
00494 {
00495   WORD_ID w;
00496   int i,j,k;
00497   HMM_Logical *ltmp;
00498   int ltmp_state_num;
00499   int ato;
00500   LOGPROB prob;
00501   int node, scid;
00502   A_CELL2 *ac;
00503   
00504   /* duplicate scid for HMMs with more than one arc from initial state */
00505   for(w=0;w<wchmm->winfo->num;w++) {
00506     for(k=0;k<wchmm->winfo->wlen[w];k++) {
00507       node = wchmm->offset[w][k];
00508       scid = wchmm->state[node].scid;
00509       if (scid == 0) continue;
00510       ltmp = wchmm->winfo->wseq[w][k];
00511       ltmp_state_num = hmm_logical_state_num(ltmp);
00512       if ((hmm_logical_trans(ltmp))->a[0][ltmp_state_num-1] != LOG_ZERO) {
00513         j = k + 1;
00514         if (j == wchmm->winfo->wlen[w]) {
00515           if (wchmm->state[wchmm->wordend[w]].scid == 0) {
00516             jlog("STAT: word %d: factoring node copied for skip phone\n", w);
00517             wchmm->state[wchmm->wordend[w]].scid = scid;
00518           }
00519         } else {
00520           if (wchmm->state[wchmm->offset[w][j]].scid == 0) {
00521             jlog("STAT: word %d: factoring node copied for skip phone\n", w);
00522             wchmm->state[wchmm->offset[w][j]].scid = scid;
00523           }
00524         }
00525       }
00526       for(ato=1;ato<ltmp_state_num;ato++) {
00527         prob = (hmm_logical_trans(ltmp))->a[0][ato];
00528         if (prob != LOG_ZERO) {
00529           wchmm->state[node+ato-1].scid = scid;
00530         }
00531       }
00532     }
00533   }
00534 
00535   /* move scid and fscore on the head state to the head grammar state */
00536   for(i=0;i<wchmm->startnum;i++) {
00537     node = wchmm->startnode[i];
00538     if (wchmm->state[node].out.state != NULL) {
00539       j_internal_error("adjust_sc_index: outprob exist in word-head node??\n");
00540     }
00541     if (wchmm->next_a[node] != LOG_ZERO) {
00542       if (wchmm->state[node+1].scid != 0) {
00543         if (wchmm->state[node].scid != 0 && wchmm->state[node].scid != wchmm->state[node+1].scid) {
00544           j_internal_error("adjust_sc_index: different successor list within word-head phone?\n");
00545         }
00546         wchmm->state[node].scid = wchmm->state[node+1].scid;
00547         wchmm->state[node+1].scid = 0;
00548       }
00549     }
00550     for(ac=wchmm->ac[node];ac;ac=ac->next) {
00551       for(k=0;k<ac->n;k++) {
00552         if (wchmm->state[ac->arc[k]].scid != 0) {
00553           if (wchmm->state[node].scid != 0 && wchmm->state[node].scid != wchmm->state[ac->arc[k]].scid) {
00554             j_internal_error("adjust_sc_index: different successor list within word-head phone?\n");
00555           }
00556           wchmm->state[node].scid = wchmm->state[ac->arc[k]].scid;
00557           wchmm->state[ac->arc[k]].scid = 0;
00558         }
00559       }
00560     }
00561   }
00562 }
00563 
00564 
00565 /* -------------------------------------------------------------------- */
00566 /* factoring computation */
00567 
00586 void
00587 max_successor_cache_init(WCHMM_INFO *wchmm)
00588 {
00589   int i;
00590   LM_PROB_CACHE *l;
00591   WORD_ID wnum;
00592 
00593   /* for word-internal */
00594   l = &(wchmm->lmcache);
00595 
00596   l->probcache = (LOGPROB *) mymalloc(sizeof(LOGPROB) * wchmm->scnum);
00597   l->lastwcache = (WORD_ID *) mymalloc(sizeof(WORD_ID) * wchmm->scnum);
00598   for (i=0;i<wchmm->scnum;i++) {
00599     l->lastwcache[i] = WORD_INVALID;
00600   }
00601   /* for cross-word */
00602   if (wchmm->ngram) {
00603     wnum = wchmm->ngram->max_word_num;
00604   } else {
00605     wnum = wchmm->winfo->num;
00606   }
00607 #ifdef HASH_CACHE_IW
00608   l->iw_cache_num = wnum * jconf.search.pass1.iw_cache_rate / 100;
00609   if (l->iw_cache_num < 10) l->iw_cache_num = 10;
00610 #else
00611   l->iw_cache_num = wnum;
00612 #endif /* HASH_CACHE_IW */
00613   l->iw_sc_cache = (LOGPROB **)mymalloc(sizeof(LOGPROB *) * l->iw_cache_num);
00614   for (i=0;i<l->iw_cache_num;i++) {
00615     l->iw_sc_cache[i] = NULL;
00616   }
00617 #ifdef HASH_CACHE_IW
00618   l->iw_lw_cache = (WORD_ID *)mymalloc(sizeof(WORD_ID) * l->iw_cache_num);
00619   for (i=0;i<l->iw_cache_num;i++) {
00620     l->iw_lw_cache[i] = WORD_INVALID;
00621   }
00622 #endif
00623 }
00624 
00637 static void
00638 max_successor_prob_iw_free(WCHMM_INFO *wchmm)
00639 {
00640   int i;
00641   LM_PROB_CACHE *l;
00642   l = &(wchmm->lmcache);
00643   for (i=0;i<l->iw_cache_num;i++) {
00644     if (l->iw_sc_cache[i] != NULL) free(l->iw_sc_cache[i]);
00645     l->iw_sc_cache[i] = NULL;
00646   }
00647 }
00648 
00665 void
00666 max_successor_cache_free(WCHMM_INFO *wchmm)
00667 {
00668   free(wchmm->lmcache.probcache);
00669   free(wchmm->lmcache.lastwcache);
00670   max_successor_prob_iw_free(wchmm);
00671   free(wchmm->lmcache.iw_sc_cache);
00672 #ifdef HASH_CACHE_IW
00673   free(wchmm->lmcache.iw_lw_cache);
00674 #endif
00675 }
00676 
00677 #ifdef UNIGRAM_FACTORING
00678 
00719 void
00720 make_iwcache_index(WCHMM_INFO *wchmm)
00721 {
00722   int i, node, num;
00723 
00724   wchmm->start2isolate = (int *)mymalloc(sizeof(int) * wchmm->startnum);
00725   num = 0;
00726   for(i=0;i<wchmm->startnum;i++) {
00727     node = wchmm->startnode[i];
00728     if (wchmm->state[node].scid >= 0) { /* not a factoring node (isolated node, has no 1-gram factoring value) */
00729       wchmm->start2isolate[i] = num;
00730       num++;
00731     } else {                    /* factoring node (shared) */
00732       wchmm->start2isolate[i] = -1;
00733     }
00734   }
00735   wchmm->isolatenum = num;
00736 }
00737 
00738 #ifndef FAST_FACTOR1_SUCCESSOR_LIST
00739 
00784 void
00785 calc_all_unigram_factoring_values(WCHMM_INFO *wchmm)
00786 {
00787   S_CELL *sc, *sctmp;
00788   LOGPROB tmpprob, maxprob;
00789   int i, n;
00790 
00791   /* count needed number of 1-gram factoring nodes */
00792   n = 0;
00793   for (i=1;i<wchmm->scnum;i++) {
00794     sc = wchmm->sclist[i];
00795     if (sc == NULL) {
00796       j_internal_error("call_all_unigram_factoring_values: sclist has no sc?\n");
00797     }
00798     if (sc->next != NULL) {
00799       /* more than two words, so compute maximum 1-gram probability */
00800       n++;
00801     }
00802   }
00803   wchmm->fsnum = n + 1;
00804   /* allocate area */
00805   wchmm->fscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->fsnum);
00806   /* assign values */
00807   n = 1;
00808   for (i=1;i<wchmm->scnum;i++) {
00809     sc = wchmm->sclist[i];
00810     if (sc->next != NULL) {
00811       maxprob = LOG_ZERO;
00812       for (sctmp = sc; sctmp; sctmp = sctmp->next) {
00813         if (wchmm->ngram) {
00814           tmpprob = uni_prob(wchmm->ngram, wchmm->winfo->wton[sctmp->word])
00815 #ifdef CLASS_NGRAM
00816             + wchmm->winfo->cprob[sctmp->word] 
00817 #endif
00818             ;
00819         } else {
00820           tmpprob = LOG_ZERO;
00821         }
00822         if (wchmm->lmvar == LM_NGRAM_USER) {
00823           tmpprob = (*(wchmm->uni_prob_user))(wchmm->winfo, sctmp->word, tmpprob);
00824         }
00825         if (maxprob < tmpprob) maxprob = tmpprob;
00826       }
00827       wchmm->fscore[n] = maxprob;
00828       free_successor(wchmm, i);
00829       wchmm->state[wchmm->sclist2node[i]].scid = - n;
00830       n++;
00831     }
00832   }
00833   /* garbage collection of factored sclist */
00834   compaction_successor(wchmm);
00835 }
00836 
00837 #endif
00838 
00839 #else  /* ~UNIGRAM_FACTORING */
00840 
00863 static LOGPROB
00864 calc_successor_prob(WCHMM_INFO *wchmm, WORD_ID lastword, int node)
00865 {
00866   LOGPROB tmpprob, maxprob;
00867   WORD_ID lw, w;
00868   int i;
00869   int scid;
00870 
00871   maxprob = LOG_ZERO;
00872   if (wchmm->ngram) {
00873     lw = wchmm->winfo->wton[lastword];
00874   }
00875 
00876   scid = wchmm->state[node].scid;
00877 
00878   for (i = 0; i < wchmm->sclen[scid]; i++) {
00879     w = wchmm->sclist[scid][i];
00880     if (wchmm->ngram) {
00881       tmpprob = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, lw , wchmm->winfo->wton[w])
00882 #ifdef CLASS_NGRAM
00883         + wchmm->winfo->cprob[w]
00884 #endif
00885         ;
00886     } else {
00887       tmpprob = LOG_ZERO;
00888     }
00889     if (wchmm->lmvar == LM_NGRAM_USER) {
00890       tmpprob = (*(wchmm->bi_prob_user))(wchmm->winfo, lastword, w, tmpprob);
00891     }
00892     if (maxprob < tmpprob) maxprob = tmpprob;
00893   }
00894 
00895   return(maxprob);
00896 }
00897 
00898 #endif  /* ~UNIGRAM_FACTORING */
00899 
00942 LOGPROB
00943 max_successor_prob(WCHMM_INFO *wchmm, WORD_ID lastword, int node)
00944 {
00945   LOGPROB maxprob;
00946   WORD_ID last_nword, w;
00947   int scid;
00948   LM_PROB_CACHE *l;
00949 
00950   l = &(wchmm->lmcache);
00951 
00952   if (lastword != WORD_INVALID) { /* return nothing if no previous word */
00953     if (wchmm->ngram) {
00954       last_nword = wchmm->winfo->wton[lastword];
00955     } else {
00956       last_nword = lastword;
00957     }
00958     scid = wchmm->state[node].scid;
00959 #ifdef UNIGRAM_FACTORING
00960     if (scid < 0) {
00961       /* return 1-gram factoring value already calced */
00962       return(wchmm->fscore[(- scid)]);
00963     } else {
00964       /* this node has only one successor */
00965       /* return precise 2-gram score */
00966       if (last_nword != l->lastwcache[scid]) {
00967         /* calc and cache */
00968         w = wchmm->scword[scid];
00969         if (wchmm->ngram) {
00970           maxprob = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, last_nword, wchmm->winfo->wton[w])
00971 #ifdef CLASS_NGRAM
00972             + wchmm->winfo->cprob[w]
00973 #endif
00974             ;
00975         } else {
00976           maxprob = LOG_ZERO;
00977         }
00978         if (wchmm->lmvar == LM_NGRAM_USER) {
00979           maxprob = (*(wchmm->bi_prob_user))(wchmm->winfo, lastword, w, maxprob);
00980         }
00981         l->lastwcache[scid] = last_nword;
00982         l->probcache[scid] = maxprob;
00983         return(maxprob);
00984       } else {
00985         /* return cached */
00986         return (l->probcache[scid]);
00987       }
00988     }
00989 #else  /* UNIGRAM_FACTORING */
00990     /* 2-gram */
00991     if (last_nword != l->lastwcache[scid]) {
00992       maxprob = calc_successor_prob(wchmm, lastword, node);
00993       /* store to cache */
00994       l->lastwcache[scid] = last_nword;
00995       l->probcache[scid] = maxprob;
00996       return(maxprob);
00997     } else {
00998       return (l->probcache[scid]);
00999     }
01000 #endif /* UNIGRAM_FACTORING */
01001   } else {
01002     return(0.0);
01003 #if 0
01004     maxprob = LOG_ZERO;
01005     for (sc=wchmm->state[node].sc;sc;sc=sc->next) {
01006       tmpprob = uni_prob(wchmm->ngram, sc->word);
01007       if (maxprob < tmpprob) maxprob = tmpprob;
01008     }
01009     return(maxprob);
01010 #endif
01011   }
01012 
01013 }
01014 
01049 LOGPROB *
01050 max_successor_prob_iw(WCHMM_INFO *wchmm, WORD_ID lastword)
01051 {
01052   int i, j, x, node;
01053   int last_nword;
01054   WORD_ID w;
01055   LM_PROB_CACHE *l;
01056   LOGPROB p;
01057 
01058   l = &(wchmm->lmcache);
01059 
01060   if (wchmm->ngram) {
01061     last_nword = wchmm->winfo->wton[lastword];
01062   } else {
01063     last_nword = lastword;
01064   }
01065 
01066 #ifdef HASH_CACHE_IW
01067   x = last_nword % l->iw_cache_num;
01068   if (l->iw_lw_cache[x] == last_nword) { /* cache hit */
01069     return(l->iw_sc_cache[x]);
01070   }
01071 #else  /* full cache */
01072   if (l->iw_sc_cache[last_nword] != NULL) { /* cache hit */
01073     return(l->iw_sc_cache[last_nword]);
01074   }
01075   x = last_nword;
01076   /* cache mis-hit, calc probs and cache them as new */
01077 #endif
01078   /* allocate cache memory */
01079   if (l->iw_sc_cache[x] == NULL) {
01080 #ifdef UNIGRAM_FACTORING
01081     l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->isolatenum);
01082 #else
01083     l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->startnum);
01084 #endif
01085     if (l->iw_sc_cache[x] == NULL) { /* malloc failed */
01086       /* clear existing cache, and retry */
01087       max_successor_prob_iw_free(wchmm);
01088       jlog("STAT: inter-word LM cache (%dMB) rehashed\n",
01089                (l->iw_cache_num * 
01090 #ifdef UNIGRAM_FACTORING
01091                 wchmm->isolatenum
01092 #else
01093                 wchmm->startnum
01094 #endif
01095                 ) / 1000 * sizeof(LOGPROB) / 1000);
01096 #ifdef UNIGRAM_FACTORING
01097       l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->isolatenum);
01098 #else
01099       l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->startnum);
01100 #endif
01101       if (l->iw_sc_cache[x] == NULL) { /* malloc failed again? */
01102         j_internal_error("max_successor_prob_iw: cannot malloc\n");
01103       }
01104     }
01105   }
01106 
01107   /* calc prob for all startid */
01108 #ifdef UNIGRAM_FACTORING
01109   for (j=0;j<wchmm->startnum;j++) {
01110     i = wchmm->start2isolate[j];
01111     if (i == -1) continue;
01112     node = wchmm->startnode[j];
01113     if (wchmm->state[node].scid <= 0) {
01114       /* should not happen!!! below is just for debugging */
01115       j_internal_error("max_successor_prob_iw: isolated (not shared) tree root node has unigram factoring value??\n");
01116     } else {
01117       w = wchmm->scword[wchmm->state[node].scid];
01118       if (wchmm->ngram) {
01119         p = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, last_nword, wchmm->winfo->wton[w])
01120 #ifdef CLASS_NGRAM
01121           + wchmm->winfo->cprob[w]
01122 #endif
01123           ;
01124       } else {
01125         p = LOG_ZERO;
01126       }
01127       if (wchmm->lmvar == LM_NGRAM_USER) {
01128         p = (*(wchmm->bi_prob_user))(wchmm->winfo, lastword, w, p);
01129       }
01130       l->iw_sc_cache[x][i] = p;
01131     }
01132   }
01133 #else  /* ~UNIGRAM_FACTORING */
01134   for (i=0;i<wchmm->startnum;i++) {
01135     node = wchmm->startnode[i];
01136     l->iw_sc_cache[x][i] = calc_successor_prob(wchmm, lastword, node);
01137   }
01138 #endif
01139 #ifdef HASH_CACHE_IW
01140   l->iw_lw_cache[x] = last_nword;
01141 #endif
01142 
01143   return(l->iw_sc_cache[x]);
01144 }
01145 
01195 boolean
01196 can_succeed(WCHMM_INFO *wchmm, WORD_ID lastword, int node)
01197 {
01198   int lc;
01199   int i;
01200   int s;
01201 
01202   /* return TRUE if at least one subtree word can connect */
01203 
01204   s = wchmm->state[node].scid;
01205 
01206   if (lastword == WORD_INVALID) { /* case at beginning-of-word */
01207     for (i = 0; i < wchmm->sclen[s]; i++) {
01208       if (dfa_cp_begin(wchmm->dfa, wchmm->sclist[s][i]) == TRUE) return(TRUE);
01209     }
01210     return(FALSE);
01211   } else {
01212     lc = wchmm->winfo->wton[lastword];
01213     for (i = 0; i < wchmm->sclen[s]; i++) {
01214       if (dfa_cp(wchmm->dfa, lc, wchmm->sclist[s][i]) == TRUE) return(TRUE);
01215     }
01216     return(FALSE);
01217   }
01218 }
01219 
01220 /* end of file */