Julius: libjulius/src/wchmm.c ソースファイル

Julius 4.1.5
00001 
00037 /*
00038  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00039  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00040  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00041  * All rights reserved
00042  */
00043 
00044 /* wchmm = word conjunction HMM = lexicon tree */
00045 
00046 #include <julius/julius.h>
00047 
00048 
00049 #define WCHMM_SIZE_CHECK                ///< If defined, do wchmm size estimation (for debug only)
00050 
00051 /**************************************************************/
00052 /*********** Initialization of tree lexicon *******************/
00053 /**************************************************************/
00054 
00069 WCHMM_INFO *
00070 wchmm_new()
00071 {
00072   WCHMM_INFO *w;
00073   w = (WCHMM_INFO *)mymalloc(sizeof(WCHMM_INFO));
00074   w->lmtype = LM_UNDEF;
00075   w->lmvar  = LM_UNDEF;
00076   w->ngram = NULL;
00077   w->dfa = NULL;
00078   w->winfo = NULL;
00079   w->malloc_root = NULL;
00080 #ifdef PASS1_IWCD
00081   w->lcdset_category_root = NULL;
00082   w->lcdset_mroot = NULL;
00083 #endif /* PASS1_IWCD */
00084   w->wrk.out_from_len = 0;
00085   /* reset user function entry point */
00086   w->uni_prob_user = NULL;
00087   w->bi_prob_user = NULL;
00088   return w;
00089 }
00090 
00103 static void
00104 wchmm_init(WCHMM_INFO *wchmm)
00105 {
00106   /* the resulting tree size is typically half of total state num */
00107   wchmm->maxwcn = wchmm->winfo->totalstatenum / 2;
00108   wchmm->state = (WCHMM_STATE *)mymalloc(sizeof(WCHMM_STATE)*wchmm->maxwcn);
00109   wchmm->self_a = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->maxwcn);
00110   wchmm->next_a = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->maxwcn);
00111   wchmm->ac = (A_CELL2 **)mymalloc(sizeof(A_CELL2 *)*wchmm->maxwcn);
00112   wchmm->stend = (WORD_ID *)mymalloc(sizeof(WORD_ID)*wchmm->maxwcn);
00113   wchmm->offset = (int **)mymalloc(sizeof(int *)*wchmm->winfo->num);
00114   wchmm->wordend = (int *)mymalloc(sizeof(int)*wchmm->winfo->num);
00115   wchmm->maxstartnum = STARTNODE_STEP;
00116   wchmm->startnode = (int *)mymalloc(sizeof(int)*STARTNODE_STEP);
00117   wchmm->startnum = 0;
00118   if (wchmm->category_tree) {
00119     wchmm->start2wid = (WORD_ID *)mymalloc(sizeof(WORD_ID)*STARTNODE_STEP);
00120   }
00121   if (wchmm->hmminfo->multipath) {
00122     wchmm->wordbegin = (int *)mymalloc(sizeof(int)*wchmm->winfo->num);
00123     wchmm->wrk.out_from = (int *)mymalloc(sizeof(int) * wchmm->winfo->maxwn);
00124     wchmm->wrk.out_from_next = (int *)mymalloc(sizeof(int) * wchmm->winfo->maxwn);
00125     wchmm->wrk.out_a = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->winfo->maxwn);
00126     wchmm->wrk.out_a_next = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->winfo->maxwn);
00127     wchmm->wrk.out_from_len = wchmm->winfo->maxwn;
00128   } else {
00129     wchmm->wordend_a = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->winfo->num);
00130   }
00131 #ifdef PASS1_IWCD
00132   wchmm->outstyle = (unsigned char *)mymalloc(sizeof(unsigned char)*wchmm->maxwcn);
00133 #endif
00134 #ifdef UNIGRAM_FACTORING
00135   wchmm->start2isolate = NULL;
00136   wchmm->isolatenum = 0;
00137 #endif
00138   if (!wchmm->category_tree) {
00139     wchmm->sclist = NULL;
00140     wchmm->sclist2node = NULL;
00141 #ifdef UNIGRAM_FACTORING
00142     wchmm->fscore = NULL;
00143 #endif
00144   }
00145 
00146   wchmm->n = 0;
00147 }
00148 
00161 static void
00162 wchmm_extend(WCHMM_INFO *wchmm)
00163 {
00164   /* practical value! */
00165   wchmm->maxwcn += wchmm->winfo->totalstatenum / 6;
00166   wchmm->state = (WCHMM_STATE *)myrealloc(wchmm->state, sizeof(WCHMM_STATE)*wchmm->maxwcn);
00167   wchmm->self_a = (LOGPROB *)myrealloc(wchmm->self_a, sizeof(LOGPROB)*wchmm->maxwcn);
00168   wchmm->next_a = (LOGPROB *)myrealloc(wchmm->next_a, sizeof(LOGPROB)*wchmm->maxwcn);
00169   wchmm->ac = (A_CELL2 **)myrealloc(wchmm->ac, sizeof(A_CELL2 *)*wchmm->maxwcn);
00170   wchmm->stend = (WORD_ID *)myrealloc(wchmm->stend, sizeof(WORD_ID)*wchmm->maxwcn);
00171 #ifdef PASS1_IWCD
00172   wchmm->outstyle = (unsigned char *)myrealloc(wchmm->outstyle, sizeof(unsigned char)*wchmm->maxwcn);
00173 #endif
00174 }
00175 
00188 static void
00189 wchmm_extend_startnode(WCHMM_INFO *wchmm)
00190 {
00191   wchmm->maxstartnum += STARTNODE_STEP;
00192   wchmm->startnode = (int *)myrealloc(wchmm->startnode, sizeof(int) * wchmm->maxstartnum);
00193   if (wchmm->category_tree) {
00194     wchmm->start2wid = (WORD_ID *)myrealloc(wchmm->start2wid, sizeof(WORD_ID) * wchmm->maxstartnum);
00195   }
00196 }
00197 
00212 void
00213 wchmm_free(WCHMM_INFO *w)
00214 {
00215   S_CELL *sc, *sctmp;
00216   int i;
00217   /* wchmm->state[i].ac malloced by mybmalloc2() */
00218   /* wchmm->offset[][] malloced by mybmalloc2() */
00219 #ifdef PASS1_IWCD
00220   /* LRC_INFO, RC_INFO in wchmm->state[i].outsty malloced by mybmalloc2() */
00221 #endif
00222   /* they all will be freed by a single mybfree2() call */
00223   mybfree2(&(w->malloc_root));
00224   if (!w->category_tree) {
00225     if (w->sclist != NULL) {
00226       for(i=1;i<w->scnum;i++) {
00227         sc = w->sclist[i];
00228         while(sc) {
00229           sctmp = sc->next;
00230           free(sc);
00231           sc = sctmp;
00232         }
00233       }
00234       free(w->sclist);
00235     }
00236     if (w->sclist2node != NULL) free(w->sclist2node);
00237 #ifdef UNIGRAM_FACTORING
00238     if (w->fscore != NULL) free(w->fscore);
00239 #endif
00240   }
00241 #ifdef UNIGRAM_FACTORING
00242   if (w->start2isolate != NULL) free(w->start2isolate);
00243 #endif
00244 #ifdef PASS1_IWCD
00245   free(w->outstyle);
00246 #endif
00247   if (w->hmminfo->multipath) {
00248     free(w->wordbegin);
00249   } else {
00250     free(w->wordend_a);
00251   }
00252   if (w->category_tree) free(w->start2wid);
00253   free(w->startnode);
00254   free(w->wordend);
00255   free(w->offset);
00256   free(w->stend);
00257   free(w->ac);
00258   free(w->next_a);
00259   free(w->self_a);
00260   free(w->state);
00261 #ifdef PASS1_IWCD
00262   if (w->category_tree) lcdset_remove_with_category_all(w);
00263 #endif /* PASS1_IWCD */
00264   if (w->wrk.out_from_len != 0) {
00265     free(w->wrk.out_from);
00266     free(w->wrk.out_from_next);
00267     free(w->wrk.out_a);
00268     free(w->wrk.out_a_next);
00269     w->wrk.out_from_len = 0;
00270   }
00271   free(w);
00272 }
00273 
00274 
00275 /**************************************************************/
00276 /*********** Word sort functions for tree construction ********/
00277 /**************************************************************/
00278 
00297 static int
00298 compare_wseq(WORD_ID *widx1, WORD_ID *widx2, WORD_INFO *winfo)
00299 {
00300   int len1, len2, n;
00301   int p=0;
00302   
00303   len1 = winfo->wlen[*widx1];
00304   len2 = winfo->wlen[*widx2];
00305 
00306   n=0;
00307   /*  while (n < len1 && n < len2 && (p = (int)winfo->wseq[*widx1][n] - (int)winfo->wseq[*widx2][n]) == 0 ) n++;*/
00308   while (n < len1 && n < len2 && (p = strcmp((winfo->wseq[*widx1][n])->name, (winfo->wseq[*widx2][n])->name)) == 0 ) n++;
00309   if (n < len1) {
00310     if (n < len2) {
00311       /* differ */
00312       return(p);
00313     } else {
00314       /* 2 is part of 1 */
00315       return(1);
00316     }
00317   } else {
00318     if (n < len2) {
00319       /* 1 is part of 2 */
00320       return(-1);
00321     } else {
00322       /* same */
00323       return(0);
00324     }
00325   }
00326 }
00327 
00346 static void
00347 wchmm_sort_idx_by_wseq(WORD_INFO *winfo, WORD_ID *windex, WORD_ID bgn, WORD_ID len)
00348 {
00349   qsort_reentrant(&(windex[bgn]), len, sizeof(WORD_ID), (int (*)(const void *, const void *, void *))compare_wseq, winfo);
00350 }
00351 
00370 static int
00371 compare_category(WORD_ID *widx1, WORD_ID *widx2, WORD_INFO *winfo)
00372 {
00373   int c1,c2;
00374   c1 = winfo->wton[*widx1];
00375   c2 = winfo->wton[*widx2];
00376   return(c1 - c2);
00377 }
00378 
00395 static void
00396 wchmm_sort_idx_by_category(WORD_INFO *winfo, WORD_ID *windex, WORD_ID len)
00397 {
00398   qsort_reentrant(windex, len, sizeof(WORD_ID), (int (*)(const void *, const void *, void *))compare_category, winfo);
00399 }
00400   
00401 
00402 /**********************************************************************/
00403 /************** Subroutines to link part of words  ********************/
00404 /**********************************************************************/
00405 
00427 static int
00428 wchmm_check_match(WORD_INFO *winfo, int i, int j)
00429 {
00430   int k,tmplen;
00431 
00432   for (tmplen=0,k=0;k<winfo->wlen[i];k++) {
00433     if (k > winfo->wlen[j]-1)
00434       break;
00435     if (! (strmatch(winfo->wseq[i][k]->name, winfo->wseq[j][k]->name)))
00436       break;
00437     tmplen++;
00438   }
00439   return(tmplen);
00440 }
00441 
00454 static void
00455 acc_init(WCHMM_INFO *wchmm, int node)
00456 {
00457   wchmm->self_a[node] = LOG_ZERO;
00458   wchmm->next_a[node] = LOG_ZERO;
00459   wchmm->ac[node] = NULL;
00460 }
00461 
00478 static void
00479 add_ac(WCHMM_INFO *wchmm, int node, LOGPROB a, int arc)
00480 {
00481   A_CELL2 *ac2;
00482 
00483   for(ac2=wchmm->ac[node];ac2;ac2=ac2->next) {
00484     if (ac2->n < A_CELL2_ALLOC_STEP) break;
00485   }
00486   if (ac2 == NULL) {
00487     ac2 = (A_CELL2 *)mybmalloc2(sizeof(A_CELL2), &(wchmm->malloc_root));
00488     ac2->n = 0;
00489     ac2->next = wchmm->ac[node];
00490     wchmm->ac[node] = ac2;
00491   }
00492   ac2->arc[ac2->n] = arc;
00493   ac2->a[ac2->n]   = a;
00494   ac2->n++;
00495 }
00496 
00515 static void
00516 add_wacc(WCHMM_INFO *wchmm, int node, LOGPROB a, int arc)
00517 {
00518   if (arc == node) {
00519     wchmm->self_a[node] = a;
00520   } else if (arc == node + 1) {
00521     wchmm->next_a[node] = a;
00522   } else {
00523     add_ac(wchmm, node, a, arc);
00524   }
00525 }
00526 
00553 static void
00554 get_outtrans_list(WCHMM_INFO *wchmm, WORD_ID w, int pos, int *node, LOGPROB *a, int *num, int maxnum, boolean insert_sp)
00555 {
00556   HMM_Logical *ltmp;
00557   int states;
00558   int k;
00559   LOGPROB prob;
00560   int oldnum;
00561 
00562   if (pos < 0) {
00563     
00564     /* set the word-beginning node, and return */
00565     node[*num] = wchmm->wordbegin[w];
00566     a[*num] = 0.0;
00567     (*num)++;
00568     
00569   } else {
00570 
00571     ltmp = wchmm->winfo->wseq[w][pos];
00572     states = hmm_logical_state_num(ltmp);
00573 
00574     /* check initial->final state */
00575     if ((hmm_logical_trans(ltmp))->a[0][states-1] != LOG_ZERO) {
00576       /* recursive call for previous phone */
00577       oldnum = *num;
00578       get_outtrans_list(wchmm, w, pos-1, node, a, num, maxnum, FALSE); /* previous phone should not be an sp-inserted phone */
00579       /* add probability of the skip transition to all the previous ones */
00580       for(k=oldnum;k<*num;k++) {
00581         a[k] += (hmm_logical_trans(ltmp))->a[0][states-1];
00582       }
00583     }
00584     /* add to list the arcs from output state to final state */
00585     for (k = 1; k < states - 1; k++) {
00586       prob = (hmm_logical_trans(ltmp))->a[k][states-1];
00587       if (prob != LOG_ZERO) {
00588         if (*num >= maxnum) {
00589           j_internal_error("get_outtrans_list: maximum outtrans list num exceeded %d\n", maxnum);
00590         }
00591         node[*num] = wchmm->offset[w][pos] + k - 1;
00592         a[*num] = prob;
00593         (*num)++;
00594       }
00595     }
00596     /* for -iwsp, add outgoing arc from the tail sp model
00597        only if need_sp == TRUE.
00598        need_sp should be TRUE only when the connecting [pos] phone is also an end phone of the to-be-added word (i.e. homophone word)
00599      */
00600     /*  */
00601     if (insert_sp) {
00602       /* consider sp */
00603       for (k = 1; k < hmm_logical_state_num(wchmm->hmminfo->sp) - 1; k++) {
00604         prob = hmm_logical_trans(wchmm->hmminfo->sp)->a[k][hmm_logical_state_num(wchmm->hmminfo->sp)-1];
00605         if (prob != LOG_ZERO) {
00606           if (*num >= maxnum) {
00607             j_internal_error("get_outtrans_list: maximum outtrans list num exceeded %d\n", maxnum);
00608           }
00609           node[*num] = wchmm->offset[w][pos] + (states - 2) + k - 1;
00610           a[*num] = prob;
00611           (*num)++;
00612         }
00613       }
00614     }
00615   }
00616   /*printf("   %d(%s)-%d:\"%s\", num=%d\n", w, wchmm->winfo->woutput[w], pos,
00617     (pos < 0) ? "BGN" : wchmm->winfo->wseq[w][pos]->name, *num);*/
00618   return;
00619 }  
00620 
00639 static void
00640 wchmm_link_hmm(WCHMM_INFO *wchmm, int from_node, int to_node, HTK_HMM_Trans *tinfo)
00641 {     
00642   A_CELL2 *actmp;
00643   LOGPROB a;
00644   int i, j;
00645   boolean tflag;
00646 
00647   /* get transition probability to outer state in tinfo */
00648   for(i = tinfo->statenum - 2; i >= 0; i--) {
00649     if ((a = tinfo->a[i][tinfo->statenum-1]) != LOG_ZERO) { /* found */
00650       /* check if the arc already exist */
00651       tflag = FALSE;
00652       if (to_node == from_node && wchmm->self_a[from_node] == a) {
00653         tflag = TRUE;
00654       } else if (to_node == from_node + 1 && wchmm->next_a[from_node] == a) {
00655         tflag = TRUE;
00656       } else {
00657         for (actmp = wchmm->ac[from_node]; actmp; actmp = actmp->next) {
00658           for(j=0;j<actmp->n;j++) {
00659             if (actmp->arc[j] == to_node && actmp->a[j] == a) {
00660               tflag = TRUE;
00661               break;
00662             }
00663           }
00664           if (tflag == TRUE) break;
00665         }
00666       }
00667       if (tflag) break;
00668       /* add the arc to wchmm */
00669       add_wacc(wchmm, from_node, a, to_node);
00670       return;                   /* exit function here */
00671     }
00672   }      
00673   j_internal_error("wchmm_link_hmm: No arc to endstate?\n");
00674 }
00675 
00696 static void
00697 wchmm_link_subword(WCHMM_INFO *wchmm, int from_word, int from_seq, int to_word, int to_seq)
00698 {     
00699   HMM_Logical *last;
00700   int lastp;
00701 
00702   last = wchmm->winfo->wseq[from_word][from_seq];
00703   lastp = wchmm->offset[from_word][from_seq] + hmm_logical_state_num(last)-2 -1;
00704   wchmm_link_hmm(wchmm, lastp, wchmm->offset[to_word][to_seq],
00705                  hmm_logical_trans(last));
00706 }
00707 
00708 /**************************************************************/
00709 /******** homophone processing: duplicating leaf nodes ********/
00710 /**************************************************************/
00711 
00751 static void
00752 wchmm_duplicate_state(WCHMM_INFO *wchmm, int node, int word) /* source node, new word */
00753 {
00754   int j, n;
00755   int n_src, n_prev;
00756   A_CELL2       *ac;
00757   HMM_Logical *lastphone;
00758 
00759   /* 1 state will newly created: expand tree if needed */
00760   if (wchmm->n + 1 >= wchmm->maxwcn) {
00761     wchmm_extend(wchmm);
00762   }
00763   /* n: the target new node to which 'node' is copied */
00764   n = wchmm->n;
00765 
00766   n_src = node;
00767 
00768   /* copy output probability info */
00769 #ifdef PASS1_IWCD
00770   {
00771     RC_INFO *rcnew;
00772     LRC_INFO *lrcnew;
00773     wchmm->outstyle[n] = wchmm->outstyle[n_src];
00774     if (wchmm->outstyle[n] == AS_RSET) {
00775       /* duplicate RC_INFO because it has its own cache */
00776       rcnew = (RC_INFO *)mybmalloc2(sizeof(RC_INFO), &(wchmm->malloc_root));
00777       memcpy(rcnew, wchmm->state[n_src].out.rset, sizeof(RC_INFO));
00778       wchmm->state[n].out.rset = rcnew;
00779     } else if (wchmm->outstyle[n] == AS_LRSET) {
00780       /* duplicate LRC_INFO because it has its own cache */
00781       lrcnew = (LRC_INFO *)mybmalloc2(sizeof(LRC_INFO), &(wchmm->malloc_root));
00782       memcpy(lrcnew, wchmm->state[n_src].out.lrset, sizeof(LRC_INFO));
00783       wchmm->state[n].out.lrset = lrcnew;
00784     } else {
00785       /* share same info, simply copy the pointer */
00786       memcpy(&(wchmm->state[n].out), &(wchmm->state[n_src].out), sizeof(ACOUSTIC_SPEC));
00787     }
00788   }
00789 #else  /* ~PASS1_IWCD */
00790   memcpy(&(wchmm->state[n].out), &(wchmm->state[n_src].out), sizeof(HTK_HMM_State *));
00791 #endif
00792 
00793   lastphone = wchmm->winfo->wseq[word][wchmm->winfo->wlen[word]-1];
00794   acc_init(wchmm, n);
00795 
00796   /* add self transition arc */
00797   wchmm->self_a[n] = wchmm->self_a[n_src];
00798   
00799   /* copy transition arcs whose destination is the source node to new node */
00800   if (hmm_logical_state_num(lastphone) == 3) { /* = 1 state */
00801     /* phone with only 1 state should be treated carefully */
00802     if (wchmm->winfo->wlen[word] == 1) { /* word consists of only this phone */
00803       /* no arcs need to be copied: this is also a start node of a word */
00804       wchmm->offset[word][0] = n;
00805       /* index the new word-beginning node as startnode (old ststart) */
00806       if (wchmm->lmtype != LM_PROB || word != wchmm->winfo->head_silwid) {
00807         wchmm->startnode[wchmm->startnum] = n;
00808         if (wchmm->category_tree) wchmm->start2wid[wchmm->startnum] = word;
00809         /* expand data area if necessary */
00810         if (++wchmm->startnum >= wchmm->maxstartnum) wchmm_extend_startnode(wchmm);
00811       }
00812     } else {
00813       /* copy arcs from the last state of the previous phone */
00814       n_prev = wchmm->offset[word][wchmm->winfo->wlen[word]-2]
00815         + hmm_logical_state_num(wchmm->winfo->wseq[word][wchmm->winfo->wlen[word]-2]) - 3;
00816       if(n_src == n_prev + 1) {
00817         add_wacc(wchmm, n_prev, wchmm->next_a[n_prev], n);
00818       } else {
00819         for(ac=wchmm->ac[n_prev];ac;ac=ac->next) {
00820           for(j=0;j<ac->n;j++) {
00821             if (ac->arc[j] == n_src) {
00822               add_wacc(wchmm, n_prev, ac->a[j], n);
00823             }
00824           }
00825         }
00826       }
00827       /* also update the last offset (== wordend in this case) */
00828       wchmm->offset[word][wchmm->winfo->wlen[word]-1] = n;
00829     }
00830   } else {                      /* phone with more than 2 states */
00831     /* copy arcs from/to the source node to new node */
00832     for (n_prev = wchmm->offset[word][wchmm->winfo->wlen[word]-1]; n_prev < n_src; n_prev++) {
00833       if (n_src == n_prev + 1) {
00834         add_wacc(wchmm, n_prev, wchmm->next_a[n_prev], n);
00835       } else {
00836         for(ac=wchmm->ac[n_prev];ac;ac=ac->next) {
00837           for(j=0;j<ac->n;j++) {
00838             if (ac->arc[j] == n_src) {
00839               add_wacc(wchmm, n_prev, ac->a[j], n);
00840             }
00841           }
00842         }
00843       }
00844       if (n_prev == n_src + 1) {
00845         add_wacc(wchmm, n, wchmm->next_a[n_src], n_prev);
00846       } else {
00847         for(ac=wchmm->ac[n_src];ac;ac=ac->next) {
00848           for(j=0;j<ac->n;j++) {
00849             if (ac->arc[j] == n_prev) {
00850               add_wacc(wchmm, n, ac->a[j], n_prev);
00851             }
00852           }
00853         }
00854       }
00855     }
00856   }
00857 
00858   /* map word <-> node */
00859   wchmm->stend[n]   = word;     /* 'n' is an end node of word 'word' */
00860   wchmm->wordend[word] = n;     /* the word end node of 'word' is 'n' */
00861 
00862   /* new state has been created: increment the size */
00863   wchmm->n++;
00864   
00865 }
00866 
00881 static int
00882 wchmm_duplicate_leafnode(WCHMM_INFO *wchmm)
00883 {
00884   int w, nlast, n, narc, narc_model;
00885   boolean *dupw;                /* node marker */
00886   A_CELL2 *actmp;
00887   int dupcount;
00888 
00889   dupcount = 0;
00890 
00891   nlast = wchmm->n;
00892   dupw = (boolean *)mymalloc(sizeof(boolean) * nlast);
00893   for(n=0;n<nlast;n++) dupw[n] = FALSE; /* initialize all marker */
00894 
00895   for (w=0;w<wchmm->winfo->num;w++) {
00896     n = wchmm->wordend[w];
00897     if (dupw[n]) {              /* if already marked (2nd time or later */
00898       wchmm_duplicate_state(wchmm, n, w); dupcount++; /* duplicate */
00899     } else {                    /* if not marked yet (1st time) */
00900       /* try to find an arc outside the word */
00901       {
00902         /* count number of model-internal arc from the last state */
00903         HMM_Logical *lastphone;
00904         HTK_HMM_Trans *tinfo;
00905         int laststate, i;
00906         lastphone = wchmm->winfo->wseq[w][wchmm->winfo->wlen[w]-1];
00907         laststate = hmm_logical_state_num(lastphone) - 2;
00908         tinfo = hmm_logical_trans(lastphone);
00909         narc_model=0;
00910         for(i=1;i<hmm_logical_state_num(lastphone)-1;i++) {
00911           if (tinfo->a[laststate][i] != LOG_ZERO) narc_model++;
00912         }
00913         /* count number of actual arc from the last state in the tree */
00914         narc = 0;
00915         if (wchmm->self_a[n] != LOG_ZERO) narc++;
00916         if (wchmm->next_a[n] != LOG_ZERO) narc++;
00917         for(actmp=wchmm->ac[n];actmp;actmp=actmp->next) narc += actmp->n;
00918       }
00919       /* if both number does not match, it means it is not a single word tail */
00920       if (narc_model != narc) {
00921         /* word 'w' is embedded as part of other words at this node 'n' */
00922         /* duplicate this node now */
00923         wchmm_duplicate_state(wchmm, n, w); dupcount++;
00924         /* as new node has been assigned as word end node of word 'w',
00925            reset this source node as it is not the word end node */
00926         wchmm->stend[n] = WORD_INVALID;
00927       } else {
00928         /* no arc to other node found, it means it is a single word tail */
00929         /* as this is first time, only make sure that this node is word end of [w] */
00930         wchmm->stend[n] = w;
00931       }
00932       /* mark node 'n' */
00933       dupw[n] = TRUE;
00934     }
00935   }
00936   free(dupw);
00937 
00938   return(dupcount);
00939 }
00940 
00941 /**************************************************************/
00942 /*************** add a word to wchmm lexicon tree *************/
00943 /**************************************************************/
00944 
00969 static boolean
00970 wchmm_add_word(WCHMM_INFO *wchmm, int word, int matchlen, int matchword, boolean enable_iwsp)
00971 {
00972   boolean ok_p;
00973   int   j,k,n;
00974   int   add_head, add_tail, add_to;
00975   int   word_len, matchword_len;
00976   HMM_Logical *ltmp;
00977   int ato;
00978   LOGPROB prob;
00979   int ntmp;
00980   int ltmp_state_num;
00981 #ifdef PASS1_IWCD
00982   CD_Set *lcd = NULL;
00983 #endif
00984   int *out_from;
00985   int *out_from_next;
00986   LOGPROB *out_a;
00987   LOGPROB *out_a_next;
00988 
00989   
00990   /* for multipath handling */
00991   int out_num_prev, out_num_next;
00992   int kkk;
00993 
00994   ok_p = TRUE;
00995   if (wchmm->hmminfo->multipath) {
00996     out_from = wchmm->wrk.out_from;
00997     out_from_next = wchmm->wrk.out_from_next;
00998     out_a = wchmm->wrk.out_a;
00999     out_a_next = wchmm->wrk.out_a_next;
01000   }
01001   
01002 /* 
01003  *   if (matchlen > 0) {
01004  *     printf("--\n");
01005  *     put_voca(stdout, wchmm->winfo, word);
01006  *     put_voca(stdout, wchmm->winfo, matchword);
01007  *     printf("matchlen=%d\n", matchlen);
01008  *   }
01009  */
01010   
01011   /* variable abbreviations */
01012   n = wchmm->n;
01013   word_len      = wchmm->winfo->wlen[word];
01014   matchword_len = wchmm->winfo->wlen[matchword];
01015 
01016   /* malloc phone offset area */
01017   wchmm->offset[word] = (int *)mybmalloc2(sizeof(int)*word_len, &(wchmm->malloc_root));
01018 
01019   /* allocate unshared (new) part */
01020   add_head = matchlen;
01021   add_tail = word_len - 1;
01022   add_to   = matchlen - 1;
01023 
01024   if (wchmm->hmminfo->multipath) {
01025     /* make word-beginning node if needed */
01026     if (matchlen == 0) {
01027       /* create word-beginning node */
01028       wchmm->wordbegin[word] = n;
01029       wchmm->stend[n] = WORD_INVALID;
01030       acc_init(wchmm, n);
01031       wchmm->state[n].out.state = NULL;
01032       /* index the new word-beginning node as startnode (old ststart) */
01033       wchmm->startnode[wchmm->startnum] = n;
01034       if (wchmm->category_tree) wchmm->start2wid[wchmm->startnum] = word;
01035       /* expand data area if necessary */
01036       if (++wchmm->startnum >= wchmm->maxstartnum) wchmm_extend_startnode(wchmm);
01037       if (++n >= wchmm->maxwcn) wchmm_extend(wchmm);
01038     } else {
01039       wchmm->wordbegin[word] = wchmm->wordbegin[matchword];
01040     }
01041 
01042     /* now n is at beginning of output state */
01043 
01044     /* store the initial outgoing arcs to out_from[] and out_a[] */
01045     out_num_prev = 0;
01046     if (matchlen == 0) {
01047       /* set the word-beginning node */
01048       out_from[0] = wchmm->wordbegin[word];
01049       out_a[0] = 0.0;
01050       out_num_prev = 1;
01051     } else {
01052       /*printf("%d(%s)\n", word, wchmm->winfo->woutput[word]);*/
01053       /* on -iwsp, trailing sp is needed only when no phone will be created */
01054       get_outtrans_list(wchmm, matchword, add_to, out_from, out_a, &out_num_prev, wchmm->winfo->maxwn, (enable_iwsp && add_tail - add_head + 1 <= 0) ? TRUE : FALSE);
01055       /*printf("NUM=%d\n", out_num_prev);*/
01056     }
01057   } else { /*  end of multipath block */
01058     if (matchlen == 0) {
01059       if (wchmm->lmtype != LM_PROB || word != wchmm->winfo->head_silwid) {
01060         /* index the new word-beginning node as startnode (old ststart) */
01061         wchmm->startnode[wchmm->startnum] = n;
01062         if (wchmm->category_tree) wchmm->start2wid[wchmm->startnum] = word;
01063         /* expand data area if necessary */
01064         if (++wchmm->startnum >= wchmm->maxstartnum) wchmm_extend_startnode(wchmm);
01065       }
01066     }
01067   }
01068   
01069   if (add_tail - add_head + 1 > 0) { /* there are new phones to be created */
01070       ntmp = n;
01071       for (j=add_head; j <= add_tail; j++) { /* for each new phones */
01072         ltmp = wchmm->winfo->wseq[word][j];
01073         ltmp_state_num = hmm_logical_state_num(ltmp);
01074 #ifdef PASS1_IWCD
01075         if (wchmm->ccd_flag) {
01076           /* in the triphone lexicon tree, the last phone of a word has
01077              left-context cdset */
01078           if (wchmm->winfo->wlen[word] > 1 && j == wchmm->winfo->wlen[word] - 1) {
01079             if (wchmm->category_tree) {
01080 #ifdef USE_OLD_IWCD
01081               lcd = lcdset_lookup_by_hmmname(wchmm->hmminfo, ltmp->name);
01082 #else
01083               lcd = lcdset_lookup_with_category(wchmm, ltmp, wchmm->winfo->wton[word]);
01084               if (lcd == NULL) {
01085                 /* no category-aware cdset found.  This is case when no word
01086                    can follow this word grammatically.
01087                    so fallback to normal state */
01088                 jlog("WARNING: wchmm: no lcdset found for [%s::%04d], fallback to [%s]\n", ltmp->name, wchmm->winfo->wton[word], ltmp->name);
01089                 lcd = lcdset_lookup_by_hmmname(wchmm->hmminfo, ltmp->name);
01090               }
01091 #endif
01092             } else {
01093               lcd = lcdset_lookup_by_hmmname(wchmm->hmminfo, ltmp->name);
01094             }
01095             if (lcd == NULL) {
01096               jlog("ERROR: wchmm: at word #%d: no lcdset found for [%s]\n", word, ltmp->name);
01097               ok_p = FALSE;
01098             }
01099           }
01100         }
01101 #endif /* PASS1_IWCD */
01102         for (k = 1; k < ltmp_state_num - 1; k++) { /* for each state in the phone */
01103           /* set state output prob info */
01104 #ifdef PASS1_IWCD
01105           if (wchmm->ccd_flag) {
01106             /* output info of triphones needs special handling */
01107             if (wchmm->winfo->wlen[word] == 1) { /* word with only 1 phone */
01108               wchmm->outstyle[ntmp] = AS_LRSET;
01109               wchmm->state[ntmp].out.lrset = (LRC_INFO *)mybmalloc2(sizeof(LRC_INFO), &(wchmm->malloc_root));
01110               (wchmm->state[ntmp].out.lrset)->hmm       = ltmp;
01111               (wchmm->state[ntmp].out.lrset)->state_loc = k;
01112               if (wchmm->category_tree) {
01113                 (wchmm->state[ntmp].out.lrset)->category  = wchmm->winfo->wton[word];
01114               }
01115             } else if (j == 0) {        /* head phone of a word */
01116               wchmm->outstyle[ntmp] = AS_RSET;
01117               wchmm->state[ntmp].out.rset = (RC_INFO *)mybmalloc2(sizeof(RC_INFO), &(wchmm->malloc_root));
01118               (wchmm->state[ntmp].out.rset)->hmm       = ltmp;
01119               (wchmm->state[ntmp].out.rset)->state_loc = k;
01120             } else if (j == wchmm->winfo->wlen[word] - 1) { /* last phone of a word */
01121               wchmm->outstyle[ntmp] = AS_LSET;
01122               wchmm->state[ntmp].out.lset = &(lcd->stateset[k]);
01123             } else {
01124               wchmm->outstyle[ntmp] = AS_STATE;
01125               if (ltmp->is_pseudo) {
01126                 jlog("WARNING: wchmm: word-internal phone should not be pseudo\n");
01127                 put_voca(stdout, wchmm->winfo, word);
01128                 ok_p = FALSE;
01129               }
01130               wchmm->state[ntmp].out.state = ltmp->body.defined->s[k];
01131             }
01132           } else {
01133             /* monophone */
01134             if (ltmp->is_pseudo) {
01135               j_internal_error("wchmm_add_word: CDSET phoneme exist in monophone?\n");
01136               put_voca(stdout, wchmm->winfo, word);
01137               ok_p = FALSE;
01138             }
01139             wchmm->outstyle[ntmp] = AS_STATE;
01140             wchmm->state[ntmp].out.state = ltmp->body.defined->s[k];
01141           }
01142 #else  /* ~PASS1_IWCD */
01143           if (ltmp->is_pseudo) {
01144             j_internal_error("wchmm_add_word: CDSET phoneme exist in monophone?\n");
01145             put_voca(stdout, wchmm->winfo, word);
01146             ok_p = FALSE;
01147           }
01148           wchmm->state[ntmp].out = ltmp->body.defined->s[k];
01149 #endif /* PASS1_IWCD */
01150           
01151           /* initialize other info */
01152           acc_init(wchmm, ntmp);
01153           wchmm->stend[ntmp] = WORD_INVALID;
01154           if (! wchmm->hmminfo->multipath) {
01155             /* make transition arc from HMM transition info */
01156             for (ato = 1; ato < ltmp_state_num; ato++) {
01157               prob = (hmm_logical_trans(ltmp))->a[k][ato];
01158               if (prob != LOG_ZERO) {
01159                   if (j == add_tail && k == ltmp_state_num - 2 && ato == ltmp_state_num - 1) {
01160                     /* arc outside new part will be handled later */
01161                   } else {
01162                     add_wacc(wchmm, ntmp, prob, ntmp + ato - k);
01163                   }
01164                 }
01165             }
01166           }
01167           
01168           ntmp++;
01169           /* expand wchmm if neccesary */
01170           if (ntmp >= wchmm->maxwcn) wchmm_extend(wchmm);
01171         } /* end of state loop */
01172       } /* end of phone loop */
01173 
01174       if (wchmm->hmminfo->multipath) {
01175           
01176         /* On multipath version, the skip transition should be handled! */
01177       
01178         /* make transition arc from HMM transition info */
01179         ntmp = n;
01180         for (j = add_head; j <= add_tail; j++) {
01181           ltmp = wchmm->winfo->wseq[word][j];
01182           ltmp_state_num = hmm_logical_state_num(ltmp);
01183           out_num_next = 0;
01184           /* arc from initial state ... need arc expansion from precious phone */
01185           for (ato = 1; ato < ltmp_state_num; ato++) {
01186             prob = (hmm_logical_trans(ltmp))->a[0][ato];
01187             if (prob != LOG_ZERO) {
01188               /* expand arc from previous HMM */
01189               if (ato == ltmp_state_num - 1) {
01190                 /* to final state ... just register states for next expansion */
01191                 for(kkk=0; kkk<out_num_prev; kkk++) {
01192                   out_from_next[out_num_next] = out_from[kkk];
01193                   out_a_next[out_num_next] = out_a[kkk] + prob;
01194                   out_num_next++;
01195                 }
01196               } else {
01197                 for(kkk=0; kkk<out_num_prev; kkk++) {
01198                   add_wacc(wchmm, out_from[kkk], out_a[kkk] + prob, ntmp + ato - 1);
01199                 }
01200               }
01201             }
01202           } /* end of state loop */
01203           /* from outprob state */
01204           for(k = 1; k < ltmp_state_num - 1; k++) {
01205             for (ato = 1; ato < ltmp_state_num; ato++) {
01206               prob = (hmm_logical_trans(ltmp))->a[k][ato];
01207               if (prob != LOG_ZERO) {
01208                 if (ato == ltmp_state_num - 1) {
01209                   /* to final state ... register states for next expansion */
01210                   out_from_next[out_num_next] = ntmp;
01211                   out_a_next[out_num_next] = prob;
01212                   out_num_next++;
01213                 } else {
01214                   add_wacc(wchmm, ntmp, prob, ntmp + ato - k);
01215                 }
01216               }
01217             }
01218             ntmp++;
01219           } /* end of state loop */
01220           /* swap out list for next phone */
01221           for(kkk=0;kkk<out_num_next;kkk++) {
01222             out_from[kkk] = out_from_next[kkk];
01223             out_a[kkk] = out_a_next[kkk];
01224           }
01225           out_num_prev = out_num_next;
01226         }       /* end of phone loop */
01227       } /* end of multipath block */
01228       
01229   } /* new phone node creation loop for this word */
01230 
01231 
01232   /*************************************/
01233   /* Short Pause appending (multipath) */
01234   /*************************************/
01235   
01236   /* if -iwsp, add noise model to the end of word at ntmp */
01237   if (wchmm->hmminfo->multipath && enable_iwsp && add_tail - add_head + 1 > 0) { /* there are new phones to be created */
01238     int ntmp_bak;
01239     
01240     /* set short pause state info */
01241     ntmp_bak = ntmp;
01242     if (wchmm->hmminfo->sp->is_pseudo) {
01243       for(k = 1;k < hmm_logical_state_num(wchmm->hmminfo->sp) - 1; k++) {
01244         wchmm->outstyle[ntmp] = AS_LSET;
01245         wchmm->state[ntmp].out.lset = &(wchmm->hmminfo->sp->body.pseudo->stateset[k]);
01246         acc_init(wchmm, ntmp);
01247         wchmm->stend[ntmp] = WORD_INVALID;
01248         ntmp++;
01249         if (ntmp >= wchmm->maxwcn) wchmm_extend(wchmm);
01250       }
01251     } else {
01252       for(k = 1;k < hmm_logical_state_num(wchmm->hmminfo->sp) - 1; k++) {
01253         wchmm->outstyle[ntmp] = AS_STATE;
01254         wchmm->state[ntmp].out.state = wchmm->hmminfo->sp->body.defined->s[k];
01255         acc_init(wchmm, ntmp);
01256         wchmm->stend[ntmp] = WORD_INVALID;
01257         ntmp++;
01258         if (ntmp >= wchmm->maxwcn) wchmm_extend(wchmm);
01259       }
01260     }
01261     ntmp = ntmp_bak;
01262     /* connect incoming arcs from previous phone */
01263     out_num_next = 0;
01264     for (ato = 1; ato < hmm_logical_state_num(wchmm->hmminfo->sp); ato++) {
01265       prob = hmm_logical_trans(wchmm->hmminfo->sp)->a[0][ato];
01266       if (prob != LOG_ZERO) {
01267         /* to control short pause insertion, transition probability toward
01268          the word-end short pause will be given a penalty */
01269         prob += wchmm->hmminfo->iwsp_penalty;
01270         if (ato == hmm_logical_state_num(wchmm->hmminfo->sp) - 1) {
01271           /* model has a model skip transition, just inherit them to next */
01272           for(kkk=0; kkk<out_num_prev; kkk++) {
01273             out_from_next[out_num_next] = out_from[kkk];
01274             out_a_next[out_num_next] = out_a[kkk] + prob;
01275             out_num_next++;
01276           }
01277         } else {
01278           /* connect incoming arcs from previous phone to this phone */
01279           for(kkk=0; kkk<out_num_prev; kkk++) {
01280             add_wacc(wchmm, out_from[kkk], out_a[kkk] + prob, ntmp + ato - 1);
01281           }
01282         }
01283       }
01284     }
01285     /* if short pause model doesn't have a model skip transition, also add it */
01286     if (hmm_logical_trans(wchmm->hmminfo->sp)->a[0][hmm_logical_state_num(wchmm->hmminfo->sp)-1] == LOG_ZERO) {
01287       /* to make insertion sp model to have no effect on the original path,
01288          the skip transition probability should be 0.0 (=100%) */
01289       prob = 0.0;
01290       for(kkk=0; kkk<out_num_prev; kkk++) {
01291         out_from_next[out_num_next] = out_from[kkk];
01292         out_a_next[out_num_next] = out_a[kkk] + prob;
01293         out_num_next++;
01294       }
01295     }
01296     /* connect arcs within model, and store new outgoing arcs for wordend node */
01297     for (k = 1; k < hmm_logical_state_num(wchmm->hmminfo->sp) - 1; k++) {
01298       for (ato = 1; ato < hmm_logical_state_num(wchmm->hmminfo->sp); ato++) {
01299         prob = hmm_logical_trans(wchmm->hmminfo->sp)->a[k][ato];
01300         if (prob != LOG_ZERO) {
01301           if (ato == hmm_logical_state_num(wchmm->hmminfo->sp) - 1) {
01302             out_from_next[out_num_next] = ntmp;
01303             out_a_next[out_num_next] = prob;
01304             out_num_next++;
01305           } else {
01306             add_wacc(wchmm, ntmp, prob, ntmp + ato - k);
01307           }
01308         }
01309       }
01310       ntmp++;
01311     }
01312     /* swap work area for next */
01313     for(kkk=0;kkk<out_num_next;kkk++) {
01314       out_from[kkk] = out_from_next[kkk];
01315       out_a[kkk] = out_a_next[kkk];
01316     }
01317     out_num_prev = out_num_next;
01318 
01319   } /* end of inter-word short pause appending block */
01320 
01321   /* make mapping: word <-> node on wchmm */
01322   for (j=0;j<word_len;j++) {
01323     if (j < add_head) { /* shared part */
01324       wchmm->offset[word][j] = wchmm->offset[matchword][j];
01325     } else if (add_tail < j) { /* shared tail part (should not happen..) */
01326       wchmm->offset[word][j] = wchmm->offset[matchword][j+(matchword_len-word_len)];
01327     } else {                    /* newly created part */
01328       wchmm->offset[word][j] = n;
01329       n += hmm_logical_state_num(wchmm->winfo->wseq[word][j]) - 2;
01330     }
01331   }
01332 
01333 
01334   if (wchmm->hmminfo->multipath) {
01335     /* create word-end node */
01336 
01337     /* paranoia check if the short-pause addition has been done well */
01338     if (enable_iwsp && add_tail - add_head + 1 > 0) {
01339       n += hmm_logical_state_num(wchmm->hmminfo->sp) - 2;
01340       if (n != ntmp) j_internal_error("wchmm_add_word: cannot match\n");
01341     }
01342     
01343     /* create word-end node */
01344     wchmm->wordend[word] = n;   /* tail node of 'word' is 'n' */
01345     wchmm->stend[n] = word;     /* node 'k' is a tail node of 'word' */
01346     acc_init(wchmm, n);
01347     wchmm->state[n].out.state = NULL;
01348     
01349     /* connect the final outgoing arcs in out_from[] to the word end node */
01350     for(k = 0; k < out_num_prev; k++) {
01351       add_wacc(wchmm, out_from[k], out_a[k], n);
01352     }
01353     n++;
01354     if (n >= wchmm->maxwcn) wchmm_extend(wchmm);
01355     
01356     if (matchlen == 0) {
01357       /* check if the new word has whole word-skipping transition */
01358       /* (use out_from and out_num_prev temporary) */
01359       out_num_prev = 0;
01360       get_outtrans_list(wchmm, word, word_len-1, out_from, out_a, &out_num_prev, wchmm->winfo->maxwn, enable_iwsp);
01361       for(k=0;k<out_num_prev;k++) {
01362         if (out_from[k] == wchmm->wordbegin[word]) {
01363           jlog("ERROR: *** ERROR: WORD SKIPPING TRANSITION NOT ALLOWED ***\n");
01364           jlog("ERROR:   Word id=%d (%s[%s]) has \"word skipping transition\".\n", word, wchmm->winfo->wname[word], wchmm->winfo->woutput[word]);
01365           jlog("ERROR:   All HMMs in the word:\n    ");
01366           for(kkk=0;kkk<wchmm->winfo->wlen[word];kkk++) {
01367             jlog("%s ", wchmm->winfo->wseq[word][kkk]->name);
01368           }
01369           jlog("\n");
01370           jlog("ERROR:  has transitions from initial state to final state.\n");
01371           jlog("ERROR:  This type of word skipping is not supported.\n");
01372           ok_p = FALSE;
01373         }
01374       }
01375     }
01376 
01377     wchmm->n = n;
01378 
01379   } else {
01380 
01381     wchmm->n = n;
01382     k = wchmm->offset[word][word_len-1] + hmm_logical_state_num(wchmm->winfo->wseq[word][word_len-1])-2 -1;
01383     wchmm->wordend[word] = k;   /* tail node of 'word' is 'k' */
01384     wchmm->stend[k] = word;     /* node 'k' is a tail node of 'word' */
01385     
01386     if (matchlen != 0 && add_tail - add_head + 1 > 0) {
01387       /* new part has been created in the above procedure: */
01388       /* now make link from shared part to the new part */
01389       wchmm_link_subword(wchmm, matchword,add_to,word,add_head);        
01390     }
01391 
01392   }
01393 
01394   return(ok_p);
01395   
01396 }
01397 
01398 /*************************************************************/
01399 /**** parse whole structure (after wchmm has been built) *****/
01400 /*************************************************************/
01401 
01416 static void
01417 wchmm_calc_wordend_arc(WCHMM_INFO *wchmm)
01418 {
01419   WORD_ID w;
01420   HTK_HMM_Trans *tr;
01421   LOGPROB a;
01422 
01423   for (w=0;w<wchmm->winfo->num;w++) {
01424     tr = hmm_logical_trans(wchmm->winfo->wseq[w][wchmm->winfo->wlen[w]-1]);
01425     a = tr->a[tr->statenum-2][tr->statenum-1];
01426     wchmm->wordend_a[w] = a;
01427   }
01428 }
01429 
01430 #ifdef SEPARATE_BY_UNIGRAM
01431 
01432 /********************************************************************/
01433 /****** for separation (linearization) of high-frequent words *******/
01434 /********************************************************************/
01435 
01454 static int
01455 compare_prob(LOGPROB *a, LOGPROB *b)
01456 {
01457   if (*a < *b)  return (1);
01458   if (*a > *b)  return (-1);
01459   return(0);
01460 }
01461 
01480 static LOGPROB
01481 get_nbest_uniprob(WCHMM_INFO *wchmm, int n)
01482 {
01483   LOGPROB *u_p;
01484   WORD_ID w;
01485   LOGPROB x;
01486   WORD_INFO *winfo;
01487   NGRAM_INFO *ngram;
01488 
01489   winfo = wchmm->winfo;
01490   ngram = wchmm->ngram;
01491 
01492   if (n < 1) n = 1;
01493   if (n > winfo->num) n = winfo->num;
01494 
01495   /* store all unigram probability to u_p[] */
01496   u_p = (LOGPROB *)mymalloc(sizeof(LOGPROB) * winfo->num);
01497   for(w=0;w<winfo->num;w++) {
01498     if (ngram) {
01499       x = uni_prob(ngram, winfo->wton[w])
01500 #ifdef CLASS_NGRAM
01501         + winfo->cprob[w]
01502 #endif
01503         ;
01504     } else {
01505       x = LOG_ZERO;
01506     }
01507     if (wchmm->lmvar == LM_NGRAM_USER) {
01508       x = (*(wchmm->uni_prob_user))(wchmm->winfo, w, x);
01509     }
01510     u_p[w] = x;
01511   }
01512 
01513   /* sort them downward */
01514   qsort(u_p, winfo->num, sizeof(LOGPROB),
01515         (int (*)(const void *,const void *))compare_prob);
01516 
01517   /* return the Nth value */
01518   x = u_p[n-1];
01519   free(u_p);
01520   return(x);
01521 }
01522 
01523 #endif
01524 
01525 /**********************************************************/
01526 /****** MAKE WCHMM (LEXICON TREE) --- main function *******/
01527 /**********************************************************/
01528 
01529 #define COUNT_STEP 500         ///< Word count step for debug progress output
01530 
01552 boolean
01553 build_wchmm(WCHMM_INFO *wchmm, JCONF_LM *lmconf)
01554 {
01555   int i,j;
01556   int matchword=0, sharelen=0, maxsharelen=0;
01557   int num_duplicated;
01558 #ifdef SEPARATE_BY_UNIGRAM
01559   LOGPROB separate_thres;
01560   LOGPROB p;
01561 #endif
01562   boolean ok_p;
01563 
01564   /* lingustic infos must be set before build_wchmm() is called */
01565   /* check if necessary lingustic info is already assigned (for debug) */
01566   if (wchmm->winfo == NULL
01567       || (wchmm->lmvar == LM_NGRAM && wchmm->ngram == NULL)
01568       || (wchmm->lmvar == LM_DFA_GRAMMAR && wchmm->dfa == NULL)
01569       ) {
01570     jlog("ERROR: wchmm: linguistic info not available!!\n");
01571     return FALSE;
01572   }
01573 
01574   ok_p = TRUE;
01575   
01576 #ifdef SEPARATE_BY_UNIGRAM
01577   /* 上位[separate_wnum]番目の1-gramスコアを求める */
01578   /* 1-gramスコアがこの値以上のものは木から分ける */
01579   separate_thres = get_nbest_uniprob(wchmm, lmconf->separate_wnum);
01580 #endif
01581 
01582 #ifdef PASS1_IWCD
01583 #ifndef USE_OLD_IWCD
01584   if (wchmm->category_tree) {
01585     if (wchmm->ccd_flag) {
01586       /* 全てのカテゴリID付き lcd_set を作成 */
01587       lcdset_register_with_category_all(wchmm);
01588     }
01589   }
01590 #endif
01591 #endif /* PASS1_IWCD */
01592   
01593 
01594   /* wchmmを初期化 */
01595   wchmm_init(wchmm);
01596 
01597   /* カウンタリセット */
01598   wchmm->separated_word_count=0;
01599 
01600   jlog("STAT: wchmm: Building HMM lexicon tree (left-to-right)\n");
01601   for (i=0;i<wchmm->winfo->num;i++) {
01602 
01603     if (wchmm->lmtype == LM_PROB) {
01604       if (i == wchmm->winfo->head_silwid || i == wchmm->winfo->tail_silwid) {
01605         /* 先頭/末尾の無音モデルは木構造化せず，
01606          * 先頭の無音単語の先頭への遷移，末尾単語の末尾からの遷移は作らない*/
01607         /* sharelen=0でそのまま */
01608         if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) {
01609           jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n");
01610           ok_p = FALSE;
01611         }
01612         continue;
01613       }
01614 #ifndef NO_SEPARATE_SHORT_WORD
01615       if (wchmm->winfo->wlen[i] <= SHORT_WORD_LEN) {
01616         /* 長さの短い単語を木構造化しない(ここでは1音節) */
01617         /* sharelen=0でそのまま */
01618         if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) {
01619           jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n");
01620           ok_p = FALSE;
01621         }
01622         wchmm->separated_word_count++;
01623         continue;
01624       }
01625 #endif
01626 #ifdef SEPARATE_BY_UNIGRAM
01627       if (wchmm->ngram) {
01628         p = uni_prob(wchmm->ngram, wchmm->winfo->wton[i])
01629 #ifdef CLASS_NGRAM
01630           + wchmm->winfo->cprob[i]
01631 #endif
01632           ;
01633       } else {
01634         p = LOG_ZERO;
01635       }
01636       if (wchmm->lmvar == LM_NGRAM_USER) {
01637         p = (*(wchmm->uni_prob_user))(wchmm->winfo, i, p);
01638       }
01639       if (p >= separate_thres && wchmm->separated_word_count < lmconf->separate_wnum) {
01640         /* 頻度の高い単語を木構造化しない */
01641         /* separate_thres は上位separate_wnum番目のスコア */
01642         if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) {
01643           jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n");
01644           ok_p = FALSE;
01645         }
01646         wchmm->separated_word_count++;
01647         continue;
01648       }
01649 #endif
01650     }
01651 
01652     /* 最も長く音素を共有出来る単語を探す */
01653     maxsharelen=0;
01654     for (j=0;j<i;j++) {
01655       if (wchmm->category_tree  && wchmm->lmtype == LM_DFA) {
01656         if (wchmm->winfo->wton[i] != wchmm->winfo->wton[j]) continue;
01657       }
01658       sharelen = wchmm_check_match(wchmm->winfo, i, j);
01659       if (sharelen == wchmm->winfo->wlen[i] && sharelen == wchmm->winfo->wlen[j]) {
01660        /* word に同音語が存在する */
01661        /* 必ず最大の長さであり，重複カウントを避けるためここで抜ける */
01662        maxsharelen = sharelen;
01663        matchword = j;
01664        break;
01665       }
01666       if (sharelen > maxsharelen) {
01667        matchword = j;
01668        maxsharelen = sharelen;
01669       }
01670     }
01671     if (wchmm_add_word(wchmm, i, maxsharelen, matchword, lmconf->enable_iwsp) == FALSE) {
01672       jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n");
01673       ok_p = FALSE;
01674     }
01675   }
01676 
01677 #if 0
01678   /* 木構造を作らない */
01679   for (i=0;i<wchmm->winfo->num;i++) {
01680     if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) {
01681       jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n");
01682       ok_p = FALSE;
01683     }
01684   }
01685 #endif  
01686   jlog("STAT:  %5d words ended     (%6d nodes)\n",i,wchmm->n);
01687 
01688   if (! wchmm->hmminfo->multipath) {
01689     /* 同一音素系列を持つ単語同士の leaf node を2重化して区別する */
01690     num_duplicated = wchmm_duplicate_leafnode(wchmm);
01691     jlog("STAT:  %d leaf nodes are made unshared\n", num_duplicated);
01692     
01693     /* 単語の終端から外への遷移確率を求めておく */
01694     wchmm_calc_wordend_arc(wchmm);
01695   }
01696 
01697   /* wchmmの整合性をチェックする */
01698   check_wchmm(wchmm);
01699 
01700   /* factoring用に各状態に後続単語のリストを付加する */
01701   if (!wchmm->category_tree) {
01702 
01703 #ifdef UNIGRAM_FACTORING
01704     if (wchmm->lmtype == LM_PROB) {
01705       /* 同時に前もってfactoring値を計算 */
01706       make_successor_list_unigram_factoring(wchmm);
01707       jlog("STAT:  1-gram factoring values has been pre-computed\n");
01708     } else {
01709       make_successor_list(wchmm);
01710     }
01711 #else 
01712     make_successor_list(wchmm);
01713 #endif /* UNIGRAM_FACTORING */
01714     
01715     if (wchmm->hmminfo->multipath) {
01716       /* 構築された factoring 情報をスキップ遷移および文頭文法ノードにコピー */
01717       adjust_sc_index(wchmm);
01718     }
01719     
01720 #ifdef UNIGRAM_FACTORING
01721     if (wchmm->lmtype == LM_PROB) {
01722       /* 単語間LMキャッシュが必要なノードのリストを作る */
01723       make_iwcache_index(wchmm);
01724     }
01725 #endif /* UNIGRAM_FACTORING */
01726 
01727     /* sclist2node is no longer used */
01728     if (wchmm->sclist2node != NULL) {
01729       free(wchmm->sclist2node);
01730       wchmm->sclist2node = NULL;
01731     }
01732 
01733   }
01734 
01735   jlog("STAT: done\n");
01736 
01737   return ok_p;
01738 }
01739 
01765 boolean
01766 build_wchmm2(WCHMM_INFO *wchmm, JCONF_LM *lmconf)
01767 {
01768   int i,j, last_i;
01769   int num_duplicated;
01770   WORD_ID *windex;
01771 #ifdef SEPARATE_BY_UNIGRAM
01772   LOGPROB separate_thres;
01773   LOGPROB p;
01774 #endif
01775   boolean ok_p;
01776   boolean ret;
01777 
01778   /* lingustic infos must be set before build_wchmm() is called */
01779   /* check if necessary lingustic info is already assigned (for debug) */
01780   if (wchmm->winfo == NULL
01781       || (wchmm->lmvar == LM_NGRAM && wchmm->ngram == NULL)
01782       || (wchmm->lmvar == LM_DFA_GRAMMAR && wchmm->dfa == NULL)
01783       ) {
01784     jlog("ERROR: wchmm: linguistic info not available!!\n");
01785     return FALSE;
01786   }
01787 
01788   ok_p = TRUE;
01789   
01790   wchmm->separated_word_count = 0;
01791   
01792   jlog("STAT: Building HMM lexicon tree\n");
01793   
01794   if (wchmm->lmtype == LM_PROB) {
01795 #ifdef SEPARATE_BY_UNIGRAM
01796     /* compute score threshold beforehand to separate words from tree */
01797     /* here we will separate best [separate_wnum] words from tree */
01798     separate_thres = get_nbest_uniprob(wchmm, lmconf->separate_wnum);
01799 #endif
01800   }
01801 
01802 #ifdef PASS1_IWCD
01803 #ifndef USE_OLD_IWCD
01804   if (wchmm->category_tree) {
01805     if (wchmm->ccd_flag) {
01806       /* when Julian mode (category-tree) and triphone is used,
01807          make all category-indexed context-dependent phone set (cdset) here */
01808       /* these will be assigned on the last phone of each word on tree */
01809       lcdset_register_with_category_all(wchmm);
01810     }
01811   }
01812 #endif
01813 #endif /* PASS1_IWCD */
01814 
01815  /* initialize wchmm */
01816   wchmm_init(wchmm);
01817 
01818   /* make sorted word index ordered by phone sequence */
01819   windex = (WORD_ID *)mymalloc(sizeof(WORD_ID) * wchmm->winfo->num);
01820   for(i=0;i<wchmm->winfo->num;i++) windex[i] = i;
01821 
01822   if (wchmm->category_tree && wchmm->lmtype == LM_DFA) {
01823 
01824     /* sort by category -> sort by word ID in each category */
01825     wchmm_sort_idx_by_category(wchmm->winfo, windex, wchmm->winfo->num);
01826     {
01827       int last_cate;
01828       last_i = 0;
01829       last_cate = wchmm->winfo->wton[windex[0]];
01830       for(i = 1;i<wchmm->winfo->num;i++) {
01831         if (wchmm->winfo->wton[windex[i]] != last_cate) {
01832           wchmm_sort_idx_by_wseq(wchmm->winfo, windex, last_i, i - last_i);
01833           last_cate = wchmm->winfo->wton[windex[i]];
01834           last_i = i;
01835         }
01836       }
01837       wchmm_sort_idx_by_wseq(wchmm->winfo, windex, last_i, wchmm->winfo->num - last_i);
01838     }
01839 
01840   } else {
01841 
01842     /* sort by word ID for whole vocabulary */
01843     wchmm_sort_idx_by_wseq(wchmm->winfo, windex, 0, wchmm->winfo->num);
01844 
01845   }
01846 
01847 /* 
01848  *   {
01849  *     int i,w;
01850  *     for(i=0;i<wchmm->winfo->num;i++) {
01851  *       w = windex[i];
01852  *       printf("%d: cate=%4d wid=%4d %s\n",i, wchmm->winfo->wton[w], w, wchmm->winfo->woutput[w]);
01853  *     }
01854  *   }
01855  */
01856 
01857   /* incrementaly add words to lexicon tree */
01858   /* now for each word, the previous word (last_i) is always the most matched one */
01859   last_i = WORD_INVALID;
01860   for (j=0;j<wchmm->winfo->num;j++) {
01861     i = windex[j];
01862 
01863     if (wchmm->lmtype == LM_PROB) {
01864 
01865       /* start/end silence word should not be shared */
01866       if (i == wchmm->winfo->head_silwid || i == wchmm->winfo->tail_silwid) {
01867         /* add whole word as new (sharelen=0) */
01868         if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) {
01869           jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n");
01870           ok_p = FALSE;
01871         }
01872         continue;
01873       }
01874 #ifndef NO_SEPARATE_SHORT_WORD
01875       /* separate short words from tree */
01876       if (wchmm->winfo->wlen[i] <= SHORT_WORD_LEN) {
01877         if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) {
01878           jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n");
01879           ok_p = FALSE;
01880         }
01881         wchmm->separated_word_count++;
01882         continue;
01883       }
01884 #endif
01885 #ifdef SEPARATE_BY_UNIGRAM
01886       if (wchmm->ngram) {
01887         p = uni_prob(wchmm->ngram, wchmm->winfo->wton[i])
01888 #ifdef CLASS_NGRAM
01889           + wchmm->winfo->cprob[i]
01890 #endif
01891           ;
01892       } else {
01893         p = LOG_ZERO;
01894       }
01895       if (wchmm->lmvar == LM_NGRAM_USER) {
01896         p = (*(wchmm->uni_prob_user))(wchmm->winfo, i, p);
01897       }
01898       /* separate high-frequent words from tree (threshold = separate_thres) */
01899       if (p >= separate_thres && wchmm->separated_word_count < lmconf->separate_wnum) {
01900         if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) {
01901           jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n");
01902           ok_p = FALSE;
01903         }
01904         wchmm->separated_word_count++;
01905         continue;
01906       }
01907 #endif
01908     }
01909 
01910     if (last_i == WORD_INVALID) { /* first word */
01911       ret = wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp);
01912     } else {
01913       /* the previous word (last_i) is always the most matched one */
01914       if (wchmm->category_tree && wchmm->lmtype == LM_DFA) {
01915         if (wchmm->winfo->wton[i] != wchmm->winfo->wton[last_i]) {
01916           ret = wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp);
01917         } else {
01918           ret = wchmm_add_word(wchmm, i, wchmm_check_match(wchmm->winfo, i, last_i), last_i, lmconf->enable_iwsp);
01919         }
01920       } else {
01921         ret = wchmm_add_word(wchmm, i, wchmm_check_match(wchmm->winfo, i, last_i), last_i, lmconf->enable_iwsp);
01922       }
01923     }
01924     if (ret == FALSE) {
01925       jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n");
01926       ok_p = FALSE;
01927     }
01928     last_i = i;
01929     
01930   } /* end of add word loop */
01931   
01932   /*j_printerr("\r %5d words ended     (%6d nodes)\n",j,wchmm->n);*/
01933 
01934   /* free work area */
01935   free(windex);
01936 
01937   if (wchmm->hmminfo->multipath) {
01938     jlog("STAT: lexicon size: %d nodes\n", wchmm->n);
01939   } else {
01940     /* duplicate leaf nodes of homophone/embedded words */
01941     jlog("STAT: lexicon size: %d", wchmm->n);
01942     num_duplicated = wchmm_duplicate_leafnode(wchmm);
01943     jlog("+%d=%d\n", num_duplicated, wchmm->n);
01944   }
01945 
01946   if (! wchmm->hmminfo->multipath) {
01947     /* calculate transition probability of word end node to outside */
01948     wchmm_calc_wordend_arc(wchmm);
01949   }
01950 
01951   /* check wchmm coherence (internal debug) */
01952   check_wchmm(wchmm);
01953 
01954   /* make successor list for all branch nodes for N-gram factoring */
01955   if (!wchmm->category_tree) {
01956 
01957 #ifdef UNIGRAM_FACTORING
01958     if (wchmm->lmtype == LM_PROB) {
01959       /* for 1-gram factoring, we can compute the values before search */
01960        make_successor_list_unigram_factoring(wchmm);
01961        jlog("STAT:  1-gram factoring values has been pre-computed\n");
01962     } else {
01963       make_successor_list(wchmm);
01964     }
01965 #else
01966     make_successor_list(wchmm);
01967 #endif /* UNIGRAM_FACTORING */
01968     if (wchmm->hmminfo->multipath) {
01969       /* Copy the factoring data according to the skip transitions and startword nodes */
01970       adjust_sc_index(wchmm);
01971     }
01972 #ifdef UNIGRAM_FACTORING
01973     if (wchmm->lmtype == LM_PROB) {
01974       /* make list of start nodes that needs inter-word LM cache */
01975       make_iwcache_index(wchmm);
01976     }
01977 #endif /* UNIGRAM_FACTORING */
01978 
01979     /* sclist2node is no longer used */
01980     if (wchmm->sclist2node != NULL) {
01981       free(wchmm->sclist2node);
01982       wchmm->sclist2node = NULL;
01983     }
01984 
01985   }
01986 
01987   //jlog("STAT: done\n");
01988 
01989 #ifdef WCHMM_SIZE_CHECK
01990   if (debug2_flag) {
01991     /* detailed check of lexicon tree size (inaccurate!) */
01992     jlog("STAT: --- memory size of word lexicon ---\n");
01993     jlog("STAT: wchmm: %d words, %d nodes\n", wchmm->winfo->num, wchmm->n);
01994     jlog("STAT: %9d bytes: wchmm->state[node] (exclude ac, sc)\n", sizeof(WCHMM_STATE) * wchmm->n);
01995     {
01996       int count1 = 0;
01997       int count2 = 0;
01998       int count3 = 0;
01999       for(i=0;i<wchmm->n;i++) {
02000         if (wchmm->self_a[i] != LOG_ZERO) count1++;
02001         if (wchmm->next_a[i] != LOG_ZERO) count2++;
02002         if (wchmm->ac[i] != NULL) count3++;
02003       }
02004       jlog("STAT: %9d bytes: wchmm->self_a[node] (%4.1f%% filled)\n", sizeof(LOGPROB) * wchmm->n, 100.0 * count1 / (float)wchmm->n);
02005       jlog("STAT: %9d bytes: wchmm->next_a[node] (%4.1f%% filled)\n", sizeof(LOGPROB) * wchmm->n, 100.0 * count2 / (float)wchmm->n);
02006       jlog("STAT: %9d bytes: wchmm->ac[node] (%4.1f%% used)\n", sizeof(A_CELL2 *) * wchmm->n, 100.0 * count3 / (float)wchmm->n);
02007     }
02008     jlog("STAT: %9d bytes: wchmm->stend[node]\n", sizeof(WORD_ID) * wchmm->n);
02009     {
02010       int w,count;
02011       count = 0;
02012       for(w=0;w<wchmm->winfo->num;w++) {
02013         count += wchmm->winfo->wlen[w] * sizeof(int) + sizeof(int *);
02014       }
02015       jlog("STAT: %9d bytes: wchmm->offset[w][]\n", count);
02016     }
02017     if (wchmm->hmminfo->multipath) {
02018       jlog("STAT: %9d bytes: wchmm->wordbegin[w]\n", wchmm->winfo->num * sizeof(int));
02019     }
02020     jlog("STAT: %9d bytes: wchmm->wordend[w]\n", wchmm->winfo->num * sizeof(int));
02021     jlog("STAT: %9d bytes: wchmm->startnode[]\n", wchmm->startnum * sizeof(int));
02022     if (wchmm->category_tree) {
02023       jlog("STAT: %9d bytes: wchmm->start2wid[]\n", wchmm->startnum * sizeof(WORD_ID));
02024     }
02025 #ifdef UNIGRAM_FACTORING
02026     if (wchmm->lmtype == LM_PROB) {
02027       jlog("STAT: %9d bytes: wchmm->start2isolate[]\n", wchmm->isolatenum * sizeof(int));
02028     }
02029 #endif
02030     if (!wchmm->hmminfo->multipath) {
02031       jlog("STAT: %9d bytes: wchmm->wordend_a[]\n", wchmm->winfo->num * sizeof(LOGPROB));
02032     }
02033 #ifdef PASS1_IWCD
02034     jlog("STAT: %9d bytes: wchmm->outstyle[]\n", wchmm->n * sizeof(unsigned char));
02035     {
02036       int c;
02037       c = 0;
02038       for(i=0;i<wchmm->n;i++) {
02039         switch(wchmm->outstyle[i]) {
02040         case AS_RSET:
02041           c += sizeof(RC_INFO);
02042           break;
02043         case AS_LRSET:
02044           c += sizeof(LRC_INFO);
02045           break;
02046       }
02047       }
02048       if (c > 0) jlog("STAT: %9d bytes: wchmm->out (RC_INFO / LRC_INFO)\n", c);
02049     }
02050 #endif
02051     if (!wchmm->category_tree) {
02052       jlog("STAT: %9d bytes: wchmm->sclist[]\n", wchmm->scnum * sizeof(S_CELL *));
02053       jlog("STAT: %9d bytes: wchmm->sclist2node[]\n", wchmm->scnum * sizeof(int));
02054 #ifdef UNIGRAM_FACTORING
02055       if (wchmm->lmtype == LM_PROB) {
02056         jlog("STAT: %9d bytes: wchmm->fscore[]\n", wchmm->fsnum * sizeof(LOGPROB));
02057       }
02058 #endif  
02059     }
02060     
02061     {
02062       int count, n;
02063       A_CELL2 *ac;
02064       count = 0;
02065       for(n=0;n<wchmm->n;n++) {
02066         for(ac=wchmm->ac[n];ac;ac=ac->next) {
02067           count += sizeof(A_CELL2);
02068         }
02069       }
02070       jlog("STAT: %9d bytes: A_CELL2\n", count);
02071     }
02072     if (!wchmm->category_tree) {
02073       jlog("STAT: %9d bytes: sclist\n", wchmm->scnum * sizeof(S_CELL *));
02074       jlog("STAT: %9d bytes: sclist2node\n", wchmm->scnum * sizeof(int));
02075     }
02076 
02077   }
02078 
02079 #endif /* WCHMM_SIZE_CHECK */
02080 
02081 
02082   return ok_p;
02083 
02084 }
02085 
02086 
02101 void
02102 print_wchmm_info(WCHMM_INFO *wchmm)
02103 {
02104   int n,i, rootnum;
02105 
02106   if (wchmm->hmminfo->multipath) {
02107     rootnum = wchmm->startnum;
02108   } else {
02109     if (wchmm->lmtype == LM_PROB) {
02110       rootnum = wchmm->startnum + 1;    /* including winfo->head_silwid */
02111     } else if (wchmm->lmtype == LM_DFA) {
02112       rootnum = wchmm->startnum;
02113     }
02114   }
02115   
02116   jlog(" Lexicon tree:\n");
02117   jlog("\t total node num = %6d\n", wchmm->n);
02118   if (wchmm->lmtype == LM_PROB) {
02119     jlog("\t  root node num = %6d\n", rootnum);
02120 #ifdef NO_SEPARATE_SHORT_WORD
02121 #ifdef SEPARATE_BY_UNIGRAM
02122     jlog("\t(%d hi-freq. words are separated from tree lexicon)\n", wchmm->separated_word_count);
02123 #else
02124     jlog(" (no words are separated from tree)\n");
02125 #endif /* SEPARATE_BY_UNIGRAM */
02126 #else
02127     jlog(" (%d short words (<= %d phonemes) are separated from tree)\n", wchmm->separated_word_count, SHORT_WORD_LEN);
02128 #endif /* NO_SEPARATE_SHORT_WORD */
02129   }
02130   if (wchmm->lmtype == LM_DFA) {
02131     jlog("\t  root node num = %6d\n", rootnum);
02132   }
02133   for(n=0,i=0;i<wchmm->n;i++) {
02134     if (wchmm->stend[i] != WORD_INVALID) n++;
02135   }
02136   jlog("\t  leaf node num = %6d\n", n);
02137   if (!wchmm->category_tree) {
02138     jlog("\t fact. node num = %6d\n", wchmm->scnum - 1);
02139   }
02140 }
02141 
02142 /* end of file */