Julius 4.2
libsent/src/hmminfo/chkhmmlist.c
説明を見る。
00001 
00032 /*
00033  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00034  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00035  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00036  * All rights reserved
00037  */
00038 
00039 #include <sent/htk_hmm.h>
00040 #include <sent/vocabulary.h>
00041 
00047 void
00048 make_hmm_basephone_list(HTK_HMM_INFO *hmminfo)
00049 {
00050   HMM_Logical *lg;
00051   char p[MAX_HMMNAME_LEN];
00052   BASEPHONE *match = NULL, *new;
00053   APATNODE *root;
00054   int n;
00055 
00056   n = 0;
00057   root = NULL;
00058   for(lg=hmminfo->lgstart; lg; lg=lg->next) {
00059     center_name(lg->name, p);
00060     if (root != NULL) {
00061       match = aptree_search_data(p, root);
00062       if (match != NULL && strmatch(match->name, p)) continue;
00063     }
00064     new = (BASEPHONE *)mybmalloc2(sizeof(BASEPHONE), &(hmminfo->mroot));
00065     new->bgnflag = FALSE;
00066     new->endflag = FALSE;
00067     new->name = (char *)mybmalloc2(strlen(p)+1, &(hmminfo->mroot));
00068     strcpy(new->name, p);
00069     if (root == NULL) root = aptree_make_root_node(new, &(hmminfo->mroot));
00070     else aptree_add_entry(new->name, new, match->name, &root, &(hmminfo->mroot));
00071     n++;
00072   }
00073   hmminfo->basephone.num = n;
00074   hmminfo->basephone.root = root;
00075 }
00076 
00082 static void
00083 print_callback_detail(void *x)
00084 {
00085   BASEPHONE *b = x;
00086   printf("\"%s\": bgn=%d, end=%d\n", b->name, b->bgnflag, b->endflag);
00087 }
00088 
00094 static void
00095 print_callback_name(void *x)
00096 {
00097   BASEPHONE *b = x;
00098   printf("%s, ", b->name);
00099 }
00105 void
00106 print_all_basephone_detail(HMM_basephone *base)
00107 {
00108   aptree_traverse_and_do(base->root, print_callback_detail);
00109 }
00115 void
00116 print_all_basephone_name(HMM_basephone *base)
00117 {
00118   aptree_traverse_and_do(base->root, print_callback_name);
00119   printf("\n");
00120 }
00121 
00122 static int bncnt;               
00123 static int edcnt;               
00124 
00130 static void
00131 count_callback(void *x)
00132 {
00133   BASEPHONE *b = x;
00134   if (b->bgnflag) bncnt++;
00135   if (b->endflag) edcnt++;
00136 }
00137 
00144 static void
00145 count_all_phone(HMM_basephone *base)
00146 {
00147   bncnt = edcnt = 0;
00148   aptree_traverse_and_do(base->root, count_callback);
00149   base->bgnnum = bncnt;
00150   base->endnum = edcnt;
00151 }
00152 
00159 static boolean
00160 mark_word_edge(WORD_INFO *winfo, HMM_basephone *base)
00161 {
00162   WORD_ID w;
00163   char p[MAX_HMMNAME_LEN];
00164   char *key;
00165   BASEPHONE *match;
00166   boolean ok_p = TRUE;
00167 
00168   /* mark what is at beginning of word (can be right context) */
00169   for(w=0;w<winfo->num;w++) {
00170     if (w == winfo->head_silwid) continue;
00171     key = center_name(winfo->wseq[w][0]->name, p);
00172     match = aptree_search_data(key, base->root);
00173     if (match != NULL && strmatch(match->name, key)) {
00174       match->bgnflag = TRUE;
00175     } else {
00176       /* not found!!! */
00177       jlog("Error: chkhmmlist: basephone \"%s\" used in dictionary not exist in HMM definition\n", key);
00178       ok_p = FALSE;
00179     }
00180   }
00181   /* mark what is at end of word (can be left context) */
00182   for(w=0;w<winfo->num;w++) {
00183     if (w == winfo->tail_silwid) continue;
00184     key = center_name(winfo->wseq[w][winfo->wlen[w]-1]->name, p);
00185     match = aptree_search_data(key, base->root);
00186     if (match != NULL && strmatch(match->name, key)) {
00187       match->endflag = TRUE;
00188     } else {
00189       /* not found!!! */
00190       jlog("Error: chkhmmlist: basephone \"%s\" used in dictionary not exist in HMM definition\n", key);
00191       ok_p = FALSE;
00192     }
00193   }
00194 
00195   return ok_p;
00196 }
00197 
00198 
00199 /* check if all possible triphones are exist in logical HMM */
00200 /* temporal storage for aptree() callback */
00201 static HTK_HMM_INFO *local_hmminfo; 
00202 static WORD_INFO *local_winfo;  
00203 static APATNODE *local_root;    
00204 static WORD_ID current_w;       
00205 static char gbuf[MAX_HMMNAME_LEN];              
00206 
00207 static APATNODE *error_root;    
00208 static int error_num;           
00209 
00216 static void
00217 add_to_error(char *lostname, HTK_HMM_INFO *hmminfo)
00218 {
00219   char *match = NULL, *new;
00220   if (error_root != NULL) {
00221     match = aptree_search_data(lostname, error_root);
00222     if (match != NULL && strmatch(match, lostname)) return;
00223   }
00224   new = (char *)mybmalloc2(strlen(lostname)+1, &(hmminfo->mroot));
00225   strcpy(new, lostname);
00226   if (error_root == NULL) error_root = aptree_make_root_node(new, &(hmminfo->mroot));
00227   else aptree_add_entry(new, new, match, &error_root, &(hmminfo->mroot));
00228 
00229   error_num++;
00230 }
00231 
00237 static void
00238 print_error_callback(void *x)
00239 {
00240   char *p = x;
00241   printf("%s\n", p);
00242 }
00243 
00251 static void
00252 triphone_callback_normal(void *x)
00253 {
00254   BASEPHONE *b = x;
00255   WORD_ID w = current_w;
00256   HMM_Logical *lg, *found;
00257 
00258   if (b->endflag) {             /* x can appear as end of word */
00259     lg = local_winfo->wseq[w][0];
00260     strcpy(gbuf, lg->name);
00261     add_left_context(gbuf, b->name);
00262     /* printf("checking \"%s\" - \"%s\"\n", b->name, lg->name); */
00263     if ((found = htk_hmmdata_lookup_logical(local_hmminfo, gbuf)) == NULL) {
00264       if (lg->is_pseudo) {
00265         printf("Error: chkhmmlist: \"%s\" not found, fallback to pseudo {%s}\n", gbuf, lg->name);
00266         add_to_error(gbuf, local_hmminfo);
00267       }
00268     }
00269   }
00270   if (b->bgnflag) {             /* x can appear as beginning of word */
00271     lg = local_winfo->wseq[w][local_winfo->wlen[w]-1];
00272     strcpy(gbuf, lg->name);
00273     add_right_context(gbuf, b->name);
00274     /* printf("checking \"%s\" - \"%s\"\n", lg->name, b->name); */
00275     if ((found = htk_hmmdata_lookup_logical(local_hmminfo, gbuf)) == NULL) {
00276       if (lg->is_pseudo) {
00277         printf("Error: chkhmmlist: \"%s\" not found, fallback to pseudo {%s}\n", gbuf, lg->name);
00278         add_to_error(gbuf, local_hmminfo);
00279       }
00280     }
00281   }
00282 }
00283 
00284 /* for words with only one phone, all combination of "x - current_w + x"
00285    should be checked */
00293 static void
00294 triphone_callback_right(void *x)
00295 {
00296   BASEPHONE *b = x;
00297   WORD_ID w = current_w;
00298   HMM_Logical *lg, *found;
00299   static char buf[MAX_HMMNAME_LEN];
00300 
00301   if (b->bgnflag) {
00302     lg = local_winfo->wseq[w][0];
00303     strcpy(buf, gbuf);
00304     add_right_context(buf, b->name);
00305     /* printf("    checking \"%s\" - \"%s\"\n", gbuf, b->name); */
00306     if ((found = htk_hmmdata_lookup_logical(local_hmminfo, buf)) == NULL) {
00307       if (lg->is_pseudo) {
00308         printf("Error: chkhmmlist: \"%s\" not found, fallback to pseudo {%s}\n", buf, lg->name);
00309         add_to_error(buf, local_hmminfo);
00310       }
00311     }
00312   }
00313 }
00314 
00322 static void
00323 triphone_callback_left(void *x)
00324 {
00325   BASEPHONE *b = x;
00326   WORD_ID w = current_w;
00327   HMM_Logical *lg;
00328 
00329   if (b->endflag) {
00330     lg = local_winfo->wseq[w][0];
00331     strcpy(gbuf, lg->name);
00332     add_left_context(gbuf, b->name);
00333     aptree_traverse_and_do(local_root, triphone_callback_right);
00334   }
00335 }
00336 
00344 void
00345 test_interword_triphone(HTK_HMM_INFO *hmminfo, WORD_INFO *winfo)
00346 {
00347   WORD_ID w;
00348   local_hmminfo = hmminfo;
00349   local_winfo = winfo;
00350   local_root = hmminfo->basephone.root;
00351   error_root = NULL;
00352   error_num = 0;
00353 
00354   printf("Inter-word triphone existence test...\n");
00355   for(w=0;w<winfo->num;w++) {
00356     current_w = w;
00357     if (winfo->wlen[w] > 1) {
00358       /* check beginning phone and ending phone of this word */
00359       aptree_traverse_and_do(hmminfo->basephone.root, triphone_callback_normal);
00360     } else {
00361       /* for word of only 1 phoneme, check both */
00362       aptree_traverse_and_do(hmminfo->basephone.root, triphone_callback_left);
00363     }
00364   }
00365   if (error_root == NULL) {
00366     printf("passed\n");
00367   } else {
00368     printf("following triphones are missing in HMMList:\n");
00369     aptree_traverse_and_do(error_root, print_error_callback);
00370     printf("total %d missing inter-word triphones\n", error_num);
00371   }
00372 }
00373 
00374 
00375 
00385 boolean
00386 make_base_phone(HTK_HMM_INFO *hmminfo, WORD_INFO *winfo)
00387 {
00388   /* gather base phones and word-{head,tail} phones */
00389   jlog("Stat: chkhmmlist: Exploring HMM database and lexicon tree:\n");
00390   if (mark_word_edge(winfo, &(hmminfo->basephone)) == FALSE) {
00391     return FALSE;
00392   }
00393   count_all_phone(&(hmminfo->basephone));
00394   return TRUE;
00395 }
00396 
00403 void
00404 print_phone_info(FILE *fp, HTK_HMM_INFO *hmminfo)
00405 {
00406   /* output information */
00407   fprintf(fp, "%5d physical HMMs defined in hmmdefs\n", hmminfo->totalhmmnum);
00408   if (hmminfo->totalhmmnum == hmminfo->totallogicalnum - hmminfo->totalpseudonum) {
00409     fprintf(fp, "   no HMMList, physical HMM names are redirected to logicalHMM\n");
00410   } else {
00411     if (hmminfo->is_triphone) {
00412       fprintf(fp, "%5d triphones listed in hmmlist\n", hmminfo->totallogicalnum - hmminfo->totalpseudonum);
00413     } else {
00414       fprintf(fp, "%5d phones in hmmlist\n", hmminfo->totallogicalnum - hmminfo->totalpseudonum);
00415     }
00416   }
00417   if (hmminfo->totalpseudonum != 0) {
00418     fprintf(fp, "%5d pseudo HMM generated for missing mono/bi-phones\n",hmminfo->totalpseudonum);
00419   }
00420   fprintf(fp, "%5d TOTAL logical HMMs\n", hmminfo->totallogicalnum);
00421   fprintf(fp, "%5d base phones in logical HMM\n", hmminfo->basephone.num);
00422   fprintf(fp, "%5d phones appear on word head, %d phones on word tail\n", hmminfo->basephone.bgnnum, hmminfo->basephone.endnum);
00423 }