Julius 4.2
|
00001 00032 /* 00033 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00034 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00035 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00036 * All rights reserved 00037 */ 00038 00039 #include <sent/htk_hmm.h> 00040 #include <sent/vocabulary.h> 00041 00047 void 00048 make_hmm_basephone_list(HTK_HMM_INFO *hmminfo) 00049 { 00050 HMM_Logical *lg; 00051 char p[MAX_HMMNAME_LEN]; 00052 BASEPHONE *match = NULL, *new; 00053 APATNODE *root; 00054 int n; 00055 00056 n = 0; 00057 root = NULL; 00058 for(lg=hmminfo->lgstart; lg; lg=lg->next) { 00059 center_name(lg->name, p); 00060 if (root != NULL) { 00061 match = aptree_search_data(p, root); 00062 if (match != NULL && strmatch(match->name, p)) continue; 00063 } 00064 new = (BASEPHONE *)mybmalloc2(sizeof(BASEPHONE), &(hmminfo->mroot)); 00065 new->bgnflag = FALSE; 00066 new->endflag = FALSE; 00067 new->name = (char *)mybmalloc2(strlen(p)+1, &(hmminfo->mroot)); 00068 strcpy(new->name, p); 00069 if (root == NULL) root = aptree_make_root_node(new, &(hmminfo->mroot)); 00070 else aptree_add_entry(new->name, new, match->name, &root, &(hmminfo->mroot)); 00071 n++; 00072 } 00073 hmminfo->basephone.num = n; 00074 hmminfo->basephone.root = root; 00075 } 00076 00082 static void 00083 print_callback_detail(void *x) 00084 { 00085 BASEPHONE *b = x; 00086 printf("\"%s\": bgn=%d, end=%d\n", b->name, b->bgnflag, b->endflag); 00087 } 00088 00094 static void 00095 print_callback_name(void *x) 00096 { 00097 BASEPHONE *b = x; 00098 printf("%s, ", b->name); 00099 } 00105 void 00106 print_all_basephone_detail(HMM_basephone *base) 00107 { 00108 aptree_traverse_and_do(base->root, print_callback_detail); 00109 } 00115 void 00116 print_all_basephone_name(HMM_basephone *base) 00117 { 00118 aptree_traverse_and_do(base->root, print_callback_name); 00119 printf("\n"); 00120 } 00121 00122 static int bncnt; 00123 static int edcnt; 00124 00130 static void 00131 count_callback(void *x) 00132 { 00133 BASEPHONE *b = x; 00134 if (b->bgnflag) bncnt++; 00135 if (b->endflag) edcnt++; 00136 } 00137 00144 static void 00145 count_all_phone(HMM_basephone *base) 00146 { 00147 bncnt = edcnt = 0; 00148 aptree_traverse_and_do(base->root, count_callback); 00149 base->bgnnum = bncnt; 00150 base->endnum = edcnt; 00151 } 00152 00159 static boolean 00160 mark_word_edge(WORD_INFO *winfo, HMM_basephone *base) 00161 { 00162 WORD_ID w; 00163 char p[MAX_HMMNAME_LEN]; 00164 char *key; 00165 BASEPHONE *match; 00166 boolean ok_p = TRUE; 00167 00168 /* mark what is at beginning of word (can be right context) */ 00169 for(w=0;w<winfo->num;w++) { 00170 if (w == winfo->head_silwid) continue; 00171 key = center_name(winfo->wseq[w][0]->name, p); 00172 match = aptree_search_data(key, base->root); 00173 if (match != NULL && strmatch(match->name, key)) { 00174 match->bgnflag = TRUE; 00175 } else { 00176 /* not found!!! */ 00177 jlog("Error: chkhmmlist: basephone \"%s\" used in dictionary not exist in HMM definition\n", key); 00178 ok_p = FALSE; 00179 } 00180 } 00181 /* mark what is at end of word (can be left context) */ 00182 for(w=0;w<winfo->num;w++) { 00183 if (w == winfo->tail_silwid) continue; 00184 key = center_name(winfo->wseq[w][winfo->wlen[w]-1]->name, p); 00185 match = aptree_search_data(key, base->root); 00186 if (match != NULL && strmatch(match->name, key)) { 00187 match->endflag = TRUE; 00188 } else { 00189 /* not found!!! */ 00190 jlog("Error: chkhmmlist: basephone \"%s\" used in dictionary not exist in HMM definition\n", key); 00191 ok_p = FALSE; 00192 } 00193 } 00194 00195 return ok_p; 00196 } 00197 00198 00199 /* check if all possible triphones are exist in logical HMM */ 00200 /* temporal storage for aptree() callback */ 00201 static HTK_HMM_INFO *local_hmminfo; 00202 static WORD_INFO *local_winfo; 00203 static APATNODE *local_root; 00204 static WORD_ID current_w; 00205 static char gbuf[MAX_HMMNAME_LEN]; 00206 00207 static APATNODE *error_root; 00208 static int error_num; 00209 00216 static void 00217 add_to_error(char *lostname, HTK_HMM_INFO *hmminfo) 00218 { 00219 char *match = NULL, *new; 00220 if (error_root != NULL) { 00221 match = aptree_search_data(lostname, error_root); 00222 if (match != NULL && strmatch(match, lostname)) return; 00223 } 00224 new = (char *)mybmalloc2(strlen(lostname)+1, &(hmminfo->mroot)); 00225 strcpy(new, lostname); 00226 if (error_root == NULL) error_root = aptree_make_root_node(new, &(hmminfo->mroot)); 00227 else aptree_add_entry(new, new, match, &error_root, &(hmminfo->mroot)); 00228 00229 error_num++; 00230 } 00231 00237 static void 00238 print_error_callback(void *x) 00239 { 00240 char *p = x; 00241 printf("%s\n", p); 00242 } 00243 00251 static void 00252 triphone_callback_normal(void *x) 00253 { 00254 BASEPHONE *b = x; 00255 WORD_ID w = current_w; 00256 HMM_Logical *lg, *found; 00257 00258 if (b->endflag) { /* x can appear as end of word */ 00259 lg = local_winfo->wseq[w][0]; 00260 strcpy(gbuf, lg->name); 00261 add_left_context(gbuf, b->name); 00262 /* printf("checking \"%s\" - \"%s\"\n", b->name, lg->name); */ 00263 if ((found = htk_hmmdata_lookup_logical(local_hmminfo, gbuf)) == NULL) { 00264 if (lg->is_pseudo) { 00265 printf("Error: chkhmmlist: \"%s\" not found, fallback to pseudo {%s}\n", gbuf, lg->name); 00266 add_to_error(gbuf, local_hmminfo); 00267 } 00268 } 00269 } 00270 if (b->bgnflag) { /* x can appear as beginning of word */ 00271 lg = local_winfo->wseq[w][local_winfo->wlen[w]-1]; 00272 strcpy(gbuf, lg->name); 00273 add_right_context(gbuf, b->name); 00274 /* printf("checking \"%s\" - \"%s\"\n", lg->name, b->name); */ 00275 if ((found = htk_hmmdata_lookup_logical(local_hmminfo, gbuf)) == NULL) { 00276 if (lg->is_pseudo) { 00277 printf("Error: chkhmmlist: \"%s\" not found, fallback to pseudo {%s}\n", gbuf, lg->name); 00278 add_to_error(gbuf, local_hmminfo); 00279 } 00280 } 00281 } 00282 } 00283 00284 /* for words with only one phone, all combination of "x - current_w + x" 00285 should be checked */ 00293 static void 00294 triphone_callback_right(void *x) 00295 { 00296 BASEPHONE *b = x; 00297 WORD_ID w = current_w; 00298 HMM_Logical *lg, *found; 00299 static char buf[MAX_HMMNAME_LEN]; 00300 00301 if (b->bgnflag) { 00302 lg = local_winfo->wseq[w][0]; 00303 strcpy(buf, gbuf); 00304 add_right_context(buf, b->name); 00305 /* printf(" checking \"%s\" - \"%s\"\n", gbuf, b->name); */ 00306 if ((found = htk_hmmdata_lookup_logical(local_hmminfo, buf)) == NULL) { 00307 if (lg->is_pseudo) { 00308 printf("Error: chkhmmlist: \"%s\" not found, fallback to pseudo {%s}\n", buf, lg->name); 00309 add_to_error(buf, local_hmminfo); 00310 } 00311 } 00312 } 00313 } 00314 00322 static void 00323 triphone_callback_left(void *x) 00324 { 00325 BASEPHONE *b = x; 00326 WORD_ID w = current_w; 00327 HMM_Logical *lg; 00328 00329 if (b->endflag) { 00330 lg = local_winfo->wseq[w][0]; 00331 strcpy(gbuf, lg->name); 00332 add_left_context(gbuf, b->name); 00333 aptree_traverse_and_do(local_root, triphone_callback_right); 00334 } 00335 } 00336 00344 void 00345 test_interword_triphone(HTK_HMM_INFO *hmminfo, WORD_INFO *winfo) 00346 { 00347 WORD_ID w; 00348 local_hmminfo = hmminfo; 00349 local_winfo = winfo; 00350 local_root = hmminfo->basephone.root; 00351 error_root = NULL; 00352 error_num = 0; 00353 00354 printf("Inter-word triphone existence test...\n"); 00355 for(w=0;w<winfo->num;w++) { 00356 current_w = w; 00357 if (winfo->wlen[w] > 1) { 00358 /* check beginning phone and ending phone of this word */ 00359 aptree_traverse_and_do(hmminfo->basephone.root, triphone_callback_normal); 00360 } else { 00361 /* for word of only 1 phoneme, check both */ 00362 aptree_traverse_and_do(hmminfo->basephone.root, triphone_callback_left); 00363 } 00364 } 00365 if (error_root == NULL) { 00366 printf("passed\n"); 00367 } else { 00368 printf("following triphones are missing in HMMList:\n"); 00369 aptree_traverse_and_do(error_root, print_error_callback); 00370 printf("total %d missing inter-word triphones\n", error_num); 00371 } 00372 } 00373 00374 00375 00385 boolean 00386 make_base_phone(HTK_HMM_INFO *hmminfo, WORD_INFO *winfo) 00387 { 00388 /* gather base phones and word-{head,tail} phones */ 00389 jlog("Stat: chkhmmlist: Exploring HMM database and lexicon tree:\n"); 00390 if (mark_word_edge(winfo, &(hmminfo->basephone)) == FALSE) { 00391 return FALSE; 00392 } 00393 count_all_phone(&(hmminfo->basephone)); 00394 return TRUE; 00395 } 00396 00403 void 00404 print_phone_info(FILE *fp, HTK_HMM_INFO *hmminfo) 00405 { 00406 /* output information */ 00407 fprintf(fp, "%5d physical HMMs defined in hmmdefs\n", hmminfo->totalhmmnum); 00408 if (hmminfo->totalhmmnum == hmminfo->totallogicalnum - hmminfo->totalpseudonum) { 00409 fprintf(fp, " no HMMList, physical HMM names are redirected to logicalHMM\n"); 00410 } else { 00411 if (hmminfo->is_triphone) { 00412 fprintf(fp, "%5d triphones listed in hmmlist\n", hmminfo->totallogicalnum - hmminfo->totalpseudonum); 00413 } else { 00414 fprintf(fp, "%5d phones in hmmlist\n", hmminfo->totallogicalnum - hmminfo->totalpseudonum); 00415 } 00416 } 00417 if (hmminfo->totalpseudonum != 0) { 00418 fprintf(fp, "%5d pseudo HMM generated for missing mono/bi-phones\n",hmminfo->totalpseudonum); 00419 } 00420 fprintf(fp, "%5d TOTAL logical HMMs\n", hmminfo->totallogicalnum); 00421 fprintf(fp, "%5d base phones in logical HMM\n", hmminfo->basephone.num); 00422 fprintf(fp, "%5d phones appear on word head, %d phones on word tail\n", hmminfo->basephone.bgnnum, hmminfo->basephone.endnum); 00423 }