Julius 4.2
|
00001 00019 /* 00020 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00021 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00022 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00023 * All rights reserved 00024 */ 00025 00026 #include <sent/stddefs.h> 00027 #include <sent/vocabulary.h> 00028 #include <sent/htk_hmm.h> 00029 00030 /* 00031 * dictinary format: 00032 * 00033 * 1 words per line. 00034 * 00035 * fields: OutputString phone1 phone2 .... 00036 * 00037 * OutputString 00038 * String to output when the word is recognized. 00039 * 00040 * phone1 phone2 .... 00041 * sequence of logical HMM name (normally phoneme) 00042 * to express the pronunciation 00043 */ 00044 00045 #define PHONEMELEN_STEP 30 ///< Memory allocation step for phoneme sequence 00046 static char buf[MAXLINELEN]; 00047 static char bufbak[MAXLINELEN]; 00048 00055 static void 00056 add_to_error(WORD_INFO *winfo, char *name) 00057 { 00058 char *buf; 00059 char *match; 00060 00061 buf = (char *)mymalloc(strlen(name) + 1); 00062 strcpy(buf, name); 00063 if (winfo->errph_root == NULL) { 00064 winfo->errph_root = aptree_make_root_node(buf, &(winfo->mroot)); 00065 } else { 00066 match = aptree_search_data(buf, winfo->errph_root); 00067 if (match == NULL || !strmatch(match, buf)) { 00068 aptree_add_entry(buf, buf, match, &(winfo->errph_root), &(winfo->mroot)); 00069 } 00070 } 00071 } 00072 00078 static void 00079 callback_list_error(void *x) 00080 { 00081 char *name; 00082 name = x; 00083 jlog("Error: voca_load_wordlist: %s\n", name); 00084 } 00090 static void 00091 list_error(WORD_INFO *winfo) 00092 { 00093 jlog("Error: voca_load_wordlist: begin missing phones\n"); 00094 aptree_traverse_and_do(winfo->errph_root, callback_list_error); 00095 jlog("Error: voca_load_wordlist: end missing phones\n"); 00096 } 00097 00113 boolean 00114 voca_load_word_line(char *buf, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, char *headphone, char *tailphone, char *contextphone) 00115 { 00116 WORD_ID vnum; 00117 00118 winfo->linenum++; 00119 vnum = winfo->num; 00120 if (vnum >= winfo->maxnum) { 00121 if (winfo_expand(winfo) == FALSE) return FALSE; 00122 } 00123 if (voca_load_wordlist_line(buf, &vnum, winfo->linenum, winfo, hmminfo, winfo->do_conv, &(winfo->ok_flag), headphone, tailphone, contextphone) == FALSE) { 00124 return FALSE; 00125 } 00126 winfo->num = vnum; 00127 return TRUE; 00128 } 00141 boolean 00142 voca_load_wordlist(FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, char *headphone, char *tailphone, char *contextphone) 00143 { 00144 boolean ret; 00145 00146 voca_load_start(winfo, hmminfo, FALSE); 00147 while (getl(buf, sizeof(buf), fp) != NULL) { 00148 if (voca_load_word_line(buf, winfo, hmminfo, headphone, tailphone, contextphone) == FALSE) break; 00149 } 00150 ret = voca_load_end(winfo); 00151 00152 return(ret); 00153 } 00154 00155 00168 boolean 00169 voca_load_wordlist_fp(FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, char *headphone, char *tailphone, char *contextphone) 00170 { 00171 boolean ret; 00172 00173 voca_load_start(winfo, hmminfo, FALSE); 00174 while (getl_fp(buf, sizeof(buf), fp) != NULL) { 00175 if (voca_load_word_line(buf, winfo, hmminfo, headphone, tailphone, contextphone) == FALSE) break; 00176 } 00177 ret = voca_load_end(winfo); 00178 00179 return(ret); 00180 } 00181 00198 boolean 00199 voca_load_wordlist_line(char *buf, WORD_ID *vnum_p, int linenum, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean do_conv, boolean *ok_flag, char *headphone, char *tailphone, char *contextphone) 00200 { 00201 char *ptmp, *lp = NULL, *p; 00202 static char cbuf[MAX_HMMNAME_LEN]; 00203 static HMM_Logical **tmpwseq = NULL; 00204 static int tmpmaxlen; 00205 int len; 00206 HMM_Logical *tmplg; 00207 boolean pok, first; 00208 int vnum; 00209 00210 vnum = *vnum_p; 00211 00212 if (strmatch(buf, "DICEND")) return FALSE; 00213 00214 /* allocate temporal work area for the first call */ 00215 if (tmpwseq == NULL) { 00216 tmpmaxlen = PHONEMELEN_STEP; 00217 tmpwseq = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * tmpmaxlen); 00218 } 00219 00220 /* backup whole line for debug output */ 00221 strcpy(bufbak, buf); 00222 00223 /* Output string */ 00224 if ((ptmp = mystrtok_quote(buf, " \t\n")) == NULL) { 00225 jlog("Error: voca_load_wordlist: line %d: corrupted data:\n> %s\n", linenum, bufbak); 00226 winfo->errnum++; 00227 *ok_flag = FALSE; 00228 return TRUE; 00229 } 00230 winfo->wname[vnum] = strcpy((char *)mybmalloc2(strlen(ptmp)+1, &(winfo->mroot)), ptmp); 00231 00232 /* reset transparent flag */ 00233 winfo->is_transparent[vnum] = FALSE; 00234 00235 /* just move pointer to next token */ 00236 if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) { 00237 jlog("Error: voca_load_wordlist: line %d: corrupted data:\n> %s\n", linenum, bufbak); 00238 winfo->errnum++; 00239 *ok_flag = FALSE; 00240 return TRUE; 00241 } 00242 #ifdef CLASS_NGRAM 00243 winfo->cprob[vnum] = 0.0; /* prob = 1.0, logprob = 0.0 */ 00244 #endif 00245 00246 if (ptmp[0] == '@') { /* class N-gram prob */ 00247 #ifdef CLASS_NGRAM 00248 /* word probability within the class (for class N-gram) */ 00249 /* format: classname @classprob wordname [output] phoneseq */ 00250 /* classname equals to wname, and wordname will be omitted */ 00251 /* format: @%f (log scale) */ 00252 /* if "@" not found or "@0", it means class == word */ 00253 if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) { 00254 jlog("Error: voca_load_wordlist: line %d: corrupted data:\n> %s\n", linenum, bufbak); 00255 winfo->errnum++; 00256 *ok_flag = FALSE; 00257 return TRUE; 00258 } 00259 if (ptmp[1] == '\0') { /* space between '@' and figures */ 00260 jlog("Error: voca_load_wordlist: line %d: value after '@' missing, maybe wrong space?\n> %s\n", linenum, bufbak); 00261 winfo->errnum++; 00262 *ok_flag = FALSE; 00263 return TRUE; 00264 } 00265 winfo->cprob[vnum] = atof(&(ptmp[1])); 00266 if (winfo->cprob[vnum] != 0.0) winfo->cwnum++; 00267 /* read next word entry (just skip them) */ 00268 if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) { 00269 jlog("Error: voca_load_wordlist: line %d: corrupted data:\n> %s\n", linenum,bufbak); 00270 winfo->errnum++; 00271 *ok_flag = FALSE; 00272 return TRUE; 00273 } 00274 /* move to the next word entry */ 00275 if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) { 00276 jlog("Error: voca_load_wordlist: line %d: corrupted data:\n> %s\n", linenum, bufbak); 00277 winfo->errnum++; 00278 *ok_flag = FALSE; 00279 return TRUE; 00280 } 00281 #else /* ~CLASS_NGRAM */ 00282 jlog("Error: voca_load_wordlist: line %d: cannot handle in-class word probability\n> %s\n", linenum, ptmp, bufbak); 00283 winfo->errnum++; 00284 *ok_flag = FALSE; 00285 return TRUE; 00286 #endif /* CLASS_NGRAM */ 00287 } 00288 00289 /* OutputString */ 00290 switch(ptmp[0]) { 00291 case '[': /* ignore transparency */ 00292 ptmp = mystrtok_quotation(NULL, " \t\n", '[', ']', 0); 00293 break; 00294 case '{': /* ignore transparency */ 00295 ptmp = mystrtok_quotation(NULL, " \t\n", '{', '}', 0); 00296 break; 00297 default: 00298 /* ALLOW no entry for output */ 00299 /* same as wname is used */ 00300 ptmp = winfo->wname[vnum]; 00301 } 00302 if (ptmp == NULL) { 00303 jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum, bufbak); 00304 winfo->errnum++; 00305 *ok_flag = FALSE; 00306 return TRUE; 00307 } 00308 winfo->woutput[vnum] = strcpy((char *)mybmalloc2(strlen(ptmp)+1, &(winfo->mroot)), ptmp); 00309 00310 /* phoneme sequence */ 00311 if (hmminfo == NULL) { 00312 /* don't read */ 00313 winfo->wseq[vnum] = NULL; 00314 winfo->wlen[vnum] = 0; 00315 } else { 00316 00317 len = 0; 00318 first = TRUE; 00319 pok = TRUE; 00320 00321 for (;;) { 00322 if (do_conv) { 00323 if (first) { 00324 /* init phone cycler */ 00325 cycle_triphone(NULL); 00326 /* insert head phone at beginning of word */ 00327 if (contextphone) { 00328 cycle_triphone(contextphone); 00329 } else { 00330 cycle_triphone("NULL_C"); 00331 } 00332 if ((lp = mystrtok(NULL, " \t\n")) == NULL) { 00333 jlog("Error: voca_load_wordlist: line %d: word %s has no phoneme:\n> %s\n", linenum, winfo->wname[vnum], bufbak); 00334 winfo->errnum++; 00335 *ok_flag = FALSE; 00336 return TRUE; 00337 } 00338 p = cycle_triphone(lp); 00339 first = FALSE; 00340 } else { /* do_conv, not first */ 00341 if (lp != NULL) { /* some token processed at last loop */ 00342 lp = mystrtok(NULL, " \t\n"); 00343 if (lp != NULL) { 00344 /* token exist */ 00345 p = cycle_triphone(lp); 00346 } else { 00347 /* no more token, insert tail phone at end of word */ 00348 if (contextphone) { 00349 p = cycle_triphone(contextphone); 00350 } else { 00351 p = cycle_triphone("NULL_C"); 00352 } 00353 } 00354 } else { /* no more token at last input */ 00355 /* flush tone cycler */ 00356 p = cycle_triphone_flush(); 00357 } 00358 } 00359 } else { /* not do_conv */ 00360 if (first) { 00361 p = lp = headphone; 00362 first = FALSE; 00363 } else { 00364 if (lp != NULL) { /* some token processed at last loop */ 00365 p = lp = mystrtok(NULL, " \t\n"); 00366 /* if no more token, use tailphone */ 00367 if (lp == NULL) p = tailphone; 00368 } else { 00369 /* no more token at last input, exit loop */ 00370 p = NULL; 00371 } 00372 } 00373 } 00374 if (p == NULL) break; 00375 /* for headphone and tailphone, their context should not be handled */ 00376 /* and when they appear as context they should be replaced by contextphone */ 00377 if (do_conv) { 00378 center_name(p, cbuf); 00379 if (contextphone) { 00380 if (strmatch(cbuf, contextphone)) { 00381 if (len == 0) { 00382 p = headphone; 00383 } else if (lp == NULL) { 00384 p = tailphone; 00385 } 00386 } 00387 } else { 00388 if (strmatch(cbuf, "NULL_C")) { 00389 if (len == 0) { 00390 p = headphone; 00391 } else if (lp == NULL) { 00392 p = tailphone; 00393 } 00394 } else { 00395 if (strnmatch(p, "NULL_C", 6)) { 00396 if (strnmatch(&(p[strlen(p)-6]), "NULL_C", 6)) { 00397 p = cbuf; 00398 } else { 00399 p = rightcenter_name(p, cbuf); 00400 } 00401 } else if (strnmatch(&(p[strlen(p)-6]), "NULL_C", 6)) { 00402 p = leftcenter_name(p, cbuf); 00403 } 00404 } 00405 } 00406 } 00407 //printf("[[%s]]\n", p); 00408 00409 /* both defined/pseudo phone is allowed */ 00410 tmplg = htk_hmmdata_lookup_logical(hmminfo, p); 00411 if (tmplg == NULL) { 00412 /* not found */ 00413 if (do_conv) { 00414 /* logical phone was not found */ 00415 jlog("Error: voca_load_wordlist: line %d: logical phone \"%s\" not found\n", linenum, p); 00416 snprintf(cbuf,MAX_HMMNAME_LEN,"%s", p); 00417 } else { 00418 jlog("Error: voca_load_wordlist: line %d: phone \"%s\" not found\n", linenum, p); 00419 snprintf(cbuf, MAX_HMMNAME_LEN, "%s", p); 00420 } 00421 add_to_error(winfo, cbuf); 00422 pok = FALSE; 00423 } else { 00424 /* found */ 00425 if (len >= tmpmaxlen) { 00426 /* expand wseq area by PHONEMELEN_STEP */ 00427 tmpmaxlen += PHONEMELEN_STEP; 00428 tmpwseq = (HMM_Logical **)myrealloc(tmpwseq, sizeof(HMM_Logical *) * tmpmaxlen); 00429 } 00430 /* store to temporal buffer */ 00431 tmpwseq[len] = tmplg; 00432 } 00433 len++; 00434 } 00435 if (!pok) { /* error in phoneme */ 00436 jlog("Error: voca_load_wordlist: the line content was: %s\n", bufbak); 00437 winfo->errnum++; 00438 *ok_flag = FALSE; 00439 return TRUE; 00440 } 00441 if (len == 0) { 00442 jlog("Error: voca_load_wordlist: line %d: no phone specified:\n> %s\n", linenum, bufbak); 00443 winfo->errnum++; 00444 *ok_flag = FALSE; 00445 return TRUE; 00446 } 00447 /* store to winfo */ 00448 winfo->wseq[vnum] = (HMM_Logical **)mybmalloc2(sizeof(HMM_Logical *) * len, &(winfo->mroot)); 00449 memcpy(winfo->wseq[vnum], tmpwseq, sizeof(HMM_Logical *) * len); 00450 winfo->wlen[vnum] = len; 00451 winfo->wton[vnum] = 0; 00452 } 00453 00454 vnum++; 00455 *vnum_p = vnum; 00456 00457 return(TRUE); 00458 } 00459