Julius 4.2
|
00001 00025 /* 00026 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00027 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00028 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00029 * All rights reserved 00030 */ 00031 00032 #include <sent/stddefs.h> 00033 #include <sent/vocabulary.h> 00034 #include <sent/htk_hmm.h> 00035 00036 /* 00037 * dictinary format: 00038 * 00039 * 1 words per line. 00040 * 00041 * fields: GrammarEntry [OutputString] phone1 phone2 .... 00042 * 00043 * GrammarEntry 00044 * (for N-gram) 00045 * word name in N-gram 00046 * (for DFA) 00047 * terminal symbol ID 00048 * 00049 * [OutputString] 00050 * String to output when the word is recognized. 00051 * 00052 * {OutputString} 00053 * String to output when the word is recognized. 00054 * Also specifies that this word is transparent 00055 * 00056 * phone1 phon2 .... 00057 * sequence of logical HMM name (normally phoneme) 00058 * to express the pronunciation 00059 */ 00060 00061 #define PHONEMELEN_STEP 30 ///< Memory allocation step for phoneme sequence 00062 static char buf[MAXLINELEN]; 00063 static char bufbak[MAXLINELEN]; 00064 00065 static char trbuf[3][20]; 00066 static char chbuf[30]; 00067 static char nophone[1]; 00068 static int trp_l; 00069 static int trp; 00070 static int trp_r; 00071 00079 char * 00080 cycle_triphone(char *p) 00081 { 00082 int i; 00083 00084 if (p == NULL) { /* initialize */ 00085 nophone[0]='\0'; 00086 for(i=0;i<3;i++) trbuf[i][0] = '\0'; 00087 trp_l = 0; 00088 trp = 1; 00089 trp_r = 2; 00090 return NULL; 00091 } 00092 00093 strcpy(trbuf[trp_r],p); 00094 00095 chbuf[0]='\0'; 00096 if (trbuf[trp_l][0] != '\0') { 00097 strcat(chbuf,trbuf[trp_l]); 00098 strcat(chbuf,HMM_LC_DLIM); 00099 } 00100 if (trbuf[trp][0] == '\0') { 00101 i = trp_l; 00102 trp_l = trp; 00103 trp = trp_r; 00104 trp_r = i; 00105 return NULL; 00106 } 00107 strcat(chbuf, trbuf[trp]); 00108 if (trbuf[trp_r][0] != '\0') { 00109 strcat(chbuf,HMM_RC_DLIM); 00110 strcat(chbuf,trbuf[trp_r]); 00111 } 00112 i = trp_l; 00113 trp_l = trp; 00114 trp = trp_r; 00115 trp_r = i; 00116 00117 return(chbuf); 00118 } 00119 00125 char * 00126 cycle_triphone_flush() 00127 { 00128 return(cycle_triphone(nophone)); 00129 } 00130 00137 static void 00138 add_to_error(WORD_INFO *winfo, char *name) 00139 { 00140 char *buf; 00141 char *match; 00142 00143 buf = (char *)mymalloc(strlen(name) + 1); 00144 strcpy(buf, name); 00145 if (winfo->errph_root == NULL) { 00146 winfo->errph_root = aptree_make_root_node(buf, &(winfo->mroot)); 00147 } else { 00148 match = aptree_search_data(buf, winfo->errph_root); 00149 if (match == NULL || !strmatch(match, buf)) { 00150 aptree_add_entry(buf, buf, match, &(winfo->errph_root), &(winfo->mroot)); 00151 } 00152 } 00153 } 00154 00160 static void 00161 callback_list_error(void *x) 00162 { 00163 char *name; 00164 name = x; 00165 jlog("Error: voca_load_htkdict: %s\n", name); 00166 } 00172 static void 00173 list_error(WORD_INFO *winfo) 00174 { 00175 jlog("Error: voca_load_htkdict: begin missing phones\n"); 00176 aptree_traverse_and_do(winfo->errph_root, callback_list_error); 00177 jlog("Error: voca_load_htkdict: end missing phones\n"); 00178 } 00179 00185 void 00186 voca_set_stats(WORD_INFO *winfo) 00187 { 00188 int w,p,n; 00189 int maxwn; 00190 int maxwlen; 00191 int states; 00192 int models; 00193 int trnum; 00194 00195 maxwn = 0; 00196 maxwlen = 0; 00197 states = 0; 00198 models = 0; 00199 trnum = 0; 00200 for (w=0;w<winfo->num;w++) { 00201 models += winfo->wlen[w]; 00202 if (maxwlen < winfo->wlen[w]) maxwlen = winfo->wlen[w]; 00203 n = 0; 00204 for (p=0;p<winfo->wlen[w];p++) { 00205 n += hmm_logical_state_num(winfo->wseq[w][p]) - 2; 00206 } 00207 if (maxwn < n) maxwn = n; 00208 states += n; 00209 if (winfo->is_transparent[w]) trnum++; 00210 } 00211 winfo->maxwn = maxwn; 00212 winfo->maxwlen = maxwlen; 00213 winfo->totalstatenum = states; 00214 winfo->totalmodelnum = models; 00215 winfo->totaltransnum = trnum; 00216 } 00217 00227 void 00228 voca_load_start(WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv) 00229 { 00230 winfo->ok_flag = TRUE; 00231 winfo->linenum = 0; 00232 if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv)) { 00233 winfo->do_conv = TRUE; 00234 } else { 00235 winfo->do_conv = FALSE; 00236 } 00237 winfo_init(winfo); 00238 winfo->num = 0; 00239 } 00240 00254 boolean 00255 voca_load_line(char *buf, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo) 00256 { 00257 WORD_ID vnum; 00258 00259 winfo->linenum++; 00260 vnum = winfo->num; 00261 if (vnum >= winfo->maxnum) { 00262 if (winfo_expand(winfo) == FALSE) return FALSE; 00263 } 00264 if (voca_load_htkdict_line(buf, &vnum, winfo->linenum, winfo, hmminfo, winfo->do_conv, &(winfo->ok_flag)) == FALSE) { 00265 return FALSE; 00266 } 00267 winfo->num = vnum; 00268 return TRUE; 00269 } 00270 00283 boolean 00284 voca_load_end(WORD_INFO *winfo) 00285 { 00286 voca_set_stats(winfo); 00287 if (!winfo->ok_flag) { 00288 if (winfo->errph_root != NULL) list_error(winfo); 00289 } 00290 return(winfo->ok_flag); 00291 } 00292 00293 00304 boolean 00305 voca_load_htkdict(FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv) 00306 { 00307 boolean ret; 00308 00309 voca_load_start(winfo, hmminfo, ignore_tri_conv); 00310 while (getl(buf, sizeof(buf), fp) != NULL) { 00311 if (voca_load_line(buf, winfo, hmminfo) == FALSE) break; 00312 } 00313 ret = voca_load_end(winfo); 00314 00315 return(ret); 00316 } 00317 00318 00329 boolean 00330 voca_load_htkdict_fp(FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv) 00331 { 00332 boolean ret; 00333 00334 voca_load_start(winfo, hmminfo, ignore_tri_conv); 00335 while(getl_fp(buf, MAXLINELEN, fp) != NULL) { 00336 if (voca_load_line(buf, winfo, hmminfo) == FALSE) break; 00337 } 00338 ret = voca_load_end(winfo); 00339 00340 return(ret); 00341 } 00342 00353 boolean 00354 voca_append_htkdict(char *entry, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv) 00355 { 00356 voca_load_line(entry, winfo, hmminfo); 00357 return(voca_load_end(winfo)); 00358 } 00359 00373 boolean 00374 voca_load_htkdict_line(char *buf, WORD_ID *vnum_p, int linenum, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean do_conv, boolean *ok_flag) 00375 { 00376 char *ptmp, *lp = NULL, *p; 00377 static char cbuf[MAX_HMMNAME_LEN]; 00378 HMM_Logical **tmpwseq; 00379 int len; 00380 HMM_Logical *tmplg; 00381 boolean pok; 00382 int vnum; 00383 00384 vnum = *vnum_p; 00385 00386 if (strmatch(buf, "DICEND")) return FALSE; 00387 00388 /* allocate temporal work area for the first call */ 00389 if (winfo->work == NULL) { 00390 winfo->work_num = PHONEMELEN_STEP; 00391 winfo->work = (void *)mybmalloc2(sizeof(HMM_Logical *) * winfo->work_num, &(winfo->mroot)); 00392 } 00393 tmpwseq = (HMM_Logical **)winfo->work; 00394 00395 /* backup whole line for debug output */ 00396 strcpy(bufbak, buf); 00397 00398 /* GrammarEntry */ 00399 if ((ptmp = mystrtok_quote(buf, " \t\n")) == NULL) { 00400 jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum, bufbak); 00401 winfo->errnum++; 00402 *ok_flag = FALSE; 00403 return TRUE; 00404 } 00405 winfo->wname[vnum] = strcpy((char *)mybmalloc2(strlen(ptmp)+1, &(winfo->mroot)), ptmp); 00406 00407 /* just move pointer to next token */ 00408 if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) { 00409 jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum, bufbak); 00410 winfo->errnum++; 00411 *ok_flag = FALSE; 00412 return TRUE; 00413 } 00414 #ifdef CLASS_NGRAM 00415 winfo->cprob[vnum] = 0.0; /* prob = 1.0, logprob = 0.0 */ 00416 #endif 00417 00418 if (ptmp[0] == '@') { /* class N-gram prob */ 00419 #ifdef CLASS_NGRAM 00420 /* word probability within the class (for class N-gram) */ 00421 /* format: classname @classprob wordname [output] phoneseq */ 00422 /* classname equals to wname, and wordname will be omitted */ 00423 /* format: @%f (log scale) */ 00424 /* if "@" not found or "@0", it means class == word */ 00425 if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) { 00426 jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum, bufbak); 00427 winfo->errnum++; 00428 *ok_flag = FALSE; 00429 return TRUE; 00430 } 00431 if (ptmp[1] == '\0') { /* space between '@' and figures */ 00432 jlog("Error: voca_load_htkdict: line %d: value after '@' missing, maybe wrong space?\n> %s\n", linenum, bufbak); 00433 winfo->errnum++; 00434 *ok_flag = FALSE; 00435 return TRUE; 00436 } 00437 winfo->cprob[vnum] = atof(&(ptmp[1])); 00438 if (winfo->cprob[vnum] != 0.0) winfo->cwnum++; 00439 /* read next word entry (just skip them) */ 00440 if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) { 00441 jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum,bufbak); 00442 winfo->errnum++; 00443 *ok_flag = FALSE; 00444 return TRUE; 00445 } 00446 /* move to the next word entry */ 00447 if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) { 00448 jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum, bufbak); 00449 winfo->errnum++; 00450 *ok_flag = FALSE; 00451 return TRUE; 00452 } 00453 #else /* ~CLASS_NGRAM */ 00454 jlog("Error: voca_load_htkdict: line %d: cannot handle in-class word probability\n> %s\n", linenum, ptmp, bufbak); 00455 winfo->errnum++; 00456 *ok_flag = FALSE; 00457 return TRUE; 00458 #endif /* CLASS_NGRAM */ 00459 } 00460 00461 /* OutputString */ 00462 switch(ptmp[0]) { 00463 case '[': /* not transparent word */ 00464 winfo->is_transparent[vnum] = FALSE; 00465 ptmp = mystrtok_quotation(NULL, " \t\n", '[', ']', 0); 00466 break; 00467 case '{': /* transparent word */ 00468 winfo->is_transparent[vnum] = TRUE; 00469 ptmp = mystrtok_quotation(NULL, " \t\n", '{', '}', 0); 00470 break; 00471 default: 00472 #if 1 00473 /* ALLOW no entry for output */ 00474 /* same as wname is used */ 00475 winfo->is_transparent[vnum] = FALSE; 00476 ptmp = winfo->wname[vnum]; 00477 #else 00478 /* error */ 00479 jlog("Error: voca_load_htkdict: line %d: missing output string??\n> %s\n", linenum, bufbak); 00480 winfo->errnum++; 00481 *ok_flag = FALSE; 00482 return TRUE; 00483 #endif 00484 } 00485 if (ptmp == NULL) { 00486 jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum, bufbak); 00487 winfo->errnum++; 00488 *ok_flag = FALSE; 00489 return TRUE; 00490 } 00491 winfo->woutput[vnum] = strcpy((char *)mybmalloc2(strlen(ptmp)+1, &(winfo->mroot)), ptmp); 00492 00493 /* phoneme sequence */ 00494 if (hmminfo == NULL) { 00495 /* don't read */ 00496 winfo->wseq[vnum] = NULL; 00497 winfo->wlen[vnum] = 0; 00498 } else { 00499 00500 /* store converted phone sequence to temporal bufffer */ 00501 len = 0; 00502 00503 if (do_conv) { 00504 /* convert phoneme to triphone expression (word-internal) */ 00505 cycle_triphone(NULL); 00506 if ((lp = mystrtok(NULL, " \t\n")) == NULL) { 00507 jlog("Error: voca_load_htkdict: line %d: word %s has no phoneme:\n> %s\n", linenum, winfo->wname[vnum], bufbak); 00508 winfo->errnum++; 00509 *ok_flag = FALSE; 00510 return TRUE; 00511 } 00512 cycle_triphone(lp); 00513 } 00514 00515 pok = TRUE; 00516 for (;;) { 00517 if (do_conv) { 00518 /* if (lp != NULL) jlog(" %d%s",len,lp);*/ 00519 if (lp != NULL) lp = mystrtok(NULL, " \t\n"); 00520 if (lp != NULL) p = cycle_triphone(lp); 00521 else p = cycle_triphone_flush(); 00522 } else { 00523 p = mystrtok(NULL, " \t\n"); 00524 } 00525 if (p == NULL) break; 00526 00527 /* both defined/pseudo phone is allowed */ 00528 tmplg = htk_hmmdata_lookup_logical(hmminfo, p); 00529 if (tmplg == NULL) { 00530 /* not found */ 00531 if (do_conv) { 00532 /* both defined or pseudo phone are not found */ 00533 if (len == 0 && lp == NULL) { 00534 jlog("Error: voca_load_htkdict: line %d: triphone \"*-%s+*\" or monophone \"%s\" not found\n", linenum, p, p); 00535 snprintf(cbuf,MAX_HMMNAME_LEN,"*-%s+* or monophone %s", p, p); 00536 } else if (len == 0) { 00537 jlog("Error: voca_load_htkdict: line %d: triphone \"*-%s\" or biphone \"%s\" not found\n", linenum, p, p); 00538 snprintf(cbuf,MAX_HMMNAME_LEN,"*-%s or biphone %s", p, p); 00539 } else if (lp == NULL) { 00540 jlog("Error: voca_load_htkdict: line %d: triphone \"%s+*\" or biphone \"%s\" not found\n", linenum, p, p); 00541 snprintf(cbuf,MAX_HMMNAME_LEN,"%s+* or biphone %s", p, p); 00542 } else { 00543 jlog("Error: voca_load_htkdict: line %d: triphone \"%s\" not found\n", linenum, p); 00544 snprintf(cbuf,MAX_HMMNAME_LEN,"%s", p); 00545 } 00546 } else { 00547 jlog("Error: voca_load_htkdict: line %d: phone \"%s\" not found\n", linenum, p); 00548 snprintf(cbuf, MAX_HMMNAME_LEN, "%s", p); 00549 } 00550 add_to_error(winfo, cbuf); 00551 pok = FALSE; 00552 } else { 00553 /* found */ 00554 if (len >= winfo->work_num) { 00555 /* expand wseq area by PHONEMELEN_STEP */ 00556 winfo->work_num += PHONEMELEN_STEP; 00557 winfo->work = (void *)mybmalloc2(sizeof(HMM_Logical *) * winfo->work_num, &(winfo->mroot)); 00558 memcpy(winfo->work, tmpwseq, sizeof(HMM_Logical *) * (winfo->work_num - PHONEMELEN_STEP)); 00559 tmpwseq = (HMM_Logical **)winfo->work; 00560 } 00561 /* store to temporal buffer */ 00562 tmpwseq[len] = tmplg; 00563 } 00564 len++; 00565 } 00566 if (!pok) { /* error in phoneme */ 00567 jlog("Error: voca_load_htkdict: the line content was: %s\n", bufbak); 00568 winfo->errnum++; 00569 *ok_flag = FALSE; 00570 return TRUE; 00571 } 00572 if (len == 0) { 00573 jlog("Error: voca_load_htkdict: line %d: no phone specified:\n> %s\n", linenum, bufbak); 00574 winfo->errnum++; 00575 *ok_flag = FALSE; 00576 return TRUE; 00577 } 00578 /* store to winfo */ 00579 winfo->wseq[vnum] = (HMM_Logical **)mybmalloc2(sizeof(HMM_Logical *) * len, &(winfo->mroot)); 00580 memcpy(winfo->wseq[vnum], tmpwseq, sizeof(HMM_Logical *) * len); 00581 winfo->wlen[vnum] = len; 00582 } 00583 00584 vnum++; 00585 00586 *vnum_p = vnum; 00587 00588 return(TRUE); 00589 } 00590 00602 boolean 00603 voca_mono2tri(WORD_INFO *winfo, HTK_HMM_INFO *hmminfo) 00604 { 00605 WORD_ID w; 00606 int ph; 00607 char *p; 00608 HMM_Logical *tmplg; 00609 boolean ok_flag = TRUE; 00610 00611 for (w=0;w<winfo->num;w++) { 00612 cycle_triphone(NULL); 00613 cycle_triphone(winfo->wseq[w][0]->name); 00614 00615 for (ph = 0; ph < winfo->wlen[w] ; ph++) { 00616 if (ph == winfo->wlen[w] - 1) { 00617 p = cycle_triphone_flush(); 00618 } else { 00619 p = cycle_triphone(winfo->wseq[w][ph + 1]->name); 00620 } 00621 if ((tmplg = htk_hmmdata_lookup_logical(hmminfo, p)) == NULL) { 00622 jlog("Error: voca_load_htkdict: word \"%s[%s]\"(id=%d): HMM \"%s\" not found\n", winfo->wname[w], winfo->woutput[w], w, p); 00623 ok_flag = FALSE; 00624 continue; 00625 } 00626 winfo->wseq[w][ph] = tmplg; 00627 } 00628 } 00629 return (ok_flag); 00630 } 00631 00643 boolean 00644 voca_append(WORD_INFO *dstinfo, WORD_INFO *srcinfo, int coffset, int woffset) 00645 { 00646 WORD_ID n, w; 00647 int i; 00648 00649 n = woffset; 00650 while (n >= dstinfo->maxnum) { 00651 if (winfo_expand(dstinfo) == FALSE) return FALSE; 00652 } 00653 for(w=0;w<srcinfo->num;w++) { 00654 /* copy data */ 00655 dstinfo->wlen[n] = srcinfo->wlen[w]; 00656 if (srcinfo->wname[w]) { 00657 dstinfo->wname[n] = strcpy((char *)mybmalloc2(strlen(srcinfo->wname[w])+1, &(dstinfo->mroot)), srcinfo->wname[w]); 00658 } else { 00659 dstinfo->wname[n] = NULL; 00660 } 00661 if (srcinfo->woutput[w]) { 00662 dstinfo->woutput[n] = strcpy((char *)mybmalloc2(strlen(srcinfo->woutput[w])+1, &(dstinfo->mroot)), srcinfo->woutput[w]); 00663 } else { 00664 dstinfo->woutput[n] = NULL; 00665 } 00666 if (srcinfo->wlen[w] > 0) { 00667 dstinfo->wseq[n] = (HMM_Logical **)mybmalloc2(sizeof(HMM_Logical *) * srcinfo->wlen[w], &(dstinfo->mroot)); 00668 for(i=0;i<srcinfo->wlen[w];i++) { 00669 dstinfo->wseq[n][i] = srcinfo->wseq[w][i]; 00670 } 00671 } else { 00672 dstinfo->wseq[n] = NULL; 00673 } 00674 #ifdef CLASS_NGRAM 00675 dstinfo->cprob[n] = srcinfo->cprob[w]; 00676 if (dstinfo->cprob[n] != 0.0) dstinfo->cwnum++; 00677 #endif 00678 dstinfo->is_transparent[n] = srcinfo->is_transparent[w]; 00679 /* offset category ID by coffset */ 00680 dstinfo->wton[n] = srcinfo->wton[w] + coffset; 00681 00682 n++; 00683 if (n >= dstinfo->maxnum) { 00684 if (winfo_expand(dstinfo) == FALSE) return FALSE; 00685 } 00686 00687 } 00688 dstinfo->num = n; 00689 00690 /* compute maxwn */ 00691 voca_set_stats(dstinfo); 00692 00693 return TRUE; 00694 } 00695