Julius 4.2
libsent/src/voca/voca_load_wordlist.c
説明を見る。
00001 
00019 /*
00020  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00021  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00022  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00023  * All rights reserved
00024  */
00025 
00026 #include <sent/stddefs.h>
00027 #include <sent/vocabulary.h>
00028 #include <sent/htk_hmm.h>
00029 
00030 /* 
00031  * dictinary format:
00032  * 
00033  * 1 words per line.
00034  * 
00035  * fields: OutputString phone1 phone2 ....
00036  * 
00037  *     OutputString
00038  *                 String to output when the word is recognized.
00039  *
00040  *     phone1 phone2 ....
00041  *                 sequence of logical HMM name (normally phoneme)
00042  *                 to express the pronunciation
00043  */
00044 
00045 #define PHONEMELEN_STEP  30     ///< Memory allocation step for phoneme sequence
00046 static char buf[MAXLINELEN];    
00047 static char bufbak[MAXLINELEN]; 
00048 
00055 static void
00056 add_to_error(WORD_INFO *winfo, char *name)
00057 {
00058   char *buf;
00059   char *match;
00060 
00061   buf = (char *)mymalloc(strlen(name) + 1);
00062   strcpy(buf, name);
00063   if (winfo->errph_root == NULL) {
00064     winfo->errph_root = aptree_make_root_node(buf, &(winfo->mroot));
00065   } else {
00066     match = aptree_search_data(buf, winfo->errph_root);
00067     if (match == NULL || !strmatch(match, buf)) {
00068       aptree_add_entry(buf, buf, match, &(winfo->errph_root), &(winfo->mroot));
00069     }
00070   }
00071 }
00072 
00078 static void
00079 callback_list_error(void *x)
00080 {
00081   char *name;
00082   name = x;
00083   jlog("Error: voca_load_wordlist: %s\n", name);
00084 }
00090 static void
00091 list_error(WORD_INFO *winfo)
00092 {
00093   jlog("Error: voca_load_wordlist: begin missing phones\n");
00094   aptree_traverse_and_do(winfo->errph_root, callback_list_error);
00095   jlog("Error: voca_load_wordlist: end missing phones\n");
00096 }
00097 
00113 boolean
00114 voca_load_word_line(char *buf, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, char *headphone, char *tailphone, char *contextphone)
00115 {
00116   WORD_ID vnum;
00117 
00118   winfo->linenum++;
00119   vnum = winfo->num;
00120   if (vnum >= winfo->maxnum) {
00121     if (winfo_expand(winfo) == FALSE) return FALSE;
00122   }
00123   if (voca_load_wordlist_line(buf, &vnum, winfo->linenum, winfo, hmminfo, winfo->do_conv, &(winfo->ok_flag), headphone, tailphone, contextphone) == FALSE) {
00124     return FALSE;
00125   }
00126   winfo->num = vnum;
00127   return TRUE;
00128 }
00141 boolean
00142 voca_load_wordlist(FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, char *headphone, char *tailphone, char *contextphone)
00143 {
00144   boolean ret;
00145 
00146   voca_load_start(winfo, hmminfo, FALSE);
00147   while (getl(buf, sizeof(buf), fp) != NULL) {
00148     if (voca_load_word_line(buf, winfo, hmminfo, headphone, tailphone, contextphone) == FALSE) break;
00149   }
00150   ret = voca_load_end(winfo);
00151 
00152   return(ret);
00153 }
00154 
00155 
00168 boolean
00169 voca_load_wordlist_fp(FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, char *headphone, char *tailphone, char *contextphone)
00170 {
00171   boolean ret;
00172 
00173   voca_load_start(winfo, hmminfo, FALSE);
00174   while (getl_fp(buf, sizeof(buf), fp) != NULL) {
00175     if (voca_load_word_line(buf, winfo, hmminfo, headphone, tailphone, contextphone) == FALSE) break;
00176   }
00177   ret = voca_load_end(winfo);
00178 
00179   return(ret);
00180 }
00181 
00198 boolean
00199 voca_load_wordlist_line(char *buf, WORD_ID *vnum_p, int linenum, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean do_conv, boolean *ok_flag, char *headphone, char *tailphone, char *contextphone)
00200 {
00201   char *ptmp, *lp = NULL, *p;
00202   static char cbuf[MAX_HMMNAME_LEN];
00203   static HMM_Logical **tmpwseq = NULL;
00204   static int tmpmaxlen;
00205   int len;
00206   HMM_Logical *tmplg;
00207   boolean pok, first;
00208   int vnum;
00209 
00210   vnum = *vnum_p;
00211 
00212   if (strmatch(buf, "DICEND")) return FALSE;
00213 
00214   /* allocate temporal work area for the first call */
00215   if (tmpwseq == NULL) {
00216     tmpmaxlen = PHONEMELEN_STEP;
00217     tmpwseq = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * tmpmaxlen);
00218   }
00219 
00220   /* backup whole line for debug output */
00221   strcpy(bufbak, buf);
00222   
00223   /* Output string */
00224   if ((ptmp = mystrtok_quote(buf, " \t\n")) == NULL) {
00225     jlog("Error: voca_load_wordlist: line %d: corrupted data:\n> %s\n", linenum, bufbak);
00226     winfo->errnum++;
00227     *ok_flag = FALSE;
00228     return TRUE;
00229   }
00230   winfo->wname[vnum] = strcpy((char *)mybmalloc2(strlen(ptmp)+1, &(winfo->mroot)), ptmp);
00231 
00232   /* reset transparent flag */
00233   winfo->is_transparent[vnum] = FALSE;
00234 
00235   /* just move pointer to next token */
00236   if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) {
00237     jlog("Error: voca_load_wordlist: line %d: corrupted data:\n> %s\n", linenum, bufbak);
00238     winfo->errnum++;
00239     *ok_flag = FALSE;
00240     return TRUE;
00241   }
00242 #ifdef CLASS_NGRAM
00243   winfo->cprob[vnum] = 0.0;     /* prob = 1.0, logprob = 0.0 */
00244 #endif
00245   
00246   if (ptmp[0] == '@') {         /* class N-gram prob */
00247 #ifdef CLASS_NGRAM
00248     /* word probability within the class (for class N-gram) */
00249     /* format: classname @classprob wordname [output] phoneseq */
00250     /* classname equals to wname, and wordname will be omitted */
00251     /* format: @%f (log scale) */
00252     /* if "@" not found or "@0", it means class == word */
00253     if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) {
00254       jlog("Error: voca_load_wordlist: line %d: corrupted data:\n> %s\n", linenum, bufbak);
00255       winfo->errnum++;
00256       *ok_flag = FALSE;
00257       return TRUE;
00258     }
00259     if (ptmp[1] == '\0') {      /* space between '@' and figures */
00260       jlog("Error: voca_load_wordlist: line %d: value after '@' missing, maybe wrong space?\n> %s\n", linenum, bufbak);
00261       winfo->errnum++;
00262       *ok_flag = FALSE;
00263       return TRUE;
00264     }
00265     winfo->cprob[vnum] = atof(&(ptmp[1]));
00266     if (winfo->cprob[vnum] != 0.0) winfo->cwnum++;
00267     /* read next word entry (just skip them) */
00268     if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) {
00269       jlog("Error: voca_load_wordlist: line %d: corrupted data:\n> %s\n", linenum,bufbak);
00270       winfo->errnum++;
00271       *ok_flag = FALSE;
00272       return TRUE;
00273     }
00274     /* move to the next word entry */
00275     if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) {
00276       jlog("Error: voca_load_wordlist: line %d: corrupted data:\n> %s\n", linenum, bufbak);
00277       winfo->errnum++;
00278       *ok_flag = FALSE;
00279       return TRUE;
00280     }
00281 #else  /* ~CLASS_NGRAM */
00282     jlog("Error: voca_load_wordlist: line %d: cannot handle in-class word probability\n> %s\n", linenum, ptmp, bufbak);
00283     winfo->errnum++;
00284     *ok_flag = FALSE;
00285     return TRUE;
00286 #endif /* CLASS_NGRAM */
00287   }
00288 
00289   /* OutputString */
00290   switch(ptmp[0]) {
00291   case '[':                     /* ignore transparency */
00292     ptmp = mystrtok_quotation(NULL, " \t\n", '[', ']', 0);
00293     break;
00294   case '{':                     /* ignore transparency */
00295     ptmp = mystrtok_quotation(NULL, " \t\n", '{', '}', 0);
00296     break;
00297   default:
00298     /* ALLOW no entry for output */
00299     /* same as wname is used */
00300     ptmp = winfo->wname[vnum];
00301   }
00302   if (ptmp == NULL) {
00303     jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum, bufbak);
00304     winfo->errnum++;
00305     *ok_flag = FALSE;
00306     return TRUE;
00307   }
00308   winfo->woutput[vnum] = strcpy((char *)mybmalloc2(strlen(ptmp)+1, &(winfo->mroot)), ptmp);
00309     
00310   /* phoneme sequence */
00311   if (hmminfo == NULL) {
00312     /* don't read */
00313     winfo->wseq[vnum] = NULL;
00314     winfo->wlen[vnum] = 0;
00315   } else {
00316 
00317     len = 0;
00318     first = TRUE;
00319     pok = TRUE;
00320 
00321     for (;;) {
00322       if (do_conv) {
00323         if (first) {
00324           /* init phone cycler */
00325           cycle_triphone(NULL);
00326           /* insert head phone at beginning of word */
00327           if (contextphone) {
00328             cycle_triphone(contextphone);
00329           } else {
00330             cycle_triphone("NULL_C");
00331           }
00332           if ((lp = mystrtok(NULL, " \t\n")) == NULL) {
00333             jlog("Error: voca_load_wordlist: line %d: word %s has no phoneme:\n> %s\n", linenum, winfo->wname[vnum], bufbak);
00334             winfo->errnum++;
00335             *ok_flag = FALSE;
00336             return TRUE;
00337           }
00338           p = cycle_triphone(lp);
00339           first = FALSE;
00340         } else {                /* do_conv, not first */
00341           if (lp != NULL) {     /* some token processed at last loop */
00342             lp = mystrtok(NULL, " \t\n");
00343             if (lp != NULL) {
00344               /* token exist */
00345               p = cycle_triphone(lp);
00346             } else {
00347               /* no more token, insert tail phone at end of word */
00348               if (contextphone) {
00349                 p = cycle_triphone(contextphone);
00350               } else {
00351                 p = cycle_triphone("NULL_C");
00352               }
00353             }
00354           } else {              /* no more token at last input  */
00355             /* flush tone cycler */
00356             p = cycle_triphone_flush();
00357           }
00358         }
00359       } else {                  /* not do_conv */
00360         if (first) {
00361           p = lp = headphone;
00362           first = FALSE;
00363         } else {
00364           if (lp != NULL) {     /* some token processed at last loop */
00365             p = lp = mystrtok(NULL, " \t\n");
00366             /* if no more token, use tailphone */
00367             if (lp == NULL) p = tailphone;
00368           } else {
00369             /* no more token at last input, exit loop */
00370             p = NULL;
00371           }
00372         }
00373       }
00374       if (p == NULL) break;
00375       /* for headphone and tailphone, their context should not be handled */
00376       /* and when they appear as context they should be replaced by contextphone */
00377       if (do_conv) {
00378         center_name(p, cbuf);
00379         if (contextphone) {
00380           if (strmatch(cbuf, contextphone)) {
00381             if (len == 0) {
00382               p = headphone;
00383             } else if (lp == NULL) {
00384               p = tailphone;
00385             }
00386           }
00387         } else {
00388           if (strmatch(cbuf, "NULL_C")) {
00389             if (len == 0) {
00390               p = headphone;
00391             } else if (lp == NULL) {
00392               p = tailphone;
00393             }
00394           } else {
00395             if (strnmatch(p, "NULL_C", 6)) {
00396               if (strnmatch(&(p[strlen(p)-6]), "NULL_C", 6)) {
00397                 p = cbuf;
00398               } else {
00399                 p = rightcenter_name(p, cbuf);
00400               }
00401             } else if (strnmatch(&(p[strlen(p)-6]), "NULL_C", 6)) {
00402               p = leftcenter_name(p, cbuf);
00403             }
00404           }
00405         }
00406       }
00407       //printf("[[%s]]\n", p);
00408 
00409       /* both defined/pseudo phone is allowed */
00410       tmplg = htk_hmmdata_lookup_logical(hmminfo, p);
00411       if (tmplg == NULL) {
00412         /* not found */
00413         if (do_conv) {
00414           /* logical phone was not found */
00415           jlog("Error: voca_load_wordlist: line %d: logical phone \"%s\" not found\n", linenum, p);
00416           snprintf(cbuf,MAX_HMMNAME_LEN,"%s", p);
00417         } else {
00418           jlog("Error: voca_load_wordlist: line %d: phone \"%s\" not found\n", linenum, p);
00419           snprintf(cbuf, MAX_HMMNAME_LEN, "%s", p);
00420         }
00421         add_to_error(winfo, cbuf);
00422         pok = FALSE;
00423       } else {
00424         /* found */
00425         if (len >= tmpmaxlen) {
00426           /* expand wseq area by PHONEMELEN_STEP */
00427           tmpmaxlen += PHONEMELEN_STEP;
00428           tmpwseq = (HMM_Logical **)myrealloc(tmpwseq, sizeof(HMM_Logical *) * tmpmaxlen);
00429         }
00430         /* store to temporal buffer */
00431         tmpwseq[len] = tmplg;
00432       }
00433       len++;
00434     }
00435     if (!pok) {                 /* error in phoneme */
00436       jlog("Error: voca_load_wordlist: the line content was: %s\n", bufbak);
00437       winfo->errnum++;
00438       *ok_flag = FALSE;
00439       return TRUE;
00440     }
00441     if (len == 0) {
00442       jlog("Error: voca_load_wordlist: line %d: no phone specified:\n> %s\n", linenum, bufbak);
00443       winfo->errnum++;
00444       *ok_flag = FALSE;
00445       return TRUE;
00446     }
00447     /* store to winfo */
00448     winfo->wseq[vnum] = (HMM_Logical **)mybmalloc2(sizeof(HMM_Logical *) * len, &(winfo->mroot));
00449     memcpy(winfo->wseq[vnum], tmpwseq, sizeof(HMM_Logical *) * len);
00450     winfo->wlen[vnum] = len;
00451     winfo->wton[vnum] = 0;
00452   }
00453 
00454   vnum++;
00455   *vnum_p = vnum;
00456   
00457   return(TRUE);
00458 }
00459