Julius 4.2
libsent/src/voca/voca_load_htkdict.c
説明を見る。
00001 
00025 /*
00026  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00027  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00028  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00029  * All rights reserved
00030  */
00031 
00032 #include <sent/stddefs.h>
00033 #include <sent/vocabulary.h>
00034 #include <sent/htk_hmm.h>
00035 
00036 /* 
00037  * dictinary format:
00038  * 
00039  * 1 words per line.
00040  * 
00041  * fields: GrammarEntry [OutputString] phone1 phone2 ....
00042  * 
00043  *     GrammarEntry
00044  *                 (for N-gram)
00045  *                 word name in N-gram
00046  *                 (for DFA)
00047  *                 terminal symbol ID
00048  *
00049  *     [OutputString]
00050  *                 String to output when the word is recognized.
00051  *
00052  *     {OutputString}
00053  *                 String to output when the word is recognized.
00054  *                 Also specifies that this word is transparent
00055  * 
00056  *     phone1 phon2 ....
00057  *                 sequence of logical HMM name (normally phoneme)
00058  *                 to express the pronunciation
00059  */
00060 
00061 #define PHONEMELEN_STEP  30     ///< Memory allocation step for phoneme sequence
00062 static char buf[MAXLINELEN];    
00063 static char bufbak[MAXLINELEN]; 
00064 
00065 static char trbuf[3][20];       
00066 static char chbuf[30];       
00067 static char nophone[1];         
00068 static int  trp_l;              
00069 static int  trp;                
00070 static int  trp_r;              
00071 
00079 char *
00080 cycle_triphone(char *p)
00081 {
00082   int i;
00083   
00084   if (p == NULL) {              /* initialize */
00085     nophone[0]='\0';
00086     for(i=0;i<3;i++) trbuf[i][0] = '\0';
00087     trp_l = 0;
00088     trp   = 1;
00089     trp_r = 2;
00090     return NULL;
00091   }
00092 
00093   strcpy(trbuf[trp_r],p);
00094 
00095   chbuf[0]='\0';
00096   if (trbuf[trp_l][0] != '\0') {
00097     strcat(chbuf,trbuf[trp_l]);
00098     strcat(chbuf,HMM_LC_DLIM);
00099   }
00100   if (trbuf[trp][0] == '\0') {
00101     i = trp_l;
00102     trp_l = trp;
00103     trp = trp_r;
00104     trp_r = i;
00105     return NULL;
00106   }
00107   strcat(chbuf, trbuf[trp]);
00108   if (trbuf[trp_r][0] != '\0') {
00109     strcat(chbuf,HMM_RC_DLIM);
00110     strcat(chbuf,trbuf[trp_r]);
00111   }
00112   i = trp_l;
00113   trp_l = trp;
00114   trp = trp_r;
00115   trp_r = i;
00116 
00117   return(chbuf);
00118 }
00119 
00125 char *
00126 cycle_triphone_flush()
00127 {
00128   return(cycle_triphone(nophone));
00129 }
00130 
00137 static void
00138 add_to_error(WORD_INFO *winfo, char *name)
00139 {
00140   char *buf;
00141   char *match;
00142 
00143   buf = (char *)mymalloc(strlen(name) + 1);
00144   strcpy(buf, name);
00145   if (winfo->errph_root == NULL) {
00146     winfo->errph_root = aptree_make_root_node(buf, &(winfo->mroot));
00147   } else {
00148     match = aptree_search_data(buf, winfo->errph_root);
00149     if (match == NULL || !strmatch(match, buf)) {
00150       aptree_add_entry(buf, buf, match, &(winfo->errph_root), &(winfo->mroot));
00151     }
00152   }
00153 }
00154 
00160 static void
00161 callback_list_error(void *x)
00162 {
00163   char *name;
00164   name = x;
00165   jlog("Error: voca_load_htkdict: %s\n", name);
00166 }
00172 static void
00173 list_error(WORD_INFO *winfo)
00174 {
00175   jlog("Error: voca_load_htkdict: begin missing phones\n");
00176   aptree_traverse_and_do(winfo->errph_root, callback_list_error);
00177   jlog("Error: voca_load_htkdict: end missing phones\n");
00178 }
00179 
00185 void
00186 voca_set_stats(WORD_INFO *winfo)
00187 {
00188   int w,p,n;
00189   int maxwn;
00190   int maxwlen;
00191   int states;
00192   int models;
00193   int trnum;
00194 
00195   maxwn = 0;
00196   maxwlen = 0;
00197   states = 0;
00198   models = 0;
00199   trnum = 0;
00200   for (w=0;w<winfo->num;w++) {
00201     models += winfo->wlen[w];
00202     if (maxwlen < winfo->wlen[w]) maxwlen = winfo->wlen[w];
00203     n = 0;
00204     for (p=0;p<winfo->wlen[w];p++) {
00205       n += hmm_logical_state_num(winfo->wseq[w][p]) - 2;
00206     }
00207     if (maxwn < n) maxwn = n;
00208     states += n;
00209     if (winfo->is_transparent[w]) trnum++;
00210   }
00211   winfo->maxwn = maxwn;
00212   winfo->maxwlen = maxwlen;
00213   winfo->totalstatenum = states;
00214   winfo->totalmodelnum = models;
00215   winfo->totaltransnum = trnum;
00216 }
00217 
00227 void
00228 voca_load_start(WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv)
00229 {
00230   winfo->ok_flag = TRUE;
00231   winfo->linenum = 0;
00232   if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv)) {
00233     winfo->do_conv = TRUE;
00234   } else {
00235     winfo->do_conv = FALSE;
00236   }
00237   winfo_init(winfo);
00238   winfo->num = 0;
00239 }
00240 
00254 boolean
00255 voca_load_line(char *buf, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo)
00256 {
00257   WORD_ID vnum;
00258 
00259   winfo->linenum++;
00260   vnum = winfo->num;
00261   if (vnum >= winfo->maxnum) {
00262     if (winfo_expand(winfo) == FALSE) return FALSE;
00263   }
00264   if (voca_load_htkdict_line(buf, &vnum, winfo->linenum, winfo, hmminfo, winfo->do_conv, &(winfo->ok_flag)) == FALSE) {
00265     return FALSE;
00266   }
00267   winfo->num = vnum;
00268   return TRUE;
00269 }
00270 
00283 boolean
00284 voca_load_end(WORD_INFO *winfo)
00285 {
00286   voca_set_stats(winfo);
00287   if (!winfo->ok_flag) {
00288     if (winfo->errph_root != NULL) list_error(winfo);
00289   }
00290   return(winfo->ok_flag);
00291 }
00292 
00293 
00304 boolean
00305 voca_load_htkdict(FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv)
00306 {
00307   boolean ret;
00308 
00309   voca_load_start(winfo, hmminfo, ignore_tri_conv);
00310   while (getl(buf, sizeof(buf), fp) != NULL) {
00311     if (voca_load_line(buf, winfo, hmminfo) == FALSE) break;
00312   }
00313   ret = voca_load_end(winfo);
00314 
00315   return(ret);
00316 }
00317 
00318 
00329 boolean
00330 voca_load_htkdict_fp(FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv)
00331 {
00332   boolean ret;
00333 
00334   voca_load_start(winfo, hmminfo, ignore_tri_conv);
00335   while(getl_fp(buf, MAXLINELEN, fp) != NULL) {
00336     if (voca_load_line(buf, winfo, hmminfo) == FALSE) break;
00337   }
00338   ret = voca_load_end(winfo);
00339 
00340   return(ret);
00341 }
00342 
00353 boolean
00354 voca_append_htkdict(char *entry, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv)
00355 {
00356   voca_load_line(entry, winfo, hmminfo);
00357   return(voca_load_end(winfo));
00358 }
00359 
00373 boolean
00374 voca_load_htkdict_line(char *buf, WORD_ID *vnum_p, int linenum, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean do_conv, boolean *ok_flag)
00375 {
00376   char *ptmp, *lp = NULL, *p;
00377   static char cbuf[MAX_HMMNAME_LEN];
00378   HMM_Logical **tmpwseq;
00379   int len;
00380   HMM_Logical *tmplg;
00381   boolean pok;
00382   int vnum;
00383 
00384   vnum = *vnum_p;
00385 
00386   if (strmatch(buf, "DICEND")) return FALSE;
00387 
00388   /* allocate temporal work area for the first call */
00389   if (winfo->work == NULL) {
00390     winfo->work_num = PHONEMELEN_STEP;
00391     winfo->work = (void *)mybmalloc2(sizeof(HMM_Logical *) * winfo->work_num, &(winfo->mroot));
00392   }
00393   tmpwseq = (HMM_Logical **)winfo->work;
00394 
00395   /* backup whole line for debug output */
00396   strcpy(bufbak, buf);
00397   
00398   /* GrammarEntry */
00399   if ((ptmp = mystrtok_quote(buf, " \t\n")) == NULL) {
00400     jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum, bufbak);
00401     winfo->errnum++;
00402     *ok_flag = FALSE;
00403     return TRUE;
00404   }
00405   winfo->wname[vnum] = strcpy((char *)mybmalloc2(strlen(ptmp)+1, &(winfo->mroot)), ptmp);
00406 
00407   /* just move pointer to next token */
00408   if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) {
00409     jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum, bufbak);
00410     winfo->errnum++;
00411     *ok_flag = FALSE;
00412     return TRUE;
00413   }
00414 #ifdef CLASS_NGRAM
00415   winfo->cprob[vnum] = 0.0;     /* prob = 1.0, logprob = 0.0 */
00416 #endif
00417   
00418   if (ptmp[0] == '@') {         /* class N-gram prob */
00419 #ifdef CLASS_NGRAM
00420     /* word probability within the class (for class N-gram) */
00421     /* format: classname @classprob wordname [output] phoneseq */
00422     /* classname equals to wname, and wordname will be omitted */
00423     /* format: @%f (log scale) */
00424     /* if "@" not found or "@0", it means class == word */
00425     if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) {
00426       jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum, bufbak);
00427       winfo->errnum++;
00428       *ok_flag = FALSE;
00429       return TRUE;
00430     }
00431     if (ptmp[1] == '\0') {      /* space between '@' and figures */
00432       jlog("Error: voca_load_htkdict: line %d: value after '@' missing, maybe wrong space?\n> %s\n", linenum, bufbak);
00433       winfo->errnum++;
00434       *ok_flag = FALSE;
00435       return TRUE;
00436     }
00437     winfo->cprob[vnum] = atof(&(ptmp[1]));
00438     if (winfo->cprob[vnum] != 0.0) winfo->cwnum++;
00439     /* read next word entry (just skip them) */
00440     if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) {
00441       jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum,bufbak);
00442       winfo->errnum++;
00443       *ok_flag = FALSE;
00444       return TRUE;
00445     }
00446     /* move to the next word entry */
00447     if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) {
00448       jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum, bufbak);
00449       winfo->errnum++;
00450       *ok_flag = FALSE;
00451       return TRUE;
00452     }
00453 #else  /* ~CLASS_NGRAM */
00454     jlog("Error: voca_load_htkdict: line %d: cannot handle in-class word probability\n> %s\n", linenum, ptmp, bufbak);
00455     winfo->errnum++;
00456     *ok_flag = FALSE;
00457     return TRUE;
00458 #endif /* CLASS_NGRAM */
00459   }
00460 
00461   /* OutputString */
00462   switch(ptmp[0]) {
00463   case '[':                     /* not transparent word */
00464     winfo->is_transparent[vnum] = FALSE;
00465     ptmp = mystrtok_quotation(NULL, " \t\n", '[', ']', 0);
00466     break;
00467   case '{':                     /* transparent word */
00468     winfo->is_transparent[vnum] = TRUE;
00469     ptmp = mystrtok_quotation(NULL, " \t\n", '{', '}', 0);
00470     break;
00471   default:
00472 #if 1
00473     /* ALLOW no entry for output */
00474     /* same as wname is used */
00475     winfo->is_transparent[vnum] = FALSE;
00476     ptmp = winfo->wname[vnum];
00477 #else
00478     /* error */
00479     jlog("Error: voca_load_htkdict: line %d: missing output string??\n> %s\n", linenum, bufbak);
00480     winfo->errnum++;
00481     *ok_flag = FALSE;
00482     return TRUE;
00483 #endif
00484   }
00485   if (ptmp == NULL) {
00486     jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum, bufbak);
00487     winfo->errnum++;
00488     *ok_flag = FALSE;
00489     return TRUE;
00490   }
00491   winfo->woutput[vnum] = strcpy((char *)mybmalloc2(strlen(ptmp)+1, &(winfo->mroot)), ptmp);
00492     
00493   /* phoneme sequence */
00494   if (hmminfo == NULL) {
00495     /* don't read */
00496     winfo->wseq[vnum] = NULL;
00497     winfo->wlen[vnum] = 0;
00498   } else {
00499 
00500     /* store converted phone sequence to temporal bufffer */
00501     len = 0;
00502       
00503     if (do_conv) {
00504       /* convert phoneme to triphone expression (word-internal) */
00505       cycle_triphone(NULL);
00506       if ((lp = mystrtok(NULL, " \t\n")) == NULL) {
00507         jlog("Error: voca_load_htkdict: line %d: word %s has no phoneme:\n> %s\n", linenum, winfo->wname[vnum], bufbak);
00508         winfo->errnum++;
00509         *ok_flag = FALSE;
00510         return TRUE;
00511       }
00512       cycle_triphone(lp);
00513     }
00514 
00515     pok = TRUE;
00516     for (;;) {
00517       if (do_conv) {
00518 /*      if (lp != NULL) jlog(" %d%s",len,lp);*/
00519         if (lp != NULL) lp = mystrtok(NULL, " \t\n");
00520         if (lp != NULL) p = cycle_triphone(lp);
00521         else p = cycle_triphone_flush();
00522       } else {
00523         p = mystrtok(NULL, " \t\n");
00524       }
00525       if (p == NULL) break;
00526 
00527       /* both defined/pseudo phone is allowed */
00528       tmplg = htk_hmmdata_lookup_logical(hmminfo, p);
00529       if (tmplg == NULL) {
00530         /* not found */
00531         if (do_conv) {
00532           /* both defined or pseudo phone are not found */
00533           if (len == 0 && lp == NULL) {
00534             jlog("Error: voca_load_htkdict: line %d: triphone \"*-%s+*\" or monophone \"%s\" not found\n", linenum, p, p);
00535             snprintf(cbuf,MAX_HMMNAME_LEN,"*-%s+* or monophone %s", p, p);
00536           } else if (len == 0) {
00537             jlog("Error: voca_load_htkdict: line %d: triphone \"*-%s\" or biphone \"%s\" not found\n", linenum, p, p);
00538             snprintf(cbuf,MAX_HMMNAME_LEN,"*-%s or biphone %s", p, p);
00539           } else if (lp == NULL) {
00540             jlog("Error: voca_load_htkdict: line %d: triphone \"%s+*\" or biphone \"%s\" not found\n", linenum, p, p);
00541             snprintf(cbuf,MAX_HMMNAME_LEN,"%s+* or biphone %s", p, p);
00542           } else {
00543             jlog("Error: voca_load_htkdict: line %d: triphone \"%s\" not found\n", linenum, p);
00544             snprintf(cbuf,MAX_HMMNAME_LEN,"%s", p);
00545           }
00546         } else {
00547           jlog("Error: voca_load_htkdict: line %d: phone \"%s\" not found\n", linenum, p);
00548           snprintf(cbuf, MAX_HMMNAME_LEN, "%s", p);
00549         }
00550         add_to_error(winfo, cbuf);
00551         pok = FALSE;
00552       } else {
00553         /* found */
00554         if (len >= winfo->work_num) {
00555           /* expand wseq area by PHONEMELEN_STEP */
00556           winfo->work_num += PHONEMELEN_STEP;
00557           winfo->work = (void *)mybmalloc2(sizeof(HMM_Logical *) * winfo->work_num, &(winfo->mroot));
00558           memcpy(winfo->work, tmpwseq, sizeof(HMM_Logical *) * (winfo->work_num - PHONEMELEN_STEP));
00559           tmpwseq = (HMM_Logical **)winfo->work;
00560         }
00561         /* store to temporal buffer */
00562         tmpwseq[len] = tmplg;
00563       }
00564       len++;
00565     }
00566     if (!pok) {                 /* error in phoneme */
00567       jlog("Error: voca_load_htkdict: the line content was: %s\n", bufbak);
00568       winfo->errnum++;
00569       *ok_flag = FALSE;
00570       return TRUE;
00571     }
00572     if (len == 0) {
00573       jlog("Error: voca_load_htkdict: line %d: no phone specified:\n> %s\n", linenum, bufbak);
00574       winfo->errnum++;
00575       *ok_flag = FALSE;
00576       return TRUE;
00577     }
00578     /* store to winfo */
00579     winfo->wseq[vnum] = (HMM_Logical **)mybmalloc2(sizeof(HMM_Logical *) * len, &(winfo->mroot));
00580     memcpy(winfo->wseq[vnum], tmpwseq, sizeof(HMM_Logical *) * len);
00581     winfo->wlen[vnum] = len;
00582   }
00583 
00584   vnum++;
00585 
00586   *vnum_p = vnum;
00587   
00588   return(TRUE);
00589 }
00590 
00602 boolean
00603 voca_mono2tri(WORD_INFO *winfo, HTK_HMM_INFO *hmminfo)
00604 {
00605   WORD_ID w;
00606   int ph;
00607   char *p;
00608   HMM_Logical *tmplg;
00609   boolean ok_flag = TRUE;
00610   
00611   for (w=0;w<winfo->num;w++) {
00612     cycle_triphone(NULL);
00613     cycle_triphone(winfo->wseq[w][0]->name);
00614 
00615     for (ph = 0; ph < winfo->wlen[w] ; ph++) {
00616       if (ph == winfo->wlen[w] - 1) {
00617         p = cycle_triphone_flush();
00618       } else {
00619         p = cycle_triphone(winfo->wseq[w][ph + 1]->name);
00620       }
00621       if ((tmplg = htk_hmmdata_lookup_logical(hmminfo, p)) == NULL) {
00622         jlog("Error: voca_load_htkdict: word \"%s[%s]\"(id=%d): HMM \"%s\" not found\n", winfo->wname[w], winfo->woutput[w], w, p);
00623         ok_flag = FALSE;
00624         continue;
00625       }
00626       winfo->wseq[w][ph] = tmplg;
00627     }
00628   }
00629   return (ok_flag);
00630 }
00631 
00643 boolean
00644 voca_append(WORD_INFO *dstinfo, WORD_INFO *srcinfo, int coffset, int woffset)
00645 {
00646   WORD_ID n, w;
00647   int i;
00648 
00649   n = woffset;
00650   while (n >= dstinfo->maxnum) {
00651     if (winfo_expand(dstinfo) == FALSE) return FALSE;
00652   }
00653   for(w=0;w<srcinfo->num;w++) {
00654     /* copy data */
00655     dstinfo->wlen[n] = srcinfo->wlen[w];
00656     if (srcinfo->wname[w]) {
00657       dstinfo->wname[n] = strcpy((char *)mybmalloc2(strlen(srcinfo->wname[w])+1, &(dstinfo->mroot)), srcinfo->wname[w]);
00658     } else {
00659       dstinfo->wname[n] = NULL;
00660     }
00661     if (srcinfo->woutput[w]) {
00662       dstinfo->woutput[n] = strcpy((char *)mybmalloc2(strlen(srcinfo->woutput[w])+1, &(dstinfo->mroot)), srcinfo->woutput[w]);
00663     } else {
00664       dstinfo->woutput[n] = NULL;
00665     }
00666     if (srcinfo->wlen[w] > 0) {
00667       dstinfo->wseq[n] = (HMM_Logical **)mybmalloc2(sizeof(HMM_Logical *) * srcinfo->wlen[w], &(dstinfo->mroot));
00668       for(i=0;i<srcinfo->wlen[w];i++) {
00669         dstinfo->wseq[n][i] = srcinfo->wseq[w][i];
00670       }
00671     } else {
00672       dstinfo->wseq[n] = NULL;
00673     }
00674 #ifdef CLASS_NGRAM
00675     dstinfo->cprob[n] = srcinfo->cprob[w];
00676     if (dstinfo->cprob[n] != 0.0) dstinfo->cwnum++;
00677 #endif
00678     dstinfo->is_transparent[n] = srcinfo->is_transparent[w];
00679     /* offset category ID by coffset */
00680     dstinfo->wton[n] = srcinfo->wton[w] + coffset;
00681     
00682     n++;
00683     if (n >= dstinfo->maxnum) {
00684       if (winfo_expand(dstinfo) == FALSE) return FALSE;
00685     }
00686 
00687   }
00688   dstinfo->num = n;
00689 
00690   /* compute maxwn */
00691   voca_set_stats(dstinfo);
00692 
00693   return TRUE;
00694 }
00695