Julius 4.1.5
libsent/src/hmminfo/rdhmmdef.c
説明を見る。
00001 
00036 /*
00037  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00038  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00039  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00040  * All rights reserved
00041  */
00042 
00043 #include <sent/stddefs.h>
00044 #include <sent/htk_param.h>
00045 #include <sent/htk_hmm.h>
00046 
00047 #define MAXBUFLEN  4096         ///< Maximum length of a line in the input
00048 
00049 char *rdhmmdef_token;           
00050 static char *buf = NULL;        
00051 static int line;                
00052 
00053 /* global functions for rdhmmdef_*.c */
00054 
00060 void
00061 rderr(char *str)
00062 {
00063   if (rdhmmdef_token == NULL) { /* end of file */
00064     jlog("Error: rdhmmdef: %s on end of file\n", str);
00065   } else {
00066     jlog("Error: rdhmmdef: read error at line %d: %s\n", line, (str) ? str : "parse error");
00067   }
00068   jlog_flush();
00069   exit(1);
00070 }
00071 
00079 char *
00080 read_token(FILE *fp)
00081 {
00082   if (buf != NULL) {
00083     /* already have buffer */
00084     if ((rdhmmdef_token = mystrtok_quote(NULL, HMMDEF_DELM)) != NULL) {
00085       /* return next token */
00086       return rdhmmdef_token;
00087     }
00088   } else {
00089     /* init: allocate buffer for the first time */
00090     buf = (char *)mymalloc(MAXBUFLEN);
00091     line = 1;
00092   }
00093   /* read new 1 line */
00094   if (getl(buf, MAXBUFLEN, fp) == NULL) {
00095     rdhmmdef_token = NULL;
00096   } else {
00097     rdhmmdef_token = mystrtok_quote(buf, HMMDEF_DELM);
00098     line++;
00099   }
00100   return rdhmmdef_token;
00101 }
00102 
00108 static void
00109 conv_log_arc(HTK_HMM_INFO *hmm)
00110 {
00111   HTK_HMM_Trans *tr;
00112   int i,j;
00113   LOGPROB l;
00114 
00115   for (tr = hmm->trstart; tr; tr = tr->next) {
00116     for(i=0;i<tr->statenum;i++) {
00117       for(j=0;j<tr->statenum;j++) {
00118         l = tr->a[i][j];
00119         tr->a[i][j] = (l != 0.0) ? (float)log10(l) : LOG_ZERO;
00120       }
00121     }
00122   }
00123 }
00129 void
00130 htk_hmm_inverse_variances(HTK_HMM_INFO *hmm)
00131 {
00132   HTK_HMM_Var *v;
00133   int i;
00134 
00135   for (v = hmm->vrstart; v; v = v->next) {
00136     for(i=0;i<v->len;i++) {
00137       v->vec[i] = 1.0 / v->vec[i];
00138     }
00139   }
00140 }
00141 
00142 #ifdef ENABLE_MSD
00143 
00148 void
00149 htk_hmm_check_msd(HTK_HMM_INFO *hmm)
00150 {
00151   HTK_HMM_PDF *m;
00152   int vlen;
00153   int i;
00154 
00155   hmm->has_msd = FALSE;
00156   for (m = hmm->pdfstart; m; m = m->next) {
00157     /* skip tied-mixture pdf */
00158     if (m->tmix) continue;
00159     /* check if vector length are the same */
00160     vlen = hmm->opt.stream_info.vsize[m->stream_id];
00161     for(i=0;i<m->mix_num;i++) {
00162       if (m->b[i]->meanlen != vlen) {
00163         jlog("Stat: rdhmmdef: assume MSD-HMM since Gaussian dimension are not consistent\n");
00164         hmm->has_msd = TRUE;
00165         return;
00166       }
00167     }
00168   }
00169 }
00170 #endif
00171 
00183 boolean
00184 rdhmmdef(FILE *fp, HTK_HMM_INFO *hmm)
00185 {
00186   char macrosw;
00187   char *name;
00188 
00189   /* variances in htkdefs are not inversed yet */
00190   hmm->variance_inversed = FALSE;
00191 
00192   /* read the first token */
00193   read_token(fp);
00194   
00195   /* the toplevel loop */
00196   while (rdhmmdef_token != NULL) {/* break on EOF */
00197     if (rdhmmdef_token[0] != '~') { /* toplevel commands are always macro */
00198       return FALSE;
00199     }
00200     macrosw = rdhmmdef_token[1];
00201     read_token(fp);             /* read next token after the "~.."  */
00202     switch(macrosw) {
00203     case 'o':                   /* global option */
00204       if (set_global_opt(fp,hmm) == FALSE) {
00205         return FALSE;
00206       }
00207       break;
00208     case 't':                   /* transition macro */
00209       name = mybstrdup2(rdhmmdef_token, &(hmm->mroot));
00210       if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long");
00211       read_token(fp);
00212       def_trans_macro(name, fp, hmm);
00213       break;
00214     case 's':                   /* state macro */
00215       name = mybstrdup2(rdhmmdef_token, &(hmm->mroot));
00216       if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long");
00217       read_token(fp);
00218       def_state_macro(name, fp, hmm);
00219       break;
00220     case 'm':                   /* density (mixture) macro */
00221       name = mybstrdup2(rdhmmdef_token, &(hmm->mroot));
00222       if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long");
00223       read_token(fp);
00224       def_dens_macro(name, fp, hmm);
00225       break;
00226     case 'h':                   /* HMM define */
00227       name = mybstrdup2(rdhmmdef_token, &(hmm->mroot));
00228       if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long");
00229       read_token(fp);
00230       def_HMM(name, fp, hmm);
00231       break;
00232     case 'v':                   /* Variance macro */
00233       name = mybstrdup2(rdhmmdef_token, &(hmm->mroot));
00234       if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long");
00235       read_token(fp);
00236       def_var_macro(name, fp, hmm);
00237       break;
00238     case 'w':                   /* Stream weight macro */
00239       name = mybstrdup2(rdhmmdef_token, &(hmm->mroot));
00240       if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long");
00241       read_token(fp);
00242       def_streamweight_macro(name, fp, hmm);
00243       break;
00244     case 'r':                   /* Regression class macro (ignore) */
00245       name = mybstrdup2(rdhmmdef_token, &(hmm->mroot));
00246       if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long");
00247       read_token(fp);
00248       def_regtree_macro(name, fp, hmm);
00249       break;
00250     case 'p':                   /* Mixture pdf macro (extension of HTS) */
00251       name = mybstrdup2(rdhmmdef_token, &(hmm->mroot));
00252       if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long");
00253       read_token(fp);
00254       def_mpdf_macro(name, fp, hmm);
00255       break;
00256     }
00257   }
00258 
00259   /* convert transition prob to log scale */
00260   conv_log_arc(hmm);
00261 
00262   jlog("Stat: rdhmmdef: ascii format HMM definition\n");
00263   
00264   /* check limitation */
00265   if (check_all_hmm_limit(hmm)) {
00266     jlog("Stat: rdhmmdef: limit check passed\n");
00267   } else {
00268     jlog("Error: rdhmmdef: cannot handle this HMM due to system limitation\n");
00269     return FALSE;
00270   }
00271 
00272   /* determine whether this model needs multi-path handling */
00273   hmm->need_multipath = htk_hmm_has_several_arc_on_edge(hmm);
00274   if (hmm->need_multipath) {
00275     jlog("Stat: rdhmmdef: this HMM requires multipath handling at decoding\n");
00276   } else {
00277     jlog("Stat: rdhmmdef: this HMM does not need multipath handling\n");
00278   }
00279   
00280   /* inverse all variance values for faster computation */
00281   if (! hmm->variance_inversed) {
00282     htk_hmm_inverse_variances(hmm);
00283     hmm->variance_inversed = TRUE;
00284   }
00285 
00286   /* check HMM parameter option type */
00287   if (!check_hmm_options(hmm)) {
00288     jlog("Error: rdhmmdef: hmm options check failed\n");
00289     return FALSE;
00290   }
00291 
00292   /* add ID number for all HTK_HMM_State */
00293   /* also calculate the maximum number of mixture */
00294   {
00295     HTK_HMM_State *stmp;
00296     int n, max, s, mix;
00297     n = 0;
00298     max = 0;
00299     for (stmp = hmm->ststart; stmp; stmp = stmp->next) {
00300       for(s=0;s<stmp->nstream;s++) {
00301         mix = stmp->pdf[s]->mix_num;
00302         if (max < mix) max = mix;
00303       }
00304       stmp->id = n++;
00305       if (n >= MAX_STATE_NUM) {
00306         jlog("Error: rdhmmdef: too much states in a model > %d\n", MAX_STATE_NUM);
00307         return FALSE;
00308       }
00309     }
00310     hmm->totalstatenum = n;
00311     hmm->maxmixturenum = max;
00312   }
00313   /* compute total number of HMM models and maximum length */
00314   {
00315     HTK_HMM_Data *dtmp;
00316     int n, maxlen;
00317     n = 0;
00318     maxlen = 0;
00319     for (dtmp = hmm->start; dtmp; dtmp = dtmp->next) {
00320       if (maxlen < dtmp->state_num) maxlen = dtmp->state_num;
00321       n++;
00322     }
00323     hmm->maxstatenum = maxlen;
00324     hmm->totalhmmnum = n;
00325   }
00326   /* compute total number of Gaussians */
00327   {
00328     HTK_HMM_Dens *dtmp;
00329     int n = 0;
00330     for (dtmp = hmm->dnstart; dtmp; dtmp = dtmp->next) {
00331       n++;
00332     }
00333     hmm->totalmixnum = n;
00334   }
00335   /* check of HMM name length exceed the maximum */
00336   {
00337     HTK_HMM_Dens *dtmp;
00338     int n = 0;
00339     for (dtmp = hmm->dnstart; dtmp; dtmp = dtmp->next) {
00340       n++;
00341     }
00342     hmm->totalmixnum = n;
00343   }
00344   /* compute total number of mixture PDFs */
00345   {
00346     HTK_HMM_PDF *p;
00347     int n = 0;
00348     for (p = hmm->pdfstart; p; p = p->next) {
00349       n++;
00350     }
00351     hmm->totalpdfnum = n;
00352   }
00353 #ifdef ENABLE_MSD
00354   /* check if MSD-HMM */
00355   htk_hmm_check_msd(hmm);
00356 #endif
00357 
00358   return(TRUE);                 /* success */
00359 }