Julius 4.1.5
|
00001 00036 /* 00037 * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University 00038 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00039 * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology 00040 * All rights reserved 00041 */ 00042 00043 #include <sent/stddefs.h> 00044 #include <sent/htk_param.h> 00045 #include <sent/htk_hmm.h> 00046 00047 #define MAXBUFLEN 4096 ///< Maximum length of a line in the input 00048 00049 char *rdhmmdef_token; 00050 static char *buf = NULL; 00051 static int line; 00052 00053 /* global functions for rdhmmdef_*.c */ 00054 00060 void 00061 rderr(char *str) 00062 { 00063 if (rdhmmdef_token == NULL) { /* end of file */ 00064 jlog("Error: rdhmmdef: %s on end of file\n", str); 00065 } else { 00066 jlog("Error: rdhmmdef: read error at line %d: %s\n", line, (str) ? str : "parse error"); 00067 } 00068 jlog_flush(); 00069 exit(1); 00070 } 00071 00079 char * 00080 read_token(FILE *fp) 00081 { 00082 if (buf != NULL) { 00083 /* already have buffer */ 00084 if ((rdhmmdef_token = mystrtok_quote(NULL, HMMDEF_DELM)) != NULL) { 00085 /* return next token */ 00086 return rdhmmdef_token; 00087 } 00088 } else { 00089 /* init: allocate buffer for the first time */ 00090 buf = (char *)mymalloc(MAXBUFLEN); 00091 line = 1; 00092 } 00093 /* read new 1 line */ 00094 if (getl(buf, MAXBUFLEN, fp) == NULL) { 00095 rdhmmdef_token = NULL; 00096 } else { 00097 rdhmmdef_token = mystrtok_quote(buf, HMMDEF_DELM); 00098 line++; 00099 } 00100 return rdhmmdef_token; 00101 } 00102 00108 static void 00109 conv_log_arc(HTK_HMM_INFO *hmm) 00110 { 00111 HTK_HMM_Trans *tr; 00112 int i,j; 00113 LOGPROB l; 00114 00115 for (tr = hmm->trstart; tr; tr = tr->next) { 00116 for(i=0;i<tr->statenum;i++) { 00117 for(j=0;j<tr->statenum;j++) { 00118 l = tr->a[i][j]; 00119 tr->a[i][j] = (l != 0.0) ? (float)log10(l) : LOG_ZERO; 00120 } 00121 } 00122 } 00123 } 00129 void 00130 htk_hmm_inverse_variances(HTK_HMM_INFO *hmm) 00131 { 00132 HTK_HMM_Var *v; 00133 int i; 00134 00135 for (v = hmm->vrstart; v; v = v->next) { 00136 for(i=0;i<v->len;i++) { 00137 v->vec[i] = 1.0 / v->vec[i]; 00138 } 00139 } 00140 } 00141 00142 #ifdef ENABLE_MSD 00143 00148 void 00149 htk_hmm_check_msd(HTK_HMM_INFO *hmm) 00150 { 00151 HTK_HMM_PDF *m; 00152 int vlen; 00153 int i; 00154 00155 hmm->has_msd = FALSE; 00156 for (m = hmm->pdfstart; m; m = m->next) { 00157 /* skip tied-mixture pdf */ 00158 if (m->tmix) continue; 00159 /* check if vector length are the same */ 00160 vlen = hmm->opt.stream_info.vsize[m->stream_id]; 00161 for(i=0;i<m->mix_num;i++) { 00162 if (m->b[i]->meanlen != vlen) { 00163 jlog("Stat: rdhmmdef: assume MSD-HMM since Gaussian dimension are not consistent\n"); 00164 hmm->has_msd = TRUE; 00165 return; 00166 } 00167 } 00168 } 00169 } 00170 #endif 00171 00183 boolean 00184 rdhmmdef(FILE *fp, HTK_HMM_INFO *hmm) 00185 { 00186 char macrosw; 00187 char *name; 00188 00189 /* variances in htkdefs are not inversed yet */ 00190 hmm->variance_inversed = FALSE; 00191 00192 /* read the first token */ 00193 read_token(fp); 00194 00195 /* the toplevel loop */ 00196 while (rdhmmdef_token != NULL) {/* break on EOF */ 00197 if (rdhmmdef_token[0] != '~') { /* toplevel commands are always macro */ 00198 return FALSE; 00199 } 00200 macrosw = rdhmmdef_token[1]; 00201 read_token(fp); /* read next token after the "~.." */ 00202 switch(macrosw) { 00203 case 'o': /* global option */ 00204 if (set_global_opt(fp,hmm) == FALSE) { 00205 return FALSE; 00206 } 00207 break; 00208 case 't': /* transition macro */ 00209 name = mybstrdup2(rdhmmdef_token, &(hmm->mroot)); 00210 if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long"); 00211 read_token(fp); 00212 def_trans_macro(name, fp, hmm); 00213 break; 00214 case 's': /* state macro */ 00215 name = mybstrdup2(rdhmmdef_token, &(hmm->mroot)); 00216 if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long"); 00217 read_token(fp); 00218 def_state_macro(name, fp, hmm); 00219 break; 00220 case 'm': /* density (mixture) macro */ 00221 name = mybstrdup2(rdhmmdef_token, &(hmm->mroot)); 00222 if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long"); 00223 read_token(fp); 00224 def_dens_macro(name, fp, hmm); 00225 break; 00226 case 'h': /* HMM define */ 00227 name = mybstrdup2(rdhmmdef_token, &(hmm->mroot)); 00228 if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long"); 00229 read_token(fp); 00230 def_HMM(name, fp, hmm); 00231 break; 00232 case 'v': /* Variance macro */ 00233 name = mybstrdup2(rdhmmdef_token, &(hmm->mroot)); 00234 if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long"); 00235 read_token(fp); 00236 def_var_macro(name, fp, hmm); 00237 break; 00238 case 'w': /* Stream weight macro */ 00239 name = mybstrdup2(rdhmmdef_token, &(hmm->mroot)); 00240 if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long"); 00241 read_token(fp); 00242 def_streamweight_macro(name, fp, hmm); 00243 break; 00244 case 'r': /* Regression class macro (ignore) */ 00245 name = mybstrdup2(rdhmmdef_token, &(hmm->mroot)); 00246 if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long"); 00247 read_token(fp); 00248 def_regtree_macro(name, fp, hmm); 00249 break; 00250 case 'p': /* Mixture pdf macro (extension of HTS) */ 00251 name = mybstrdup2(rdhmmdef_token, &(hmm->mroot)); 00252 if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long"); 00253 read_token(fp); 00254 def_mpdf_macro(name, fp, hmm); 00255 break; 00256 } 00257 } 00258 00259 /* convert transition prob to log scale */ 00260 conv_log_arc(hmm); 00261 00262 jlog("Stat: rdhmmdef: ascii format HMM definition\n"); 00263 00264 /* check limitation */ 00265 if (check_all_hmm_limit(hmm)) { 00266 jlog("Stat: rdhmmdef: limit check passed\n"); 00267 } else { 00268 jlog("Error: rdhmmdef: cannot handle this HMM due to system limitation\n"); 00269 return FALSE; 00270 } 00271 00272 /* determine whether this model needs multi-path handling */ 00273 hmm->need_multipath = htk_hmm_has_several_arc_on_edge(hmm); 00274 if (hmm->need_multipath) { 00275 jlog("Stat: rdhmmdef: this HMM requires multipath handling at decoding\n"); 00276 } else { 00277 jlog("Stat: rdhmmdef: this HMM does not need multipath handling\n"); 00278 } 00279 00280 /* inverse all variance values for faster computation */ 00281 if (! hmm->variance_inversed) { 00282 htk_hmm_inverse_variances(hmm); 00283 hmm->variance_inversed = TRUE; 00284 } 00285 00286 /* check HMM parameter option type */ 00287 if (!check_hmm_options(hmm)) { 00288 jlog("Error: rdhmmdef: hmm options check failed\n"); 00289 return FALSE; 00290 } 00291 00292 /* add ID number for all HTK_HMM_State */ 00293 /* also calculate the maximum number of mixture */ 00294 { 00295 HTK_HMM_State *stmp; 00296 int n, max, s, mix; 00297 n = 0; 00298 max = 0; 00299 for (stmp = hmm->ststart; stmp; stmp = stmp->next) { 00300 for(s=0;s<stmp->nstream;s++) { 00301 mix = stmp->pdf[s]->mix_num; 00302 if (max < mix) max = mix; 00303 } 00304 stmp->id = n++; 00305 if (n >= MAX_STATE_NUM) { 00306 jlog("Error: rdhmmdef: too much states in a model > %d\n", MAX_STATE_NUM); 00307 return FALSE; 00308 } 00309 } 00310 hmm->totalstatenum = n; 00311 hmm->maxmixturenum = max; 00312 } 00313 /* compute total number of HMM models and maximum length */ 00314 { 00315 HTK_HMM_Data *dtmp; 00316 int n, maxlen; 00317 n = 0; 00318 maxlen = 0; 00319 for (dtmp = hmm->start; dtmp; dtmp = dtmp->next) { 00320 if (maxlen < dtmp->state_num) maxlen = dtmp->state_num; 00321 n++; 00322 } 00323 hmm->maxstatenum = maxlen; 00324 hmm->totalhmmnum = n; 00325 } 00326 /* compute total number of Gaussians */ 00327 { 00328 HTK_HMM_Dens *dtmp; 00329 int n = 0; 00330 for (dtmp = hmm->dnstart; dtmp; dtmp = dtmp->next) { 00331 n++; 00332 } 00333 hmm->totalmixnum = n; 00334 } 00335 /* check of HMM name length exceed the maximum */ 00336 { 00337 HTK_HMM_Dens *dtmp; 00338 int n = 0; 00339 for (dtmp = hmm->dnstart; dtmp; dtmp = dtmp->next) { 00340 n++; 00341 } 00342 hmm->totalmixnum = n; 00343 } 00344 /* compute total number of mixture PDFs */ 00345 { 00346 HTK_HMM_PDF *p; 00347 int n = 0; 00348 for (p = hmm->pdfstart; p; p = p->next) { 00349 n++; 00350 } 00351 hmm->totalpdfnum = n; 00352 } 00353 #ifdef ENABLE_MSD 00354 /* check if MSD-HMM */ 00355 htk_hmm_check_msd(hmm); 00356 #endif 00357 00358 return(TRUE); /* success */ 00359 }