00001
00027
00028
00029
00030
00031
00032
00033 #include <sent/stddefs.h>
00034 #include <sent/htk_param.h>
00035 #include <sent/htk_hmm.h>
00036
00037 #undef DMES
00038
00039 static boolean gzfile;
00040
00049 static void
00050 rdn(FILE *fp, void *buf, size_t unitbyte, int unitnum)
00051 {
00052 size_t tmp;
00053 if (gzfile) {
00054 tmp = myfread(buf, unitbyte, unitnum, fp);
00055 } else {
00056 tmp = fread(buf, unitbyte, unitnum, fp);
00057 }
00058 if (tmp < (size_t)unitnum) {
00059 perror("ngram_read_bin");
00060 j_error("read failed\n");
00061 }
00062 #ifndef WORDS_BIGENDIAN
00063 if (unitbyte != 1) {
00064 swap_bytes(buf, unitbyte, unitnum);
00065 }
00066 #endif
00067 }
00068
00069 static char buf[MAXLINELEN];
00070
00078 static char *
00079 rdn_str(FILE *fp, HTK_HMM_INFO *hmm)
00080 {
00081 int c;
00082 int len;
00083 char *p;
00084
00085 len = 0;
00086 while ((c = gzfile ? myfgetc(fp) : fgetc(fp)) != -1) {
00087 if (len >= MAXLINELEN) j_error("Error: string len exceeded %d bytes\n", len);
00088 buf[len++] = c;
00089 if (c == '\0') break;
00090 }
00091 if (len == 1) {
00092 p = NULL;
00093 } else {
00094 p = (char *)mybmalloc2(len, &(hmm->mroot));
00095 strcpy(p, buf);
00096 }
00097 return(p);
00098 }
00099
00100
00101
00102 static char *binhmm_header = BINHMM_HEADER;
00103 static char *binhmm_header_v2 = BINHMM_HEADER_V2;
00104
00111 static void
00112 rd_para(FILE *fp, Value *para)
00113 {
00114 short version;
00115
00116 rdn(fp, &version, sizeof(short), 1);
00117 switch(version) {
00118 case 1:
00119 rdn(fp, &(para->smp_period), sizeof(long), 1);
00120 rdn(fp, &(para->smp_freq), sizeof(long), 1);
00121 rdn(fp, &(para->framesize), sizeof(int), 1);
00122 rdn(fp, &(para->frameshift), sizeof(int), 1);
00123 rdn(fp, &(para->preEmph), sizeof(float), 1);
00124 rdn(fp, &(para->lifter), sizeof(int), 1);
00125 rdn(fp, &(para->fbank_num), sizeof(int), 1);
00126 rdn(fp, &(para->delWin), sizeof(int), 1);
00127 rdn(fp, &(para->accWin), sizeof(int), 1);
00128 rdn(fp, &(para->silFloor), sizeof(float), 1);
00129 rdn(fp, &(para->escale), sizeof(float), 1);
00130 rdn(fp, &(para->hipass), sizeof(int), 1);
00131 rdn(fp, &(para->lopass), sizeof(int), 1);
00132 rdn(fp, &(para->enormal), sizeof(int), 1);
00133 rdn(fp, &(para->raw_e), sizeof(int), 1);
00134 rdn(fp, &(para->ss_alpha), sizeof(float), 1);
00135 rdn(fp, &(para->ss_floor), sizeof(float), 1);
00136 rdn(fp, &(para->zmeanframe), sizeof(int), 1);
00137 break;
00138 default:
00139 j_error("Error: read_binhmm: unknown embedded parameter format version: %d\n", version);
00140 }
00141 }
00142
00153 static boolean
00154 rd_header(FILE *fp, HTK_HMM_INFO *hmm, Value *para)
00155 {
00156 char *p, *q;
00157 boolean emp, inv;
00158
00159 p = rdn_str(fp, hmm);
00160 if (strmatch(p, binhmm_header)) {
00161
00162 hmm->variance_inversed = FALSE;
00163 } else if (strmatch(p, binhmm_header_v2)) {
00164
00165 emp = inv = FALSE;
00166 q = rdn_str(fp, hmm);
00167 if (q != NULL) {
00168 while(*q == '_') {
00169 q++;
00170 switch (*q) {
00171 case BINHMM_HEADER_V2_EMBEDPARA:
00172
00173 emp = TRUE;
00174 break;
00175 case BINHMM_HEADER_V2_VARINV:
00176 inv = TRUE;
00177 break;
00178 }
00179 q++;
00180 }
00181 }
00182 if (emp) {
00183 para->loaded = 1;
00184 rd_para(fp, para);
00185 j_printerr("(acoutic analysis conf embedded)...");
00186 }
00187 if (inv) {
00188 hmm->variance_inversed = TRUE;
00189 j_printerr("(varinv)...");
00190 } else {
00191 hmm->variance_inversed = FALSE;
00192 }
00193 } else {
00194
00195 return FALSE;
00196 }
00197 return TRUE;
00198 }
00199
00200
00201
00209 static void
00210 rd_opt(FILE *fp, HTK_HMM_Options *opt)
00211 {
00212 rdn(fp, &(opt->stream_info.num), sizeof(short), 1);
00213 rdn(fp, opt->stream_info.vsize, sizeof(short), 50);
00214 rdn(fp, &(opt->vec_size), sizeof(short), 1);
00215 rdn(fp, &(opt->cov_type), sizeof(short), 1);
00216 rdn(fp, &(opt->dur_type), sizeof(short), 1);
00217 rdn(fp, &(opt->param_type), sizeof(short), 1);
00218 }
00219
00226 static void
00227 rd_type(FILE *fp, HTK_HMM_INFO *hmm)
00228 {
00229 rdn(fp, &(hmm->is_tied_mixture), sizeof(boolean), 1);
00230 rdn(fp, &(hmm->maxmixturenum), sizeof(int), 1);
00231 }
00232
00233
00234
00235 static HTK_HMM_Trans **tr_index;
00236 static unsigned int tr_num;
00237
00248 static void
00249 rd_trans(FILE *fp, HTK_HMM_INFO *hmm)
00250 {
00251 HTK_HMM_Trans *t;
00252 unsigned int idx;
00253 int i;
00254 PROB *atmp;
00255
00256 rdn(fp, &tr_num, sizeof(unsigned int), 1);
00257 tr_index = (HTK_HMM_Trans **)mymalloc(sizeof(HTK_HMM_Trans *) * tr_num);
00258
00259 hmm->trstart = NULL;
00260 hmm->tr_root = NULL;
00261 for (idx = 0; idx < tr_num; idx++) {
00262 t = (HTK_HMM_Trans *)mybmalloc2(sizeof(HTK_HMM_Trans), &(hmm->mroot));
00263 t->name = rdn_str(fp, hmm);
00264 rdn(fp, &(t->statenum), sizeof(short), 1);
00265 t->a = (PROB **)mybmalloc2(sizeof(PROB *) * t->statenum, &(hmm->mroot));
00266 atmp = (PROB *)mybmalloc2(sizeof(PROB) * t->statenum * t->statenum, &(hmm->mroot));
00267 for (i=0;i<t->statenum;i++) {
00268 t->a[i] = &(atmp[i*t->statenum]);
00269 rdn(fp, t->a[i], sizeof(PROB), t->statenum);
00270 }
00271 trans_add(hmm, t);
00272 tr_index[idx] = t;
00273 }
00274
00275 #ifdef DMES
00276 j_printf("%d transition maxtix read\n", tr_num);
00277 #endif
00278 }
00279
00280
00281 static HTK_HMM_Var **vr_index;
00282 static unsigned int vr_num;
00283
00294 static void
00295 rd_var(FILE *fp, HTK_HMM_INFO *hmm)
00296 {
00297 HTK_HMM_Var *v;
00298 unsigned int idx;
00299
00300 rdn(fp, &vr_num, sizeof(unsigned int), 1);
00301 vr_index = (HTK_HMM_Var **)mymalloc(sizeof(HTK_HMM_Var *) * vr_num);
00302
00303 hmm->vrstart = NULL;
00304 hmm->vr_root = NULL;
00305 for (idx = 0; idx < vr_num; idx++) {
00306 v = (HTK_HMM_Var *)mybmalloc2(sizeof(HTK_HMM_Var), &(hmm->mroot));
00307 v->name = rdn_str(fp, hmm);
00308 rdn(fp, &(v->len), sizeof(short), 1);
00309 v->vec = (VECT *)mybmalloc2(sizeof(VECT) * v->len, &(hmm->mroot));
00310 rdn(fp, v->vec, sizeof(VECT), v->len);
00311 vr_index[idx] = v;
00312 var_add(hmm, v);
00313 }
00314 #ifdef DMES
00315 j_printf("%d variance read\n", vr_num);
00316 #endif
00317 }
00318
00319
00320
00321 static HTK_HMM_Dens **dens_index;
00322 static unsigned int dens_num;
00323
00335 static void
00336 rd_dens(FILE *fp, HTK_HMM_INFO *hmm)
00337 {
00338 HTK_HMM_Dens *d;
00339 unsigned int idx;
00340 unsigned int vid;
00341
00342 rdn(fp, &dens_num, sizeof(unsigned int), 1);
00343 hmm->totalmixnum = dens_num;
00344 dens_index = (HTK_HMM_Dens **)mymalloc(sizeof(HTK_HMM_Dens *) * dens_num);
00345
00346 hmm->dnstart = NULL;
00347 hmm->dn_root = NULL;
00348 for (idx = 0; idx < dens_num; idx++) {
00349 d = (HTK_HMM_Dens *)mybmalloc2(sizeof(HTK_HMM_Dens), &(hmm->mroot));
00350 d->name = rdn_str(fp, hmm);
00351 rdn(fp, &(d->meanlen), sizeof(short), 1);
00352 d->mean = (VECT *)mybmalloc2(sizeof(VECT) * d->meanlen, &(hmm->mroot));
00353 rdn(fp, d->mean, sizeof(VECT), d->meanlen);
00354 rdn(fp, &vid, sizeof(unsigned int), 1);
00355 d->var = vr_index[vid];
00356 rdn(fp, &(d->gconst), sizeof(LOGPROB), 1);
00357 dens_index[idx] = d;
00358 dens_add(hmm, d);
00359 }
00360 #ifdef DMES
00361 j_printf("%d gaussian densities read\n", dens_num);
00362 #endif
00363 }
00364
00365
00366
00367 static GCODEBOOK **tm_index;
00368 static unsigned int tm_num;
00369
00381 static void
00382 rd_tmix(FILE *fp, HTK_HMM_INFO *hmm)
00383 {
00384 GCODEBOOK *tm;
00385 unsigned int idx;
00386 unsigned int did;
00387 int i;
00388
00389 rdn(fp, &tm_num, sizeof(unsigned int), 1);
00390 hmm->codebooknum = tm_num;
00391 tm_index = (GCODEBOOK **)mymalloc(sizeof(GCODEBOOK *) * tm_num);
00392 hmm->maxcodebooksize = 0;
00393
00394 hmm->codebook_root = NULL;
00395 for (idx = 0; idx < tm_num; idx++) {
00396 tm = (GCODEBOOK *)mybmalloc2(sizeof(GCODEBOOK), &(hmm->mroot));
00397 tm->name = rdn_str(fp, hmm);
00398 rdn(fp, &(tm->num), sizeof(int), 1);
00399 if (hmm->maxcodebooksize < tm->num) hmm->maxcodebooksize = tm->num;
00400 tm->d = (HTK_HMM_Dens **)mybmalloc2(sizeof(HTK_HMM_Dens *) * tm->num, &(hmm->mroot));
00401 for(i=0;i<tm->num;i++) {
00402 rdn(fp, &did, sizeof(unsigned int), 1);
00403 if (did >= dens_num) {
00404 tm->d[i] = NULL;
00405 } else {
00406 tm->d[i] = dens_index[did];
00407 }
00408 }
00409 tm->id = idx;
00410 tm_index[idx] = tm;
00411 codebook_add(hmm, tm);
00412 }
00413 #ifdef DMES
00414 j_printf("%d tied-mixture codebooks read\n", tm_num);
00415 #endif
00416 }
00417
00418
00419 static HTK_HMM_State **st_index;
00420 static unsigned int st_num;
00421
00434 static void
00435 rd_state(FILE *fp, HTK_HMM_INFO *hmm)
00436 {
00437 HTK_HMM_State *s;
00438 unsigned int idx;
00439 unsigned int did;
00440 int i;
00441
00442 rdn(fp, &st_num, sizeof(unsigned int), 1);
00443 hmm->totalstatenum = st_num;
00444 st_index = (HTK_HMM_State **)mymalloc(sizeof(HTK_HMM_State *) * st_num);
00445
00446 hmm->ststart = NULL;
00447 hmm->st_root = NULL;
00448 for (idx = 0; idx < st_num; idx++) {
00449 s = (HTK_HMM_State *)mybmalloc2(sizeof(HTK_HMM_State), &(hmm->mroot));
00450 s->name = rdn_str(fp, hmm);
00451 rdn(fp, &(s->mix_num), sizeof(short), 1);
00452 if (s->mix_num == -1) {
00453
00454 rdn(fp, &did, sizeof(unsigned int), 1);
00455 s->b = (HTK_HMM_Dens **)tm_index[did];
00456 s->mix_num = (tm_index[did])->num;
00457 } else {
00458
00459 s->b = (HTK_HMM_Dens **)mybmalloc2(sizeof(HTK_HMM_Dens *) * s->mix_num, &(hmm->mroot));
00460 for (i=0;i<s->mix_num;i++) {
00461 rdn(fp, &did, sizeof(unsigned int), 1);
00462 if (did >= dens_num) {
00463 s->b[i] = NULL;
00464 } else {
00465 s->b[i] = dens_index[did];
00466 }
00467 }
00468 }
00469 s->bweight = (PROB *)mybmalloc2(sizeof(PROB) * s->mix_num, &(hmm->mroot));
00470 rdn(fp, s->bweight, sizeof(PROB), s->mix_num);
00471 s->id = idx;
00472 st_index[idx] = s;
00473 state_add(hmm, s);
00474 }
00475 #ifdef DMES
00476 j_printf("%d states read\n", st_num);
00477 #endif
00478 }
00479
00491 static void
00492 rd_data(FILE *fp, HTK_HMM_INFO *hmm)
00493 {
00494 HTK_HMM_Data *d;
00495 unsigned int md_num;
00496 unsigned int sid, tid;
00497 unsigned int idx;
00498 int i;
00499
00500 rdn(fp, &(md_num), sizeof(unsigned int), 1);
00501 hmm->totalhmmnum = md_num;
00502
00503 hmm->start = NULL;
00504 hmm->physical_root = NULL;
00505 for (idx = 0; idx < md_num; idx++) {
00506 d = (HTK_HMM_Data *)mybmalloc2(sizeof(HTK_HMM_Data), &(hmm->mroot));
00507 d->name = rdn_str(fp, hmm);
00508 rdn(fp, &(d->state_num), sizeof(short), 1);
00509 d->s = (HTK_HMM_State **)mybmalloc2(sizeof(HTK_HMM_State *) * d->state_num, &(hmm->mroot));
00510 for (i=0;i<d->state_num;i++) {
00511 rdn(fp, &sid, sizeof(unsigned int), 1);
00512 if (sid > hmm->totalstatenum) {
00513 d->s[i] = NULL;
00514 } else {
00515 d->s[i] = st_index[sid];
00516 }
00517 }
00518 rdn(fp, &tid, sizeof(unsigned int), 1);
00519 d->tr = tr_index[tid];
00520 htk_hmmdata_add(hmm, d);
00521 }
00522 #ifdef DMES
00523 j_printf("%d HMM model definition read\n", md_num);
00524 #endif
00525 }
00526
00527
00528
00539 boolean
00540 read_binhmm(FILE *fp, HTK_HMM_INFO *hmm, boolean gzfile_p, Value *para)
00541 {
00542
00543 gzfile = gzfile_p;
00544
00545
00546 if (rd_header(fp, hmm, para) == FALSE) {
00547 return FALSE;
00548 }
00549
00550 j_printerr("(binary)...");
00551
00552
00553 rd_opt(fp, &(hmm->opt));
00554
00555
00556 rd_type(fp, hmm);
00557
00558
00559 rd_trans(fp, hmm);
00560
00561
00562 rd_var(fp, hmm);
00563
00564
00565 rd_dens(fp, hmm);
00566
00567
00568 if (hmm->is_tied_mixture) {
00569 rd_tmix(fp, hmm);
00570 }
00571
00572
00573 rd_state(fp, hmm);
00574
00575
00576 rd_data(fp, hmm);
00577
00578
00579 free(tr_index);
00580 free(vr_index);
00581 free(dens_index);
00582 if (hmm->is_tied_mixture) free(tm_index);
00583 free(st_index);
00584
00585
00586 {
00587 HTK_HMM_Data *dtmp;
00588 int maxlen = 0;
00589 for (dtmp = hmm->start; dtmp; dtmp = dtmp->next) {
00590 if (maxlen < dtmp->state_num) maxlen = dtmp->state_num;
00591 }
00592 hmm->maxstatenum = maxlen;
00593 }
00594
00595 if (! hmm->variance_inversed) {
00596
00597 htk_hmm_inverse_variances(hmm);
00598 hmm->variance_inversed = TRUE;
00599 }
00600
00601 j_printerr("finished\n");
00602
00603 return (TRUE);
00604 }