Julius 4.1.5
libsent/src/hmminfo/cdset.c
説明を見る。
00001 
00066 /*
00067  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00068  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00069  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00070  * All rights reserved
00071  */
00072 
00073 #include <sent/stddefs.h>
00074 #include <sent/htk_param.h>
00075 #include <sent/htk_hmm.h>
00076 
00078 
00079 
00080 #define CD_STATE_SET_STEP 10    ///< CD_State_Set memory allocation step
00081 
00087 static void
00088 cdset_init(HTK_HMM_INFO *hmminfo)
00089 {
00090   hmminfo->cdset_info.cdtree = NULL;
00091 }
00092 
00098 static CD_Set *
00099 cdset_new()
00100 {
00101   return((CD_Set *)mymalloc(sizeof(CD_Set)));
00102 }
00103 
00112 CD_Set *
00113 cdset_lookup(HTK_HMM_INFO *hmminfo, char *cdstr)
00114 {
00115   CD_Set *cd;
00116   cd = aptree_search_data(cdstr, hmminfo->cdset_info.cdtree);
00117   if (cd != NULL && strmatch(cdstr, cd->name)) {
00118     return cd;
00119   } else {
00120     return NULL;
00121   }
00122 }
00123 
00132 CD_Set *
00133 lcdset_lookup_by_hmmname(HTK_HMM_INFO *hmminfo, char *hmmname)
00134 {
00135   char buf[MAX_HMMNAME_LEN];
00136 
00137   return(cdset_lookup(hmminfo, leftcenter_name(hmmname, buf)));
00138 }
00139 
00148 CD_Set *
00149 rcdset_lookup_by_hmmname(HTK_HMM_INFO *hmminfo, char *hmmname)
00150 {
00151   char buf[MAX_HMMNAME_LEN];
00152 
00153   return(cdset_lookup(hmminfo, rightcenter_name(hmmname, buf)));
00154 }
00155 
00156 
00162 static void
00163 put_cdset(void *ptr)
00164 {
00165   int i;
00166   CD_Set *a;
00167 
00168   a = ptr;
00169   printf("name: %s\n", a->name);
00170   /* printf("state_num: %d\n", a->state_num); */
00171   for(i=0;i<a->state_num;i++) {
00172     if (a->stateset[i].num == 0) {
00173       printf("\t[state %d]  not exist\n", i);
00174     } else {
00175       printf("\t[state %d]  %d variants\n", i, a->stateset[i].num);
00176     }
00177     /*
00178       for(j=0;j<a->stateset[i].num;j++) {
00179         put_htk_state(stdout, a->stateset[i].s[j]);
00180       }
00181     */
00182   }
00183 }
00184 
00190 void
00191 put_all_cdinfo(HTK_HMM_INFO *hmminfo)
00192 {
00193   aptree_traverse_and_do(hmminfo->cdset_info.cdtree, put_cdset);
00194 }
00195 
00196 
00206 boolean
00207 regist_cdset(APATNODE **root, HTK_HMM_Data *d, char *cdname, BMALLOC_BASE **mroot)
00208 {
00209   boolean need_new;
00210   CD_State_Set *tmp;
00211   CD_Set *lset = NULL, *lmatch = NULL;
00212   int j,n;
00213   boolean changed = FALSE;
00214 
00215   if (strlen(cdname) >= MAX_HMMNAME_LEN) {
00216     jlog("Error: cdset: HMM name exceeds limit (%d): %s!\n", MAX_HMMNAME_LEN, cdname);
00217     jlog("Error: cdset: Please increase the value of MAX_HMMNAME_LEN (current = %d)\n", MAX_HMMNAME_LEN);
00218     exit(1);
00219   }
00220   
00221   /* check if the cdset already exist */
00222   need_new = TRUE;
00223   if (*root != NULL) {
00224     lmatch = aptree_search_data(cdname, *root);
00225     if (lmatch != NULL && strmatch(lmatch->name, cdname)) {
00226       /* exist, add to it later */
00227       lset = lmatch;
00228       need_new = FALSE;
00229       /* if the state num is larger than allocated, expand the lset */
00230       if (d->state_num > lset->state_num) {
00231         lset->stateset = (CD_State_Set *)myrealloc(lset->stateset, sizeof(CD_State_Set) * d->state_num);
00232         /* 0 1 ... (lset->state_num-1) */
00233         /* N A ... N                   */
00234         /* 0 1 ...                     ... (d->state_num-1) */
00235         /* N A ... A ..................... N                */
00236         /* malloc new area to expanded state (N to A above) */
00237         for(j = lset->state_num - 1; j < d->state_num - 1; j++) {
00238           lset->stateset[j].maxnum = CD_STATE_SET_STEP;
00239           lset->stateset[j].s = (HTK_HMM_State **)mymalloc(sizeof(HTK_HMM_State *) * lset->stateset[j].maxnum);
00240           lset->stateset[j].num = 0;
00241         }
00242         lset->stateset[d->state_num-1].s = NULL;
00243         lset->stateset[d->state_num-1].num = 0;
00244         lset->stateset[d->state_num-1].maxnum = 0;
00245         
00246         lset->state_num = d->state_num;
00247 
00248         /* update transition table */
00249         lset->tr = d->tr;
00250 
00251         changed = TRUE;
00252       }
00253     }
00254   }
00255 
00256   if (need_new) {
00257     /* allocate as new with blank data */
00258     lset = cdset_new();
00259     lset->name = strdup(cdname);
00260     lset->state_num = d->state_num;
00261     lset->stateset = (CD_State_Set *)mymalloc(sizeof(CD_State_Set) * lset->state_num);
00262     /* assume first and last state has no outprob */
00263     lset->stateset[0].s = lset->stateset[lset->state_num-1].s = NULL;
00264     lset->stateset[0].num = lset->stateset[lset->state_num-1].num = 0;
00265     lset->stateset[0].maxnum = lset->stateset[lset->state_num-1].maxnum = 0;
00266     for(j=1;j<lset->state_num-1; j++) {
00267       /* pre-allocate only the first step */
00268       lset->stateset[j].maxnum = CD_STATE_SET_STEP;
00269       lset->stateset[j].s = (HTK_HMM_State **)mymalloc(sizeof(HTK_HMM_State *) * lset->stateset[j].maxnum);
00270       lset->stateset[j].num = 0;
00271     }
00272     /* assign transition table of first found %HMM (ad-hoc?) */
00273     lset->tr = d->tr;
00274     /* add to search index tree */
00275     if (*root == NULL) {
00276       *root = aptree_make_root_node(lset, mroot);
00277     } else {
00278       aptree_add_entry(lset->name, lset, lmatch->name, root, mroot);
00279     }
00280 
00281     changed = TRUE;
00282   }
00283     
00284   /* register each HMM states to the lcdset */
00285   for (j=1;j<d->state_num-1;j++) {
00286     tmp = &(lset->stateset[j]);
00287     /* check if the state has already registered */
00288     for(n = 0; n < tmp->num ; n++) {
00289       if (tmp->s[n] == d->s[j]) { /* compare by pointer */
00290         /*jlog("\tstate %d has same\n", n);*/
00291         break;
00292       }
00293     }
00294     if (n < tmp->num ) continue;        /* same state found, cancel regist. */
00295     
00296     /* expand storage area if necessary */
00297     if (tmp->num >= tmp->maxnum) {
00298       tmp->maxnum += CD_STATE_SET_STEP;
00299       tmp->s = (HTK_HMM_State **)myrealloc(tmp->s, sizeof(HTK_HMM_State *) * tmp->maxnum);
00300     }
00301     
00302     tmp->s[tmp->num] = d->s[j];
00303     tmp->num++;
00304 
00305     changed = TRUE;
00306   }
00307 
00308   return(changed);
00309 }
00310 
00319 boolean
00320 make_cdset(HTK_HMM_INFO *hmminfo)
00321 {
00322   HMM_Logical *lg;
00323   char buf[MAX_HMMNAME_LEN];
00324 
00325   cdset_init(hmminfo);
00326   /* make cdset name from logical HMM name */
00327   /* left-context set: "a-k" for /a-k+i/, /a-k+o/, ...
00328      for 1st pass (word end) */
00329   for(lg = hmminfo->lgstart; lg; lg = lg->next) {
00330     if (lg->is_pseudo) continue;
00331     regist_cdset(&(hmminfo->cdset_info.cdtree), lg->body.defined, leftcenter_name(lg->name, buf), &(hmminfo->cdset_root));
00332   }
00333   /* right-context set: "a+o" for /b-a+o/, /t-a+o/, ...
00334      for 2nd pass (word beginning) */
00335   for(lg = hmminfo->lgstart; lg; lg = lg->next) {
00336     if (lg->is_pseudo) continue;
00337     regist_cdset(&(hmminfo->cdset_info.cdtree), lg->body.defined, rightcenter_name(lg->name, buf), &(hmminfo->cdset_root));
00338   }
00339   /* both-context set: "a" for all triphone with same base phone "a"
00340      for 1st pass (1 phoneme word, with no previous word hypo.) */
00341   for(lg = hmminfo->lgstart; lg; lg = lg->next) {
00342     if (lg->is_pseudo) continue;
00343     regist_cdset(&(hmminfo->cdset_info.cdtree), lg->body.defined, center_name(lg->name, buf), &(hmminfo->cdset_root));
00344   }
00345 
00346   /* now that cdset is completely built */
00347   
00348   return(TRUE);
00349 }
00350 
00356 static void
00357 callback_free_lcdset_content(void *arg)
00358 {
00359   CD_Set *d;
00360   int j;
00361 
00362   d = arg;
00363   for(j=0;j<d->state_num;j++) {
00364     if (d->stateset[j].s != NULL) free(d->stateset[j].s);
00365   }
00366   free(d->stateset);
00367   free(d->name);
00368   free(d);
00369 }
00370 
00378 void
00379 free_cdset(APATNODE **root, BMALLOC_BASE **mroot)
00380 {
00381   if (*root != NULL) {
00382     aptree_traverse_and_do(*root, callback_free_lcdset_content);
00383     mybfree2(mroot);
00384     *root = NULL;
00385   }
00386 }
00387