00001
00038
00039
00040
00041
00042
00043
00044
00045 #include <julius.h>
00046
00047 #define PER_WORD 1
00048 #define PER_PHONEME 2
00049 #define PER_STATE 3
00050
00076 static HMM_Logical **
00077 make_phseq(WORD_ID *wseq, short num
00078 #ifdef MULTIPATH_VERSION
00079 , boolean **has_sp_ret
00080 #endif
00081 , int *num_ret, int **end_ret, int per_what)
00082 {
00083 HMM_Logical **ph;
00084 #ifdef MULTIPATH_VERSION
00085 boolean *has_sp;
00086 int k;
00087 #endif
00088 int phnum;
00089 WORD_ID tmpw, w;
00090 int i, j, pn, st, endn;
00091 HMM_Logical *tmpp, *ret;
00092
00093
00094
00095 phnum = 0;
00096 for (w=0;w<num;w++) phnum += winfo->wlen[wseq[w]];
00097 ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * phnum);
00098 #ifdef MULTIPATH_VERSION
00099 has_sp = (boolean *)mymalloc(sizeof(boolean) * phnum);
00100 #endif
00101
00102 #ifdef MULTIPATH_VERSION
00103 st = 1;
00104 #else
00105 st = 0;
00106 #endif
00107 pn = 0;
00108 endn = 0;
00109 for (w=0;w<num;w++) {
00110 tmpw = wseq[w];
00111 for (i=0;i<winfo->wlen[tmpw];i++) {
00112 tmpp = winfo->wseq[tmpw][i];
00113
00114 if (ccd_flag) {
00115 if (w > 0 && i == 0) {
00116
00117 if ((ret = get_left_context_HMM(tmpp, ph[pn-1]->name, hmminfo)) != NULL) {
00118 tmpp = ret;
00119 }
00120
00121
00122 }
00123 if (w < num-1 && i == winfo->wlen[tmpw] - 1) {
00124 if ((ret = get_right_context_HMM(tmpp, winfo->wseq[wseq[w+1]][0]->name, hmminfo)) != NULL) {
00125 tmpp = ret;
00126 }
00127 }
00128 }
00129 ph[pn] = tmpp;
00130 #ifdef MULTIPATH_VERSION
00131 if (enable_iwsp && i == winfo->wlen[tmpw] - 1) {
00132 has_sp[pn] = TRUE;
00133 } else {
00134 has_sp[pn] = FALSE;
00135 }
00136 #endif
00137 if (per_what == PER_STATE) {
00138 for (j=0;j<hmm_logical_state_num(tmpp)-2;j++) {
00139 (*end_ret)[endn++] = st + j;
00140 }
00141 #ifdef MULTIPATH_VERSION
00142 if (enable_iwsp && has_sp[pn]) {
00143 for (k=0;k<hmm_logical_state_num(hmminfo->sp)-2;k++) {
00144 (*end_ret)[endn++] = st + j + k;
00145 }
00146 }
00147 #endif
00148 }
00149 st += hmm_logical_state_num(tmpp) - 2;
00150 #ifdef MULTIPATH_VERSION
00151 if (enable_iwsp && has_sp[pn]) {
00152 st += hmm_logical_state_num(hmminfo->sp) - 2;
00153 }
00154 #endif
00155 if (per_what == PER_PHONEME) (*end_ret)[endn++] = st - 1;
00156 pn++;
00157 }
00158 if (per_what == PER_WORD) (*end_ret)[endn++] = st - 1;
00159 }
00160 *num_ret = phnum;
00161 #ifdef MULTIPATH_VERSION
00162 *has_sp_ret = has_sp;
00163 #endif
00164 return ph;
00165 }
00166
00167
00186 static void
00187 do_align(WORD_ID *words, short wnum, HTK_Param *param, int per_what)
00188 {
00189 HMM_Logical **phones;
00190 #ifdef MULTIPATH_VERSION
00191 boolean *has_sp;
00192 int k;
00193 #endif
00194 int phonenum;
00195 HMM *shmm;
00196 int *end_state;
00197 int *end_frame;
00198 LOGPROB *end_score;
00199 LOGPROB allscore;
00200 WORD_ID w;
00201 int i, rlen;
00202 int end_num = 0;
00203 int *id_seq, *phloc = NULL, *stloc = NULL;
00204 int j,n,p;
00205
00206
00207 switch(per_what) {
00208 case PER_WORD:
00209 j_printf("=== word alignment begin ===\n");
00210 end_num = wnum;
00211 phloc = (int *)mymalloc(sizeof(int)*wnum);
00212 i = 0;
00213 for(w=0;w<wnum;w++) {
00214 phloc[w] = i;
00215 i += winfo->wlen[words[w]];
00216 }
00217 break;
00218 case PER_PHONEME:
00219 j_printf("=== phoneme alignment begin ===\n");
00220 end_num = 0;
00221 for(w=0;w<wnum;w++) end_num += winfo->wlen[words[w]];
00222 break;
00223 case PER_STATE:
00224 j_printf("=== state alignment begin ===\n");
00225 end_num = 0;
00226 for(w=0;w<wnum;w++) {
00227 for (i=0;i<winfo->wlen[words[w]]; i++) {
00228 end_num += hmm_logical_state_num(winfo->wseq[words[w]][i]) - 2;
00229 }
00230 #ifdef MULTIPATH_VERSION
00231 if (enable_iwsp) {
00232 end_num += hmm_logical_state_num(hmminfo->sp) - 2;
00233 }
00234 #endif
00235 }
00236 phloc = (int *)mymalloc(sizeof(int)*end_num);
00237 stloc = (int *)mymalloc(sizeof(int)*end_num);
00238 {
00239 n = 0;
00240 p = 0;
00241 for(w=0;w<wnum;w++) {
00242 for(i=0;i<winfo->wlen[words[w]]; i++) {
00243 for(j=0; j<hmm_logical_state_num(winfo->wseq[words[w]][i]) - 2; j++) {
00244 phloc[n] = p;
00245 stloc[n] = j + 1;
00246 n++;
00247 }
00248 #ifdef MULTIPATH_VERSION
00249 if (enable_iwsp && i == winfo->wlen[words[w]] - 1) {
00250 for(k=0;k<hmm_logical_state_num(hmminfo->sp)-2;k++) {
00251 phloc[n] = p;
00252 stloc[n] = j + 1 + k + end_num;
00253 n++;
00254 }
00255 }
00256 #endif
00257 p++;
00258 }
00259 }
00260 }
00261
00262 break;
00263 }
00264 end_state = (int *)mymalloc(sizeof(int) * end_num);
00265
00266
00267 phones = make_phseq(words, wnum
00268 #ifdef MULTIPATH_VERSION
00269 , &has_sp
00270 #endif
00271 , &phonenum, &end_state, per_what);
00272
00273 shmm = new_make_word_hmm(hmminfo, phones, phonenum
00274 #ifdef MULTIPATH_VERSION
00275 , has_sp
00276 #endif
00277 );
00278
00279
00280 allscore = viterbi_segment(shmm, param, end_state, end_num, &id_seq, &end_frame, &end_score, &rlen);
00281
00282
00283 {
00284 int i,p,n;
00285 j_printf("id: from to n_score applied HMMs (logical[physical] or {pseudo})\n");
00286 j_printf("------------------------------------------------------------\n");
00287 for (i=0;i<rlen;i++) {
00288 j_printf("%2d: %4d %4d %f ", id_seq[i], (i == 0) ? 0 : end_frame[i-1]+1, end_frame[i], end_score[i]);
00289 switch(per_what) {
00290 case PER_WORD:
00291 for(p=0;p<winfo->wlen[words[id_seq[i]]];p++) {
00292 n = phloc[id_seq[i]] + p;
00293 if (phones[n]->is_pseudo) {
00294 j_printf(" %s{%s}", phones[n]->name, phones[n]->body.pseudo->name);
00295 } else if (strmatch(phones[n]->name, phones[n]->body.defined->name)) {
00296 j_printf(" %s", phones[n]->name);
00297 } else {
00298 j_printf(" %s[%s]", phones[n]->name, phones[n]->body.defined->name);
00299 }
00300 }
00301 break;
00302 case PER_PHONEME:
00303 n = id_seq[i];
00304 if (phones[n]->is_pseudo) {
00305 j_printf(" {%s}", phones[n]->name);
00306 } else if (strmatch(phones[n]->name, phones[n]->body.defined->name)) {
00307 j_printf(" %s", phones[n]->name);
00308 } else {
00309 j_printf(" %s[%s]", phones[n]->name, phones[n]->body.defined->name);
00310 }
00311 break;
00312 case PER_STATE:
00313 n = phloc[id_seq[i]];
00314 if (phones[n]->is_pseudo) {
00315 j_printf(" {%s}", phones[n]->name);
00316 } else if (strmatch(phones[n]->name, phones[n]->body.defined->name)) {
00317 j_printf(" %s", phones[n]->name);
00318 } else {
00319 j_printf(" %s[%s]", phones[n]->name, phones[n]->body.defined->name);
00320 }
00321 #ifdef MULTIPATH_VERSION
00322 if (enable_iwsp && stloc[id_seq[i]] > end_num) {
00323 j_printf(" #%d (sp)", stloc[id_seq[i]] - end_num);
00324 } else {
00325 j_printf(" #%d", stloc[id_seq[i]]);
00326 }
00327 #else
00328 j_printf(" #%d", stloc[id_seq[i]]);
00329 #endif
00330 break;
00331 }
00332 j_printf("\n");
00333 }
00334 }
00335 j_printf("re-computed AM score: %f\n", allscore);
00336
00337 free_hmm(shmm);
00338 free(id_seq);
00339 free(phones);
00340 #ifdef MULTIPATH_VERSION
00341 free(has_sp);
00342 #endif
00343 free(end_score);
00344 free(end_frame);
00345 free(end_state);
00346
00347 switch(per_what) {
00348 case PER_WORD:
00349 free(phloc);
00350 j_printf("=== word alignment end ===\n");
00351 break;
00352 case PER_PHONEME:
00353 j_printf("=== phoneme alignment end ===\n");
00354 break;
00355 case PER_STATE:
00356 free(phloc);
00357 free(stloc);
00358 j_printf("=== state alignment end ===\n");
00359 }
00360
00361 }
00362
00363
00380 void
00381 word_align(WORD_ID *words, short wnum, HTK_Param *param)
00382 {
00383 do_align(words, wnum, param, PER_WORD);
00384 }
00385
00402 void
00403 word_rev_align(WORD_ID *revwords, short wnum, HTK_Param *param)
00404 {
00405 WORD_ID *words;
00406 int w;
00407 words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * wnum);
00408 for (w=0;w<wnum;w++) words[w] = revwords[wnum-w-1];
00409 do_align(words, wnum, param, PER_WORD);
00410 free(words);
00411 }
00412
00429 void
00430 phoneme_align(WORD_ID *words, short num, HTK_Param *param)
00431 {
00432 do_align(words, num, param, PER_PHONEME);
00433 }
00434
00451 void
00452 phoneme_rev_align(WORD_ID *revwords, short num, HTK_Param *param)
00453 {
00454 WORD_ID *words;
00455 int p;
00456 words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * num);
00457 for (p=0;p<num;p++) words[p] = revwords[num-p-1];
00458 do_align(words, num, param, PER_PHONEME);
00459 free(words);
00460 }
00461
00478 void
00479 state_align(WORD_ID *words, short num, HTK_Param *param)
00480 {
00481 do_align(words, num, param, PER_STATE);
00482 }
00483
00500 void
00501 state_rev_align(WORD_ID *revwords, short num, HTK_Param *param)
00502 {
00503 WORD_ID *words;
00504 int p;
00505 words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * num);
00506 for (p=0;p<num;p++) words[p] = revwords[num-p-1];
00507 do_align(words, num, param, PER_STATE);
00508 free(words);
00509 }