Julius 4.2
|
00001 00030 /* 00031 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00032 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00033 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00034 * All rights reserved 00035 */ 00036 00037 #include <julius/julius.h> 00038 00055 static void 00056 print_winfo_w(WORD_INFO *winfo, WORD_ID word, boolean ngram_exist) 00057 { 00058 int i; 00059 if (word >= winfo->num) return; 00060 printf("--winfo\n"); 00061 printf("wname = %s\n",winfo->wname[word]); 00062 printf("woutput = %s\n",winfo->woutput[word]); 00063 printf("\ntransp = %s\n", (winfo->is_transparent[word]) ? "yes" : "no"); 00064 printf("wlen = %d\n",winfo->wlen[word]); 00065 printf("wseq ="); 00066 for (i=0;i<winfo->wlen[word];i++) { 00067 printf(" %s",winfo->wseq[word][i]->name); 00068 } 00069 printf("\nwseq_def="); 00070 for (i=0;i<winfo->wlen[word];i++) { 00071 if (winfo->wseq[word][i]->is_pseudo) { 00072 printf(" (%s)", winfo->wseq[word][i]->body.pseudo->name); 00073 } else { 00074 printf(" %s",winfo->wseq[word][i]->body.defined->name); 00075 } 00076 } 00077 if (ngram_exist) { 00078 printf("\nwton = %d\n",winfo->wton[word]); 00079 #ifdef CLASS_NGRAM 00080 printf("cprob = %f(%f)\n", winfo->cprob[word], pow(10.0, winfo->cprob[word])); 00081 #endif 00082 } 00083 00084 } 00085 00100 static void 00101 print_wchmm_w(WCHMM_INFO *wchmm, WORD_ID word) 00102 { 00103 int i; 00104 if (word >= wchmm->winfo->num) return; 00105 printf("--wchmm (word)\n"); 00106 printf("offset ="); 00107 for (i=0;i<wchmm->winfo->wlen[word];i++) { 00108 printf(" %d",wchmm->offset[word][i]); 00109 } 00110 printf("\n"); 00111 if (wchmm->hmminfo->multipath) { 00112 printf("wordbegin = %d\n",wchmm->wordbegin[word]); 00113 } 00114 printf("wordend = %d\n",wchmm->wordend[word]); 00115 } 00116 00131 static void 00132 print_wchmm_s(WCHMM_INFO *wchmm, int node) 00133 { 00134 printf("--wchmm (node)\n"); 00135 printf("stend = %d\n",wchmm->stend[node]); 00136 if (wchmm->hmminfo->multipath) { 00137 if (wchmm->state[node].out.state == NULL) { 00138 printf("NO OUTPUT\n"); 00139 return; 00140 } 00141 } 00142 #ifdef PASS1_IWCD 00143 printf("outstyle= "); 00144 switch(wchmm->outstyle[node]) { 00145 case AS_STATE: 00146 printf("AS_STATE (id=%d)\n", (wchmm->state[node].out.state)->id); 00147 break; 00148 case AS_LSET: 00149 printf("AS_LSET (%d variants)\n", (wchmm->state[node].out.lset)->num); 00150 break; 00151 case AS_RSET: 00152 if ((wchmm->state[node].out.rset)->hmm->is_pseudo) { 00153 printf("AS_RSET (name=\"%s\", pseudo=\"%s\", loc=%d)\n", 00154 (wchmm->state[node].out.rset)->hmm->name, 00155 (wchmm->state[node].out.rset)->hmm->body.pseudo->name, 00156 (wchmm->state[node].out.rset)->state_loc); 00157 } else { 00158 printf("AS_RSET (name=\"%s\", defined=\"%s\", loc=%d)\n", 00159 (wchmm->state[node].out.rset)->hmm->name, 00160 (wchmm->state[node].out.rset)->hmm->body.defined->name, 00161 (wchmm->state[node].out.rset)->state_loc); 00162 } 00163 break; 00164 case AS_LRSET: 00165 if ((wchmm->state[node].out.rset)->hmm->is_pseudo) { 00166 printf("AS_LRSET (name=\"%s\", pseudo=\"%s\", loc=%d)\n", 00167 (wchmm->state[node].out.lrset)->hmm->name, 00168 (wchmm->state[node].out.lrset)->hmm->body.pseudo->name, 00169 (wchmm->state[node].out.lrset)->state_loc); 00170 } else { 00171 printf("AS_LRSET (name=\"%s\", defined=\"%s\", loc=%d)\n", 00172 (wchmm->state[node].out.lrset)->hmm->name, 00173 (wchmm->state[node].out.lrset)->hmm->body.defined->name, 00174 (wchmm->state[node].out.lrset)->state_loc); 00175 } 00176 break; 00177 default: 00178 printf("UNKNOWN???\n"); 00179 } 00180 #endif /* PASS1_IWCD */ 00181 } 00182 00197 static void 00198 print_wchmm_s_arc(WCHMM_INFO *wchmm, int node) 00199 { 00200 A_CELL2 *ac; 00201 int i = 0; 00202 int j; 00203 printf("arcs:\n"); 00204 if (wchmm->self_a[node] != LOG_ZERO) { 00205 printf(" %d %f(%f)\n", node, wchmm->self_a[node], pow(10.0, wchmm->self_a[node])); 00206 i++; 00207 } 00208 if (wchmm->next_a[node] != LOG_ZERO) { 00209 printf(" %d %f(%f)\n", node + 1, wchmm->next_a[node], pow(10.0, wchmm->next_a[node])); 00210 i++; 00211 } 00212 for(ac = wchmm->ac[node]; ac; ac = ac->next) { 00213 for (j=0;j<ac->n;j++) { 00214 printf(" %d %f(%f)\n",ac->arc[j],ac->a[j],pow(10.0, ac->a[j])); 00215 i++; 00216 } 00217 } 00218 printf(" total %d arcs\n",i); 00219 } 00220 00235 static void 00236 print_wchmm_s_successor(WCHMM_INFO *wchmm, int node) 00237 { 00238 int i = 0, j; 00239 int scid; 00240 00241 scid = wchmm->state[node].scid; 00242 if (scid == 0) { 00243 printf("no successors\n"); 00244 } else if (scid < 0) { 00245 printf("successor id: %d\n", scid); 00246 #ifdef UNIGRAM_FACTORING 00247 if (wchmm->lmtype == LM_PROB) { 00248 printf("1-gram factoring node: score=%f\n",wchmm->fscore[-scid]); 00249 } 00250 #endif 00251 } else { 00252 #ifdef UNIGRAM_FACTORING 00253 printf("successor id: %d\n", scid); 00254 printf(" %d\n", wchmm->scword[scid]); 00255 #else 00256 printf("successor id: %d\n", scid); 00257 for (j = 0; j < wchmm->sclen[scid]; j++) { 00258 printf(" %d\n", wchmm->sclist[scid][j]); 00259 i++; 00260 } 00261 printf(" total %d successors\n",i); 00262 #endif 00263 } 00264 } 00265 00280 static void 00281 print_hmminfo(char *name, HTK_HMM_INFO *hmminfo) 00282 { 00283 HMM_Logical *l; 00284 00285 l = htk_hmmdata_lookup_logical(hmminfo, name); 00286 if (l == NULL) { 00287 printf("no HMM named \"%s\"\n", name); 00288 } else { 00289 put_logical_hmm(stdout, l); 00290 } 00291 } 00292 00307 static void 00308 print_ngraminfo(NGRAM_INFO *ngram, int nid) 00309 { 00310 printf("-- N-gram entry --\n"); 00311 printf("nid = %d\n", nid); 00312 printf("name = %s\n", ngram->wname[nid]); 00313 } 00314 00315 00331 void 00332 wchmm_check_interactive(WCHMM_INFO *wchmm) /* interactive check */ 00333 { 00334 #define MAXNAMELEN 24 00335 char buf[MAXNAMELEN], *name; 00336 int arg, newline; 00337 WORD_ID argw; 00338 boolean endflag; 00339 00340 printf("\n\n"); 00341 printf("********************************************\n"); 00342 printf("******** LM & LEXICON CHECK MODE *********\n"); 00343 printf("********************************************\n"); 00344 printf("\n"); 00345 00346 for (endflag = FALSE; endflag == FALSE;) { 00347 printf("===== syntax: command arg (\"H\" for help) > "); 00348 if (fgets(buf, MAXNAMELEN, stdin) == NULL) break; 00349 name = ""; 00350 arg = 0; 00351 if (isalpha(buf[0]) != 0 && buf[1] == ' ') { 00352 newline = strlen(buf)-1; 00353 if (buf[newline] == '\n') { 00354 buf[newline] = '\0'; 00355 } 00356 if (buf[2] != '\0') { 00357 name = buf + 2; 00358 arg = atoi(name); 00359 } 00360 } 00361 switch(buf[0]) { 00362 case 'w': /* word info */ 00363 argw = arg; 00364 print_winfo_w(wchmm->winfo, argw, (wchmm->ngram) ? TRUE : FALSE); 00365 print_wchmm_w(wchmm, argw); 00366 break; 00367 case 'n': /* node info */ 00368 print_wchmm_s(wchmm, arg); 00369 break; 00370 case 'a': /* arc list */ 00371 print_wchmm_s_arc(wchmm, arg); 00372 break; 00373 #if 0 00374 case 'r': /* reverse arc list */ 00375 print_wchmm_r_arc(arg); 00376 break; 00377 #endif 00378 case 's': /* successor word list */ 00379 if (wchmm->category_tree) { 00380 printf("Error: this is category tree (no successor list)\n"); 00381 } else { 00382 print_wchmm_s_successor(wchmm, arg); 00383 } 00384 break; 00385 case 't': /* node total info of above */ 00386 print_wchmm_s(wchmm, arg); 00387 print_wchmm_s_arc(wchmm, arg); 00388 #if 0 00389 print_wchmm_r_arc(arg); 00390 #endif 00391 if (!wchmm->category_tree) { 00392 print_wchmm_s_successor(wchmm, arg); 00393 } 00394 break; 00395 case 'h': /* hmm state info */ 00396 print_hmminfo(name, wchmm->hmminfo); 00397 break; 00398 case 'l': /* N-gram language model info */ 00399 if (wchmm->lmtype == LM_PROB) { 00400 print_ngraminfo(wchmm->ngram, arg); 00401 } else { 00402 printf("Error: this is not an N-gram model\n"); 00403 } 00404 break; 00405 case 'q': /* quit */ 00406 endflag = TRUE; 00407 break; 00408 default: /* help */ 00409 printf("syntax: [command_character] [number(#)]\n"); 00410 printf(" w [word_id] ... show word info\n"); 00411 printf(" n [state] ... show wchmm state info\n"); 00412 printf(" a [state] ... show arcs from the state\n"); 00413 #if 0 00414 printf(" r [state] ... show arcs to the state\n"); 00415 #endif 00416 printf(" s [state] ... show successor list of the state\n"); 00417 printf(" h [hmmname] ... show HMM info of the name\n"); 00418 printf(" l [nwid] ... N-gram entry info\n"); 00419 printf(" H ... print this help\n"); 00420 printf(" q ... quit\n"); 00421 break; 00422 } 00423 } 00424 printf("\n"); 00425 printf("********************************************\n"); 00426 printf("***** END OF LM & LEXICON CHECK MODE *****\n"); 00427 printf("********************************************\n"); 00428 printf("\n"); 00429 } 00430 00431 00446 void 00447 check_wchmm(WCHMM_INFO *wchmm) 00448 { 00449 int i; 00450 boolean ok_flag; 00451 int node; 00452 WORD_ID w; 00453 00454 ok_flag = TRUE; 00455 00456 if (wchmm->hmminfo->multipath) { 00457 00458 /* check word-beginning nodes */ 00459 for(i=0;i<wchmm->startnum;i++) { 00460 node = wchmm->startnode[i]; 00461 if (wchmm->state[node].out.state != NULL) { 00462 printf("Error: word-beginning node %d has output function!\n", node); 00463 ok_flag = FALSE; 00464 } 00465 } 00466 /* examine if word->state and state->word mapping is correct */ 00467 for(w=0;w<wchmm->winfo->num;w++) { 00468 if (wchmm->stend[wchmm->wordend[w]] != w) { 00469 printf("Error: no match of word end for word %d!!\n", w); 00470 ok_flag = FALSE; 00471 } 00472 } 00473 00474 } else { 00475 00476 /* examine if word->state and state->word mapping is correct */ 00477 for (i=0;i<wchmm->winfo->num;i++) { 00478 if (wchmm->stend[wchmm->wordend[i]]!=i) { 00479 printf("end ga awanai!!!: word=%d, node=%d, value=%d\n", 00480 i, wchmm->wordend[i], wchmm->stend[wchmm->wordend[i]]); 00481 ok_flag = FALSE; 00482 } 00483 } 00484 } 00485 00486 #if 0 00487 /* check if the last state is unique and has only one output arc */ 00488 { 00489 int n; 00490 A_CELL *ac; 00491 00492 i = 0; 00493 for (n=0;n<wchmm->n;n++) { 00494 if (wchmm->stend[n] != WORD_INVALID) { 00495 i++; 00496 for (ac=wchmm->state[n].ac; ac; ac=ac->next) { 00497 if (ac->arc == n) continue; 00498 if (!wchmm->hmminfo->multipath && wchmm->ststart[ac->arc] != WORD_INVALID) continue; 00499 break; 00500 } 00501 if (ac != NULL) { 00502 printf("node %d is shared?\n",n); 00503 ok_flag = FALSE; 00504 } 00505 } 00506 } 00507 if (i != wchmm->winfo->num ) { 00508 printf("num of heads of words in wchmm not match word num!!\n"); 00509 printf("from wchmm->stend:%d != from winfo:%d ?\n",i,wchmm->winfo->num); 00510 ok_flag = FALSE; 00511 } 00512 } 00513 #endif 00514 00515 /* if check failed, go into interactive mode */ 00516 if (!ok_flag) { 00517 wchmm_check_interactive(wchmm); 00518 } 00519 00520 jlog("STAT: coordination check passed\n"); 00521 } 00522 00523 /* end of file */