Julius 4.2
libjulius/src/wchmm_check.c
説明を見る。
00001 
00030 /*
00031  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00032  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00033  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00034  * All rights reserved
00035  */
00036 
00037 #include <julius/julius.h>
00038 
00055 static void
00056 print_winfo_w(WORD_INFO *winfo, WORD_ID word, boolean ngram_exist)
00057 {
00058   int i;
00059   if (word >= winfo->num) return;
00060   printf("--winfo\n");
00061   printf("wname   = %s\n",winfo->wname[word]);
00062   printf("woutput = %s\n",winfo->woutput[word]);
00063   printf("\ntransp  = %s\n", (winfo->is_transparent[word]) ? "yes" : "no");
00064   printf("wlen    = %d\n",winfo->wlen[word]);
00065   printf("wseq    =");
00066   for (i=0;i<winfo->wlen[word];i++) {
00067     printf(" %s",winfo->wseq[word][i]->name);
00068   }
00069   printf("\nwseq_def=");
00070   for (i=0;i<winfo->wlen[word];i++) {
00071     if (winfo->wseq[word][i]->is_pseudo) {
00072       printf(" (%s)", winfo->wseq[word][i]->body.pseudo->name);
00073     } else {
00074       printf(" %s",winfo->wseq[word][i]->body.defined->name);
00075     }
00076   }
00077   if (ngram_exist) {
00078     printf("\nwton    = %d\n",winfo->wton[word]);
00079 #ifdef CLASS_NGRAM
00080     printf("cprob   = %f(%f)\n", winfo->cprob[word], pow(10.0, winfo->cprob[word]));
00081 #endif
00082   }
00083   
00084 }
00085 
00100 static void
00101 print_wchmm_w(WCHMM_INFO *wchmm, WORD_ID word)
00102 {
00103   int i;
00104   if (word >= wchmm->winfo->num) return;
00105   printf("--wchmm (word)\n");
00106   printf("offset  =");
00107   for (i=0;i<wchmm->winfo->wlen[word];i++) {
00108     printf(" %d",wchmm->offset[word][i]);
00109   }
00110   printf("\n");
00111   if (wchmm->hmminfo->multipath) {
00112     printf("wordbegin = %d\n",wchmm->wordbegin[word]);
00113   }
00114   printf("wordend = %d\n",wchmm->wordend[word]);
00115 }
00116 
00131 static void
00132 print_wchmm_s(WCHMM_INFO *wchmm, int node)
00133 {
00134   printf("--wchmm (node)\n");
00135   printf("stend   = %d\n",wchmm->stend[node]);
00136   if (wchmm->hmminfo->multipath) {
00137     if (wchmm->state[node].out.state == NULL) {
00138       printf("NO OUTPUT\n");
00139       return;
00140     }
00141   }
00142 #ifdef PASS1_IWCD
00143   printf("outstyle= ");
00144   switch(wchmm->outstyle[node]) {
00145   case AS_STATE:
00146     printf("AS_STATE (id=%d)\n", (wchmm->state[node].out.state)->id);
00147     break;
00148   case AS_LSET:
00149     printf("AS_LSET  (%d variants)\n", (wchmm->state[node].out.lset)->num);
00150     break;
00151   case AS_RSET:
00152     if ((wchmm->state[node].out.rset)->hmm->is_pseudo) {
00153       printf("AS_RSET  (name=\"%s\", pseudo=\"%s\", loc=%d)\n",
00154                (wchmm->state[node].out.rset)->hmm->name,
00155                (wchmm->state[node].out.rset)->hmm->body.pseudo->name,
00156                (wchmm->state[node].out.rset)->state_loc);
00157     } else {
00158       printf("AS_RSET  (name=\"%s\", defined=\"%s\", loc=%d)\n",
00159                (wchmm->state[node].out.rset)->hmm->name,
00160                (wchmm->state[node].out.rset)->hmm->body.defined->name,
00161                (wchmm->state[node].out.rset)->state_loc);
00162     }
00163     break;
00164   case AS_LRSET:
00165     if ((wchmm->state[node].out.rset)->hmm->is_pseudo) {
00166       printf("AS_LRSET  (name=\"%s\", pseudo=\"%s\", loc=%d)\n",
00167                (wchmm->state[node].out.lrset)->hmm->name,
00168                (wchmm->state[node].out.lrset)->hmm->body.pseudo->name,
00169                (wchmm->state[node].out.lrset)->state_loc);
00170     } else {
00171       printf("AS_LRSET  (name=\"%s\", defined=\"%s\", loc=%d)\n",
00172                (wchmm->state[node].out.lrset)->hmm->name,
00173                (wchmm->state[node].out.lrset)->hmm->body.defined->name,
00174                (wchmm->state[node].out.lrset)->state_loc);
00175     }
00176     break;
00177   default:
00178     printf("UNKNOWN???\n");
00179   }
00180 #endif /* PASS1_IWCD */
00181 }
00182 
00197 static void
00198 print_wchmm_s_arc(WCHMM_INFO *wchmm, int node)
00199 {
00200   A_CELL2 *ac;
00201   int i = 0;
00202   int j;
00203   printf("arcs:\n");
00204   if (wchmm->self_a[node] != LOG_ZERO) {
00205     printf(" %d %f(%f)\n", node, wchmm->self_a[node], pow(10.0, wchmm->self_a[node]));
00206     i++;
00207   }
00208   if (wchmm->next_a[node] != LOG_ZERO) {
00209     printf(" %d %f(%f)\n", node + 1, wchmm->next_a[node], pow(10.0, wchmm->next_a[node]));
00210     i++;
00211   }
00212   for(ac = wchmm->ac[node]; ac; ac = ac->next) {
00213     for (j=0;j<ac->n;j++) {
00214       printf(" %d %f(%f)\n",ac->arc[j],ac->a[j],pow(10.0, ac->a[j]));
00215       i++;
00216     }
00217   }
00218   printf(" total %d arcs\n",i);
00219 }
00220 
00235 static void
00236 print_wchmm_s_successor(WCHMM_INFO *wchmm, int node)
00237 {
00238   int i = 0, j;
00239   int scid;
00240 
00241   scid = wchmm->state[node].scid;
00242   if (scid == 0) {
00243     printf("no successors\n");
00244   } else if (scid < 0) {
00245     printf("successor id: %d\n", scid);
00246 #ifdef UNIGRAM_FACTORING
00247     if (wchmm->lmtype == LM_PROB) {
00248       printf("1-gram factoring node: score=%f\n",wchmm->fscore[-scid]);
00249     }
00250 #endif
00251   } else {
00252 #ifdef UNIGRAM_FACTORING
00253     printf("successor id: %d\n", scid);
00254     printf(" %d\n", wchmm->scword[scid]);
00255 #else
00256     printf("successor id: %d\n", scid);
00257     for (j = 0; j < wchmm->sclen[scid]; j++) {
00258       printf(" %d\n", wchmm->sclist[scid][j]);
00259       i++;
00260     }
00261     printf(" total %d successors\n",i);
00262 #endif
00263   }
00264 }
00265 
00280 static void
00281 print_hmminfo(char *name, HTK_HMM_INFO *hmminfo)
00282 {
00283   HMM_Logical *l;
00284 
00285   l = htk_hmmdata_lookup_logical(hmminfo, name);
00286   if (l == NULL) {
00287     printf("no HMM named \"%s\"\n", name);
00288   } else {
00289     put_logical_hmm(stdout, l);
00290   }
00291 }
00292 
00307 static void
00308 print_ngraminfo(NGRAM_INFO *ngram, int nid)
00309 {
00310   printf("-- N-gram entry --\n");
00311   printf("nid  = %d\n", nid);
00312   printf("name = %s\n", ngram->wname[nid]);
00313 }
00314 
00315 
00331 void
00332 wchmm_check_interactive(WCHMM_INFO *wchmm) /* interactive check */
00333 {
00334 #define MAXNAMELEN 24
00335   char buf[MAXNAMELEN], *name;
00336   int arg, newline;
00337   WORD_ID argw;
00338   boolean endflag;
00339 
00340   printf("\n\n");
00341   printf("********************************************\n");
00342   printf("********  LM & LEXICON CHECK MODE  *********\n");
00343   printf("********************************************\n");
00344   printf("\n");
00345 
00346   for (endflag = FALSE; endflag == FALSE;) {
00347     printf("===== syntax: command arg (\"H\" for help) > ");
00348     if (fgets(buf, MAXNAMELEN, stdin) == NULL) break;
00349     name = "";
00350     arg = 0;
00351     if (isalpha(buf[0]) != 0 && buf[1] == ' ') {
00352       newline = strlen(buf)-1;
00353       if (buf[newline] == '\n') {
00354         buf[newline] = '\0';
00355       }
00356       if (buf[2] != '\0') {
00357         name = buf + 2;
00358         arg = atoi(name);
00359       }
00360     }
00361     switch(buf[0]) {
00362     case 'w':                   /* word info */
00363       argw = arg;
00364       print_winfo_w(wchmm->winfo, argw, (wchmm->ngram) ? TRUE : FALSE);
00365       print_wchmm_w(wchmm, argw);
00366       break;
00367     case 'n':                   /* node info */
00368       print_wchmm_s(wchmm, arg);
00369       break;
00370     case 'a':                   /* arc list */
00371       print_wchmm_s_arc(wchmm, arg);
00372       break;
00373 #if 0
00374     case 'r':                   /* reverse arc list */
00375       print_wchmm_r_arc(arg);
00376       break;
00377 #endif
00378     case 's':                   /* successor word list */
00379       if (wchmm->category_tree) {
00380         printf("Error: this is category tree (no successor list)\n");
00381       } else {
00382         print_wchmm_s_successor(wchmm, arg);
00383       }
00384       break;
00385     case 't':                   /* node total info of above */
00386       print_wchmm_s(wchmm, arg);
00387       print_wchmm_s_arc(wchmm, arg);
00388 #if 0
00389       print_wchmm_r_arc(arg);
00390 #endif
00391       if (!wchmm->category_tree) {
00392         print_wchmm_s_successor(wchmm, arg);
00393       }
00394       break;
00395     case 'h':                   /* hmm state info */
00396       print_hmminfo(name, wchmm->hmminfo);
00397       break;
00398     case 'l':                   /* N-gram language model info */
00399       if (wchmm->lmtype == LM_PROB) {
00400         print_ngraminfo(wchmm->ngram, arg);
00401       } else {
00402         printf("Error: this is not an N-gram model\n");
00403       }
00404       break;
00405     case 'q':                   /* quit */
00406       endflag = TRUE;
00407       break;
00408     default:                    /* help */
00409       printf("syntax: [command_character] [number(#)]\n");
00410       printf("  w [word_id] ... show word info\n");
00411       printf("  n [state]   ... show wchmm state info\n");
00412       printf("  a [state]   ... show arcs from the state\n");
00413 #if 0
00414       printf("  r [state]   ... show arcs  to  the state\n");
00415 #endif
00416       printf("  s [state]   ... show successor list of the state\n");
00417       printf("  h [hmmname] ... show HMM info of the name\n");
00418       printf("  l [nwid]    ... N-gram entry info\n");
00419       printf("  H           ... print this help\n");
00420       printf("  q           ... quit\n");
00421       break;
00422     }
00423   }
00424   printf("\n");
00425   printf("********************************************\n");
00426   printf("*****  END OF LM & LEXICON CHECK MODE  *****\n");
00427   printf("********************************************\n");
00428   printf("\n");
00429 }
00430 
00431 
00446 void
00447 check_wchmm(WCHMM_INFO *wchmm)
00448 {
00449   int i;
00450   boolean ok_flag;
00451   int node;
00452   WORD_ID w;
00453 
00454   ok_flag = TRUE;
00455 
00456   if (wchmm->hmminfo->multipath) {
00457   
00458     /* check word-beginning nodes */
00459     for(i=0;i<wchmm->startnum;i++) {
00460       node = wchmm->startnode[i];
00461       if (wchmm->state[node].out.state != NULL) {
00462         printf("Error: word-beginning node %d has output function!\n", node);
00463         ok_flag = FALSE;
00464       }
00465     }
00466     /* examine if word->state and state->word mapping is correct */
00467     for(w=0;w<wchmm->winfo->num;w++) {
00468       if (wchmm->stend[wchmm->wordend[w]] != w) {
00469         printf("Error: no match of word end for word %d!!\n", w);
00470         ok_flag = FALSE;
00471       }
00472     }
00473     
00474   } else {
00475   
00476     /* examine if word->state and state->word mapping is correct */
00477     for (i=0;i<wchmm->winfo->num;i++) {
00478       if (wchmm->stend[wchmm->wordend[i]]!=i) {
00479         printf("end ga awanai!!!: word=%d, node=%d, value=%d\n",
00480                i, wchmm->wordend[i], wchmm->stend[wchmm->wordend[i]]);
00481         ok_flag = FALSE;
00482       }
00483     }
00484   }
00485 
00486 #if 0
00487   /* check if the last state is unique and has only one output arc */
00488   {
00489     int n;
00490     A_CELL *ac;
00491 
00492     i = 0;
00493     for (n=0;n<wchmm->n;n++) {
00494       if (wchmm->stend[n] != WORD_INVALID) {
00495         i++;
00496         for (ac=wchmm->state[n].ac; ac; ac=ac->next) {
00497           if (ac->arc == n) continue;
00498           if (!wchmm->hmminfo->multipath && wchmm->ststart[ac->arc] != WORD_INVALID) continue;
00499           break;
00500         }
00501         if (ac != NULL) {
00502           printf("node %d is shared?\n",n);
00503           ok_flag = FALSE;
00504         }
00505       }
00506     }
00507     if (i != wchmm->winfo->num ) {
00508       printf("num of heads of words in wchmm not match word num!!\n");
00509       printf("from wchmm->stend:%d != from winfo:%d ?\n",i,wchmm->winfo->num);
00510       ok_flag = FALSE;
00511     }
00512   }
00513 #endif
00514 
00515   /* if check failed, go into interactive mode */
00516   if (!ok_flag) {
00517     wchmm_check_interactive(wchmm);
00518   }
00519 
00520   jlog("STAT: coordination check passed\n");
00521 }
00522 
00523 /* end of file */