Julius: julius-simple/julius-simple.c ソースファイル

Julius 4.2
00001 
00030 /* include top Julius library header */
00031 #include <julius/juliuslib.h>
00032 
00037 static void
00038 status_recready(Recog *recog, void *dummy)
00039 {
00040   if (recog->jconf->input.speech_input == SP_MIC || recog->jconf->input.speech_input == SP_NETAUDIO) {
00041     fprintf(stderr, "<<< please speak >>>");
00042   }
00043 }
00044 
00049 static void
00050 status_recstart(Recog *recog, void *dummy)
00051 {
00052   if (recog->jconf->input.speech_input == SP_MIC || recog->jconf->input.speech_input == SP_NETAUDIO) {
00053     fprintf(stderr, "\r                    \r");
00054   }
00055 }
00056 
00061 static void
00062 put_hypo_phoneme(WORD_ID *seq, int n, WORD_INFO *winfo)
00063 {
00064   int i,j;
00065   WORD_ID w;
00066   static char buf[MAX_HMMNAME_LEN];
00067 
00068   if (seq != NULL) {
00069     for (i=0;i<n;i++) {
00070       if (i > 0) printf(" |");
00071       w = seq[i];
00072       for (j=0;j<winfo->wlen[w];j++) {
00073         center_name(winfo->wseq[w][j]->name, buf);
00074         printf(" %s", buf);
00075       }
00076     }
00077   }
00078   printf("\n");  
00079 }
00085 static void
00086 output_result(Recog *recog, void *dummy)
00087 {
00088   int i, j;
00089   int len;
00090   WORD_INFO *winfo;
00091   WORD_ID *seq;
00092   int seqnum;
00093   int n;
00094   Sentence *s;
00095   RecogProcess *r;
00096   HMM_Logical *p;
00097   SentenceAlign *align;
00098 
00099   /* all recognition results are stored at each recognition process
00100      instance */
00101   for(r=recog->process_list;r;r=r->next) {
00102 
00103     /* skip the process if the process is not alive */
00104     if (! r->live) continue;
00105 
00106     /* result are in r->result.  See recog.h for details */
00107 
00108     /* check result status */
00109     if (r->result.status < 0) {      /* no results obtained */
00110       /* outout message according to the status code */
00111       switch(r->result.status) {
00112       case J_RESULT_STATUS_REJECT_POWER:
00113         printf("<input rejected by power>\n");
00114         break;
00115       case J_RESULT_STATUS_TERMINATE:
00116         printf("<input teminated by request>\n");
00117         break;
00118       case J_RESULT_STATUS_ONLY_SILENCE:
00119         printf("<input rejected by decoder (silence input result)>\n");
00120         break;
00121       case J_RESULT_STATUS_REJECT_GMM:
00122         printf("<input rejected by GMM>\n");
00123         break;
00124       case J_RESULT_STATUS_REJECT_SHORT:
00125         printf("<input rejected by short input>\n");
00126         break;
00127       case J_RESULT_STATUS_FAIL:
00128         printf("<search failed>\n");
00129         break;
00130       }
00131       /* continue to next process instance */
00132       continue;
00133     }
00134 
00135     /* output results for all the obtained sentences */
00136     winfo = r->lm->winfo;
00137 
00138     for(n = 0; n < r->result.sentnum; n++) { /* for all sentences */
00139 
00140       s = &(r->result.sent[n]);
00141       seq = s->word;
00142       seqnum = s->word_num;
00143 
00144       /* output word sequence like Julius */
00145       printf("sentence%d:", n+1);
00146       for(i=0;i<seqnum;i++) printf(" %s", winfo->woutput[seq[i]]);
00147       printf("\n");
00148       /* LM entry sequence */
00149       printf("wseq%d:", n+1);
00150       for(i=0;i<seqnum;i++) printf(" %s", winfo->wname[seq[i]]);
00151       printf("\n");
00152       /* phoneme sequence */
00153       printf("phseq%d:", n+1);
00154       put_hypo_phoneme(seq, seqnum, winfo);
00155       printf("\n");
00156       /* confidence scores */
00157       printf("cmscore%d:", n+1);
00158       for (i=0;i<seqnum; i++) printf(" %5.3f", s->confidence[i]);
00159       printf("\n");
00160       /* AM and LM scores */
00161       printf("score%d: %f", n+1, s->score);
00162       if (r->lmtype == LM_PROB) { /* if this process uses N-gram */
00163         printf(" (AM: %f  LM: %f)", s->score_am, s->score_lm);
00164       }
00165       printf("\n");
00166       if (r->lmtype == LM_DFA) { /* if this process uses DFA grammar */
00167         /* output which grammar the hypothesis belongs to
00168            when using multiple grammars */
00169         if (multigram_get_all_num(r->lm) > 1) {
00170           printf("grammar%d: %d\n", n+1, s->gram_id);
00171         }
00172       }
00173       
00174       /* output alignment result if exist */
00175       for (align = s->align; align; align = align->next) {
00176         printf("=== begin forced alignment ===\n");
00177         switch(align->unittype) {
00178         case PER_WORD:
00179           printf("-- word alignment --\n"); break;
00180         case PER_PHONEME:
00181           printf("-- phoneme alignment --\n"); break;
00182         case PER_STATE:
00183           printf("-- state alignment --\n"); break;
00184         }
00185         printf(" id: from  to    n_score    unit\n");
00186         printf(" ----------------------------------------\n");
00187         for(i=0;i<align->num;i++) {
00188           printf("[%4d %4d]  %f  ", align->begin_frame[i], align->end_frame[i], align->avgscore[i]);
00189           switch(align->unittype) {
00190           case PER_WORD:
00191             printf("%s\t[%s]\n", winfo->wname[align->w[i]], winfo->woutput[align->w[i]]);
00192             break;
00193           case PER_PHONEME:
00194             p = align->ph[i];
00195             if (p->is_pseudo) {
00196               printf("{%s}\n", p->name);
00197             } else if (strmatch(p->name, p->body.defined->name)) {
00198               printf("%s\n", p->name);
00199             } else {
00200               printf("%s[%s]\n", p->name, p->body.defined->name);
00201             }
00202             break;
00203           case PER_STATE:
00204             p = align->ph[i];
00205             if (p->is_pseudo) {
00206               printf("{%s}", p->name);
00207             } else if (strmatch(p->name, p->body.defined->name)) {
00208               printf("%s", p->name);
00209             } else {
00210               printf("%s[%s]", p->name, p->body.defined->name);
00211             }
00212             if (r->am->hmminfo->multipath) {
00213               if (align->is_iwsp[i]) {
00214                 printf(" #%d (sp)\n", align->loc[i]);
00215               } else {
00216                 printf(" #%d\n", align->loc[i]);
00217               }
00218             } else {
00219               printf(" #%d\n", align->loc[i]);
00220             }
00221             break;
00222           }
00223         }
00224         
00225         printf("re-computed AM score: %f\n", align->allscore);
00226 
00227         printf("=== end forced alignment ===\n");
00228       }
00229     }
00230   }
00231 
00232   /* flush output buffer */
00233   fflush(stdout);
00234 }
00235 
00236 
00241 int
00242 main(int argc, char *argv[])
00243 {
00248   Jconf *jconf;
00249 
00254   Recog *recog;
00255 
00260   static char speechfilename[MAXPATHLEN];
00261 
00262   int ret;
00263 
00264   /* by default, all messages will be output to standard out */
00265   /* to disable output, uncomment below */
00266   //jlog_set_output(NULL);
00267 
00268   /* output log to a file */
00269   //FILE *fp; fp = fopen("log.txt", "w"); jlog_set_output(fp);
00270 
00271   /* if no argument, output usage and exit */
00272   if (argc == 1) {
00273     fprintf(stderr, "Julius rev.%s - based on ", JULIUS_VERSION);
00274     j_put_version(stderr);
00275     fprintf(stderr, "Try '-setting' for built-in engine configuration.\n");
00276     fprintf(stderr, "Try '-help' for run time options.\n");
00277     return -1;
00278   }
00279 
00280   /************/
00281   /* Start up */
00282   /************/
00283   /* 1. load configurations from command arguments */
00284   jconf = j_config_load_args_new(argc, argv);
00285   /* else, you can load configurations from a jconf file */
00286   //jconf = j_config_load_file_new(jconf_filename);
00287   if (jconf == NULL) {          /* error */
00288     fprintf(stderr, "Try `-help' for more information.\n");
00289     return -1;
00290   }
00291   
00292   /* 2. create recognition instance according to the jconf */
00293   /* it loads models, setup final parameters, build lexicon
00294      and set up work area for recognition */
00295   recog = j_create_instance_from_jconf(jconf);
00296   if (recog == NULL) {
00297     fprintf(stderr, "Error in startup\n");
00298     return -1;
00299   }
00300 
00301   /*********************/
00302   /* Register callback */
00303   /*********************/
00304   /* register result callback functions */
00305   callback_add(recog, CALLBACK_EVENT_SPEECH_READY, status_recready, NULL);
00306   callback_add(recog, CALLBACK_EVENT_SPEECH_START, status_recstart, NULL);
00307   callback_add(recog, CALLBACK_RESULT, output_result, NULL);
00308 
00309   /**************************/
00310   /* Initialize audio input */
00311   /**************************/
00312   /* initialize audio input device */
00313   /* ad-in thread starts at this time for microphone */
00314   if (j_adin_init(recog) == FALSE) {    /* error */
00315     return -1;
00316   }
00317 
00318   /* output system information to log */
00319   j_recog_info(recog);
00320 
00321   /***********************************/
00322   /* Open input stream and recognize */
00323   /***********************************/
00324 
00325   if (jconf->input.speech_input == SP_MFCFILE) {
00326     /* MFCC file input */
00327 
00328     while (get_line_from_stdin(speechfilename, MAXPATHLEN, "enter MFCC filename->") != NULL) {
00329       if (verbose_flag) printf("\ninput MFCC file: %s\n", speechfilename);
00330       /* open the input file */
00331       ret = j_open_stream(recog, speechfilename);
00332       switch(ret) {
00333       case 0:                   /* succeeded */
00334         break;
00335       case -1:                  /* error */
00336         /* go on to the next input */
00337         continue;
00338       case -2:                  /* end of recognition */
00339         return;
00340       }
00341       /* recognition loop */
00342       ret = j_recognize_stream(recog);
00343       if (ret == -1) return -1; /* error */
00344       /* reach here when an input ends */
00345     }
00346 
00347   } else {
00348     /* raw speech input (microphone etc.) */
00349 
00350     switch(j_open_stream(recog, NULL)) {
00351     case 0:                     /* succeeded */
00352       break;
00353     case -1:                    /* error */
00354       fprintf(stderr, "error in input stream\n");
00355       return;
00356     case -2:                    /* end of recognition process */
00357       fprintf(stderr, "failed to begin input stream\n");
00358       return;
00359     }
00360     
00361     /**********************/
00362     /* Recognization Loop */
00363     /**********************/
00364     /* enter main loop to recognize the input stream */
00365     /* finish after whole input has been processed and input reaches end */
00366     ret = j_recognize_stream(recog);
00367     if (ret == -1) return -1;   /* error */
00368     
00369     /*******/
00370     /* End */
00371     /*******/
00372   }
00373 
00374   /* calling j_close_stream(recog) at any time will terminate
00375      recognition and exit j_recognize_stream() */
00376   j_close_stream(recog);
00377 
00378   j_recog_free(recog);
00379 
00380   /* exit program */
00381   return(0);
00382 }