Julius 4.2
|
00001 00030 /* include top Julius library header */ 00031 #include <julius/juliuslib.h> 00032 00037 static void 00038 status_recready(Recog *recog, void *dummy) 00039 { 00040 if (recog->jconf->input.speech_input == SP_MIC || recog->jconf->input.speech_input == SP_NETAUDIO) { 00041 fprintf(stderr, "<<< please speak >>>"); 00042 } 00043 } 00044 00049 static void 00050 status_recstart(Recog *recog, void *dummy) 00051 { 00052 if (recog->jconf->input.speech_input == SP_MIC || recog->jconf->input.speech_input == SP_NETAUDIO) { 00053 fprintf(stderr, "\r \r"); 00054 } 00055 } 00056 00061 static void 00062 put_hypo_phoneme(WORD_ID *seq, int n, WORD_INFO *winfo) 00063 { 00064 int i,j; 00065 WORD_ID w; 00066 static char buf[MAX_HMMNAME_LEN]; 00067 00068 if (seq != NULL) { 00069 for (i=0;i<n;i++) { 00070 if (i > 0) printf(" |"); 00071 w = seq[i]; 00072 for (j=0;j<winfo->wlen[w];j++) { 00073 center_name(winfo->wseq[w][j]->name, buf); 00074 printf(" %s", buf); 00075 } 00076 } 00077 } 00078 printf("\n"); 00079 } 00085 static void 00086 output_result(Recog *recog, void *dummy) 00087 { 00088 int i, j; 00089 int len; 00090 WORD_INFO *winfo; 00091 WORD_ID *seq; 00092 int seqnum; 00093 int n; 00094 Sentence *s; 00095 RecogProcess *r; 00096 HMM_Logical *p; 00097 SentenceAlign *align; 00098 00099 /* all recognition results are stored at each recognition process 00100 instance */ 00101 for(r=recog->process_list;r;r=r->next) { 00102 00103 /* skip the process if the process is not alive */ 00104 if (! r->live) continue; 00105 00106 /* result are in r->result. See recog.h for details */ 00107 00108 /* check result status */ 00109 if (r->result.status < 0) { /* no results obtained */ 00110 /* outout message according to the status code */ 00111 switch(r->result.status) { 00112 case J_RESULT_STATUS_REJECT_POWER: 00113 printf("<input rejected by power>\n"); 00114 break; 00115 case J_RESULT_STATUS_TERMINATE: 00116 printf("<input teminated by request>\n"); 00117 break; 00118 case J_RESULT_STATUS_ONLY_SILENCE: 00119 printf("<input rejected by decoder (silence input result)>\n"); 00120 break; 00121 case J_RESULT_STATUS_REJECT_GMM: 00122 printf("<input rejected by GMM>\n"); 00123 break; 00124 case J_RESULT_STATUS_REJECT_SHORT: 00125 printf("<input rejected by short input>\n"); 00126 break; 00127 case J_RESULT_STATUS_FAIL: 00128 printf("<search failed>\n"); 00129 break; 00130 } 00131 /* continue to next process instance */ 00132 continue; 00133 } 00134 00135 /* output results for all the obtained sentences */ 00136 winfo = r->lm->winfo; 00137 00138 for(n = 0; n < r->result.sentnum; n++) { /* for all sentences */ 00139 00140 s = &(r->result.sent[n]); 00141 seq = s->word; 00142 seqnum = s->word_num; 00143 00144 /* output word sequence like Julius */ 00145 printf("sentence%d:", n+1); 00146 for(i=0;i<seqnum;i++) printf(" %s", winfo->woutput[seq[i]]); 00147 printf("\n"); 00148 /* LM entry sequence */ 00149 printf("wseq%d:", n+1); 00150 for(i=0;i<seqnum;i++) printf(" %s", winfo->wname[seq[i]]); 00151 printf("\n"); 00152 /* phoneme sequence */ 00153 printf("phseq%d:", n+1); 00154 put_hypo_phoneme(seq, seqnum, winfo); 00155 printf("\n"); 00156 /* confidence scores */ 00157 printf("cmscore%d:", n+1); 00158 for (i=0;i<seqnum; i++) printf(" %5.3f", s->confidence[i]); 00159 printf("\n"); 00160 /* AM and LM scores */ 00161 printf("score%d: %f", n+1, s->score); 00162 if (r->lmtype == LM_PROB) { /* if this process uses N-gram */ 00163 printf(" (AM: %f LM: %f)", s->score_am, s->score_lm); 00164 } 00165 printf("\n"); 00166 if (r->lmtype == LM_DFA) { /* if this process uses DFA grammar */ 00167 /* output which grammar the hypothesis belongs to 00168 when using multiple grammars */ 00169 if (multigram_get_all_num(r->lm) > 1) { 00170 printf("grammar%d: %d\n", n+1, s->gram_id); 00171 } 00172 } 00173 00174 /* output alignment result if exist */ 00175 for (align = s->align; align; align = align->next) { 00176 printf("=== begin forced alignment ===\n"); 00177 switch(align->unittype) { 00178 case PER_WORD: 00179 printf("-- word alignment --\n"); break; 00180 case PER_PHONEME: 00181 printf("-- phoneme alignment --\n"); break; 00182 case PER_STATE: 00183 printf("-- state alignment --\n"); break; 00184 } 00185 printf(" id: from to n_score unit\n"); 00186 printf(" ----------------------------------------\n"); 00187 for(i=0;i<align->num;i++) { 00188 printf("[%4d %4d] %f ", align->begin_frame[i], align->end_frame[i], align->avgscore[i]); 00189 switch(align->unittype) { 00190 case PER_WORD: 00191 printf("%s\t[%s]\n", winfo->wname[align->w[i]], winfo->woutput[align->w[i]]); 00192 break; 00193 case PER_PHONEME: 00194 p = align->ph[i]; 00195 if (p->is_pseudo) { 00196 printf("{%s}\n", p->name); 00197 } else if (strmatch(p->name, p->body.defined->name)) { 00198 printf("%s\n", p->name); 00199 } else { 00200 printf("%s[%s]\n", p->name, p->body.defined->name); 00201 } 00202 break; 00203 case PER_STATE: 00204 p = align->ph[i]; 00205 if (p->is_pseudo) { 00206 printf("{%s}", p->name); 00207 } else if (strmatch(p->name, p->body.defined->name)) { 00208 printf("%s", p->name); 00209 } else { 00210 printf("%s[%s]", p->name, p->body.defined->name); 00211 } 00212 if (r->am->hmminfo->multipath) { 00213 if (align->is_iwsp[i]) { 00214 printf(" #%d (sp)\n", align->loc[i]); 00215 } else { 00216 printf(" #%d\n", align->loc[i]); 00217 } 00218 } else { 00219 printf(" #%d\n", align->loc[i]); 00220 } 00221 break; 00222 } 00223 } 00224 00225 printf("re-computed AM score: %f\n", align->allscore); 00226 00227 printf("=== end forced alignment ===\n"); 00228 } 00229 } 00230 } 00231 00232 /* flush output buffer */ 00233 fflush(stdout); 00234 } 00235 00236 00241 int 00242 main(int argc, char *argv[]) 00243 { 00248 Jconf *jconf; 00249 00254 Recog *recog; 00255 00260 static char speechfilename[MAXPATHLEN]; 00261 00262 int ret; 00263 00264 /* by default, all messages will be output to standard out */ 00265 /* to disable output, uncomment below */ 00266 //jlog_set_output(NULL); 00267 00268 /* output log to a file */ 00269 //FILE *fp; fp = fopen("log.txt", "w"); jlog_set_output(fp); 00270 00271 /* if no argument, output usage and exit */ 00272 if (argc == 1) { 00273 fprintf(stderr, "Julius rev.%s - based on ", JULIUS_VERSION); 00274 j_put_version(stderr); 00275 fprintf(stderr, "Try '-setting' for built-in engine configuration.\n"); 00276 fprintf(stderr, "Try '-help' for run time options.\n"); 00277 return -1; 00278 } 00279 00280 /************/ 00281 /* Start up */ 00282 /************/ 00283 /* 1. load configurations from command arguments */ 00284 jconf = j_config_load_args_new(argc, argv); 00285 /* else, you can load configurations from a jconf file */ 00286 //jconf = j_config_load_file_new(jconf_filename); 00287 if (jconf == NULL) { /* error */ 00288 fprintf(stderr, "Try `-help' for more information.\n"); 00289 return -1; 00290 } 00291 00292 /* 2. create recognition instance according to the jconf */ 00293 /* it loads models, setup final parameters, build lexicon 00294 and set up work area for recognition */ 00295 recog = j_create_instance_from_jconf(jconf); 00296 if (recog == NULL) { 00297 fprintf(stderr, "Error in startup\n"); 00298 return -1; 00299 } 00300 00301 /*********************/ 00302 /* Register callback */ 00303 /*********************/ 00304 /* register result callback functions */ 00305 callback_add(recog, CALLBACK_EVENT_SPEECH_READY, status_recready, NULL); 00306 callback_add(recog, CALLBACK_EVENT_SPEECH_START, status_recstart, NULL); 00307 callback_add(recog, CALLBACK_RESULT, output_result, NULL); 00308 00309 /**************************/ 00310 /* Initialize audio input */ 00311 /**************************/ 00312 /* initialize audio input device */ 00313 /* ad-in thread starts at this time for microphone */ 00314 if (j_adin_init(recog) == FALSE) { /* error */ 00315 return -1; 00316 } 00317 00318 /* output system information to log */ 00319 j_recog_info(recog); 00320 00321 /***********************************/ 00322 /* Open input stream and recognize */ 00323 /***********************************/ 00324 00325 if (jconf->input.speech_input == SP_MFCFILE) { 00326 /* MFCC file input */ 00327 00328 while (get_line_from_stdin(speechfilename, MAXPATHLEN, "enter MFCC filename->") != NULL) { 00329 if (verbose_flag) printf("\ninput MFCC file: %s\n", speechfilename); 00330 /* open the input file */ 00331 ret = j_open_stream(recog, speechfilename); 00332 switch(ret) { 00333 case 0: /* succeeded */ 00334 break; 00335 case -1: /* error */ 00336 /* go on to the next input */ 00337 continue; 00338 case -2: /* end of recognition */ 00339 return; 00340 } 00341 /* recognition loop */ 00342 ret = j_recognize_stream(recog); 00343 if (ret == -1) return -1; /* error */ 00344 /* reach here when an input ends */ 00345 } 00346 00347 } else { 00348 /* raw speech input (microphone etc.) */ 00349 00350 switch(j_open_stream(recog, NULL)) { 00351 case 0: /* succeeded */ 00352 break; 00353 case -1: /* error */ 00354 fprintf(stderr, "error in input stream\n"); 00355 return; 00356 case -2: /* end of recognition process */ 00357 fprintf(stderr, "failed to begin input stream\n"); 00358 return; 00359 } 00360 00361 /**********************/ 00362 /* Recognization Loop */ 00363 /**********************/ 00364 /* enter main loop to recognize the input stream */ 00365 /* finish after whole input has been processed and input reaches end */ 00366 ret = j_recognize_stream(recog); 00367 if (ret == -1) return -1; /* error */ 00368 00369 /*******/ 00370 /* End */ 00371 /*******/ 00372 } 00373 00374 /* calling j_close_stream(recog) at any time will terminate 00375 recognition and exit j_recognize_stream() */ 00376 j_close_stream(recog); 00377 00378 j_recog_free(recog); 00379 00380 /* exit program */ 00381 return(0); 00382 }