Julius 4.1.5
|
00001 00019 /* 00020 * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University 00021 * Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan 00022 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00023 * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology 00024 * All rights reserved 00025 */ 00161 #define GLOBAL_VARIABLE_DEFINE ///< Actually make global vars in global.h 00162 #include <julius/julius.h> 00163 #include <signal.h> 00164 #if defined(_WIN32) && !defined(__CYGWIN32__) 00165 #include <mbctype.h> 00166 #include <mbstring.h> 00167 #endif 00168 00169 /* ---------- utility functions -----------------------------------------*/ 00170 #ifdef REPORT_MEMORY_USAGE 00171 00181 static void 00182 print_mem() 00183 { 00184 char buf[200]; 00185 sprintf(buf,"ps -o vsz,rss -p %d",getpid()); 00186 system(buf); 00187 fflush(stdout); 00188 fflush(stderr); 00189 } 00190 #endif 00191 00192 00209 SentenceAlign * 00210 result_align_new() 00211 { 00212 SentenceAlign *new; 00213 new = (SentenceAlign *)mymalloc(sizeof(SentenceAlign)); 00214 new->w = NULL; 00215 new->ph = NULL; 00216 new->loc = NULL; 00217 new->begin_frame = NULL; 00218 new->end_frame = NULL; 00219 new->avgscore = NULL; 00220 new->is_iwsp = NULL; 00221 new->next = NULL; 00222 return new; 00223 } 00224 00241 void 00242 result_align_free(SentenceAlign *a) 00243 { 00244 if (a->w) free(a->w); 00245 if (a->ph) free(a->ph); 00246 if (a->loc) free(a->loc); 00247 if (a->begin_frame) free(a->begin_frame); 00248 if (a->end_frame) free(a->end_frame); 00249 if (a->avgscore) free(a->avgscore); 00250 if (a->is_iwsp) free(a->is_iwsp); 00251 free(a); 00252 } 00253 00269 void 00270 result_sentence_malloc(RecogProcess *r, int num) 00271 { 00272 int i; 00273 r->result.sent = (Sentence *)mymalloc(sizeof(Sentence) * num); 00274 for(i=0;i<num;i++) r->result.sent[i].align = NULL; 00275 r->result.sentnum = 0; 00276 } 00277 00291 void 00292 result_sentence_free(RecogProcess *r) 00293 { 00294 int i; 00295 SentenceAlign *a, *atmp; 00296 if (r->result.sent) { 00297 for(i=0;i<r->result.sentnum;i++) { 00298 a = r->result.sent[i].align; 00299 while(a) { 00300 atmp = a->next; 00301 result_align_free(a); 00302 a = atmp; 00303 } 00304 } 00305 free(r->result.sent); 00306 r->result.sent = NULL; 00307 } 00308 } 00309 00323 void 00324 clear_result(RecogProcess *r) 00325 { 00326 #ifdef WORD_GRAPH 00327 /* clear 1st pass word graph output */ 00328 wordgraph_clean(&(r->result.wg1)); 00329 #endif 00330 00331 if (r->lmvar == LM_DFA_WORD) { 00332 if (r->result.status == J_RESULT_STATUS_SUCCESS) { 00333 /* clear word recog result of first pass as in final result */ 00334 free(r->result.sent); 00335 } 00336 } else { 00337 if (r->graphout) { 00338 if (r->config->graph.confnet) { 00339 /* free confusion network clusters */ 00340 cn_free_all(&(r->result.confnet)); 00341 } else if (r->config->graph.lattice) { 00342 } 00343 /* clear all wordgraph */ 00344 wordgraph_clean(&(r->result.wg)); 00345 } 00346 result_sentence_free(r); 00347 } 00348 } 00349 00350 /* --------------------- speech buffering ------------------ */ 00351 00384 int 00385 adin_cut_callback_store_buffer(SP16 *now, int len, Recog *recog) 00386 { 00387 if (recog->speechlen == 0) { /* first part of a segment */ 00388 if (!recog->process_active) { 00389 return(1); 00390 } 00391 } 00392 00393 if (recog->speechlen + len > recog->speechalloclen) { 00394 while (recog->speechlen + len > recog->speechalloclen) { 00395 recog->speechalloclen += MAX_SPEECH_ALLOC_STEP; 00396 } 00397 if (recog->speech == NULL) { 00398 recog->speech = (SP16 *)mymalloc(sizeof(SP16) * recog->speechalloclen); 00399 } else { 00400 if (debug2_flag) { 00401 jlog("STAT: expanding recog->speech to %d samples\n", recog->speechalloclen); 00402 } 00403 recog->speech = (SP16 *)myrealloc(recog->speech, sizeof(SP16) * recog->speechalloclen); 00404 } 00405 } 00406 00407 /* store now[0..len] to recog->speech[recog->speechlen] */ 00408 memcpy(&(recog->speech[recog->speechlen]), now, len * sizeof(SP16)); 00409 recog->speechlen += len; 00410 return(0); /* tell adin_go to continue reading */ 00411 } 00412 00413 00414 /* --------------------- adin check callback --------------- */ 00442 static int 00443 callback_check_in_adin(Recog *recog) 00444 { 00445 /* module: check command and terminate recording when requested */ 00446 callback_exec(CALLBACK_POLL, recog); 00447 /* With audio input via adinnet, TERMINATE command will issue terminate 00448 command to the adinnet client. The client then stops recording 00449 immediately and return end-of-segment ack. Then it will cause this 00450 process to stop recognition as normal. So we need not to 00451 perform immediate termination at this callback, but just ignore the 00452 results in the main.c. */ 00453 #if 1 00454 if (recog->process_want_terminate) { /* TERMINATE ... force termination */ 00455 return(-2); 00456 } 00457 if (recog->process_want_reload) { 00458 return(-1); 00459 } 00460 #else 00461 if (recog->process_want_terminate /* TERMINATE ... force termination */ 00462 && recog->jconf->input.speech_input != SP_ADINNET) { 00463 return(-2); 00464 } 00465 if (recog->process_want_reload) { 00466 return(-1); 00467 } 00468 #endif 00469 return(0); 00470 } 00471 00472 /*********************/ 00473 /* open input stream */ 00474 /*********************/ 00492 int 00493 j_open_stream(Recog *recog, char *file_or_dev_name) 00494 { 00495 Jconf *jconf; 00496 char *p; 00497 00498 jconf = recog->jconf; 00499 00500 if (jconf->input.type == INPUT_WAVEFORM) { 00501 /* begin A/D input */ 00502 if (adin_begin(recog->adin, file_or_dev_name) == FALSE) { 00503 return -2; 00504 } 00505 /* create A/D-in thread here */ 00506 #ifdef HAVE_PTHREAD 00507 if (recog->adin->enable_thread && ! recog->adin->input_side_segment) { 00508 if (adin_thread_create(recog) == FALSE) { 00509 return -2; 00510 } 00511 } 00512 #endif 00513 /* when using adin func, input name should be obtained when called */ 00514 } else { 00515 switch(jconf->input.speech_input) { 00516 case SP_MFCMODULE: 00517 param_init_content(recog->mfcclist->param); 00518 if (mfc_module_begin(recog->mfcclist) == FALSE) return -2; 00519 /* when using mfc module func, input name should be obtained when called */ 00520 break; 00521 case SP_MFCFILE: 00522 /* read parameter file */ 00523 param_init_content(recog->mfcclist->param); 00524 if (rdparam(file_or_dev_name, recog->mfcclist->param) == FALSE) { 00525 jlog("ERROR: error in reading parameter file: %s\n", file_or_dev_name); 00526 return -1; 00527 } 00528 /* check and strip invalid frames */ 00529 if (jconf->preprocess.strip_zero_sample) { 00530 param_strip_zero(recog->mfcclist->param); 00531 } 00532 /* output frame length */ 00533 callback_exec(CALLBACK_STATUS_PARAM, recog); 00534 /* store the input filename here */ 00535 strncpy(recog->adin->current_input_name, file_or_dev_name, MAXPATHLEN); 00536 break; 00537 default: 00538 jlog("ERROR: j_open_stream: none of SP_MFC_*??\n"); 00539 return -1; 00540 } 00541 } 00542 00543 if (jconf->input.speech_input != SP_MFCFILE) { 00544 /* store current input name using input source specific function */ 00545 p = j_get_current_filename(recog); 00546 if (p) { 00547 strncpy(recog->adin->current_input_name, p, MAXPATHLEN); 00548 } else { 00549 recog->adin->current_input_name[0] = '\0'; 00550 } 00551 } 00552 00553 return 0; 00554 00555 } 00556 00574 int 00575 j_close_stream(Recog *recog) 00576 { 00577 Jconf *jconf; 00578 00579 jconf = recog->jconf; 00580 00581 if (jconf->input.type == INPUT_WAVEFORM) { 00582 #ifdef HAVE_PTHREAD 00583 /* close A/D-in thread here */ 00584 if (! recog->adin->input_side_segment) { 00585 if (recog->adin->enable_thread) { 00586 if (adin_thread_cancel(recog) == FALSE) { 00587 return -2; 00588 } 00589 } else { 00590 recog->adin->end_of_stream = TRUE; 00591 } 00592 } 00593 #else 00594 if (! recog->adin->input_side_segment) { 00595 recog->adin->end_of_stream = TRUE; 00596 } 00597 #endif 00598 /* end A/D input */ 00599 if (adin_end(recog->adin) == FALSE) { 00600 return -2; 00601 } 00602 } else { 00603 switch(jconf->input.speech_input) { 00604 case SP_MFCMODULE: 00605 if (mfc_module_end(recog->mfcclist) == FALSE) return -2; 00606 break; 00607 case SP_MFCFILE: 00608 /* nothing to do */ 00609 break; 00610 default: 00611 jlog("ERROR: j_close_stream: none of SP_MFC_*??\n"); 00612 return -1; 00613 } 00614 } 00615 00616 return 0; 00617 00618 } 00619 00620 /**********************************************************************/ 00621 /**********************************************************************/ 00622 /**********************************************************************/ 00623 00636 static void 00637 result_error(Recog *recog, int status) 00638 { 00639 MFCCCalc *mfcc; 00640 RecogProcess *r; 00641 boolean ok_p; 00642 00643 for(r=recog->process_list;r;r=r->next) r->result.status = status; 00644 00645 ok_p = FALSE; 00646 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 00647 if (mfcc->f > 0) { 00648 ok_p = TRUE; 00649 break; 00650 } 00651 } 00652 if (ok_p) { /* had some input */ 00653 /* output as rejected */ 00654 callback_exec(CALLBACK_RESULT, recog); 00655 #ifdef ENABLE_PLUGIN 00656 plugin_exec_process_result(recog); 00657 #endif 00658 } 00659 } 00660 00696 static int 00697 j_recognize_stream_core(Recog *recog) 00698 { 00699 Jconf *jconf; 00700 int ret; 00701 float seclen, mseclen; 00702 RecogProcess *r; 00703 MFCCCalc *mfcc; 00704 PROCESS_AM *am; 00705 PROCESS_LM *lm; 00706 boolean ok_p; 00707 boolean process_segment_last; 00708 boolean on_the_fly; 00709 boolean pass2_p; 00710 00711 jconf = recog->jconf; 00712 00713 /* determine whether on-the-fly decoding should be done */ 00714 on_the_fly = FALSE; 00715 switch(jconf->input.type) { 00716 case INPUT_VECTOR: 00717 switch(jconf->input.speech_input) { 00718 case SP_MFCFILE: 00719 on_the_fly = FALSE; 00720 break; 00721 case SP_MFCMODULE: 00722 on_the_fly = TRUE; 00723 break; 00724 } 00725 break; 00726 case INPUT_WAVEFORM: 00727 if (jconf->decodeopt.realtime_flag) { 00728 on_the_fly = TRUE; 00729 } else { 00730 on_the_fly = FALSE; 00731 } 00732 break; 00733 } 00734 00735 if (jconf->input.type == INPUT_WAVEFORM || jconf->input.speech_input == SP_MFCMODULE) { 00736 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 00737 param_init_content(mfcc->param); 00738 } 00739 } 00740 00741 /* if no process instance exist, start with terminated */ 00742 if (recog->process_list == NULL) { 00743 jlog("STAT: no recog process, engine inactive\n"); 00744 j_request_pause(recog); 00745 } 00746 00747 /* update initial recognition process status */ 00748 for(r=recog->process_list;r;r=r->next) { 00749 if (r->active > 0) { 00750 r->live = TRUE; 00751 } else if (r->active < 0) { 00752 r->live = FALSE; 00753 } 00754 r->active = 0; 00755 } 00756 00757 /******************************************************************/ 00758 /* do recognition for each incoming segment from the input stream */ 00759 /******************************************************************/ 00760 while (1) { 00761 00762 start_recog: 00763 00764 /*************************************/ 00765 /* Update recognition process status */ 00766 /*************************************/ 00767 for(r=recog->process_list;r;r=r->next) { 00768 if (r->active > 0) { 00769 r->live = TRUE; 00770 jlog("STAT: SR%02d %s now active\n", r->config->id, r->config->name); 00771 } else if (r->active < 0) { 00772 r->live = FALSE; 00773 jlog("STAT: SR%02d %s now inactive\n", r->config->id, r->config->name); 00774 } 00775 r->active = 0; 00776 } 00777 if (debug2_flag) { 00778 for(r=recog->process_list;r;r=r->next) { 00779 jlog("DEBUG: %s: SR%02d %s\n", r->live ? "live" : "dead", r->config->id, r->config->name); 00780 } 00781 } 00782 /* check if any process is live */ 00783 if (recog->process_active) { 00784 ok_p = FALSE; 00785 for(r=recog->process_list;r;r=r->next) { 00786 if (r->live) ok_p = TRUE; 00787 } 00788 if (!ok_p) { /* no process is alive */ 00789 /* make whole process as inactive */ 00790 jlog("STAT: all recog process inactive, pause engine now\n"); 00791 j_request_pause(recog); 00792 } 00793 } 00794 00795 /* Check whether process status was changed while in the last run */ 00796 if (recog->process_online != recog->process_active) { 00797 recog->process_online = recog->process_active; 00798 if (recog->process_online) callback_exec(CALLBACK_EVENT_PROCESS_ONLINE, recog); 00799 else callback_exec(CALLBACK_EVENT_PROCESS_OFFLINE, recog); 00800 } 00801 /* execute poll callback */ 00802 if (recog->process_active) { 00803 callback_exec(CALLBACK_POLL, recog); 00804 } 00805 /* reset reload flag here */ 00806 j_reset_reload(recog); 00807 00808 if (!recog->process_active) { 00809 /* now sleeping, return */ 00810 /* in the next call, we will resume from here */ 00811 return 1; 00812 } 00813 /* update process status */ 00814 if (recog->process_online != recog->process_active) { 00815 recog->process_online = recog->process_active; 00816 if (recog->process_online) callback_exec(CALLBACK_EVENT_PROCESS_ONLINE, recog); 00817 else callback_exec(CALLBACK_EVENT_PROCESS_OFFLINE, recog); 00818 } 00819 00820 /*********************************************************/ 00821 /* check for grammar to change, and rebuild if necessary */ 00822 /*********************************************************/ 00823 for(lm=recog->lmlist;lm;lm=lm->next) { 00824 if (lm->lmtype == LM_DFA) { 00825 multigram_update(lm); /* some modification occured if return TRUE*/ 00826 } 00827 } 00828 for(r=recog->process_list;r;r=r->next) { 00829 if (!r->live) continue; 00830 if (r->lmtype == LM_DFA && r->lm->global_modified) { 00831 multigram_build(r); 00832 } 00833 } 00834 for(lm=recog->lmlist;lm;lm=lm->next) { 00835 if (lm->lmtype == LM_DFA) lm->global_modified = FALSE; 00836 } 00837 00838 ok_p = FALSE; 00839 for(r=recog->process_list;r;r=r->next) { 00840 if (!r->live) continue; 00841 if (r->lmtype == LM_DFA) { 00842 if (r->lm->winfo == NULL || 00843 (r->lmvar == LM_DFA_GRAMMAR && r->lm->dfa == NULL)) { 00844 /* make this instance inactive */ 00845 r->active = -1; 00846 ok_p = TRUE; 00847 } 00848 } 00849 } 00850 if (ok_p) { /* at least one instance has no grammar */ 00851 goto start_recog; 00852 } 00853 00854 00855 /******************/ 00856 /* start 1st pass */ 00857 /******************/ 00858 if (on_the_fly) { 00859 00860 /********************************************/ 00861 /* REALTIME ON-THE-FLY DECODING OF 1ST-PASS */ 00862 /********************************************/ 00863 /* store, analysis and search in a pipeline */ 00864 /* main function is RealTimePipeLine() at realtime-1stpass.c, and 00865 it will be periodically called for each incoming input segment 00866 from the AD-in function adin_go(). RealTimePipeLine() will be 00867 called as a callback function from adin_go() */ 00868 /* after this part, directly jump to the beginning of the 2nd pass */ 00869 00870 if (recog->process_segment) { 00871 /*****************************************************************/ 00872 /* short-pause segmentation: process last remaining frames first */ 00873 /*****************************************************************/ 00874 /* last was segmented by short pause */ 00875 /* the margin segment in the last input will be re-processed first, 00876 and then the speech input will be processed */ 00877 /* process the last remaining parameters */ 00878 ret = RealTimeResume(recog); 00879 if (ret < 0) { /* error end in the margin */ 00880 jlog("ERROR: failed to process last remaining samples on RealTimeResume\n"); /* exit now! */ 00881 return -1; 00882 } 00883 if (ret != 1) { /* if segmented again in the margin, not process the rest */ 00884 /* last parameters has been processed, so continue with the 00885 current input as normal */ 00886 /* process the incoming input */ 00887 if (jconf->input.type == INPUT_WAVEFORM) { 00888 /* get speech and process it on real-time */ 00889 ret = adin_go(RealTimePipeLine, callback_check_in_adin, recog); 00890 } else { 00891 /* get feature vector and process it */ 00892 ret = mfcc_go(recog, callback_check_in_adin); 00893 } 00894 if (ret < 0) { /* error end in adin_go */ 00895 if (ret == -2 || recog->process_want_terminate) { 00896 /* terminated by callback */ 00897 RealTimeTerminate(recog); 00898 /* reset param */ 00899 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 00900 param_init_content(mfcc->param); 00901 } 00902 /* execute callback at end of pass1 */ 00903 if (recog->triggered) { 00904 callback_exec(CALLBACK_EVENT_PASS1_END, recog); 00905 /* output result terminate */ 00906 result_error(recog, J_RESULT_STATUS_TERMINATE); 00907 } 00908 goto end_recog; /* cancel this recognition */ 00909 } 00910 jlog("ERROR: an error occured at on-the-fly 1st pass decoding\n"); /* exit now! */ 00911 return(-1); 00912 } 00913 } 00914 00915 } else { 00916 00917 /***********************************************************/ 00918 /* last was not segmented, process the new incoming input */ 00919 /***********************************************************/ 00920 /* end of this input will be determined by either end of stream 00921 (in case of file input), or silence detection by adin_go(), or 00922 'TERMINATE' command from module (if module mode) */ 00923 /* prepare work area for on-the-fly processing */ 00924 if (RealTimePipeLinePrepare(recog) == FALSE) { 00925 jlog("ERROR: failed to prepare for on-the-fly 1st pass decoding\n"); 00926 return (-1); 00927 } 00928 /* process the incoming input */ 00929 if (jconf->input.type == INPUT_WAVEFORM) { 00930 /* get speech and process it on real-time */ 00931 ret = adin_go(RealTimePipeLine, callback_check_in_adin, recog); 00932 } else { 00933 /* get feature vector and process it */ 00934 ret = mfcc_go(recog, callback_check_in_adin); 00935 } 00936 00937 if (ret < 0) { /* error end in adin_go */ 00938 if (ret == -2 || recog->process_want_terminate) { 00939 /* terminated by callback */ 00940 RealTimeTerminate(recog); 00941 /* reset param */ 00942 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 00943 param_init_content(mfcc->param); 00944 } 00945 /* execute callback at end of pass1 */ 00946 if (recog->triggered) { 00947 callback_exec(CALLBACK_EVENT_PASS1_END, recog); 00948 /* output result terminate */ 00949 result_error(recog, J_RESULT_STATUS_TERMINATE); 00950 } 00951 goto end_recog; 00952 } 00953 jlog("ERROR: an error occured at on-the-fly 1st pass decoding\n"); /* exit now! */ 00954 return(-1); 00955 } 00956 } 00957 /******************************************************************/ 00958 /* speech stream has been processed on-the-fly, and 1st pass ends */ 00959 /******************************************************************/ 00960 /* last procedure of 1st-pass */ 00961 if (RealTimeParam(recog) == FALSE) { 00962 jlog("ERROR: fatal error occured, program terminates now\n"); 00963 return -1; 00964 } 00965 00966 #ifdef BACKEND_VAD 00967 /* if not triggered, skip this segment */ 00968 if (recog->jconf->decodeopt.segment && ! recog->triggered) { 00969 goto end_recog; 00970 } 00971 #endif 00972 00973 /* execute callback for 1st pass result */ 00974 /* result.status <0 must be skipped inside callback */ 00975 callback_exec(CALLBACK_RESULT_PASS1, recog); 00976 #ifdef WORD_GRAPH 00977 /* result.wg1 == NULL should be skipped inside callback */ 00978 callback_exec(CALLBACK_RESULT_PASS1_GRAPH, recog); 00979 #endif 00980 /* execute callback at end of pass1 */ 00981 callback_exec(CALLBACK_EVENT_PASS1_END, recog); 00982 /* output frame length */ 00983 callback_exec(CALLBACK_STATUS_PARAM, recog); 00984 /* if terminate signal has been received, discard this input */ 00985 if (recog->process_want_terminate) { 00986 result_error(recog, J_RESULT_STATUS_TERMINATE); 00987 goto end_recog; 00988 } 00989 00990 /* END OF ON-THE-FLY INPUT AND DECODING OF 1ST PASS */ 00991 00992 } else { 00993 00994 /******************/ 00995 /* buffered input */ 00996 /******************/ 00997 00998 if (jconf->input.type == INPUT_VECTOR) { 00999 /***********************/ 01000 /* feature vector input */ 01001 /************************/ 01002 if (jconf->input.speech_input == SP_MFCFILE) { 01003 /************************/ 01004 /* parameter file input */ 01005 /************************/ 01006 /* parameter type check --- compare the type to that of HMM, 01007 and adjust them if necessary */ 01008 if (jconf->input.paramtype_check_flag) { 01009 for(am=recog->amlist;am;am=am->next) { 01010 /* return param itself or new malloced param */ 01011 if (param_check_and_adjust(am->hmminfo, am->mfcc->param, verbose_flag) == -1) { /* failed */ 01012 01013 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 01014 param_init_content(mfcc->param); 01015 } 01016 /* tell failure */ 01017 result_error(recog, J_RESULT_STATUS_FAIL); 01018 goto end_recog; 01019 } 01020 } 01021 } 01022 /* whole input is already read, so set input status to end of stream */ 01023 /* and jump to the start point of 1st pass */ 01024 ret = 0; 01025 } 01026 } else { 01027 /*************************/ 01028 /* buffered speech input */ 01029 /*************************/ 01030 if (!recog->process_segment) { /* no segment left */ 01031 01032 /****************************************/ 01033 /* store raw speech samples to speech[] */ 01034 /****************************************/ 01035 recog->speechlen = 0; 01036 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 01037 param_init_content(mfcc->param); 01038 } 01039 /* tell module to start recording */ 01040 /* the "adin_cut_callback_store_buffer" simply stores 01041 the input speech to a buffer "speech[]" */ 01042 /* end of this input will be determined by either end of stream 01043 (in case of file input), or silence detection by adin_go(), or 01044 'TERMINATE' command from module (if module mode) */ 01045 ret = adin_go(adin_cut_callback_store_buffer, callback_check_in_adin, recog); 01046 if (ret < 0) { /* error end in adin_go */ 01047 if (ret == -2 || recog->process_want_terminate) { 01048 /* terminated by module */ 01049 /* output fail */ 01050 result_error(recog, J_RESULT_STATUS_TERMINATE); 01051 goto end_recog; 01052 } 01053 jlog("ERROR: an error occured while recording input\n"); 01054 return -1; 01055 } 01056 01057 /* output recorded length */ 01058 seclen = (float)recog->speechlen / (float)jconf->input.sfreq; 01059 jlog("STAT: %d samples (%.2f sec.)\n", recog->speechlen, seclen); 01060 01061 /* -rejectshort 指定時, 入力が指定時間以下であれば 01062 ここで入力を棄却する */ 01063 /* when using "-rejectshort", and input was shorter than 01064 specified, reject the input here */ 01065 if (jconf->reject.rejectshortlen > 0) { 01066 if (seclen * 1000.0 < jconf->reject.rejectshortlen) { 01067 result_error(recog, J_RESULT_STATUS_REJECT_SHORT); 01068 goto end_recog; 01069 } 01070 } 01071 01072 /**********************************************/ 01073 /* acoustic analysis and encoding of speech[] */ 01074 /**********************************************/ 01075 jlog("STAT: ### speech analysis (waveform -> MFCC)\n"); 01076 /* CMN will be computed for the whole buffered input */ 01077 if (wav2mfcc(recog->speech, recog->speechlen, recog) == FALSE) { 01078 /* error end, end stream */ 01079 ret = -1; 01080 /* tell failure */ 01081 result_error(recog, J_RESULT_STATUS_FAIL); 01082 goto end_recog; 01083 } 01084 01085 /* if terminate signal has been received, cancel this input */ 01086 if (recog->process_want_terminate) { 01087 result_error(recog, J_RESULT_STATUS_TERMINATE); 01088 goto end_recog; 01089 } 01090 01091 /* output frame length */ 01092 callback_exec(CALLBACK_STATUS_PARAM, recog); 01093 } 01094 } 01095 01096 #ifdef ENABLE_PLUGIN 01097 /* call post-process plugin if exist */ 01098 plugin_exec_vector_postprocess_all(recog->mfcclist->param); 01099 #endif 01100 01101 /******************************************************/ 01102 /* 1st-pass --- backward search to compute heuristics */ 01103 /******************************************************/ 01104 if (!jconf->decodeopt.realtime_flag) { 01105 /* prepare for outprob cache for each HMM state and time frame */ 01106 /* assume all MFCCCalc has params of the same sample num */ 01107 for(am=recog->amlist;am;am=am->next) { 01108 outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum); 01109 } 01110 } 01111 01112 /* if terminate signal has been received, cancel this input */ 01113 if (recog->process_want_terminate) { 01114 result_error(recog, J_RESULT_STATUS_TERMINATE); 01115 goto end_recog; 01116 } 01117 01118 /* execute computation of left-to-right backtrellis */ 01119 if (get_back_trellis(recog) == FALSE) { 01120 jlog("ERROR: fatal error occured, program terminates now\n"); 01121 return -1; 01122 } 01123 #ifdef BACKEND_VAD 01124 /* if not triggered, skip this segment */ 01125 if (recog->jconf->decodeopt.segment && ! recog->triggered) { 01126 goto end_recog; 01127 } 01128 #endif 01129 01130 /* execute callback for 1st pass result */ 01131 /* result.status <0 must be skipped inside callback */ 01132 callback_exec(CALLBACK_RESULT_PASS1, recog); 01133 #ifdef WORD_GRAPH 01134 /* result.wg1 == NULL should be skipped inside callback */ 01135 callback_exec(CALLBACK_RESULT_PASS1_GRAPH, recog); 01136 #endif 01137 01138 /* execute callback at end of pass1 */ 01139 if (recog->triggered) { 01140 callback_exec(CALLBACK_EVENT_PASS1_END, recog); 01141 } 01142 01143 /* END OF BUFFERED 1ST PASS */ 01144 01145 } 01146 01147 /**********************************/ 01148 /* end processing of the 1st-pass */ 01149 /**********************************/ 01150 /* on-the-fly 1st pass processing will join here */ 01151 01152 /* -rejectshort 指定時, 入力が指定時間以下であれば探索失敗として */ 01153 /* 第2パスを実行せずにここで終了する */ 01154 /* when using "-rejectshort", and input was shorter than the specified 01155 length, terminate search here and output recognition failure */ 01156 if (jconf->reject.rejectshortlen > 0) { 01157 mseclen = (float)recog->mfcclist->param->samplenum * (float)jconf->input.period * (float)jconf->input.frameshift / 10000.0; 01158 if (mseclen < jconf->reject.rejectshortlen) { 01159 result_error(recog, J_RESULT_STATUS_REJECT_SHORT); 01160 goto end_recog; 01161 } 01162 } 01163 #ifdef POWER_REJECT 01164 if (power_reject(recog)) { 01165 result_error(recog, J_RESULT_STATUS_REJECT_POWER); 01166 goto end_recog; 01167 } 01168 #endif 01169 01170 /* if terminate signal has been received, cancel this input */ 01171 if (recog->process_want_terminate) { 01172 result_error(recog, J_RESULT_STATUS_TERMINATE); 01173 goto end_recog; 01174 } 01175 01176 /* if GMM is specified and result are to be rejected, terminate search here */ 01177 if (jconf->reject.gmm_reject_cmn_string != NULL) { 01178 if (! gmm_valid_input(recog)) { 01179 result_error(recog, J_RESULT_STATUS_REJECT_GMM); 01180 goto end_recog; 01181 } 01182 } 01183 01184 /* for instances with "-1pass", copy 1st pass result as final */ 01185 /* execute stack-decoding search */ 01186 /* they will be skipepd in the next pass */ 01187 for(r=recog->process_list;r;r=r->next) { 01188 if (!r->live) continue; 01189 /* skip if 1st pass was failed */ 01190 if (r->result.status < 0) continue; 01191 /* already stored on word recognition, so skip this */ 01192 if (r->lmvar == LM_DFA_WORD) continue; 01193 if (r->config->compute_only_1pass) { 01194 if (verbose_flag) { 01195 jlog("%02d %s: \"-1pass\" specified, output 1st pass result as a final result\n", r->config->id, r->config->name); 01196 } 01197 /* prepare result storage */ 01198 result_sentence_malloc(r, 1); 01199 /* finalize result when no hypothesis was obtained */ 01200 pass2_finalize_on_no_result(r, TRUE); 01201 } 01202 } 01203 01204 /***********************************************/ 01205 /* 2nd-pass --- forward search with heuristics */ 01206 /***********************************************/ 01207 pass2_p = FALSE; 01208 for(r=recog->process_list;r;r=r->next) { 01209 if (!r->live) continue; 01210 /* if [-1pass] is specified, skip 2nd pass */ 01211 if (r->config->compute_only_1pass) continue; 01212 /* if search already failed on 1st pass, skip 2nd pass */ 01213 if (r->result.status < 0) continue; 01214 pass2_p = TRUE; 01215 } 01216 if (pass2_p) callback_exec(CALLBACK_EVENT_PASS2_BEGIN, recog); 01217 01218 #if !defined(PASS2_STRICT_IWCD) || defined(FIX_35_PASS2_STRICT_SCORE) 01219 /* adjust trellis score not to contain outprob of the last frames */ 01220 for(r=recog->process_list;r;r=r->next) { 01221 if (!r->live) continue; 01222 /* if [-1pass] is specified, skip 2nd pass */ 01223 if (r->config->compute_only_1pass) continue; 01224 /* if search already failed on 1st pass, skip 2nd pass */ 01225 if (r->result.status < 0) continue; 01226 if (! r->am->hmminfo->multipath) { 01227 bt_discount_pescore(r->wchmm, r->backtrellis, r->am->mfcc->param); 01228 } 01229 #ifdef LM_FIX_DOUBLE_SCORING 01230 if (r->lmtype == LM_PROB) { 01231 bt_discount_lm(r->backtrellis); 01232 } 01233 #endif 01234 } 01235 #endif 01236 01237 /* execute stack-decoding search */ 01238 for(r=recog->process_list;r;r=r->next) { 01239 if (!r->live) continue; 01240 /* if [-1pass] is specified, just copy from 1st pass result */ 01241 if (r->config->compute_only_1pass) continue; 01242 /* if search already failed on 1st pass, skip 2nd pass */ 01243 if (r->result.status < 0) continue; 01244 /* prepare result storage */ 01245 if (r->lmtype == LM_DFA && r->config->output.multigramout_flag) { 01246 result_sentence_malloc(r, r->config->output.output_hypo_maxnum * multigram_get_all_num(r->lm)); 01247 } else { 01248 result_sentence_malloc(r, r->config->output.output_hypo_maxnum); 01249 } 01250 /* do 2nd pass */ 01251 if (r->lmtype == LM_PROB) { 01252 wchmm_fbs(r->am->mfcc->param, r, 0, 0); 01253 } else if (r->lmtype == LM_DFA) { 01254 if (r->config->output.multigramout_flag) { 01255 /* execute 2nd pass multiple times for each grammar sequencially */ 01256 /* to output result for each grammar */ 01257 MULTIGRAM *m; 01258 boolean has_success = FALSE; 01259 for(m = r->lm->grammars; m; m = m->next) { 01260 if (m->active) { 01261 jlog("STAT: execute 2nd pass limiting words for gram #%d\n", m->id); 01262 wchmm_fbs(r->am->mfcc->param, r, m->cate_begin, m->dfa->term_num); 01263 if (r->result.status == J_RESULT_STATUS_SUCCESS) { 01264 has_success = TRUE; 01265 } 01266 } 01267 } 01268 r->result.status = (has_success == TRUE) ? J_RESULT_STATUS_SUCCESS : J_RESULT_STATUS_FAIL; 01269 } else { 01270 /* only the best among all grammar will be output */ 01271 wchmm_fbs(r->am->mfcc->param, r, 0, r->lm->dfa->term_num); 01272 } 01273 } 01274 } 01275 01276 /* do forced alignment if needed */ 01277 for(r=recog->process_list;r;r=r->next) { 01278 if (!r->live) continue; 01279 /* if search failed on 2nd pass, skip this */ 01280 if (r->result.status < 0) continue; 01281 /* do needed alignment */ 01282 do_alignment_all(r, r->am->mfcc->param); 01283 } 01284 01285 /* output result */ 01286 callback_exec(CALLBACK_RESULT, recog); 01287 #ifdef ENABLE_PLUGIN 01288 plugin_exec_process_result(recog); 01289 #endif 01290 /* output graph */ 01291 /* r->result.wg == NULL should be skipped inside the callback */ 01292 ok_p = FALSE; 01293 for(r=recog->process_list;r;r=r->next) { 01294 if (!r->live) continue; 01295 if (r->config->compute_only_1pass) continue; 01296 if (r->result.status < 0) continue; 01297 if (r->config->graph.lattice) ok_p = TRUE; 01298 } 01299 if (ok_p) callback_exec(CALLBACK_RESULT_GRAPH, recog); 01300 /* output confnet */ 01301 /* r->result.confnet == NULL should be skipped inside the callback */ 01302 ok_p = FALSE; 01303 for(r=recog->process_list;r;r=r->next) { 01304 if (!r->live) continue; 01305 if (r->config->compute_only_1pass) continue; 01306 if (r->result.status < 0) continue; 01307 if (r->config->graph.confnet) ok_p = TRUE; 01308 } 01309 if (ok_p) callback_exec(CALLBACK_RESULT_CONFNET, recog); 01310 01311 /* clear work area for output */ 01312 for(r=recog->process_list;r;r=r->next) { 01313 if (!r->live) continue; 01314 clear_result(r); 01315 } 01316 01317 /* output end of 2nd pass */ 01318 if (pass2_p) callback_exec(CALLBACK_EVENT_PASS2_END, recog); 01319 01320 #ifdef DEBUG_VTLN_ALPHA_TEST 01321 if (r->am->mfcc->para->vtln_alpha == 1.0) { 01322 /* if vtln parameter remains default, search for VTLN parameter */ 01323 vtln_alpha(recog, r); 01324 } 01325 #endif 01326 01327 end_recog: 01328 /**********************/ 01329 /* end of recognition */ 01330 /**********************/ 01331 01332 /* update CMN info for next input (in case of realtime wave input) */ 01333 if (jconf->input.type == INPUT_WAVEFORM && jconf->decodeopt.realtime_flag) { 01334 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 01335 if (mfcc->param->samplenum > 0) { 01336 RealTimeCMNUpdate(mfcc, recog); 01337 } 01338 } 01339 } 01340 01341 process_segment_last = recog->process_segment; 01342 if (jconf->decodeopt.segment) { /* sp-segment mode */ 01343 /* param is now shrinked to hold only the processed input, and */ 01344 /* the rests are holded in (newly allocated) "rest_param" */ 01345 /* if this is the last segment, rest_param is NULL */ 01346 /* assume all segmentation are synchronized */ 01347 recog->process_segment = FALSE; 01348 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { 01349 if (mfcc->rest_param != NULL) { 01350 /* process the rest parameters in the next loop */ 01351 recog->process_segment = TRUE; 01352 free_param(mfcc->param); 01353 mfcc->param = mfcc->rest_param; 01354 mfcc->rest_param = NULL; 01355 } 01356 } 01357 } 01358 01359 /* callback of recognition end */ 01360 if (jconf->decodeopt.segment) { 01361 #ifdef BACKEND_VAD 01362 if (recog->triggered) callback_exec(CALLBACK_EVENT_SEGMENT_END, recog); 01363 if (process_segment_last && !recog->process_segment) callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog); 01364 #else 01365 callback_exec(CALLBACK_EVENT_SEGMENT_END, recog); 01366 if (!recog->process_segment) callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog); 01367 #endif 01368 } else { 01369 callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog); 01370 } 01371 01372 01373 if (verbose_flag) jlog("\n"); 01374 jlog_flush(); 01375 01376 if (jconf->decodeopt.segment) { /* sp-segment mode */ 01377 if (recog->process_segment == TRUE) { 01378 if (verbose_flag) jlog("STAT: <<<restart the rest>>>\n\n"); 01379 } else { 01380 /* input has reached end of stream, terminate program */ 01381 if (ret <= 0 && ret != -2) break; 01382 } 01383 } else { /* not sp-segment mode */ 01384 /* input has reached end of stream, terminate program */ 01385 if (ret <= 0 && ret != -2) break; 01386 } 01387 01388 /* recognition continues for next (silence-aparted) segment */ 01389 01390 } /* END OF STREAM LOOP */ 01391 01392 /* close the stream */ 01393 if (jconf->input.type == INPUT_WAVEFORM) { 01394 if (adin_end(recog->adin) == FALSE) return -1; 01395 } 01396 if (jconf->input.speech_input == SP_MFCMODULE) { 01397 if (mfc_module_end(recog->mfcclist) == FALSE) return -1; 01398 } 01399 01400 /* return to the opening of input stream */ 01401 01402 return(0); 01403 01404 } 01405 01450 int 01451 j_recognize_stream(Recog *recog) 01452 { 01453 int ret; 01454 01455 do { 01456 01457 ret = j_recognize_stream_core(recog); 01458 01459 switch(ret) { 01460 case 1: /* paused by a callback (stream will continue) */ 01461 /* call pause event callbacks */ 01462 callback_exec(CALLBACK_EVENT_PAUSE, recog); 01463 /* call pause functions */ 01464 /* block until all pause functions exits */ 01465 if (! callback_exist(recog, CALLBACK_PAUSE_FUNCTION)) { 01466 jlog("WARNING: pause requested but no pause function specified\n"); 01467 jlog("WARNING: engine will resume now immediately\n"); 01468 } 01469 callback_exec(CALLBACK_PAUSE_FUNCTION, recog); 01470 /* after here, recognition will restart for the rest input */ 01471 /* call resume event callbacks */ 01472 callback_exec(CALLBACK_EVENT_RESUME, recog); 01473 break; 01474 case 0: /* end of stream */ 01475 /* go on to the next input */ 01476 break; 01477 case -1: /* error */ 01478 jlog("ERROR: an error occured while recognition, terminate stream\n"); 01479 return -1; 01480 } 01481 } while (ret == 1); /* loop when paused by callback */ 01482 01483 return 0; 01484 } 01485 01486 /* end of file */