Julius: libjulius/src/recogmain.c ソースファイル

Julius 4.2
00001 
00019 /*
00020  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00021  * Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan
00022  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00023  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00024  * All rights reserved
00025  */
00161 #define GLOBAL_VARIABLE_DEFINE  ///< Actually make global vars in global.h
00162 #include <julius/julius.h>
00163 #include <signal.h>
00164 #if defined(_WIN32) && !defined(__CYGWIN32__)
00165 #include <mbctype.h>
00166 #include <mbstring.h>
00167 #endif
00168 
00169 /* ---------- utility functions -----------------------------------------*/
00170 #ifdef REPORT_MEMORY_USAGE
00171 
00181 static void
00182 print_mem()
00183 {
00184   char buf[200];
00185   sprintf(buf,"ps -o vsz,rss -p %d",getpid());
00186   system(buf);
00187   fflush(stdout);
00188   fflush(stderr);
00189 }
00190 #endif
00191           
00192 
00209 SentenceAlign *
00210 result_align_new()
00211 {
00212   SentenceAlign *new;
00213   new = (SentenceAlign *)mymalloc(sizeof(SentenceAlign));
00214   new->w = NULL;
00215   new->ph = NULL;
00216   new->loc = NULL;
00217   new->begin_frame = NULL;
00218   new->end_frame = NULL;
00219   new->avgscore = NULL;
00220   new->is_iwsp = NULL;
00221   new->next = NULL;
00222   return new;
00223 }
00224 
00241 void
00242 result_align_free(SentenceAlign *a)
00243 {
00244   if (a->w) free(a->w);
00245   if (a->ph) free(a->ph);
00246   if (a->loc) free(a->loc);
00247   if (a->begin_frame) free(a->begin_frame);
00248   if (a->end_frame) free(a->end_frame);
00249   if (a->avgscore) free(a->avgscore);
00250   if (a->is_iwsp) free(a->is_iwsp);
00251   free(a);
00252 }
00253 
00269 void
00270 result_sentence_malloc(RecogProcess *r, int num)
00271 {
00272   int i;
00273   r->result.sent = (Sentence *)mymalloc(sizeof(Sentence) * num);
00274   for(i=0;i<num;i++) r->result.sent[i].align = NULL;
00275   r->result.sentnum = 0;
00276 }
00277 
00291 void
00292 result_sentence_free(RecogProcess *r)
00293 {  
00294   int i;
00295   SentenceAlign *a, *atmp;
00296   if (r->result.sent) {
00297     for(i=0;i<r->result.sentnum;i++) {
00298       a = r->result.sent[i].align;
00299       while(a) {
00300         atmp = a->next;
00301         result_align_free(a);
00302         a = atmp;
00303       }
00304     }
00305     free(r->result.sent);
00306     r->result.sent = NULL;
00307   }
00308 }
00309 
00323 void
00324 clear_result(RecogProcess *r)
00325 {
00326 #ifdef WORD_GRAPH
00327   /* clear 1st pass word graph output */
00328   wordgraph_clean(&(r->result.wg1));
00329 #endif
00330 
00331   if (r->lmvar == LM_DFA_WORD) {
00332     if (r->result.status == J_RESULT_STATUS_SUCCESS) {
00333       /* clear word recog result of first pass as in final result */
00334       free(r->result.sent);
00335     }
00336   } else {
00337     if (r->graphout) {
00338       if (r->config->graph.confnet) {
00339         /* free confusion network clusters */
00340         cn_free_all(&(r->result.confnet));
00341       } else if (r->config->graph.lattice) {
00342       }
00343       /* clear all wordgraph */
00344       wordgraph_clean(&(r->result.wg));
00345     }
00346     result_sentence_free(r);
00347   }
00348 }
00349 
00350 /* --------------------- speech buffering ------------------ */
00351 
00384 int
00385 adin_cut_callback_store_buffer(SP16 *now, int len, Recog *recog)
00386 {
00387   if (recog->speechlen == 0) {          /* first part of a segment */
00388     if (!recog->process_active) {
00389       return(1);
00390     }
00391   }
00392 
00393   if (recog->speechlen + len > recog->speechalloclen) {
00394     while (recog->speechlen + len > recog->speechalloclen) {
00395       recog->speechalloclen += MAX_SPEECH_ALLOC_STEP;
00396     }
00397     if (recog->speech == NULL) {
00398       recog->speech = (SP16 *)mymalloc(sizeof(SP16) * recog->speechalloclen);
00399     } else {
00400       if (debug2_flag) {
00401         jlog("STAT: expanding recog->speech to %d samples\n", recog->speechalloclen);
00402       }
00403       recog->speech = (SP16 *)myrealloc(recog->speech, sizeof(SP16) * recog->speechalloclen);
00404     }
00405   }
00406 
00407   /* store now[0..len] to recog->speech[recog->speechlen] */
00408   memcpy(&(recog->speech[recog->speechlen]), now, len * sizeof(SP16));
00409   recog->speechlen += len;
00410   return(0);                    /* tell adin_go to continue reading */
00411 }
00412 
00413 
00414 /* --------------------- adin check callback --------------- */
00442 static int
00443 callback_check_in_adin(Recog *recog)
00444 {
00445   /* module: check command and terminate recording when requested */
00446   callback_exec(CALLBACK_POLL, recog);
00447   /* With audio input via adinnet, TERMINATE command will issue terminate
00448      command to the adinnet client.  The client then stops recording
00449      immediately and return end-of-segment ack.  Then it will cause this
00450      process to stop recognition as normal.  So we need not to
00451      perform immediate termination at this callback, but just ignore the
00452      results in the main.c.  */
00453 #if 1
00454   if (recog->process_want_terminate) { /* TERMINATE ... force termination */
00455     return(-2);
00456   }
00457   if (recog->process_want_reload) {
00458     return(-1);
00459   }
00460 #else
00461   if (recog->process_want_terminate /* TERMINATE ... force termination */
00462       && recog->jconf->input.speech_input != SP_ADINNET) {
00463     return(-2);
00464   }
00465   if (recog->process_want_reload) {
00466     return(-1);
00467   }
00468 #endif
00469   return(0);
00470 }
00471 
00472 /*********************/
00473 /* open input stream */
00474 /*********************/
00492 int
00493 j_open_stream(Recog *recog, char *file_or_dev_name)
00494 {
00495   Jconf *jconf;
00496   char *p;
00497 
00498   jconf = recog->jconf;
00499 
00500   if (jconf->input.type == INPUT_WAVEFORM) {
00501     /* begin A/D input */
00502     if (adin_begin(recog->adin, file_or_dev_name) == FALSE) {
00503       return -2;
00504     }
00505     /* create A/D-in thread here */
00506 #ifdef HAVE_PTHREAD
00507     if (recog->adin->enable_thread && ! recog->adin->input_side_segment) {
00508       if (adin_thread_create(recog) == FALSE) {
00509         return -2;
00510       }
00511     }
00512 #endif
00513     /* when using adin func, input name should be obtained when called */
00514   } else {
00515     switch(jconf->input.speech_input) {
00516     case SP_MFCMODULE:
00517       param_init_content(recog->mfcclist->param);
00518       if (mfc_module_begin(recog->mfcclist) == FALSE) return -2;
00519       /* when using mfc module func, input name should be obtained when called */
00520       break;
00521     case SP_MFCFILE:
00522       /* read parameter file */
00523       param_init_content(recog->mfcclist->param);
00524       if (rdparam(file_or_dev_name, recog->mfcclist->param) == FALSE) {
00525         jlog("ERROR: error in reading parameter file: %s\n", file_or_dev_name);
00526         return -1;
00527       }
00528       /* check and strip invalid frames */
00529       if (jconf->preprocess.strip_zero_sample) {
00530         param_strip_zero(recog->mfcclist->param);
00531       }
00532       /* output frame length */
00533       callback_exec(CALLBACK_STATUS_PARAM, recog);
00534       /* store the input filename here */
00535       strncpy(recog->adin->current_input_name, file_or_dev_name, MAXPATHLEN);
00536       break;
00537     default:
00538       jlog("ERROR: j_open_stream: none of SP_MFC_*??\n");
00539       return -1;
00540     }
00541   }
00542 
00543   if (jconf->input.speech_input != SP_MFCFILE) {
00544     /* store current input name using input source specific function */
00545     p = j_get_current_filename(recog);
00546     if (p) {
00547       strncpy(recog->adin->current_input_name, p, MAXPATHLEN);
00548     } else {
00549       recog->adin->current_input_name[0] = '\0';
00550     }
00551   }
00552       
00553   return 0;
00554 
00555 }
00556 
00574 int
00575 j_close_stream(Recog *recog)
00576 {
00577   Jconf *jconf;
00578 
00579   jconf = recog->jconf;
00580 
00581   if (jconf->input.type == INPUT_WAVEFORM) {
00582 #ifdef HAVE_PTHREAD
00583     /* close A/D-in thread here */
00584     if (! recog->adin->input_side_segment) {
00585       if (recog->adin->enable_thread) {
00586         if (adin_thread_cancel(recog) == FALSE) {
00587           return -2;
00588         }
00589       } else {
00590         recog->adin->end_of_stream = TRUE;
00591       }
00592     }
00593 #else
00594     if (! recog->adin->input_side_segment) {
00595       recog->adin->end_of_stream = TRUE;
00596     }
00597 #endif
00598   } else {
00599     switch(jconf->input.speech_input) {
00600     case SP_MFCMODULE:
00601       if (mfc_module_end(recog->mfcclist) == FALSE) return -2;
00602       break;
00603     case SP_MFCFILE:
00604       /* nothing to do */
00605       break;
00606     default:
00607       jlog("ERROR: j_close_stream: none of SP_MFC_*??\n");
00608       return -1;
00609     }
00610   }
00611       
00612   return 0;
00613 
00614 }
00615 
00616 /**********************************************************************/
00617 /**********************************************************************/
00618 /**********************************************************************/
00619 
00632 static void
00633 result_error(Recog *recog, int status)
00634 {
00635   MFCCCalc *mfcc;
00636   RecogProcess *r;
00637   boolean ok_p;
00638 
00639   for(r=recog->process_list;r;r=r->next) r->result.status = status;
00640 
00641   ok_p = FALSE;
00642   for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00643     if (mfcc->f > 0) {
00644       ok_p = TRUE;
00645       break;
00646     }
00647   }
00648   if (ok_p) {                   /* had some input */
00649     /* output as rejected */
00650     callback_exec(CALLBACK_RESULT, recog);
00651 #ifdef ENABLE_PLUGIN
00652     plugin_exec_process_result(recog);
00653 #endif
00654   }
00655 }
00656 
00692 static int
00693 j_recognize_stream_core(Recog *recog)
00694 {
00695   Jconf *jconf;
00696   int ret;
00697   float seclen, mseclen;
00698   RecogProcess *r;
00699   MFCCCalc *mfcc;
00700   PROCESS_AM *am;
00701   PROCESS_LM *lm;
00702   boolean ok_p;
00703   boolean process_segment_last;
00704   boolean on_the_fly;
00705   boolean pass2_p;
00706 
00707   jconf = recog->jconf;
00708 
00709   /* determine whether on-the-fly decoding should be done */
00710   on_the_fly = FALSE;
00711   switch(jconf->input.type) {
00712   case INPUT_VECTOR:
00713     switch(jconf->input.speech_input) {
00714     case SP_MFCFILE: 
00715       on_the_fly = FALSE;
00716       break;
00717     case SP_MFCMODULE:
00718       on_the_fly = TRUE;
00719       break;
00720     }
00721     break;
00722   case INPUT_WAVEFORM:
00723     if (jconf->decodeopt.realtime_flag) {
00724       on_the_fly = TRUE;
00725     } else {
00726       on_the_fly = FALSE;
00727     }
00728     break;
00729   }
00730 
00731   if (jconf->input.type == INPUT_WAVEFORM || jconf->input.speech_input == SP_MFCMODULE) {
00732     for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00733       param_init_content(mfcc->param);
00734     }
00735   }
00736 
00737   /* if no process instance exist, start with terminated */
00738   if (recog->process_list == NULL) {
00739     jlog("STAT: no recog process, engine inactive\n");
00740     j_request_pause(recog);
00741   }
00742 
00743   /* update initial recognition process status */
00744   for(r=recog->process_list;r;r=r->next) {
00745     if (r->active > 0) {
00746       r->live = TRUE;
00747     } else if (r->active < 0) {
00748       r->live = FALSE;
00749     }
00750     r->active = 0;
00751   }
00752 
00753   /******************************************************************/
00754   /* do recognition for each incoming segment from the input stream */
00755   /******************************************************************/
00756   while (1) {
00757     
00758   start_recog:
00759 
00760     /*************************************/
00761     /* Update recognition process status */
00762     /*************************************/
00763     for(r=recog->process_list;r;r=r->next) {
00764       if (r->active > 0) {
00765         r->live = TRUE;
00766         jlog("STAT: SR%02d %s now active\n", r->config->id, r->config->name);
00767       } else if (r->active < 0) {
00768         r->live = FALSE;
00769         jlog("STAT: SR%02d %s now inactive\n", r->config->id, r->config->name);
00770       }
00771       r->active = 0;
00772     }
00773     if (debug2_flag) {
00774       for(r=recog->process_list;r;r=r->next) {
00775         jlog("DEBUG: %s: SR%02d %s\n", r->live ? "live" : "dead", r->config->id, r->config->name);
00776       }
00777     }
00778     /* check if any process is live */
00779     if (recog->process_active) {
00780       ok_p = FALSE;
00781       for(r=recog->process_list;r;r=r->next) {
00782         if (r->live) ok_p = TRUE;
00783       }
00784       if (!ok_p) {              /* no process is alive */
00785         /* make whole process as inactive */
00786         jlog("STAT: all recog process inactive, pause engine now\n");
00787         j_request_pause(recog);
00788       }
00789     }
00790 
00791     /* Check whether process status was changed while in the last run */
00792     if (recog->process_online != recog->process_active) {
00793       recog->process_online = recog->process_active;
00794       if (recog->process_online) callback_exec(CALLBACK_EVENT_PROCESS_ONLINE, recog);
00795       else callback_exec(CALLBACK_EVENT_PROCESS_OFFLINE, recog);
00796     }
00797     /* execute poll callback */
00798     if (recog->process_active) {
00799       callback_exec(CALLBACK_POLL, recog);
00800     }
00801     /* reset reload flag here */
00802     j_reset_reload(recog);
00803 
00804     if (!recog->process_active) {
00805       /* now sleeping, return */
00806       /* in the next call, we will resume from here */
00807       return 1;
00808     }
00809     /* update process status */
00810     if (recog->process_online != recog->process_active) {
00811       recog->process_online = recog->process_active;
00812       if (recog->process_online) callback_exec(CALLBACK_EVENT_PROCESS_ONLINE, recog);
00813       else callback_exec(CALLBACK_EVENT_PROCESS_OFFLINE, recog);
00814     }
00815 
00816     /*********************************************************/
00817     /* check for grammar to change, and rebuild if necessary */
00818     /*********************************************************/
00819     for(lm=recog->lmlist;lm;lm=lm->next) {
00820       if (lm->lmtype == LM_DFA) {
00821         multigram_update(lm); /* some modification occured if return TRUE*/
00822       }
00823     }
00824     for(r=recog->process_list;r;r=r->next) {
00825       if (!r->live) continue;
00826       if (r->lmtype == LM_DFA && r->lm->global_modified) {
00827         multigram_build(r);
00828       }
00829     }
00830     for(lm=recog->lmlist;lm;lm=lm->next) {
00831       if (lm->lmtype == LM_DFA) lm->global_modified = FALSE;
00832     }
00833 
00834     ok_p = FALSE;
00835     for(r=recog->process_list;r;r=r->next) {
00836       if (!r->live) continue;
00837       if (r->lmtype == LM_DFA) {
00838         if (r->lm->winfo == NULL ||
00839             (r->lmvar == LM_DFA_GRAMMAR && r->lm->dfa == NULL)) {
00840           /* make this instance inactive */
00841           r->active = -1;
00842           ok_p = TRUE;
00843         }
00844       }
00845     }
00846     if (ok_p) {                 /* at least one instance has no grammar */
00847       goto start_recog;
00848     }
00849 
00850 
00851     /******************/
00852     /* start 1st pass */
00853     /******************/
00854     if (on_the_fly) {
00855 
00856       /********************************************/
00857       /* REALTIME ON-THE-FLY DECODING OF 1ST-PASS */
00858       /********************************************/
00859       /* store, analysis and search in a pipeline  */
00860       /* main function is RealTimePipeLine() at realtime-1stpass.c, and
00861          it will be periodically called for each incoming input segment
00862          from the AD-in function adin_go().  RealTimePipeLine() will be
00863          called as a callback function from adin_go() */
00864       /* after this part, directly jump to the beginning of the 2nd pass */
00865       
00866       if (recog->process_segment) {
00867         /*****************************************************************/
00868         /* short-pause segmentation: process last remaining frames first */
00869         /*****************************************************************/
00870         /* last was segmented by short pause */
00871         /* the margin segment in the last input will be re-processed first,
00872            and then the speech input will be processed */
00873         /* process the last remaining parameters */
00874         ret = RealTimeResume(recog);
00875         if (ret < 0) {          /* error end in the margin */
00876           jlog("ERROR: failed to process last remaining samples on RealTimeResume\n"); /* exit now! */
00877           return -1;
00878         }
00879         if (ret != 1) { /* if segmented again in the margin, not process the rest */
00880           /* last parameters has been processed, so continue with the
00881              current input as normal */
00882           /* process the incoming input */
00883           if (jconf->input.type == INPUT_WAVEFORM) {
00884             /* get speech and process it on real-time */
00885             ret = adin_go(RealTimePipeLine, callback_check_in_adin, recog);
00886           } else {
00887             /* get feature vector and process it */
00888             ret = mfcc_go(recog, callback_check_in_adin);
00889           }
00890           if (ret < 0) {                /* error end in adin_go */
00891             if (ret == -2 || recog->process_want_terminate) {
00892               /* terminated by callback */
00893               RealTimeTerminate(recog);
00894               /* reset param */
00895               for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00896                 param_init_content(mfcc->param);
00897               }
00898               /* execute callback at end of pass1 */
00899               if (recog->triggered) {
00900                 callback_exec(CALLBACK_EVENT_PASS1_END, recog);
00901                 /* output result terminate */
00902                 result_error(recog, J_RESULT_STATUS_TERMINATE);
00903               }
00904               goto end_recog; /* cancel this recognition */
00905             }
00906             jlog("ERROR: an error occured at on-the-fly 1st pass decoding\n");          /* exit now! */
00907             return(-1);
00908           }
00909         }
00910         
00911       } else {
00912 
00913         /***********************************************************/
00914         /* last was not segmented, process the new incoming input  */
00915         /***********************************************************/
00916         /* end of this input will be determined by either end of stream
00917            (in case of file input), or silence detection by adin_go(), or
00918            'TERMINATE' command from module (if module mode) */
00919         /* prepare work area for on-the-fly processing */
00920         if (RealTimePipeLinePrepare(recog) == FALSE) {
00921           jlog("ERROR: failed to prepare for on-the-fly 1st pass decoding\n");
00922           return (-1);
00923         }
00924         /* process the incoming input */
00925         if (jconf->input.type == INPUT_WAVEFORM) {
00926           /* get speech and process it on real-time */
00927           ret = adin_go(RealTimePipeLine, callback_check_in_adin, recog);
00928         } else {
00929           /* get feature vector and process it */
00930           ret = mfcc_go(recog, callback_check_in_adin);
00931         }
00932         
00933         if (ret < 0) {          /* error end in adin_go */
00934           if (ret == -2 || recog->process_want_terminate) {     
00935             /* terminated by callback */
00936             RealTimeTerminate(recog);
00937             /* reset param */
00938             for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00939               param_init_content(mfcc->param);
00940             }
00941             /* execute callback at end of pass1 */
00942             if (recog->triggered) {
00943               callback_exec(CALLBACK_EVENT_PASS1_END, recog);
00944               /* output result terminate */
00945               result_error(recog, J_RESULT_STATUS_TERMINATE);
00946             }
00947             goto end_recog;
00948           }
00949           jlog("ERROR: an error occured at on-the-fly 1st pass decoding\n");          /* exit now! */
00950           return(-1);
00951         }
00952       }
00953       /******************************************************************/
00954       /* speech stream has been processed on-the-fly, and 1st pass ends */
00955       /******************************************************************/
00956       if (ret == 1 || ret == 2) {               /* segmented */
00957 #ifdef HAVE_PTHREAD
00958         if (recog->adin->adinthread_buffer_overflowed) {
00959           jlog("Warning: input buffer overflow, disgard the input\n");
00960           result_error(recog, J_RESULT_STATUS_BUFFER_OVERFLOW);
00961           /* skip 2nd pass */
00962           goto end_recog;
00963         }
00964 #endif
00965         /* check for audio overflow */
00966         for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00967           if (mfcc->f >= recog->real.maxframelen) {
00968             jlog("Warning: input buffer overflow, disgard the input\n");
00969             result_error(recog, J_RESULT_STATUS_BUFFER_OVERFLOW);
00970             /* skip 2nd pass */
00971             goto end_recog;
00972           }
00973         }
00974       }
00975       /* last procedure of 1st-pass */
00976       if (RealTimeParam(recog) == FALSE) {
00977         jlog("ERROR: fatal error occured, program terminates now\n");
00978         return -1;
00979       }
00980       
00981 #ifdef BACKEND_VAD
00982       /* if not triggered, skip this segment */
00983       if (recog->jconf->decodeopt.segment && ! recog->triggered) {
00984         goto end_recog;
00985       }
00986 #endif
00987 
00988       /* execute callback for 1st pass result */
00989       /* result.status <0 must be skipped inside callback */
00990       callback_exec(CALLBACK_RESULT_PASS1, recog);
00991 #ifdef WORD_GRAPH
00992       /* result.wg1 == NULL should be skipped inside callback */
00993       callback_exec(CALLBACK_RESULT_PASS1_GRAPH, recog);
00994 #endif
00995       /* execute callback at end of pass1 */
00996       callback_exec(CALLBACK_EVENT_PASS1_END, recog);
00997       /* output frame length */
00998       callback_exec(CALLBACK_STATUS_PARAM, recog);
00999       /* if terminate signal has been received, discard this input */
01000       if (recog->process_want_terminate) {
01001         result_error(recog, J_RESULT_STATUS_TERMINATE);
01002         goto end_recog;
01003       }
01004 
01005       /* END OF ON-THE-FLY INPUT AND DECODING OF 1ST PASS */
01006 
01007     } else {
01008 
01009       /******************/
01010       /* buffered input */
01011       /******************/
01012 
01013       if (jconf->input.type == INPUT_VECTOR) {
01014         /***********************/
01015         /* feature vector input */
01016         /************************/
01017         if (jconf->input.speech_input == SP_MFCFILE) {
01018           /************************/
01019           /* parameter file input */
01020           /************************/
01021           /* parameter type check --- compare the type to that of HMM,
01022              and adjust them if necessary */
01023           if (jconf->input.paramtype_check_flag) {
01024             for(am=recog->amlist;am;am=am->next) {
01025               /* return param itself or new malloced param */
01026               if (param_check_and_adjust(am->hmminfo, am->mfcc->param, verbose_flag) == -1) {   /* failed */
01027                 
01028                 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
01029                   param_init_content(mfcc->param);
01030                 }
01031                 /* tell failure */
01032                 result_error(recog, J_RESULT_STATUS_FAIL);
01033                 goto end_recog;
01034               }
01035             }
01036           }
01037           /* whole input is already read, so set input status to end of stream */
01038           /* and jump to the start point of 1st pass */
01039           ret = 0;
01040         }
01041       } else {
01042         /*************************/
01043         /* buffered speech input */
01044         /*************************/
01045         if (!recog->process_segment) { /* no segment left */
01046 
01047           /****************************************/
01048           /* store raw speech samples to speech[] */
01049           /****************************************/
01050           recog->speechlen = 0;
01051           for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
01052             param_init_content(mfcc->param);
01053           }
01054           /* tell module to start recording */
01055           /* the "adin_cut_callback_store_buffer" simply stores
01056              the input speech to a buffer "speech[]" */
01057           /* end of this input will be determined by either end of stream
01058              (in case of file input), or silence detection by adin_go(), or
01059              'TERMINATE' command from module (if module mode) */
01060           ret = adin_go(adin_cut_callback_store_buffer, callback_check_in_adin, recog);
01061           if (ret < 0) {                /* error end in adin_go */
01062             if (ret == -2 || recog->process_want_terminate) {
01063               /* terminated by module */
01064               /* output fail */
01065               result_error(recog, J_RESULT_STATUS_TERMINATE);
01066               goto end_recog;
01067             }
01068             jlog("ERROR: an error occured while recording input\n");
01069             return -1;
01070           }
01071           
01072           /* output recorded length */
01073           seclen = (float)recog->speechlen / (float)jconf->input.sfreq;
01074           jlog("STAT: %d samples (%.2f sec.)\n", recog->speechlen, seclen);
01075           
01076           /* -rejectshort 指定時, 入力が指定時間以下であれば
01077              ここで入力を棄却する */
01078           /* when using "-rejectshort", and input was shorter than
01079              specified, reject the input here */
01080           if (jconf->reject.rejectshortlen > 0) {
01081             if (seclen * 1000.0 < jconf->reject.rejectshortlen) {
01082               result_error(recog, J_RESULT_STATUS_REJECT_SHORT);
01083               goto end_recog;
01084             }
01085           }
01086         
01087           /**********************************************/
01088           /* acoustic analysis and encoding of speech[] */
01089           /**********************************************/
01090           jlog("STAT: ### speech analysis (waveform -> MFCC)\n");
01091           /* CMN will be computed for the whole buffered input */
01092           if (wav2mfcc(recog->speech, recog->speechlen, recog) == FALSE) {
01093             /* error end, end stream */
01094             ret = -1;
01095             /* tell failure */
01096             result_error(recog, J_RESULT_STATUS_FAIL);
01097             goto end_recog;
01098           }
01099           
01100           /* if terminate signal has been received, cancel this input */
01101           if (recog->process_want_terminate) {
01102             result_error(recog, J_RESULT_STATUS_TERMINATE);
01103             goto end_recog;
01104           }
01105           
01106           /* output frame length */
01107           callback_exec(CALLBACK_STATUS_PARAM, recog);
01108         }
01109       }
01110 
01111 #ifdef ENABLE_PLUGIN
01112       /* call post-process plugin if exist */
01113       plugin_exec_vector_postprocess_all(recog->mfcclist->param);
01114 #endif
01115 
01116       /******************************************************/
01117       /* 1st-pass --- backward search to compute heuristics */
01118       /******************************************************/
01119       if (!jconf->decodeopt.realtime_flag) {
01120         /* prepare for outprob cache for each HMM state and time frame */
01121         /* assume all MFCCCalc has params of the same sample num */
01122         for(am=recog->amlist;am;am=am->next) {
01123           outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum);
01124         }
01125       }
01126       
01127       /* if terminate signal has been received, cancel this input */
01128       if (recog->process_want_terminate) {
01129         result_error(recog, J_RESULT_STATUS_TERMINATE);
01130         goto end_recog;
01131       }
01132     
01133       /* execute computation of left-to-right backtrellis */
01134       if (get_back_trellis(recog) == FALSE) {
01135         jlog("ERROR: fatal error occured, program terminates now\n");
01136         return -1;
01137       }
01138 #ifdef BACKEND_VAD
01139       /* if not triggered, skip this segment */
01140       if (recog->jconf->decodeopt.segment && ! recog->triggered) {
01141         goto end_recog;
01142       }
01143 #endif
01144       
01145       /* execute callback for 1st pass result */
01146       /* result.status <0 must be skipped inside callback */
01147       callback_exec(CALLBACK_RESULT_PASS1, recog);
01148 #ifdef WORD_GRAPH
01149       /* result.wg1 == NULL should be skipped inside callback */
01150       callback_exec(CALLBACK_RESULT_PASS1_GRAPH, recog);
01151 #endif
01152       
01153       /* execute callback at end of pass1 */
01154       if (recog->triggered) {
01155         callback_exec(CALLBACK_EVENT_PASS1_END, recog);
01156       }
01157 
01158       /* END OF BUFFERED 1ST PASS */
01159 
01160     }
01161 
01162     /**********************************/
01163     /* end processing of the 1st-pass */
01164     /**********************************/
01165     /* on-the-fly 1st pass processing will join here */
01166     
01167     /* -rejectshort 指定時, 入力が指定時間以下であれば探索失敗として */
01168     /* 第２パスを実行せずにここで終了する */
01169     /* when using "-rejectshort", and input was shorter than the specified
01170        length, terminate search here and output recognition failure */
01171     if (jconf->reject.rejectshortlen > 0) {
01172       mseclen = (float)recog->mfcclist->param->samplenum * (float)jconf->input.period * (float)jconf->input.frameshift / 10000.0;
01173       if (mseclen < jconf->reject.rejectshortlen) {
01174         result_error(recog, J_RESULT_STATUS_REJECT_SHORT);
01175         goto end_recog;
01176       }
01177     }
01178 #ifdef POWER_REJECT
01179     if (power_reject(recog)) {
01180       result_error(recog, J_RESULT_STATUS_REJECT_POWER);
01181       goto end_recog;
01182     }
01183 #endif
01184     
01185     /* if terminate signal has been received, cancel this input */
01186     if (recog->process_want_terminate) {
01187       result_error(recog, J_RESULT_STATUS_TERMINATE);
01188       goto end_recog;
01189     }
01190     
01191     /* if GMM is specified and result are to be rejected, terminate search here */
01192     if (jconf->reject.gmm_reject_cmn_string != NULL) {
01193       if (! gmm_valid_input(recog)) {
01194         result_error(recog, J_RESULT_STATUS_REJECT_GMM);
01195         goto end_recog;
01196       }
01197     }
01198 
01199     /* for instances with "-1pass", copy 1st pass result as final */
01200     /* execute stack-decoding search */
01201     /* they will be skipepd in the next pass */
01202     for(r=recog->process_list;r;r=r->next) {
01203       if (!r->live) continue;
01204       /* skip if 1st pass was failed */
01205       if (r->result.status < 0) continue;
01206       /* already stored on word recognition, so skip this */
01207       if (r->lmvar == LM_DFA_WORD) continue;
01208       if (r->config->compute_only_1pass) {
01209         if (verbose_flag) {
01210           jlog("%02d %s: \"-1pass\" specified, output 1st pass result as a final result\n", r->config->id, r->config->name);
01211         }
01212         /* prepare result storage */
01213         result_sentence_malloc(r, 1);
01214         /* finalize result when no hypothesis was obtained */
01215         pass2_finalize_on_no_result(r, TRUE);
01216       }
01217     }
01218 
01219     /***********************************************/
01220     /* 2nd-pass --- forward search with heuristics */
01221     /***********************************************/
01222     pass2_p = FALSE;
01223     for(r=recog->process_list;r;r=r->next) {
01224       if (!r->live) continue;
01225       /* if [-1pass] is specified, skip 2nd pass */
01226       if (r->config->compute_only_1pass) continue;
01227       /* if search already failed on 1st pass, skip 2nd pass */
01228       if (r->result.status < 0) continue;
01229       pass2_p = TRUE;
01230     }
01231     if (pass2_p) callback_exec(CALLBACK_EVENT_PASS2_BEGIN, recog);
01232 
01233 #if !defined(PASS2_STRICT_IWCD) || defined(FIX_35_PASS2_STRICT_SCORE)    
01234     /* adjust trellis score not to contain outprob of the last frames */
01235     for(r=recog->process_list;r;r=r->next) {
01236       if (!r->live) continue;
01237       /* if [-1pass] is specified, skip 2nd pass */
01238       if (r->config->compute_only_1pass) continue;
01239       /* if search already failed on 1st pass, skip 2nd pass */
01240       if (r->result.status < 0) continue;
01241       if (! r->am->hmminfo->multipath) {
01242         bt_discount_pescore(r->wchmm, r->backtrellis, r->am->mfcc->param);
01243       }
01244 #ifdef LM_FIX_DOUBLE_SCORING
01245       if (r->lmtype == LM_PROB) {
01246         bt_discount_lm(r->backtrellis);
01247       }
01248 #endif
01249     }
01250 #endif
01251     
01252     /* execute stack-decoding search */
01253     for(r=recog->process_list;r;r=r->next) {
01254       if (!r->live) continue;
01255       /* if [-1pass] is specified, just copy from 1st pass result */
01256       if (r->config->compute_only_1pass) continue;
01257       /* if search already failed on 1st pass, skip 2nd pass */
01258       if (r->result.status < 0) continue;
01259       /* prepare result storage */
01260       if (r->lmtype == LM_DFA && r->config->output.multigramout_flag) {
01261         result_sentence_malloc(r, r->config->output.output_hypo_maxnum * multigram_get_all_num(r->lm));
01262       } else {
01263         result_sentence_malloc(r, r->config->output.output_hypo_maxnum);
01264       }
01265       /* do 2nd pass */
01266       if (r->lmtype == LM_PROB) {
01267         wchmm_fbs(r->am->mfcc->param, r, 0, 0);
01268       } else if (r->lmtype == LM_DFA) {
01269         if (r->config->output.multigramout_flag) {
01270           /* execute 2nd pass multiple times for each grammar sequencially */
01271           /* to output result for each grammar */
01272           MULTIGRAM *m;
01273           boolean has_success = FALSE;
01274           for(m = r->lm->grammars; m; m = m->next) {
01275             if (m->active) {
01276               jlog("STAT: execute 2nd pass limiting words for gram #%d\n", m->id);
01277               wchmm_fbs(r->am->mfcc->param, r, m->cate_begin, m->dfa->term_num);
01278               if (r->result.status == J_RESULT_STATUS_SUCCESS) {
01279                 has_success = TRUE;
01280               }
01281             }
01282           }
01283           r->result.status = (has_success == TRUE) ? J_RESULT_STATUS_SUCCESS : J_RESULT_STATUS_FAIL;
01284         } else {
01285           /* only the best among all grammar will be output */
01286           wchmm_fbs(r->am->mfcc->param, r, 0, r->lm->dfa->term_num);
01287         }
01288       }
01289     }
01290 
01291     /* do forced alignment if needed */
01292     for(r=recog->process_list;r;r=r->next) {
01293       if (!r->live) continue;
01294       /* if search failed on 2nd pass, skip this */
01295       if (r->result.status < 0) continue;
01296       /* do needed alignment */
01297       do_alignment_all(r, r->am->mfcc->param);
01298     }
01299 
01300     /* output result */
01301     callback_exec(CALLBACK_RESULT, recog);
01302 #ifdef ENABLE_PLUGIN
01303     plugin_exec_process_result(recog);
01304 #endif
01305     /* output graph */
01306     /* r->result.wg == NULL should be skipped inside the callback */
01307     ok_p = FALSE;
01308     for(r=recog->process_list;r;r=r->next) {
01309       if (!r->live) continue;
01310       if (r->config->compute_only_1pass) continue;
01311       if (r->result.status < 0) continue;
01312       if (r->config->graph.lattice) ok_p = TRUE;
01313     }
01314     if (ok_p) callback_exec(CALLBACK_RESULT_GRAPH, recog);
01315     /* output confnet */
01316     /* r->result.confnet == NULL should be skipped inside the callback */
01317     ok_p = FALSE;
01318     for(r=recog->process_list;r;r=r->next) {
01319       if (!r->live) continue;
01320       if (r->config->compute_only_1pass) continue;
01321       if (r->result.status < 0) continue;
01322       if (r->config->graph.confnet) ok_p = TRUE;
01323     }
01324     if (ok_p) callback_exec(CALLBACK_RESULT_CONFNET, recog);
01325 
01326     /* clear work area for output */
01327     for(r=recog->process_list;r;r=r->next) {
01328       if (!r->live) continue;
01329       clear_result(r);
01330     }
01331     
01332     /* output end of 2nd pass */
01333     if (pass2_p) callback_exec(CALLBACK_EVENT_PASS2_END, recog);
01334 
01335 #ifdef DEBUG_VTLN_ALPHA_TEST
01336     if (r->am->mfcc->para->vtln_alpha == 1.0) {
01337       /* if vtln parameter remains default, search for VTLN parameter */
01338       vtln_alpha(recog, r);
01339     }
01340 #endif
01341 
01342   end_recog:
01343     /**********************/
01344     /* end of recognition */
01345     /**********************/
01346 
01347     /* update CMN info for next input (in case of realtime wave input) */
01348     if (jconf->input.type == INPUT_WAVEFORM && jconf->decodeopt.realtime_flag) {
01349       for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
01350         if (mfcc->param->samplenum > 0) {
01351           RealTimeCMNUpdate(mfcc, recog);
01352         }
01353       }
01354     }
01355     
01356     process_segment_last = recog->process_segment;
01357     if (jconf->decodeopt.segment) { /* sp-segment mode */
01358       /* param is now shrinked to hold only the processed input, and */
01359       /* the rests are holded in (newly allocated) "rest_param" */
01360       /* if this is the last segment, rest_param is NULL */
01361       /* assume all segmentation are synchronized */
01362       recog->process_segment = FALSE;
01363       for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
01364         if (mfcc->rest_param != NULL) {
01365           /* process the rest parameters in the next loop */
01366           recog->process_segment = TRUE;
01367           free_param(mfcc->param);
01368           mfcc->param = mfcc->rest_param;
01369           mfcc->rest_param = NULL;
01370         }
01371       }
01372     }
01373 
01374     /* callback of recognition end */
01375     if (jconf->decodeopt.segment) {
01376 #ifdef BACKEND_VAD
01377       if (recog->triggered) callback_exec(CALLBACK_EVENT_SEGMENT_END, recog);
01378       if (process_segment_last && !recog->process_segment) callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog);
01379 #else
01380       callback_exec(CALLBACK_EVENT_SEGMENT_END, recog);
01381       if (!recog->process_segment) callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog);
01382 #endif
01383     } else {
01384       callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog);
01385     }
01386 
01387 
01388     if (verbose_flag) jlog("\n");
01389     jlog_flush();
01390 
01391     if (jconf->decodeopt.segment) { /* sp-segment mode */
01392       if (recog->process_segment == TRUE) {
01393         if (verbose_flag) jlog("STAT: <<<restart the rest>>>\n\n");
01394       } else {
01395         /* input has reached end of stream, terminate program */
01396         if (ret <= 0 && ret != -2) break;
01397       }
01398     } else {                    /* not sp-segment mode */
01399       /* input has reached end of stream, terminate program */
01400       if (ret <= 0 && ret != -2) break;
01401     }
01402 
01403     /* recognition continues for next (silence-aparted) segment */
01404       
01405   } /* END OF STREAM LOOP */
01406     
01407   /* close the stream */
01408   if (jconf->input.type == INPUT_WAVEFORM) {
01409     if (adin_end(recog->adin) == FALSE) return -1;
01410   }
01411   if (jconf->input.speech_input == SP_MFCMODULE) {
01412     if (mfc_module_end(recog->mfcclist) == FALSE) return -1;
01413   }
01414 
01415   /* return to the opening of input stream */
01416 
01417   return(0);
01418 
01419 }
01420 
01465 int
01466 j_recognize_stream(Recog *recog)
01467 {
01468   int ret;
01469 
01470   do {
01471     
01472     ret = j_recognize_stream_core(recog);
01473 
01474     switch(ret) {
01475     case 1:           /* paused by a callback (stream will continue) */
01476       /* call pause event callbacks */
01477       callback_exec(CALLBACK_EVENT_PAUSE, recog);
01478       /* call pause functions */
01479       /* block until all pause functions exits */
01480       if (! callback_exist(recog, CALLBACK_PAUSE_FUNCTION)) {
01481         jlog("WARNING: pause requested but no pause function specified\n");
01482         jlog("WARNING: engine will resume now immediately\n");
01483       }
01484       callback_exec(CALLBACK_PAUSE_FUNCTION, recog);
01485       /* after here, recognition will restart for the rest input */
01486       /* call resume event callbacks */
01487       callback_exec(CALLBACK_EVENT_RESUME, recog);
01488       break;
01489     case 0:                     /* end of stream */
01490       /* go on to the next input */
01491       break;
01492     case -1:            /* error */
01493       jlog("ERROR: an error occured while recognition, terminate stream\n");
01494       return -1;
01495     }
01496   } while (ret == 1);           /* loop when paused by callback */
01497 
01498   return 0;
01499 }
01500 
01501 /* end of file */