00001
00017
00018
00019
00020
00021
00022
00023
00024
00025 #define GLOBAL_VARIABLE_DEFINE
00026 #include <julius.h>
00027 #include <signal.h>
00028 #if defined(_WIN32) && !defined(__CYGWIN32__)
00029 #include <mbctype.h>
00030 #include <mbstring.h>
00031 #endif
00032
00033
00034
00035 #ifdef REPORT_MEMORY_USAGE
00036
00046 static void
00047 print_mem()
00048 {
00049 char buf[200];
00050 sprintf(buf,"ps -o vsz,rss -p %d",getpid());
00051 system(buf);
00052 j_flushprint();
00053 fflush(stderr);
00054 }
00055 #endif
00056
00057
00058
00066 static SP16 *overflowed_samples = NULL;
00071 static int overflowed_samplenum;
00102 int
00103 adin_cut_callback_store_buffer(SP16 *now, int len)
00104 {
00105 if (module_mode) {
00106
00107 msock_check_and_process_command();
00108 }
00109 if (speechlen == 0) {
00110
00111 status_recstart();
00112 if (module_mode) {
00113
00114 if (module_wants_terminate() ||
00115 !module_is_active()) {
00116 return(-2);
00117 }
00118 }
00119 if (overflowed_samples) {
00120
00121 memcpy(&(speech[0]), overflowed_samples, sizeof(SP16)*overflowed_samplenum);
00122 speechlen += overflowed_samplenum;
00123 free(overflowed_samples);
00124 overflowed_samples = NULL;
00125 }
00126 }
00127 if (speechlen + len > MAXSPEECHLEN) {
00128
00129 j_printerr("Warning: too long input (> %d samples), segmented now\n", MAXSPEECHLEN);
00130
00131 {
00132 int getlen, restlen;
00133 getlen = MAXSPEECHLEN - speechlen;
00134 restlen = len - getlen;
00135 overflowed_samples = (SP16 *)mymalloc(sizeof(SP16)*restlen);
00136 memcpy(overflowed_samples, &(now[getlen]), restlen * sizeof(SP16));
00137 if (record_dirname != NULL) {
00138 record_sample_write(&(now[getlen]), restlen);
00139 }
00140 overflowed_samplenum = restlen;
00141 memcpy(&(speech[speechlen]), now, getlen * sizeof(SP16));
00142 if (record_dirname != NULL) {
00143 record_sample_write(now, getlen);
00144 }
00145 speechlen += getlen;
00146 }
00147 return(1);
00148 }
00149 if (module_mode) {
00150
00151 if (module_wants_terminate()) {
00152 speechlen = 0;
00153 return(-2);
00154 }
00155 }
00156
00157 memcpy(&(speech[speechlen]), now, len * sizeof(SP16));
00158 if (record_dirname != NULL) {
00159 record_sample_write(now, len);
00160 }
00161 speechlen += len;
00162 return(0);
00163 }
00164
00165
00201 static char *
00202 mfcfilelist_nextfile()
00203 {
00204 static FILE *mfclist = NULL;
00205 static char *buf;
00206
00207 if (mfclist == NULL) {
00208 if ((mfclist = fopen(inputlist_filename, "r")) == NULL) {
00209 j_error("inputlist open error\n");
00210 }
00211 }
00212 buf = mymalloc(MAXLINELEN);
00213 while(getl_fp(buf, MAXLINELEN, mfclist) != NULL) {
00214 if (buf[0] == '\0') continue;
00215 if (buf[0] == '#') continue;
00216
00217 return buf;
00218 }
00219
00220 free(buf);
00221 fclose(mfclist);
00222 mfclist = NULL;
00223 return NULL;
00224 }
00225
00226
00227
00228
00229
00230
00252 void
00253 main_recognition_loop()
00254 {
00255 char *speechfilename;
00256 HTK_Param *param = NULL;
00257 HTK_Param *selected_param;
00258 int ret;
00259 int file_counter;
00260 float seclen, mseclen;
00261 boolean process_online = FALSE;
00262
00263
00264
00265
00266
00267 final_fusion();
00268
00269
00270 adin_initialize();
00271
00272
00273 print_info();
00274
00275
00276 file_counter = 0;
00277
00278 #ifdef VISUALIZE
00279
00280 visual_init();
00281 #endif
00282
00283
00284
00286
00287
00288 for (;;) {
00289
00290 j_printf("\n");
00291 if (verbose_flag) j_printf("------\n");
00292 j_flushprint();
00293
00294
00295
00296
00297 if (speech_input == SP_MFCFILE) {
00298
00299
00300
00301 VERMES("### read analyzed parameter\n");
00302
00303 if (inputlist_filename != NULL) {
00304 speechfilename = mfcfilelist_nextfile();
00305 } else {
00306 speechfilename = get_line("enter MFCC filename->");
00307 }
00308 if (speechfilename == NULL) {
00309
00310 j_printerr("%d files processed\n", file_counter);
00311 #ifdef REPORT_MEMORY_USAGE
00312 print_mem();
00313 #endif
00314 j_exit();
00315 }
00316 if (verbose_flag) j_printf("\ninput MFCC file: %s\n",speechfilename);
00317
00318 param = new_param();
00319 if (rdparam(speechfilename, param) == FALSE) {
00320 j_printerr("error in reading parameter file: %s\n",speechfilename);
00321 free(speechfilename);
00322 free_param(param);
00323 continue;
00324 }
00325
00326 if (strip_zero_sample) {
00327 param_strip_zero(param);
00328 }
00329 free(speechfilename);
00330
00331 status_param(param);
00332
00333 file_counter++;
00334 } else {
00335
00336
00337
00338 VERMES("### read waveform input\n");
00339
00340 if (adin_begin() == FALSE) {
00341
00342 if (speech_input == SP_RAWFILE) {
00343 j_printerr("%d files processed\n", file_counter);
00344 j_exit();
00345 } else if (speech_input == SP_STDIN) {
00346 j_exit();
00347 } else {
00348 j_error("failed to begin input stream\n");
00349 }
00350 }
00351
00352 if (speech_input == SP_RAWFILE) {
00353 file_counter++;
00354 }
00355 }
00356
00357 #ifdef USE_DFA
00358
00359 if (module_mode) {
00360 if (dfa == NULL || winfo == NULL) {
00361 msock_exec_command("PAUSE");
00362 }
00363 }
00364 #endif
00365
00366 if (!module_mode) {
00367
00368 process_online = TRUE;
00369 status_process_online();
00370 }
00371
00372
00373
00374
00375 while (1) {
00376
00377 start_recog:
00378
00379 if (module_mode) {
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390 if (process_online != module_is_active()) {
00391 process_online = module_is_active();
00392 if (process_online) status_process_online();
00393 else status_process_offline();
00394 }
00395 if (module_is_active()) {
00396
00397 msock_check_and_process_command();
00398 }
00399 module_reset_reload();
00400 while (! module_is_active()) {
00401
00402
00403
00404 msock_process_command();
00405 }
00406
00407 if (process_online != module_is_active()) {
00408 process_online = module_is_active();
00409 if (process_online) status_process_online();
00410 else status_process_offline();
00411 }
00412 #ifdef USE_DFA
00413
00414
00415
00416 multigram_exec();
00417 if (dfa == NULL || winfo == NULL) {
00418 msock_exec_command("PAUSE");
00419 goto start_recog;
00420 }
00421 #endif
00422 }
00423
00424 if (speech_input == SP_MFCFILE) {
00425
00426
00427
00428
00429
00430
00431
00432
00433 if (paramtype_check_flag) {
00434
00435 selected_param = new_param_check_and_adjust(hmminfo, param, verbose_flag);
00436 if (selected_param == NULL) {
00437 free_param(param);
00438 param = NULL;
00439 goto end_recog;
00440 }
00441 param = selected_param;
00442 }
00443
00444
00445 ret = 0;
00446 } else {
00447
00448
00449
00450 if (realtime_flag) {
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460 #ifdef SP_BREAK_CURRENT_FRAME
00461 if (rest_param) {
00462
00463
00464
00465
00466
00467
00468 ret = RealTimeResume();
00469 if (ret < 0) {
00470 j_error("error in resuming last fragment\n");
00471 }
00472 if (ret != 1) {
00473
00474
00475
00476 status_recready();
00477 if (module_mode) {
00478
00479 ret = adin_go(RealTimePipeLine, msock_check_in_adin);
00480 } else {
00481
00482 ret = adin_go(RealTimePipeLine, NULL);
00483 }
00484 if (ret < 0) {
00485 if (module_mode && (ret == -2 || module_wants_terminate())) {
00486 RealTimeTerminate();
00487 param = NULL;
00488 goto end_recog;
00489 }
00490 j_error("error in adin_go\n");
00491 }
00492 }
00493
00494 } else {
00495
00496 #endif
00497
00498
00499
00500
00501
00502
00503
00504 RealTimePipeLinePrepare();
00505
00506 status_recready();
00507
00508 if (module_mode) {
00509 ret = adin_go(RealTimePipeLine, msock_check_in_adin);
00510 } else {
00511 ret = adin_go(RealTimePipeLine, NULL);
00512 }
00513 if (ret < 0) {
00514 if (module_mode && (ret == -2 || module_wants_terminate())) {
00515 RealTimeTerminate();
00516 param = NULL;
00517 goto end_recog;
00518 }
00519 j_error("error in adin_go\n");
00520 }
00521 #ifdef SP_BREAK_CURRENT_FRAME
00522 }
00523 #endif
00524
00525
00526
00527
00528 param = RealTimeParam(&backmax);
00529
00530 status_recend();
00531
00532 status_param(param);
00533 if (module_mode) {
00534
00535 if (module_wants_terminate()) goto end_recog;
00536 }
00537
00538 goto end_1pass;
00539
00540 }
00541
00542
00543
00544
00545 #ifdef SP_BREAK_CURRENT_FRAME
00546 if (rest_param == NULL) {
00547 #endif
00548
00549
00550
00551 speechlen = 0;
00552 param = NULL;
00553
00554 if (record_dirname != NULL) {
00555 record_sample_open();
00556 }
00557
00558 status_recready();
00559 if (module_mode) {
00560
00561
00562
00563
00564
00565
00566 ret = adin_go(adin_cut_callback_store_buffer, msock_check_in_adin);
00567 } else {
00568 ret = adin_go(adin_cut_callback_store_buffer, NULL);
00569 }
00570 if (ret < 0) {
00571 if (module_mode && (ret == -2 || module_wants_terminate())) {
00572 goto end_recog;
00573 }
00574 j_error("error in adin_go\n");
00575 }
00576
00577 status_recend();
00578
00579
00580 seclen = (float)speechlen / (float)para.smp_freq;
00581 j_printf("%d samples (%.2f sec.)\n", speechlen, seclen);
00582
00583
00584
00585
00586
00587 if (rejectshortlen > 0) {
00588 if (seclen * 1000.0 < rejectshortlen) {
00589 result_rejected("too short input");
00590 goto end_recog;
00591 }
00592 }
00593
00594
00595
00596
00597 VERMES("### speech analysis (waveform -> MFCC)\n");
00598
00599 param = new_wav2mfcc(speech, speechlen);
00600 if (param == NULL) {
00601 ret = -1;
00602 goto end_recog;
00603 }
00604
00605
00606 if (module_mode && module_wants_terminate()) goto end_recog;
00607
00608
00609 status_param(param);
00610
00611 #ifdef SP_BREAK_CURRENT_FRAME
00612 }
00613 #endif
00614 }
00615
00616
00617
00618
00619
00620
00621 #ifdef USE_NGRAM
00622 VERMES("### Recognition: 1st pass (LR beam with 2-gram)\n");
00623 #else
00624 VERMES("### Recognition: 1st pass (LR beam with word-pair grammar)\n");
00625 #endif
00626
00627
00628
00629
00630
00631
00632
00633
00634
00635
00636
00637
00638
00639
00640 if (!realtime_flag) {
00641
00642 outprob_prepare(param->samplenum);
00643 }
00644
00645 if (module_mode) {
00646
00647 if (module_wants_terminate()) goto end_recog;
00648 }
00649
00650
00651 get_back_trellis(param, wchmm, &backtrellis, &backmax);
00652
00653 end_1pass:
00654
00655
00656
00657
00658
00659
00660
00661
00662
00663
00664 if (rejectshortlen > 0) {
00665 mseclen = (float)param->samplenum * (float)para.smp_period * (float)para.frameshift / 10000.0;
00666 if (mseclen < rejectshortlen) {
00667 result_rejected("too short input");
00668 goto end_recog;
00669 }
00670 }
00671
00672
00673 if (compute_only_1pass) {
00674 goto end_recog;
00675 }
00676
00677
00678 if (backmax == LOG_ZERO) {
00679
00680 result_pass2_failed(wchmm->winfo);
00681 ret = -1;
00682 goto end_recog;
00683 }
00684
00685
00686 if (module_mode && module_wants_terminate()) goto end_recog;
00687
00688
00689 if (gmm_reject_cmn_string != NULL) {
00690 if (! gmm_valid_input()) {
00691 result_rejected("by GMM");
00692 goto end_recog;
00693 }
00694 }
00695
00696
00697
00698 #if !defined(PASS2_STRICT_IWCD) || defined(FIX_35_PASS2_STRICT_SCORE)
00699
00700 bt_discount_pescore(wchmm, &backtrellis, param);
00701 #endif
00702
00703 #ifdef USE_NGRAM
00704 VERMES("### Recognition: 2nd pass (RL heuristic best-first with 3-gram)\n");
00705 #else
00706 VERMES("### Recognition: 2nd pass (RL heuristic best-first with DFA)\n");
00707 #endif
00708
00709
00710 #ifdef USE_NGRAM
00711 wchmm_fbs(param, &backtrellis, backmax, stack_size, nbest, hypo_overflow, 0, 0);
00712 #else
00713 if (multigramout_flag) {
00714
00715
00716 MULTIGRAM *m;
00717 for(m = gramlist; m; m = m->next) {
00718 if (m->active) {
00719 j_printf("## search for gram #%d\n", m->id);
00720 wchmm_fbs(param, &backtrellis, backmax, stack_size, nbest, hypo_overflow, m->cate_begin, m->dfa->term_num);
00721 }
00722 }
00723 } else {
00724
00725 wchmm_fbs(param, &backtrellis, backmax, stack_size, nbest, hypo_overflow, 0, dfa->term_num);
00726 }
00727 #endif
00728
00729 end_recog:
00730
00731
00732
00733
00734
00735 if (speech_input != SP_MFCFILE && realtime_flag && param != NULL) {
00736 RealTimeCMNUpdate(param);
00737 }
00738
00739 #ifdef VISUALIZE
00740
00741 visual_show(&backtrellis);
00742 #endif
00743
00744
00745 if (param != NULL) free_param(param);
00746
00747
00748 if (record_dirname != NULL) {
00749 record_sample_close();
00750 }
00751
00752 VERMES("\n");
00753
00754 #ifdef SP_BREAK_CURRENT_FRAME
00755
00756
00757
00758 if (rest_param != NULL) {
00759
00760 VERMES("<<<restart the rest>>>\n\n");
00761 param = rest_param;
00762 } else {
00763
00764 if (ret <= 0 && ret != -2) break;
00765 }
00766 #else
00767
00768 if (ret <= 0 && ret != -2) break;
00769 #endif
00770
00771
00772
00773 }
00774
00775
00776
00777
00778
00779
00780
00781
00782 if (speech_input != SP_MFCFILE) {
00783
00784 adin_end();
00785 }
00786
00787 }
00788
00789 }
00790
00791
00792
00793
00794
00821 int
00822 main(int argc, char *argv[])
00823 {
00824
00825
00826
00827
00828 system_bootup();
00829
00830 opt_parse(argc,argv,NULL);
00831
00832 #ifdef CHARACTER_CONVERSION
00833 if (j_printf_set_charconv(from_code, to_code) == FALSE) {
00834 j_error("Error: character set conversion setup failed\n");
00835 }
00836 #endif
00837
00838
00839 check_specs();
00840
00841
00842
00843
00844 if (module_mode) {
00845
00846
00847 main_module_loop();
00848 } else {
00849
00850 main_recognition_loop();
00851 }
00852
00853
00854 opt_release();
00855
00856 return 0;
00857 }