00001 00032 /* 00033 * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University 00034 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00035 * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology 00036 * All rights reserved 00037 */ 00038 00039 /*****************************************************************************/ 00041 /*****************************************************************************/ 00042 00043 /* switch N-gram mode (julius) <-> grammar mode (julian) */ 00044 #ifdef USE_DFA 00045 #undef USE_NGRAM 00046 #else 00047 #define USE_NGRAM 00048 #endif 00049 00050 /* delete incoherent option */ 00051 #ifdef USE_DFA 00052 #ifdef UNIGRAM_FACTORING 00053 #undef UNIGRAM_FACTORING 00054 #endif 00055 #define CATEGORY_TREE 00056 #else /* USE_NGRAM */ 00057 #ifdef CATEGORY_TREE 00058 #undef CATEGORY_TREE 00059 #endif 00060 #endif /* USE_DFA */ 00061 00062 /* abbreviations for verbose message output */ 00063 #define VERMES if (verbose_flag) j_printerr 00064 00065 /* define this to report memory usage on exit (Linux only) */ 00066 #undef REPORT_MEMORY_USAGE 00067 00068 #ifdef USE_NGRAM 00069 /*** tree construction ***/ 00070 /* With 1-best approximation, Constructing a single tree from all words 00071 causes much error by factoring. Listing each word flatly with no 00072 tree-organization will not cause this error, but the network becomes 00073 much larger and, especially, the inter-word LM handling becomes much more 00074 complex (O(n^2)). The cost may be eased by LM caching, but it needs much 00075 memory. */ 00076 /* This is a trade-off of accuracy and cost */ 00077 #define SHORT_WORD_LEN 2 00078 #ifdef LOWMEM 00079 /* don't separate, construct a single tree from all words */ 00080 /* root nodes are about 50 in monophone, cache size will be 5MB on max */ 00081 #define NO_SEPARATE_SHORT_WORD 00082 #else 00083 #ifdef LOWMEM2 00084 /* experimental: separate words frequently appears in corpus (1-gram) */ 00085 /* root nodes will be "-sepnum num" + 50, cache size will be 10MB or so */ 00086 #define NO_SEPARATE_SHORT_WORD 00087 #define SEPARATE_BY_UNIGRAM 00088 #define DEFAULT_SEPARATE_WNUM 150 00089 #else 00090 /* separate all short words (<= 2 phonemes) */ 00091 /* root nodes are about 1100 in 20k (proportional to vocabulary), 00092 cache size will be about 100MB on max */ 00093 #endif /* LOWMEM2 */ 00094 #endif /* LOWMEM */ 00095 00096 /*#define HASH_CACHE_IW*/ 00097 /* "./configure --enable-lowmem" defines NO_SEPARATE_SHORT_WORD instead */ 00098 00099 #endif /* USE_NGRAM */ 00100 00101 #ifdef USE_NGRAM 00102 /* default language model weight and insertion penalty for pass1 and pass2 */ 00103 /* these values come from the best parameters in IPA evaluation result */ 00104 #define DEFAULT_LM_WEIGHT_MONO_PASS1 5.0 00105 #define DEFAULT_LM_PENALTY_MONO_PASS1 -1.0 00106 #define DEFAULT_LM_WEIGHT_MONO_PASS2 6.0 00107 #define DEFAULT_LM_PENALTY_MONO_PASS2 0.0 00108 #ifdef PASS1_IWCD 00109 #define DEFAULT_LM_WEIGHT_TRI_PASS1 8.0 00110 #define DEFAULT_LM_PENALTY_TRI_PASS1 -2.0 00111 #define DEFAULT_LM_WEIGHT_TRI_PASS2 8.0 00112 #define DEFAULT_LM_PENALTY_TRI_PASS2 -2.0 00113 #else 00114 #define DEFAULT_LM_WEIGHT_TRI_PASS1 9.0 00115 #define DEFAULT_LM_PENALTY_TRI_PASS1 8.0 00116 #define DEFAULT_LM_WEIGHT_TRI_PASS2 11.0 00117 #define DEFAULT_LM_PENALTY_TRI_PASS2 -2.0 00118 #endif /* PASS1_IWCD */ 00119 #endif /* USE_NGRAM */ 00120 00121 /* Switch head/tail word insertion penalty to be inserted */ 00122 #undef FIX_PENALTY 00123 00124 /* some definitions for short-pause segmentation */ 00125 #ifdef SP_BREAK_CURRENT_FRAME 00126 #undef SP_BREAK_EVAL /* output messages for evaluation */ 00127 #undef SP_BREAK_DEBUG /* output messages for debug */ 00128 #undef SP_BREAK_RESUME_WORD_BEGIN /* resume word = maxword at beginning of sp area */ 00129 #endif 00130 00131 /* '01/10/18 by ri: enable fix for trellis lookup order */ 00132 #define PREFER_CENTER_ON_TRELLIS_LOOKUP 00133 00134 #ifdef MULTIPATH_VERSION 00135 /* '01/11/28 by ri: malloc step for startnode */ 00136 # define STARTNODE_STEP 300 00137 /* default value of iwsp penalty */ 00138 # define IWSP_PENALTY_DEFAULT -1.0 00139 #endif 00140 00141 /* default dict entry for IW-sp word that will be added to dict with -iwspword */ 00142 #ifdef USE_NGRAM 00143 #define IWSPENTRY_DEFAULT "<UNK> [sp] sp sp" 00144 #endif 00145 00146 /* confidence scoring method */ 00147 #ifdef CONFIDENCE_MEASURE 00148 # ifndef CM_NBEST /* use conventional N-best CM, will be defined if "--enable-cm-nbest" specified */ 00149 # define CM_SEARCH /* otherwise, use on-the-fly CM scoring */ 00150 # endif 00151 #endif 00152 00153 /* dynamic word graph generation */ 00154 #ifdef GRAPHOUT /* output result in word-graph format */ 00155 00156 #undef GRAPHOUT_SEARCH_CONSIDER_RIGHT /* if defined, only hypothesis whose 00157 left/right contexts is already 00158 included in popped hypo will be merged. 00159 EXPERIMENTAL, should not be defined. 00160 */ 00161 #ifdef CM_SEARCH_LIMIT 00162 #undef CM_SEARCH_LIMIT_AFTER /* enable above only after 1 sentence found */ 00163 #undef CM_SEARCH_LIMIT_POP /* terminate hypo of low CM on pop */ 00164 #endif 00165 00166 /* compute exact boundary instead of using 1st pass result */ 00167 /* also propagate exact time boundary to the right context after generation */ 00168 /* this may produce precise word boundary, but cause bigger word graph output */ 00169 #define GRAPHOUT_PRECISE_BOUNDARY 00170 00171 #undef GDEBUG /* enable debug message in graphout.c */ 00172 00173 #endif /* GRAPHOUT */ 00174 00175 /* some decoding fix candidates */ 00176 #undef FIX_35_PASS2_STRICT_SCORE /* fix hypothesis scores by enabling 00177 bt_discount_pescore() in standard mode 00178 with PASS2_STRICT_IWCD, 00179 */ 00180 #define FIX_35_INHIBIT_SAME_WORD_EXPANSION /* privent connecting the same trellis word in 2nd pass */ 00181 00182 00183 /* below are new since 3.5.2 */ 00184 #ifdef GRAPHOUT 00185 00196 #define GRAPHOUT_OVERWRITE 00197 00198 /* with GRAPHOUT_OVERWRITE, use gscore_head instead of fscore_head */ 00204 #undef GRAPHOUT_OVERWRITE_GSCORE 00205 00212 #define GRAPHOUT_LIMIT_BOUNDARY_LOOP 00213 00220 #define GRAPHOUT_LIMIT_BOUNDARY_LOOP_NUM_DEFAULT 20 00221 00234 #define GRAPHOUT_SEARCH_DELAY_TERMINATION 00235 00241 #define GRAPHOUT_DEPTHCUT 00242 00248 #define GRAPHOUT_DEPTHCUT_DEFAULT 80 00249 00250 #endif /* GRAPHOUT */