Julius 4.2
|
00001 00018 /* 00019 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00020 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00021 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00022 * All rights reserved 00023 */ 00024 00025 #include <sent/stddefs.h> 00026 #include <sent/ngram2.h> 00027 00038 boolean 00039 ngram_compact_context(NGRAM_INFO *ndata, int n) 00040 { 00041 NNID i; 00042 NNID c; 00043 NNID dst; 00044 NNID ntmp; 00045 NGRAM_TUPLE_INFO *this, *up; 00046 00047 this = &(ndata->d[n-1]); 00048 up = &(ndata->d[n]); 00049 00050 /* count number of valid context */ 00051 c = 0; 00052 for(i=0;i<up->bgnlistlen;i++) { 00053 if ((up->is24bit == TRUE && up->bgn_upper[i] != NNID_INVALID_UPPER) 00054 || (up->is24bit == FALSE && up->bgn[i] != NNID_INVALID)) { 00055 c++; 00056 } else { 00057 if (up->num[i] != 0) { 00058 jlog("Error: ngram_compact_context: internal error\n"); 00059 return FALSE; 00060 } 00061 if (this->bo_wt[i] != 0.0) { 00062 jlog("Warning: ngram_compact_context: found a %d-gram that has non-zero back-off weight but not a context of upper N-gram (%f)\n", n, this->bo_wt[i]); 00063 jlog("Warning: ngram_compact_context: context compaction disabled\n"); 00064 ndata->d[n-1].ct_compaction = FALSE; 00065 return TRUE; /* no op */ 00066 } 00067 } 00068 } 00069 00070 if (this->totalnum == c) { 00071 jlog("Stat: ngram_compact_context: %d-gram has full bo_wt, compaction disabled\n", n); 00072 ndata->d[n-1].ct_compaction = FALSE; 00073 return TRUE; /* no op */ 00074 } 00075 00076 if (c >= NNID_MAX_24) { 00077 jlog("Stat: ngram_compact_context: %d-gram bo_wt exceeds 24bit, compaction diabled\n", n); 00078 ndata->d[n-1].ct_compaction = FALSE; 00079 return TRUE; /* no op */ 00080 } 00081 00082 this->context_num = c; 00083 jlog("Stat: ngram_compact_context: %d-gram back-off weight compaction: %d -> %d\n", n, this->totalnum, this->context_num); 00084 00085 /* allocate index buffer */ 00086 this->nnid2ctid_upper = (NNID_UPPER *)mymalloc(sizeof(NNID_UPPER) * this->totalnum); 00087 this->nnid2ctid_lower = (NNID_LOWER *)mymalloc(sizeof(NNID_LOWER) * this->totalnum); 00088 /* make index and do compaction of context informations */ 00089 dst = 0; 00090 for(i=0;i<up->bgnlistlen;i++) { 00091 if ((up->is24bit == TRUE && up->bgn_upper[i] != NNID_INVALID_UPPER) 00092 || (up->is24bit == FALSE && up->bgn[i] != NNID_INVALID)) { 00093 this->bo_wt[dst] = this->bo_wt[i]; 00094 if (up->is24bit) { 00095 up->bgn_upper[dst] = up->bgn_upper[i]; 00096 up->bgn_lower[dst] = up->bgn_lower[i]; 00097 } else { 00098 up->bgn[dst] = up->bgn[i]; 00099 } 00100 up->num[dst] = up->num[i]; 00101 ntmp = dst & 0xffff; 00102 this->nnid2ctid_lower[i] = ntmp; 00103 ntmp = dst >> 16; 00104 this->nnid2ctid_upper[i] = ntmp; 00105 dst++; 00106 } else { 00107 this->nnid2ctid_upper[i] = NNID_INVALID_UPPER; 00108 this->nnid2ctid_lower[i] = 0; 00109 } 00110 } 00111 up->bgnlistlen = this->context_num; 00112 00113 /* shrink the memory area */ 00114 this->bo_wt = (LOGPROB *)myrealloc(this->bo_wt, sizeof(LOGPROB) * this->context_num); 00115 if (up->is24bit) { 00116 up->bgn_upper = (NNID_UPPER *)myrealloc(up->bgn_upper, sizeof(NNID_UPPER) * up->bgnlistlen); 00117 up->bgn_lower = (NNID_LOWER *)myrealloc(up->bgn_lower, sizeof(NNID_LOWER) * up->bgnlistlen); 00118 } else { 00119 up->bgn = (NNID *)myrealloc(up->bgn, sizeof(NNID) * up->bgnlistlen); 00120 } 00121 up->num = (WORD_ID *)myrealloc(up->num, sizeof(WORD_ID) * up->bgnlistlen); 00122 00123 /* finished compaction */ 00124 ndata->d[n-1].ct_compaction = TRUE; 00125 00126 return TRUE; 00127 00128 }