Julius 4.2
libsent/src/ngram/ngram_compact_context.c
説明を見る。
00001 
00018 /*
00019  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00020  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00021  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00022  * All rights reserved
00023  */
00024 
00025 #include <sent/stddefs.h>
00026 #include <sent/ngram2.h>
00027 
00038 boolean
00039 ngram_compact_context(NGRAM_INFO *ndata, int n)
00040 {
00041   NNID i;
00042   NNID c;
00043   NNID dst;
00044   NNID ntmp;
00045   NGRAM_TUPLE_INFO *this, *up;
00046 
00047   this = &(ndata->d[n-1]);
00048   up   = &(ndata->d[n]);
00049 
00050   /* count number of valid context */
00051   c = 0;
00052   for(i=0;i<up->bgnlistlen;i++) {
00053     if ((up->is24bit == TRUE && up->bgn_upper[i] != NNID_INVALID_UPPER)
00054         || (up->is24bit == FALSE && up->bgn[i] != NNID_INVALID)) {
00055       c++;
00056     } else {
00057       if (up->num[i] != 0) {
00058         jlog("Error: ngram_compact_context: internal error\n");
00059         return FALSE;
00060       }
00061       if (this->bo_wt[i] != 0.0) {
00062         jlog("Warning: ngram_compact_context: found a %d-gram that has non-zero back-off weight but not a context of upper N-gram (%f)\n", n, this->bo_wt[i]);
00063         jlog("Warning: ngram_compact_context: context compaction disabled\n");
00064         ndata->d[n-1].ct_compaction = FALSE;
00065         return TRUE;            /* no op */
00066       }
00067     }
00068   }
00069   
00070   if (this->totalnum == c) {
00071     jlog("Stat: ngram_compact_context: %d-gram has full bo_wt, compaction disabled\n", n);
00072     ndata->d[n-1].ct_compaction = FALSE;
00073     return TRUE;                /* no op */
00074   }
00075 
00076   if (c >= NNID_MAX_24) {
00077     jlog("Stat: ngram_compact_context: %d-gram bo_wt exceeds 24bit, compaction diabled\n", n);
00078     ndata->d[n-1].ct_compaction = FALSE;
00079     return TRUE;                /* no op */
00080   }    
00081 
00082   this->context_num = c;
00083   jlog("Stat: ngram_compact_context: %d-gram back-off weight compaction: %d -> %d\n", n, this->totalnum, this->context_num);
00084   
00085   /* allocate index buffer */
00086   this->nnid2ctid_upper = (NNID_UPPER *)mymalloc(sizeof(NNID_UPPER) * this->totalnum);
00087   this->nnid2ctid_lower = (NNID_LOWER *)mymalloc(sizeof(NNID_LOWER) * this->totalnum);
00088   /* make index and do compaction of context informations */
00089   dst = 0;
00090   for(i=0;i<up->bgnlistlen;i++) {
00091     if ((up->is24bit == TRUE && up->bgn_upper[i] != NNID_INVALID_UPPER)
00092         || (up->is24bit == FALSE && up->bgn[i] != NNID_INVALID)) {
00093       this->bo_wt[dst] = this->bo_wt[i];
00094       if (up->is24bit) {
00095         up->bgn_upper[dst] = up->bgn_upper[i];
00096         up->bgn_lower[dst] = up->bgn_lower[i];
00097       } else {
00098         up->bgn[dst] = up->bgn[i];
00099       }
00100       up->num[dst] = up->num[i];
00101       ntmp = dst & 0xffff;
00102       this->nnid2ctid_lower[i] = ntmp;
00103       ntmp = dst >> 16;
00104       this->nnid2ctid_upper[i] = ntmp;
00105       dst++;
00106     } else {
00107       this->nnid2ctid_upper[i] = NNID_INVALID_UPPER;
00108       this->nnid2ctid_lower[i] = 0;
00109     }
00110   }
00111   up->bgnlistlen = this->context_num;
00112 
00113   /* shrink the memory area */
00114   this->bo_wt = (LOGPROB *)myrealloc(this->bo_wt, sizeof(LOGPROB) * this->context_num);
00115   if (up->is24bit) {
00116     up->bgn_upper = (NNID_UPPER *)myrealloc(up->bgn_upper, sizeof(NNID_UPPER) * up->bgnlistlen);
00117     up->bgn_lower = (NNID_LOWER *)myrealloc(up->bgn_lower, sizeof(NNID_LOWER) * up->bgnlistlen);
00118   } else {
00119     up->bgn = (NNID *)myrealloc(up->bgn, sizeof(NNID) * up->bgnlistlen);
00120   }
00121   up->num = (WORD_ID *)myrealloc(up->num, sizeof(WORD_ID) * up->bgnlistlen);
00122 
00123   /* finished compaction */
00124   ndata->d[n-1].ct_compaction = TRUE;
00125 
00126   return TRUE;
00127 
00128 }