Julius 4.2
|
00001 00018 /* 00019 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00020 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00021 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00022 * All rights reserved 00023 */ 00024 00025 #include <sent/stddefs.h> 00026 #include <sent/ngram2.h> 00027 00036 static unsigned int 00037 get_ngram_tuple_bytes(NGRAM_TUPLE_INFO *t) 00038 { 00039 unsigned int size, unit; 00040 00041 size = 0; 00042 if (t->num != NULL) { /* other than 1-gram */ 00043 /* bgn */ 00044 if (t->is24bit) { 00045 unit = sizeof(NNID_UPPER) + sizeof(NNID_LOWER); 00046 } else { 00047 unit = sizeof(NNID); 00048 } 00049 /* num */ 00050 unit += sizeof(WORD_ID); 00051 size += unit * t->bgnlistlen; 00052 } 00053 /* prob */ 00054 unit = sizeof(LOGPROB); 00055 /* nnid2wid */ 00056 if (t->nnid2wid) unit += sizeof(WORD_ID); 00057 size += unit * t->totalnum; 00058 00059 if (t->bo_wt) { 00060 if (t->ct_compaction) { 00061 /* nnid2ctid */ 00062 unit = sizeof(NNID_UPPER) + sizeof(NNID_LOWER); 00063 size += unit * t->totalnum; 00064 } 00065 /* bo_wt */ 00066 size += sizeof(LOGPROB) * t->context_num; 00067 } 00068 00069 return size; 00070 } 00071 00078 void 00079 print_ngram_info(FILE *fp, NGRAM_INFO *ndata) 00080 { 00081 int i; 00082 fprintf(fp, " N-gram info:\n"); 00083 //fprintf(fp, "\t struct version = %d\n", ndata->version); 00084 00085 fprintf(fp, "\t spec = %d-gram", ndata->n); 00086 if (ndata->dir == DIR_RL) { 00087 fprintf(fp, ", backward (right-to-left)\n"); 00088 } else { 00089 fprintf(fp, ", forward (left-to-right)\n"); 00090 } 00091 if (ndata->isopen) { 00092 fprintf(fp, "\t OOV word = %s(id=%d)\n", ndata->wname[ndata->unk_id],ndata->unk_id); 00093 fprintf(fp, "\t OOV size = %d words in dict\n", ndata->unk_num); 00094 } else { 00095 fprintf(fp, "\t OOV word = none (assume close vocabulary)\n"); 00096 } 00097 fprintf(fp, "\t wordset size = %d\n", ndata->max_word_num); 00098 for(i=0;i<ndata->n;i++) { 00099 fprintf(fp, "\t %d-gram entries = %10lu (%5.1f MB)", i+1, ndata->d[i].totalnum, get_ngram_tuple_bytes(&(ndata->d[i])) / 1048576.0); 00100 if (ndata->d[i].bo_wt != NULL && ndata->d[i].totalnum != ndata->d[i].context_num) { 00101 fprintf(fp, " (%d%% are valid contexts)", ndata->d[i].context_num * 100 / ndata->d[i].totalnum); 00102 } 00103 fprintf(fp, "\n"); 00104 } 00105 00106 if (ndata->bo_wt_1) { 00107 fprintf(fp, "\tLR 2-gram entries= %10lu (%5.1f MB)\n", ndata->d[1].totalnum, 00108 (sizeof(LOGPROB) * ndata->d[1].totalnum + sizeof(LOGPROB) * ndata->d[0].context_num) / 1048576.0); 00109 } 00110 fprintf(fp, "\t pass1 = "); 00111 if (ndata->dir == DIR_RL) { 00112 if (ndata->bo_wt_1) { 00113 fprintf(fp, "given additional forward 2-gram\n"); 00114 } else { 00115 fprintf(fp, "estimate 2-gram from the backward 2-gram\n"); 00116 } 00117 } else { 00118 fprintf(fp, "2-gram in the forward n-gram\n"); 00119 } 00120 }