Julius: libsent/src/ngram/ngram_util.c ソースファイル

Julius 4.2
00001 
00018 /*
00019  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00020  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00021  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00022  * All rights reserved
00023  */
00024 
00025 #include <sent/stddefs.h>
00026 #include <sent/ngram2.h>
00027 
00036 static unsigned int
00037 get_ngram_tuple_bytes(NGRAM_TUPLE_INFO *t)
00038 {
00039   unsigned int size, unit;
00040 
00041   size = 0;
00042   if (t->num != NULL) {         /* other than 1-gram */
00043     /* bgn */
00044     if (t->is24bit) {
00045       unit = sizeof(NNID_UPPER) + sizeof(NNID_LOWER);
00046     } else {
00047       unit = sizeof(NNID);
00048     }
00049     /* num */
00050     unit += sizeof(WORD_ID);
00051     size += unit * t->bgnlistlen;
00052   }
00053   /* prob */
00054   unit = sizeof(LOGPROB);
00055   /* nnid2wid */
00056   if (t->nnid2wid) unit += sizeof(WORD_ID);
00057   size += unit * t->totalnum;
00058 
00059   if (t->bo_wt) {
00060     if (t->ct_compaction) {
00061       /* nnid2ctid */
00062       unit = sizeof(NNID_UPPER) + sizeof(NNID_LOWER);
00063       size += unit * t->totalnum;
00064     }
00065     /* bo_wt */
00066     size += sizeof(LOGPROB) * t->context_num;
00067   }
00068 
00069   return size;
00070 }
00071 
00078 void
00079 print_ngram_info(FILE *fp, NGRAM_INFO *ndata)
00080 {
00081   int i;
00082   fprintf(fp, " N-gram info:\n");
00083   //fprintf(fp, "\t  struct version = %d\n", ndata->version);
00084 
00085   fprintf(fp, "\t            spec = %d-gram", ndata->n);
00086   if (ndata->dir == DIR_RL) {
00087     fprintf(fp, ", backward (right-to-left)\n");
00088   } else {
00089     fprintf(fp, ", forward (left-to-right)\n");
00090   }
00091   if (ndata->isopen) {
00092     fprintf(fp, "\t        OOV word = %s(id=%d)\n", ndata->wname[ndata->unk_id],ndata->unk_id);
00093     fprintf(fp, "\t        OOV size = %d words in dict\n", ndata->unk_num);
00094   } else {
00095     fprintf(fp, "\t        OOV word = none (assume close vocabulary)\n");
00096   }
00097   fprintf(fp, "\t    wordset size = %d\n", ndata->max_word_num);
00098   for(i=0;i<ndata->n;i++) {
00099     fprintf(fp, "\t  %d-gram entries = %10lu  (%5.1f MB)", i+1, ndata->d[i].totalnum, get_ngram_tuple_bytes(&(ndata->d[i])) / 1048576.0);
00100     if (ndata->d[i].bo_wt != NULL && ndata->d[i].totalnum != ndata->d[i].context_num) {
00101       fprintf(fp, " (%d%% are valid contexts)", ndata->d[i].context_num * 100 / ndata->d[i].totalnum);
00102     }
00103     fprintf(fp, "\n");
00104   }
00105 
00106   if (ndata->bo_wt_1) {
00107     fprintf(fp, "\tLR 2-gram entries= %10lu  (%5.1f MB)\n", ndata->d[1].totalnum,
00108             (sizeof(LOGPROB) * ndata->d[1].totalnum + sizeof(LOGPROB) * ndata->d[0].context_num) / 1048576.0);
00109   }
00110   fprintf(fp, "\t           pass1 = ");
00111   if (ndata->dir == DIR_RL) {
00112     if (ndata->bo_wt_1) {
00113       fprintf(fp, "given additional forward 2-gram\n");
00114     } else {
00115       fprintf(fp, "estimate 2-gram from the backward 2-gram\n");
00116     }
00117   } else {
00118     fprintf(fp, "2-gram in the forward n-gram\n");
00119   }
00120 }