00001
00017
00018
00019
00020
00021
00022
00023
00024 #include <sent/stddefs.h>
00025 #include <sent/ngram2.h>
00026
00035 static int
00036 get_unigram_size(NGRAM_INFO *ndata)
00037 {
00038 int unitsize;
00039 unitsize = sizeof(LOGPROB) * 3 + sizeof(NNID) + sizeof(WORD_ID);
00040 return(unitsize * ndata->ngram_num[0]);
00041 }
00042
00051 static int
00052 get_bigram_size(NGRAM_INFO *ndata)
00053 {
00054 int unitsize;
00055 int size;
00056
00057 switch(ndata->version) {
00058 case 4:
00059 unitsize = sizeof(WORD_ID) + sizeof(LOGPROB) * 2 + sizeof(NNID_UPPER) + sizeof(NNID_LOWER);
00060 size = unitsize * ndata->ngram_num[1];
00061 unitsize = sizeof(LOGPROB) + sizeof(NNID_UPPER) + sizeof(NNID_LOWER) + sizeof(WORD_ID);
00062 size += unitsize * ndata->bigram_bo_num;
00063 break;
00064 case 3:
00065 unitsize = sizeof(WORD_ID) * 2 + sizeof(LOGPROB) * 3 + sizeof(NNID);
00066 size = unitsize * ndata->ngram_num[1];
00067 break;
00068 }
00069
00070 return(size);
00071 }
00072
00081 static int
00082 get_trigram_size(NGRAM_INFO *ndata)
00083 {
00084 int unitsize;
00085
00086 unitsize = sizeof(WORD_ID) + sizeof(LOGPROB);
00087 return(unitsize * ndata->ngram_num[2]);
00088 }
00089
00090
00096 void
00097 print_ngram_info(NGRAM_INFO *ndata)
00098 {
00099 j_printf("N-gram info:\n");
00100 j_printf("\t struct version = %d\n", ndata->version);
00101 if (ndata->isopen) {
00102 j_printf("\t OOV word = %s(id=%d)\n", ndata->wname[ndata->unk_id],ndata->unk_id);
00103 j_printf("\t OOV size = %d words in dict\n", ndata->unk_num);
00104 } else {
00105 j_printf("\t OOV word = none\n");
00106 }
00107 j_printf("\t wordset size = %8d\n", ndata->max_word_num);
00108 j_printf("\tuni-gram entries = %8d (%8d bytes)\n",
00109 ndata->ngram_num[0], get_unigram_size(ndata));
00110 j_printf("\t bi-gram tuples = %8d (%8d bytes)\n",
00111 ndata->ngram_num[1], get_bigram_size(ndata));
00112 j_printf("\t tri-gram tuples = %8d (%8d bytes)\n",
00113 ndata->ngram_num[2], get_trigram_size(ndata));
00114 }