00001
00017
00018
00019
00020
00021
00022
00023
00024 #include <sent/stddefs.h>
00025 #include <sent/ngram2.h>
00026 #include <sent/vocabulary.h>
00027
00034 void
00035 init_ngram_bin(NGRAM_INFO *ndata, char *bin_ngram_file)
00036 {
00037 FILE *fp;
00038
00039 j_printerr("Reading in word n-gram...");
00040 if ((fp = fopen_readfile(bin_ngram_file)) == NULL) {
00041 j_error("open error for %s\n", bin_ngram_file);
00042 }
00043 if (ngram_read_bin(fp, ndata) == FALSE) {
00044 j_error("read error for %s\n", bin_ngram_file);
00045 }
00046 if (fclose_readfile(fp) == -1) {
00047 j_error("close error\n");
00048 }
00049 j_printerr("done\n");
00050 }
00051
00059 void
00060 init_ngram_arpa(NGRAM_INFO *ndata, char *ngram_lr_file, char *ngram_rl_file)
00061 {
00062 FILE *fp;
00063
00064 ndata->root = NULL;
00065 j_printerr("Reading in LR 2-gram...\n");
00066
00067 if ((fp = fopen_readfile(ngram_lr_file)) == NULL) {
00068 j_error("open error for %s\n", ngram_lr_file);
00069 }
00070 if (ngram_read_arpa(fp, ndata, DIR_LR) == FALSE) {
00071 j_error("read error for %s\n", ngram_lr_file);
00072 }
00073 if (fclose_readfile(fp) == -1) {
00074 j_error("close error\n");
00075 }
00076 if (ngram_rl_file != NULL) {
00077 j_printerr("done\nReading in RL 3-gram...\n");
00078
00079 if ((fp = fopen_readfile(ngram_rl_file)) == NULL) {
00080 j_error("open error for %s\n", ngram_rl_file);
00081 }
00082 if (ngram_read_arpa(fp, ndata, DIR_RL) == FALSE) {
00083 j_error("read error for %s\n", ngram_rl_file);
00084 }
00085 if (fclose_readfile(fp) == -1) {
00086 j_error("close error\n");
00087 }
00088 }
00089
00090 j_printerr("done\n");
00091 }
00092
00099 void
00100 make_voca_ref(NGRAM_INFO *ndata, WORD_INFO *winfo)
00101 {
00102 int i;
00103
00104 j_printerr("Mapping dictonary words to n-gram entries...");
00105 ndata->unk_num = 0;
00106 for (i = 0; i < winfo->num; i++) {
00107 winfo->wton[i] = make_ngram_ref(ndata, winfo->wname[i]);
00108 if (winfo->wton[i] == ndata->unk_id) {
00109 (ndata->unk_num)++;
00110 }
00111 }
00112 if (ndata->unk_num == 0) {
00113 ndata->unk_num_log = 0.0;
00114 } else {
00115 ndata->unk_num_log = (float)log10(ndata->unk_num);
00116 }
00117 j_printerr("done\n");
00118 }
00119