00001
00017
00018
00019
00020
00021
00022
00023
00024 #include <sent/stddefs.h>
00025 #include <sent/ngram2.h>
00026 #include <sent/ptree.h>
00027
00033 void
00034 ngram_make_lookup_tree(NGRAM_INFO *ndata)
00035 {
00036 int i;
00037 int *windex;
00038 char **wnameindex;
00039
00040 windex = (int *)mymalloc(sizeof(int)*ndata->max_word_num);
00041 for (i=0;i<ndata->max_word_num;i++) {
00042 windex[i] = i;
00043 }
00044 wnameindex = (char **)mymalloc(sizeof(char *)*ndata->max_word_num);
00045 for (i=0;i<ndata->max_word_num;i++) {
00046 wnameindex[i] = ndata->wname[i];
00047 }
00048
00049 ndata->root = make_ptree(wnameindex, windex, ndata->max_word_num, 0);
00050
00051 free(windex);
00052 free(wnameindex);
00053 }
00054
00063 WORD_ID
00064 ngram_lookup_word(NGRAM_INFO *ndata, char *wordstr)
00065 {
00066 int data;
00067 data = ptree_search_data(wordstr, ndata->root);
00068 if (strcmp(wordstr, ndata->wname[data]) != 0) {
00069 return WORD_INVALID;
00070 } else {
00071 return(data);
00072 }
00073 }
00074
00083 WORD_ID
00084 make_ngram_ref(NGRAM_INFO *ndata, char *wstr)
00085 {
00086 WORD_ID nw;
00087
00088 nw = ngram_lookup_word(ndata, wstr);
00089 if (nw == WORD_INVALID) {
00090 j_printf("word %s not exist in N-gram, treat as <UNK>\n", wstr);
00091 return(ndata->unk_id);
00092 } else {
00093 return(nw);
00094 }
00095 }