Julius 4.1.5
libsent/src/util/ptree.c
説明を見る。
00001 
00018 /*
00019  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00020  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00021  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00022  * All rights reserved
00023  */
00024 
00025 #include <sent/stddefs.h>
00026 #include <sent/ptree.h>
00027 
00028 static unsigned char mbit[] = {0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01};
00029   
00030 
00039 int
00040 testbit(char *str, int slen, int bitplace)
00041 {
00042   int maskptr;
00043   
00044   if ((maskptr = bitplace >> 3) > slen) return 0;
00045   return(str[maskptr] & mbit[bitplace & 7]);
00046 }
00047 
00057 int
00058 testbit_max(char *str, int bitplace, int maxbitplace)
00059 {
00060   if (bitplace >= maxbitplace) return 0;
00061   return(str[bitplace >> 3] & mbit[bitplace & 7]);
00062 }
00063 
00072 int
00073 where_the_bit_differ(char *str1, char *str2)
00074 {
00075   int p = 0;
00076   int bitloc;
00077   int slen1, slen2;
00078 
00079   /* step: char, bit */
00080   while(str1[p] == str2[p]) p++;
00081   bitloc = p * 8;
00082   slen1 = strlen(str1);
00083   slen2 = strlen(str2);
00084   while(testbit(str1, slen1, bitloc) == testbit(str2, slen2, bitloc)) bitloc++;
00085 
00086   return(bitloc);
00087 }
00088 
00095 static PATNODE *
00096 new_node()
00097 {
00098   PATNODE *tmp;
00099 
00100   tmp = (PATNODE *)mymalloc(sizeof(PATNODE));
00101   tmp->left0 = NULL;
00102   tmp->right1 = NULL;
00103 
00104   return(tmp);
00105 }
00106 
00118 PATNODE *
00119 make_ptree(char **words, int *data, int wordsnum, int bitplace)
00120 {
00121   int i,j, tmp;
00122   char *p;
00123   int newnum;
00124   PATNODE *ntmp;
00125 
00126 #if 0
00127   printf("%d:", wordsnum);
00128   for (i=0;i<wordsnum;i++) {
00129     printf(" %s",words[i]);
00130   }
00131   printf("\n");
00132   printf("test bit = %d\n", bitplace);
00133 #endif
00134 
00135   if (wordsnum == 1) {
00136     /* word identified: this is leaf node */
00137     ntmp = new_node();
00138     ntmp->value.data = data[0];
00139     return(ntmp);
00140   }
00141 
00142   newnum = 0;
00143   for (i=0;i<wordsnum;i++) {
00144     if (testbit(words[i], strlen(words[i]), bitplace) != 0) {
00145       newnum++;
00146     }
00147   }
00148   if (newnum == 0 || newnum == wordsnum) {
00149     /* all words has same bit, continue to descend */
00150     return(make_ptree(words, data, wordsnum, bitplace + 1));
00151   } else {
00152     /* sort word pointers by tested bit */
00153     j = wordsnum-1;
00154     for (i=0; i<newnum; i++) {
00155       if (testbit(words[i], strlen(words[i]), bitplace) == 0) {
00156         for (; j>=newnum; j--) {
00157           if (testbit(words[j], strlen(words[j]), bitplace) != 0) {
00158             p = words[i]; words[i] = words[j]; words[j] = p;
00159             tmp = data[i]; data[i] = data[j]; data[j] = tmp;
00160             break;
00161           }
00162         }
00163       }
00164     }
00165     /* create node and descend for each node */
00166     ntmp = new_node();
00167     ntmp->value.thres_bit = bitplace;
00168     ntmp->right1 = make_ptree(words, data, newnum, bitplace+1);
00169     ntmp->left0  = make_ptree(&(words[newnum]), &(data[newnum]), wordsnum-newnum, bitplace+1);
00170     return(ntmp);
00171   }
00172 }
00173 
00174 
00181 void
00182 disp_ptree(PATNODE *node, int level)
00183 {
00184   int i;
00185 
00186   for (i=0;i<level;i++) {
00187     printf("-");
00188   }
00189   if (node->left0 == NULL && node->right1 == NULL) {
00190     printf("LEAF:%d\n", node->value.data);
00191   } else {
00192     printf("%d\n", node->value.thres_bit);
00193     if (node->left0 != NULL) {
00194       disp_ptree(node->left0, level+1);
00195     }
00196     if (node->right1 != NULL) {
00197       disp_ptree(node->right1, level+1);
00198     }
00199   }
00200 }
00201 
00211 static int
00212 ptree_search_data_r(PATNODE *node, char *str, int maxbitplace)
00213 {
00214   if (node->left0 == NULL && node->right1 == NULL) {
00215     return(node->value.data);
00216   } else {
00217     if (testbit_max(str, node->value.thres_bit, maxbitplace) != 0) {
00218       return(ptree_search_data_r(node->right1, str, maxbitplace));
00219     } else {
00220       return(ptree_search_data_r(node->left0, str, maxbitplace));
00221     }
00222   }
00223 }
00224 
00233 int
00234 ptree_search_data(char *str, PATNODE *node)
00235 {
00236   if (node == NULL) {
00237     //("Error: ptree_search_data: no node, search for \"%s\" failed\n", str);
00238     return -1;
00239   }
00240   return(ptree_search_data_r(node, str, strlen(str) * 8 + 8));
00241 }
00242 
00253 static int
00254 ptree_replace_data_r(PATNODE *node, char *str, int val, int maxbitplace)
00255 {
00256   if (node->left0 == NULL && node->right1 == NULL) {
00257     node->value.data = val;
00258     return(node->value.data);
00259   } else {
00260     if (testbit_max(str, node->value.thres_bit, maxbitplace) != 0) {
00261       return(ptree_replace_data_r(node->right1, str, val, maxbitplace));
00262     } else {
00263       return(ptree_replace_data_r(node->left0, str, val, maxbitplace));
00264     }
00265   }
00266 }
00267 
00278 int
00279 ptree_replace_data(char *str, int val, PATNODE *node)
00280 {
00281   if (node == NULL) {
00282     //("Error: ptree_search_data: no node, search for \"%s\" failed\n", str);
00283     return -1;
00284   }
00285   return(ptree_replace_data_r(node, str, val, strlen(str) * 8 + 8));
00286 }
00287 
00288 
00289 /*******************************************************************/
00290 /* add 1 node to given ptree */
00291 
00299 PATNODE *
00300 ptree_make_root_node(int data)
00301 {
00302   PATNODE *nnew;
00303   /* make new leaf node for newstr */
00304   nnew = new_node();
00305   nnew->value.data = data;
00306   return(nnew);
00307 }
00308 
00317 static void
00318 ptree_add_entry_at(char *str, int slen, int bitloc, int data, PATNODE **parentlink)
00319 {
00320   PATNODE *node;
00321   node = *parentlink;
00322   if (node->value.thres_bit > bitloc ||
00323       (node->left0 == NULL && node->right1 == NULL)) {
00324     PATNODE *newleaf, *newbranch;
00325     /* insert between [parent] and [node] */
00326     newleaf = new_node();
00327     newleaf->value.data = data;
00328     newbranch = new_node();
00329     newbranch->value.thres_bit = bitloc;
00330     *parentlink = newbranch;
00331     if (testbit(str, slen, bitloc) ==0) {
00332       newbranch->left0  = newleaf;
00333       newbranch->right1 = node;
00334     } else {
00335       newbranch->left0  = node;
00336       newbranch->right1 = newleaf;
00337     }
00338     return;
00339   } else {
00340     if (testbit(str, slen, node->value.thres_bit) != 0) {
00341       ptree_add_entry_at(str, slen, bitloc, data, &(node->right1));
00342     } else {
00343       ptree_add_entry_at(str, slen, bitloc, data, &(node->left0));
00344     }
00345   }
00346 }
00347 
00357 void
00358 ptree_add_entry(char *str, int data, char *matchstr, PATNODE **rootnode)
00359 {
00360   int bitloc;
00361 
00362   bitloc = where_the_bit_differ(str, matchstr);
00363   if (*rootnode == NULL) {
00364     *rootnode = ptree_make_root_node(data);
00365   } else {
00366     ptree_add_entry_at(str, strlen(str), bitloc, data, rootnode);
00367   }
00368 
00369 }
00370 
00376 void
00377 free_ptree(PATNODE *node)
00378 {
00379   if (node == NULL) return;
00380   if (node->left0 != NULL) free_ptree(node->left0);
00381   if (node->right1 != NULL) free_ptree(node->right1);
00382   free(node);
00383 }