Julius 4.2
libsent/src/util/ptree.c
説明を見る。
00001 
00018 /*
00019  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00020  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00021  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00022  * All rights reserved
00023  */
00024 
00025 #include <sent/stddefs.h>
00026 #include <sent/ptree.h>
00027 
00028 static unsigned char mbit[] = {0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01};
00029   
00030 
00039 int
00040 testbit(char *str, int slen, int bitplace)
00041 {
00042   int maskptr;
00043   
00044   if ((maskptr = bitplace >> 3) > slen) return 0;
00045   return(str[maskptr] & mbit[bitplace & 7]);
00046 }
00047 
00057 int
00058 testbit_max(char *str, int bitplace, int maxbitplace)
00059 {
00060   if (bitplace >= maxbitplace) return 0;
00061   return(str[bitplace >> 3] & mbit[bitplace & 7]);
00062 }
00063 
00072 int
00073 where_the_bit_differ(char *str1, char *str2)
00074 {
00075   int p = 0;
00076   int bitloc;
00077   int slen1, slen2;
00078 
00079   /* step: char, bit */
00080   while(str1[p] == str2[p]) p++;
00081   bitloc = p * 8;
00082   slen1 = strlen(str1);
00083   slen2 = strlen(str2);
00084   while(testbit(str1, slen1, bitloc) == testbit(str2, slen2, bitloc)) bitloc++;
00085 
00086   return(bitloc);
00087 }
00088 
00096 static PATNODE *
00097 new_node(BMALLOC_BASE **mroot)
00098 {
00099   PATNODE *tmp;
00100 
00101   tmp = (PATNODE *)mybmalloc2(sizeof(PATNODE), mroot);
00102   tmp->left0 = NULL;
00103   tmp->right1 = NULL;
00104 
00105   return(tmp);
00106 }
00107 
00120 PATNODE *
00121 make_ptree(char **words, int *data, int wordsnum, int bitplace, BMALLOC_BASE **mroot)
00122 {
00123   int i,j, tmp;
00124   char *p;
00125   int newnum;
00126   PATNODE *ntmp;
00127 
00128 #if 0
00129   printf("%d:", wordsnum);
00130   for (i=0;i<wordsnum;i++) {
00131     printf(" %s",words[i]);
00132   }
00133   printf("\n");
00134   printf("test bit = %d\n", bitplace);
00135 #endif
00136 
00137   if (wordsnum == 1) {
00138     /* word identified: this is leaf node */
00139     ntmp = new_node(mroot);
00140     ntmp->value.data = data[0];
00141     return(ntmp);
00142   }
00143 
00144   newnum = 0;
00145   for (i=0;i<wordsnum;i++) {
00146     if (testbit(words[i], strlen(words[i]), bitplace) != 0) {
00147       newnum++;
00148     }
00149   }
00150   if (newnum == 0 || newnum == wordsnum) {
00151     /* all words has same bit, continue to descend */
00152     return(make_ptree(words, data, wordsnum, bitplace + 1, mroot));
00153   } else {
00154     /* sort word pointers by tested bit */
00155     j = wordsnum-1;
00156     for (i=0; i<newnum; i++) {
00157       if (testbit(words[i], strlen(words[i]), bitplace) == 0) {
00158         for (; j>=newnum; j--) {
00159           if (testbit(words[j], strlen(words[j]), bitplace) != 0) {
00160             p = words[i]; words[i] = words[j]; words[j] = p;
00161             tmp = data[i]; data[i] = data[j]; data[j] = tmp;
00162             break;
00163           }
00164         }
00165       }
00166     }
00167     /* create node and descend for each node */
00168     ntmp = new_node(mroot);
00169     ntmp->value.thres_bit = bitplace;
00170     ntmp->right1 = make_ptree(words, data, newnum, bitplace+1, mroot);
00171     ntmp->left0  = make_ptree(&(words[newnum]), &(data[newnum]), wordsnum-newnum, bitplace+1, mroot);
00172     return(ntmp);
00173   }
00174 }
00175 
00176 
00183 void
00184 disp_ptree(PATNODE *node, int level)
00185 {
00186   int i;
00187 
00188   for (i=0;i<level;i++) {
00189     printf("-");
00190   }
00191   if (node->left0 == NULL && node->right1 == NULL) {
00192     printf("LEAF:%d\n", node->value.data);
00193   } else {
00194     printf("%d\n", node->value.thres_bit);
00195     if (node->left0 != NULL) {
00196       disp_ptree(node->left0, level+1);
00197     }
00198     if (node->right1 != NULL) {
00199       disp_ptree(node->right1, level+1);
00200     }
00201   }
00202 }
00203 
00213 static int
00214 ptree_search_data_r(PATNODE *node, char *str, int maxbitplace)
00215 {
00216   if (node->left0 == NULL && node->right1 == NULL) {
00217     return(node->value.data);
00218   } else {
00219     if (testbit_max(str, node->value.thres_bit, maxbitplace) != 0) {
00220       return(ptree_search_data_r(node->right1, str, maxbitplace));
00221     } else {
00222       return(ptree_search_data_r(node->left0, str, maxbitplace));
00223     }
00224   }
00225 }
00226 
00235 int
00236 ptree_search_data(char *str, PATNODE *node)
00237 {
00238   if (node == NULL) {
00239     //("Error: ptree_search_data: no node, search for \"%s\" failed\n", str);
00240     return -1;
00241   }
00242   return(ptree_search_data_r(node, str, strlen(str) * 8 + 8));
00243 }
00244 
00255 static int
00256 ptree_replace_data_r(PATNODE *node, char *str, int val, int maxbitplace)
00257 {
00258   if (node->left0 == NULL && node->right1 == NULL) {
00259     node->value.data = val;
00260     return(node->value.data);
00261   } else {
00262     if (testbit_max(str, node->value.thres_bit, maxbitplace) != 0) {
00263       return(ptree_replace_data_r(node->right1, str, val, maxbitplace));
00264     } else {
00265       return(ptree_replace_data_r(node->left0, str, val, maxbitplace));
00266     }
00267   }
00268 }
00269 
00280 int
00281 ptree_replace_data(char *str, int val, PATNODE *node)
00282 {
00283   if (node == NULL) {
00284     //("Error: ptree_search_data: no node, search for \"%s\" failed\n", str);
00285     return -1;
00286   }
00287   return(ptree_replace_data_r(node, str, val, strlen(str) * 8 + 8));
00288 }
00289 
00290 
00291 /*******************************************************************/
00292 /* add 1 node to given ptree */
00293 
00302 PATNODE *
00303 ptree_make_root_node(int data, BMALLOC_BASE **mroot)
00304 {
00305   PATNODE *nnew;
00306   /* make new leaf node for newstr */
00307   nnew = new_node(mroot);
00308   nnew->value.data = data;
00309   return(nnew);
00310 }
00311 
00321 static void
00322 ptree_add_entry_at(char *str, int slen, int bitloc, int data, PATNODE **parentlink, BMALLOC_BASE **mroot)
00323 {
00324   PATNODE *node;
00325   node = *parentlink;
00326   if (node->value.thres_bit > bitloc ||
00327       (node->left0 == NULL && node->right1 == NULL)) {
00328     PATNODE *newleaf, *newbranch;
00329     /* insert between [parent] and [node] */
00330     newleaf = new_node(mroot);
00331     newleaf->value.data = data;
00332     newbranch = new_node(mroot);
00333     newbranch->value.thres_bit = bitloc;
00334     *parentlink = newbranch;
00335     if (testbit(str, slen, bitloc) ==0) {
00336       newbranch->left0  = newleaf;
00337       newbranch->right1 = node;
00338     } else {
00339       newbranch->left0  = node;
00340       newbranch->right1 = newleaf;
00341     }
00342     return;
00343   } else {
00344     if (testbit(str, slen, node->value.thres_bit) != 0) {
00345       ptree_add_entry_at(str, slen, bitloc, data, &(node->right1), mroot);
00346     } else {
00347       ptree_add_entry_at(str, slen, bitloc, data, &(node->left0), mroot);
00348     }
00349   }
00350 }
00351 
00362 void
00363 ptree_add_entry(char *str, int data, char *matchstr, PATNODE **rootnode, BMALLOC_BASE **mroot)
00364 {
00365   int bitloc;
00366 
00367   bitloc = where_the_bit_differ(str, matchstr);
00368   if (*rootnode == NULL) {
00369     *rootnode = ptree_make_root_node(data, mroot);
00370   } else {
00371     ptree_add_entry_at(str, strlen(str), bitloc, data, rootnode, mroot);
00372   }
00373 
00374 }