Julius 4.2
libsent/src/util/aptree.c
説明を見る。
00001 
00018 /*
00019  * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University
00020  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00021  * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology
00022  * All rights reserved
00023  */
00024 
00025 #include <sent/stddefs.h>
00026 #include <sent/ptree.h>
00027 
00034 static APATNODE *
00035 new_node(BMALLOC_BASE **mroot)
00036 {
00037   APATNODE *tmp;
00038 
00039   tmp = (APATNODE *)mybmalloc2(sizeof(APATNODE), mroot);
00040   tmp->left0 = NULL;
00041   tmp->right1 = NULL;
00042 
00043   return(tmp);
00044 }
00045 
00053 static void *
00054 aptree_search_data_r(APATNODE *node, char *str, int maxbitplace)
00055 {
00056 #if 1
00057   /* non-recursive */
00058   APATNODE *n;
00059   APATNODE *branch = NULL;
00060   n = node;
00061   while(n->left0 != NULL || n->right1 != NULL) {
00062     branch = n;
00063     if (testbit_max(str, n->value.thres_bit, maxbitplace) != 0) {
00064       n = n->right1;
00065     } else {
00066       n = n->left0;
00067     }
00068   } 
00069   return(n->value.data);
00070 #else
00071   if (node->left0 == NULL && node->right1 == NULL) {
00072     return(node->value.data);
00073   } else {
00074     if (testbit_max(str, node->value.thres_bit, maxbitplace) != 0) {
00075       return(aptree_search_data_r(node->right1, str, maxbitplace));
00076     } else {
00077       return(aptree_search_data_r(node->left0, str, maxbitplace));
00078     }
00079   }
00080 #endif
00081 }
00082 
00091 void *
00092 aptree_search_data(char *str, APATNODE *node)
00093 {
00094   if (node == NULL) {
00095     //("Error: aptree_search_data: no node, search for \"%s\" failed\n", str);
00096     return NULL;
00097   }
00098   return(aptree_search_data_r(node, str, strlen(str) * 8 + 8));
00099 }
00100 
00101 
00102 /*******************************************************************/
00103 /* add 1 node to given ptree */
00104 
00112 APATNODE *
00113 aptree_make_root_node(void *data, BMALLOC_BASE **mroot)
00114 {
00115   APATNODE *nnew;
00116   /* make new leaf node for newstr */
00117   nnew = new_node(mroot);
00118   nnew->value.data = data;
00119   return(nnew);
00120 }
00121 
00130 static void
00131 aptree_add_entry_at(char *str, int slen, int bitloc, void *data, APATNODE **parentlink, BMALLOC_BASE **mroot)
00132 {
00133 #if 1
00134   /* non-recursive */
00135   APATNODE **p;
00136   APATNODE *newleaf, *newbranch, *node;
00137 
00138   p = parentlink;
00139   node = *p;
00140   while(node->value.thres_bit <= bitloc &&
00141         (node->left0 != NULL || node->right1 != NULL)) {
00142     if (testbit(str, slen, node->value.thres_bit) != 0) {
00143       p = &(node->right1);
00144     } else {
00145       p = &(node->left0);
00146     }
00147     node = *p;
00148   }
00149          
00150   /* insert between [parent] and [node] */
00151   newleaf = new_node(mroot);
00152   newleaf->value.data = data;
00153   newbranch = new_node(mroot);
00154   newbranch->value.thres_bit = bitloc;
00155   *p = newbranch;
00156   if (testbit(str, slen, bitloc) ==0) {
00157     newbranch->left0  = newleaf;
00158     newbranch->right1 = node;
00159   } else {
00160     newbranch->left0  = node;
00161     newbranch->right1 = newleaf;
00162   }
00163 
00164 #else
00165 
00166   APATNODE *node;
00167   APATNODE *newleaf, *newbranch;
00168 
00169   node = *parentlink;
00170   if (node->value.thres_bit > bitloc ||
00171       (node->left0 == NULL && node->right1 == NULL)) {
00172     /* insert between [parent] and [node] */
00173     newleaf = new_node(mroot);
00174     newleaf->value.data = data;
00175     newbranch = new_node(mroot);
00176     newbranch->value.thres_bit = bitloc;
00177     *parentlink = newbranch;
00178     if (testbit(str, slen, bitloc) ==0) {
00179       newbranch->left0  = newleaf;
00180       newbranch->right1 = node;
00181     } else {
00182       newbranch->left0  = node;
00183       newbranch->right1 = newleaf;
00184     }
00185     return;
00186   } else {
00187     if (testbit(str, slen, node->value.thres_bit) != 0) {
00188       aptree_add_entry_at(str, slen, bitloc, data, &(node->right1), mroot);
00189     } else {
00190       aptree_add_entry_at(str, slen, bitloc, data, &(node->left0), mroot);
00191     }
00192   }
00193 #endif
00194 }
00195 
00205 void
00206 aptree_add_entry(char *str, void *data, char *matchstr, APATNODE **rootnode, BMALLOC_BASE **mroot)
00207 {
00208   int bitloc;
00209 
00210   bitloc = where_the_bit_differ(str, matchstr);
00211   if (*rootnode == NULL) {
00212     *rootnode = aptree_make_root_node(data, mroot);
00213   } else {
00214     aptree_add_entry_at(str, strlen(str), bitloc, data, rootnode, mroot);
00215   }
00216 
00217 }
00218 
00219 /*******************************************************************/
00220 
00228 static void
00229 aptree_remove_entry_r(APATNODE *now, APATNODE *up, APATNODE *up2, char *str, int maxbitplace, APATNODE **root)
00230 {
00231   APATNODE *b;
00232 
00233   if (now->left0 == NULL && now->right1 == NULL) {
00234     /* assume this is exactly the node of data that has specified key string */
00235     /* make sure the data of your removal request already exists before call this */
00236     /* execute removal */
00237     if (up == NULL) {
00238       //free(now);
00239       *root = NULL;
00240       return;
00241     }
00242     b = (up->right1 == now) ? up->left0 : up->right1;
00243     if (up2 == NULL) {
00244       //free(now);
00245       //free(up);
00246       *root = b;
00247       return;
00248     }
00249     if (up2->left0 == up) {
00250       up2->left0 = b;
00251     } else {
00252       up2->right1 = b;
00253     }
00254     //free(now);
00255     //free(up);
00256     return;
00257   } else {
00258     /* traverse */
00259     if (testbit_max(str, now->value.thres_bit, maxbitplace) != 0) {
00260       aptree_remove_entry_r(now->right1, now, up, str, maxbitplace, root);
00261     } else {
00262       aptree_remove_entry_r(now->left0, now, up, str, maxbitplace, root);
00263     }
00264   }
00265 }
00266     
00274 void
00275 aptree_remove_entry(char *str, APATNODE **rootnode)
00276 {
00277   if (*rootnode == NULL) {
00278     jlog("Warning: aptree: no node, deletion for \"%s\" failed\n", str);
00279     return;
00280   }
00281   aptree_remove_entry_r(*rootnode, NULL, NULL, str, strlen(str)*8+8, rootnode);
00282 }
00283 
00284 /*******************************************************************/
00285 
00293 void
00294 aptree_traverse_and_do(APATNODE *node, void (*callback)(void *))
00295 {
00296   if (node->left0 == NULL && node->right1 == NULL) {
00297     (*callback)(node->value.data);
00298   } else {
00299     if (node->left0 != NULL) {
00300       aptree_traverse_and_do(node->left0, callback);
00301     }
00302     if (node->right1 != NULL) {
00303       aptree_traverse_and_do(node->right1, callback);
00304     }
00305   }
00306 }
00307 
00308 /*************************************************************/
00309 
00310 static void
00311 aptree_count(APATNODE *node, int *count_branch, int *count_data, int *maxbit)
00312 {
00313   if (node->left0 == NULL && node->right1 == NULL) {
00314     (*count_data)++;
00315   } else {
00316     if (node->value.thres_bit > *maxbit) {
00317       *maxbit = node->value.thres_bit;
00318     }
00319     (*count_branch)++;
00320     if (node->left0 != NULL) {
00321       aptree_count(node->left0, count_branch, count_data, maxbit);
00322     }
00323     if (node->right1 != NULL) {
00324       aptree_count(node->right1, count_branch, count_data, maxbit);
00325     }
00326   }
00327 }  
00328 
00329 static int
00330 aptree_build_index(APATNODE *node, int *num, int *data_id, int *left, int *right, int *data)
00331 {
00332   int id;
00333 
00334   id = *num;
00335   (*num)++;
00336   if (node->left0 == NULL && node->right1 == NULL) {
00337     left[id] = -1;
00338     right[id] = -1;
00339     data[id] = *data_id;
00340     /* node->value.data を保存 */
00341     (*data_id)++;
00342   } else {
00343     data[id] = node->value.thres_bit;
00344     if (node->left0 != NULL) {
00345       left[id] = aptree_build_index(node->left0, num, data_id, left, right, data);
00346     } else {
00347       left[id] = -1;
00348     }
00349     if (node->right1 != NULL) {
00350       right[id] = aptree_build_index(node->right1, num, data_id, left, right, data);
00351     } else {
00352       right[id] = -1;
00353     }
00354   }
00355   return id;
00356 }  
00357 
00358 static void
00359 aptree_write_leaf(FILE *fp, APATNODE *node, boolean (*callback)(void *, FILE *fp), boolean *error_p)
00360 {
00361   if (node->left0 == NULL && node->right1 == NULL) {
00362     if ((*callback)(node->value.data, fp) == FALSE) {
00363       *error_p = TRUE;
00364     }
00365   } else {
00366     if (node->left0 != NULL) {
00367       aptree_write_leaf(fp, node->left0, callback, error_p);
00368     }
00369     if (node->right1 != NULL) {
00370       aptree_write_leaf(fp, node->right1, callback, error_p);
00371     }
00372   }
00373 }
00374 
00375 
00376 boolean
00377 aptree_write(FILE *fp, APATNODE *root, boolean (*save_data_func)(void *, FILE *fp))
00378 {
00379   int count_node, count_branch, count_data, maxbit;
00380   int *left, *right, *value;
00381   int num, did;
00382   boolean err;
00383 
00384   if (root == NULL) return TRUE;
00385 
00386   /* count statistics */
00387   count_branch = count_data = 0;
00388   maxbit = 0;
00389   aptree_count(root, &count_branch, &count_data, &maxbit);
00390   count_node = count_branch + count_data;
00391   jlog("Stat: aptree_write: %d nodes (%d branch + %d data), maxbit=%d\n", count_node, count_branch, count_data, maxbit);
00392   /* allocate */
00393   left = (int *)mymalloc(sizeof(int) * count_node);
00394   right = (int *)mymalloc(sizeof(int) * count_node);
00395   value = (int *)mymalloc(sizeof(int) * count_node);
00396   /* make index */
00397   did = num = 0;
00398   aptree_build_index(root, &num, &did, left, right, value);
00399 #if 0
00400   {
00401     int i;
00402     for(i=0;i<count_node;i++) {
00403       printf("%d: %d %d %d\n", i, left[i], right[i], value[i]);
00404     }
00405   }
00406 #endif
00407   /* write tree to file */
00408   if (myfwrite(&count_node, sizeof(int), 1, fp) < 1) {
00409     jlog("Error: aptree_write: fail to write header\n");
00410     return FALSE;
00411   }
00412   if (myfwrite(&count_data, sizeof(int), 1, fp) < 1) {
00413     jlog("Error: aptree_write: fail to write header\n");
00414     return FALSE;
00415   }
00416   if (myfwrite(left, sizeof(int), count_node, fp) < count_node) {
00417     jlog("Error: aptree_write: fail to write %d bytes\n", sizeof(int) * count_node);
00418     return FALSE;
00419   }
00420   if (myfwrite(right, sizeof(int), count_node, fp) < count_node) {
00421     jlog("Error: aptree_write: fail to write %d bytes\n", sizeof(int) * count_node);
00422     return FALSE;
00423   }
00424   if (myfwrite(value, sizeof(int), count_node, fp) < count_node) {
00425     jlog("Error: aptree_write: fail to write %d bytes\n", sizeof(int) * count_node);
00426     return FALSE;
00427   }
00428   if (save_data_func != NULL) {
00429     /* write leaf node data */
00430     err = FALSE;
00431     aptree_write_leaf(fp, root, save_data_func, &err);
00432   }
00433   if (err) {
00434     jlog("Error: aptree_write: error occured when writing tree leaf data\n");
00435     return FALSE;
00436   }
00437 
00438   free(value);
00439   free(right);
00440   free(left);
00441 
00442   return TRUE;
00443 }
00444 
00445 
00446 boolean
00447 aptree_read(FILE *fp, APATNODE **root, BMALLOC_BASE **mroot, void *data, boolean (*load_data_func)(void **, void *, FILE *))
00448 {
00449   int count_node, count_branch, count_data, maxbit;
00450   int *left, *right, *value;
00451   int num, did;
00452   boolean err;
00453   APATNODE *nodelist;
00454   APATNODE *node;
00455   int i;
00456 
00457   if (*root != NULL) {
00458     jlog("Error: aptree_read: root node != NULL!\n");
00459     return FALSE;
00460   }
00461 
00462   /* read header */
00463   if (myfread(&count_node, sizeof(int), 1, fp) < 1) {
00464     jlog("Error: aptree_read: fail to read header\n");
00465     return FALSE;
00466   }
00467   if (myfread(&count_data, sizeof(int), 1, fp) < 1) {
00468     jlog("Error: aptree_read: fail to read header\n");
00469     return FALSE;
00470   }
00471   jlog("Stat: aptree_read: %d nodes (%d branch + %d data)\n",
00472        count_node, count_node - count_data, count_data);
00473   /* prepare buffer */
00474   left = (int *)mymalloc(sizeof(int) * count_node);
00475   right = (int *)mymalloc(sizeof(int) * count_node);
00476   value = (int *)mymalloc(sizeof(int) * count_node);
00477   /* read data */
00478   if (myfread(left, sizeof(int), count_node, fp) < count_node) {
00479     jlog("Error: aptree_read: fail to read %d bytes\n", sizeof(int) * count_node);
00480     return FALSE;
00481   }
00482   if (myfread(right, sizeof(int), count_node, fp) < count_node) {
00483     jlog("Error: aptree_read: fail to read %d bytes\n", sizeof(int) * count_node);
00484     return FALSE;
00485   }
00486   if (myfread(value, sizeof(int), count_node, fp) < count_node) {
00487     jlog("Error: aptree_read: fail to read %d bytes\n", sizeof(int) * count_node);
00488     return FALSE;
00489   }
00490   /* allocate nodes */
00491   nodelist = (APATNODE *)mybmalloc2(sizeof(APATNODE) * count_node, mroot);
00492   for(i=0;i<count_node;i++) {
00493     node = &(nodelist[i]);
00494     if (left[i] == -1) {
00495       node->left0 = NULL;
00496     } else {
00497       node->left0 = &(nodelist[left[i]]);
00498     }
00499     if (right[i] == -1) {
00500       node->right1 = NULL;
00501     } else {
00502       node->right1 = &(nodelist[right[i]]);
00503     }
00504     if (left[i] == -1 && right[i] == -1) {
00505       /* load leaf data node */
00506       if ((*load_data_func)(&(node->value.data), data, fp) == FALSE) {
00507         jlog("Error: aptree_read: failed to load leaf data entity\n");
00508         return FALSE;
00509       }
00510     } else {
00511       /* set thres bit */
00512         node->value.thres_bit = value[i];
00513     }
00514   }
00515   /* set root node */
00516   *root = &(nodelist[0]);
00517   
00518   free(value);
00519   free(right);
00520   free(left);
00521 
00522   return TRUE;
00523 }