/* * * * An implementation of set operations using treaps * Margaret Reid-Miller * June 1998 * * * The code here contains the set operations union, itersection, * and delete, where sets are maintained as randomized * search trees (treaps)[1]. There are four versions: * 1) serial persistent (the input sets remain unchanged) * 2) serial destructive (the input sets modified to make the results) * 3) parallel persistent * 4) parallel destructive. * * The parallel functions use MIT's Cilk runtime system for shared * memory multiprocessors (see ). * * The code below includes no memory management nor explicit * deallocation or garbage collection. For a memory managment * implementation for the parallel persistent code see * . See [2] * for the analysis and experiments on the parallel persistent version * * [1] R. Seidel and C.R. Aragon. Randomized Search Trees. Algorithmica, * 16:464-497, 1996. * * [2] G.E. Blelloch and M. Reid-Miller. Fast Set Operations Using * Treaps. In Proceedings of the 10th Annual ACM Symposium on * Parallel Algorithms and Architectures. June 1998. */ #include #include /* for random, malloc */ #include /* redifines names of cilk and c unsafe * library functions */ #define TALLOC() malloc(NODE_SIZE) #define COPY_DATA(_new, _old) \ ( \ _new = TALLOC(), \ _new->key = (_old->key), \ _new->priority = (_old->priority), \ _new->left = (_old->left), \ _new->right = (_old->right), \ _new \ ) #define COPY_LFT(_new, _old) \ ( \ _new = TALLOC(), \ _new->key = (_old->key), \ _new->priority = (_old->priority), \ _new->left = (_old->left), \ _new \ ) #define COPY_RGT(_new, _old) \ ( \ _new = TALLOC(), \ _new->key = (_old->key), \ _new->priority = (_old->priority), \ _new->right = (_old->right), \ _new \ ) #define less_than(i,j) ((i) < (j)) #define less_equal(i,j) ((i) <= (j)) #define equal(i,j) ((i) == (j)) #define greater_than(i,j) ((i) > (j)) #define greater_equal(i,j) ((i) >= (j)) #define not_equal(i,j) ((i) != (j)) #define TRUE 1 #define FALSE 0 typedef int bool; typedef int key_type; typedef struct key_set { int n; key_type *data; } Key_set; typedef struct tnode *Tree_ptr; typedef struct tnode { Tree_ptr left; /* left subtree (keys less than key) */ key_type key; /* key (kept in in-order) */ Tree_ptr right; /* right subtree (keys greater than key) */ int priority; /* priority (kept in heap-order) */ }Tree_node; size_t NODE_SIZE = sizeof(Tree_node); /**************************** SERIAL CODE ******************************/ /* Destructively splits the treap "t" into two treaps: the "left_tree" * is a treap with key values less than "key" and the "right_tree" is * a treap with key values greater than "key". Searches down the tree * in key order to find the root of the subtree that contains keys * that are too large (small) to be to in the left (right) subtree * (then changes direction of the search). Makes this subtree the * other branch of right (left) subtree. This subtree in turn has * nodes that are too small (large) and needs to be moved back to * where the subtree was taken, and so on until it reaches a leaf or a * node with the same key as "key". Returns either the leaf (NULL) or * this node. */ Tree_ptr destruct_set_split(Tree_ptr *left_tree, Tree_ptr *right_tree, Tree_ptr t, key_type key) { Tree_node N; /* Place to hold roots of left_tree and right_tree */ Tree_ptr l; /* follows right spine of left subtree */ Tree_ptr r; /* follows left spine of right subtree */ int on_left; if (t == NULL) { *left_tree = *right_tree = NULL; return NULL; } on_left = greater_than (key, t->key); if (on_left){ N.right = t; /* save left_tree root */ N.left = NULL; } else { N.left = t; /* save right_tree root */ N.right = NULL; } r = &N; /* r->left will get next piece of right subtree */ l = &N; /* l->right will get next piece of left subtree */ while ((t != NULL) && (not_equal(key, t->key))) { if (on_left) { do { /* Search down right spine of left subtree. */ l = t; t = t->right; } while ((t != NULL) && (greater_than(key, t->key))); r->left = t; /* Next piece of right subtree. */ on_left = FALSE; } else { do { /* Search down left spine of right subtree. */ r = t; t = t->left; } while ((t != NULL) && (less_than(key, t->key))); l->right = t; /* Next piece of left subtree. */ on_left = TRUE; } } if (t != NULL) { /* found a node with equal key */ l->right = t->left; /* t->left is in left subtree */ r->left = t->right; /* t->right is in right subtree */ } *left_tree = N.right; *right_tree = N.left; return t; } /* Splits treap "t" by "key" into "left_tree" and "right_tree" without * destroying "t". That is, the nodes that become the right spine of * the left tree and the nodes that become the left spine of the right * tree are new and are copies of the nodes that were on the path * followed during the split except with new children. If "t" has a * node with the same key as "key"then returns that node, otherwise * returns NULL. */ Tree_ptr set_split(Tree_ptr *left_tree, Tree_ptr *right_tree, Tree_ptr t, key_type key) { Tree_node N; /* Place to hold roots of left_tree and right_tree */ Tree_ptr l; /* follows right spine of left subtree */ Tree_ptr r; /* follows left spine of right subtree */ Tree_ptr new; /* new node in which to write new data */ int on_left; /* whether current node is in left_tree */ if (t == NULL) { *left_tree = *right_tree = NULL; return NULL; } on_left = greater_than (key, t->key); r = &N; /* r->left will get next piece of right subtree */ l = &N; /* l->right will get next piece of left subtree */ while ((t != NULL) && (not_equal(key, t->key))) { if (on_left) { do { /* Copy right spine of left subtree. */ new = COPY_LFT(new, t); l->right = new; l = new; t = t->right; } while ((t != NULL) && (greater_than(key, t->key))); on_left = FALSE; } else { do { /* Copy left spine of right subtree. */ new = COPY_RGT(new, t); r->left = new; r = new; t = t->left; } while ((t != NULL) && (less_than(key, t->key))); on_left = TRUE; } } if (t == NULL) { l->right = NULL; r->left = NULL; } else { /* found a node with equal key */ l->right = t->left; /* t->left is in left subtree */ r->left = t->right; /* t->right is in right subtree */ } *left_tree = N.right; *right_tree = N.left; return t; } /* Destructive form of join because destroys "l" and "r". Takes two * treap "l" and "r" where largest key in "l" is less than the * smallest key in "r" and returns a treap that is the join of the two * treaps. Traversing the right spine of "l" and the left spine of * "r" interleaving the spines so that the path has decreasing * priority values. */ Tree_ptr destruct_set_join(Tree_ptr l, Tree_ptr r) { Tree_ptr root; /* root of joined tree */ Tree_ptr p; /* parent node */ int on_left; /* whether root of left tree has higher priority */ if (l == NULL) return r; if (r == NULL) return l; root = (on_left = (l->priority >= r->priority)) ? l : r; while ((l != NULL) && (r != NULL)) { if (on_left) { do { /* follow right path of left tree */ p = l; l = l->right; } while ((l != NULL) && (l->priority >= r->priority)); on_left = FALSE; /* switch to right tree */ p->right = r; } else { do { /* follow left path of right tree */ p = r; r = r->left; } while ((r != NULL) && (l->priority < r->priority)); on_left = TRUE; /* switch to left tree */ p->left = l; } } return root; } /* Takes two treap "l" and "r" where largest key in "l" is less than * the smallest key in "r" and returns a treap that is the join of the * two treaps. Traverses the right spine of "l" and the left spine of * "r" interleaving the spines so that the path has decreasing * priority values. Does not modify "l" or "r". */ Tree_ptr set_join(Tree_ptr l, Tree_ptr r) { Tree_ptr root; /* root of joined tree */ Tree_ptr p; /* parent node */ Tree_ptr new; /* new node in which to write new data */ bool on_left; /* whether left has higher priority */ bool p_on_left; /* whether parent was from left */ if (l == NULL) return r; if (r == NULL) return l; p_on_left = (l->priority >= r->priority); if (p_on_left) { p = COPY_LFT(p, l); l = l->right; if (l != NULL) on_left = (l->priority >= r->priority); } else { p = COPY_RGT(p, r); r = r->left; if (r != NULL) on_left = (l->priority >= r->priority); } root = p; while ((l != NULL) && (r != NULL)) { if (on_left) { do { /* follow right path of left tree */ new = COPY_LFT(new, l); if (p_on_left) p->right = new; else p->left = new; p = new; p_on_left = TRUE; l = l->right; } while ((l != NULL) && (l->priority >= r->priority)); on_left = FALSE; /* switch to right tree */ } else { do { /* follow left path of right tree */ new = COPY_RGT(new, r); if (p_on_left) p->right = new; else p->left = new; p = new; p_on_left = FALSE; r = r->left; } while ((r != NULL) && (l->priority < r->priority)); on_left = TRUE; /* switch to left tree */ } } if (p_on_left) p->right = r; else p->left = l; return root; } /* Takes two treaps, "r1" and "r2", and returns a treap that is the union * of the two of them keeping the original treaps. Assumes each treap * has no duplicates and removes any duplicates between them. */ Tree_ptr set_union_ser(Tree_ptr r1, Tree_ptr r2) { Tree_ptr root; Tree_ptr left, right; Tree_ptr duplicate; if (r1 == NULL) return r2; if (r2 == NULL) return r1; if (r1->priority < r2->priority) { root = r1; r1 = r2; r2 = root; } duplicate = set_split(&left, &right, r2, r1->key); root = COPY_DATA(root, r1); root->left = set_union_ser(r1->left, left); root->right = set_union_ser(r1->right, right); return root; } /* Destructive version of set_union because it destroys r1 and * r2. Doesn't free duplicates. */ Tree_ptr destruct_set_union_ser(Tree_ptr r1, Tree_ptr r2) { Tree_ptr left, right, root; Tree_ptr duplicate; if (r1 == NULL) return r2; if (r2 == NULL) return r1; if (r1->priority < r2->priority) { root = r1; r1 = r2; r2 = root; } duplicate = destruct_set_split(&left, &right, r2, r1->key); r1->left = destruct_set_union_ser(r1->left, left); r1->right = destruct_set_union_ser(r1->right, right); return r1; } /* Takes two treaps, "r1" and "r2", and returns a treap that is the set * intersection of the two of them while keeping the original treaps. * Assumes each treap has no duplicates within it. */ Tree_ptr set_intersect_ser(Tree_ptr r1, Tree_ptr r2) { Tree_ptr duplicate; Tree_ptr root, left, right; if ((r2 == NULL) || (r1 == NULL)) return NULL; if (r1->priority < r2->priority) { root = r1; r1 = r2; r2 = root; } duplicate = set_split(&left, &right, r2, r1->key); left = set_intersect_ser(r1->left, left); right = set_intersect_ser(r1->right, right); if (duplicate == NULL) { return destruct_set_join(left, right); } else { root = COPY_DATA(root, r1); root->left = left; root->right = right; return root; } } /* Destructive version of set_intersect_ser because it destroys r1 and * r2. Doesn't free nodes not in the intersection. */ Tree_ptr destruct_set_intersect_ser(Tree_ptr r1, Tree_ptr r2) { Tree_ptr duplicate; Tree_ptr left, right; if ((r2 == NULL) || (r1 == NULL)) return NULL; if (r1->priority < r2->priority) { duplicate = r1; r1 = r2; r2 = duplicate; } duplicate = destruct_set_split(&left, &right, r2, r1->key); left = destruct_set_intersect_ser(r1->left, left); right = destruct_set_intersect_ser(r1->right, right); if (duplicate == NULL) { return destruct_set_join(left, right); } else { r1->left = left; r1->right = right; return r1; } } /* Takes two treaps, "r1" and "r2", and returns a treap that is the set * difference of the two of them while keeping the original treaps. * Assumes each treap has no duplicates within it. If "r2_is_subtr" is * false it reverses the role of "r1" and "r2". */ Tree_ptr set_diff_ser(Tree_ptr r1, Tree_ptr r2, bool r2_is_subtr) { Tree_ptr root, left, right, duplicate; if ((r1 == NULL) || (r2 == NULL)) return r2_is_subtr?r1:r2; if (r1->priority < r2->priority) { r2_is_subtr = !r2_is_subtr; left = r1; r1 = r2; r2 = left; } duplicate = set_split(&left, &right, r2, r1->key); left = set_diff_ser(r1->left, left, r2_is_subtr); right = set_diff_ser(r1->right, right, r2_is_subtr); /* Keep r1 if no duplicate and subtracting r2 otherwise delete r1. */ if ((duplicate == NULL) && (r2_is_subtr)) { root = COPY_DATA(root, r1); root->left = left; root->right = right; return root; } else { return set_join(left, right); } } Tree_ptr set_difference_ser(Tree_ptr r1, Tree_ptr r2) { return set_diff_ser(r1, r2, TRUE); } /* Destructive version of set_diff_ser because it destroys r1 and r2. * Does not free deleted nodes. */ Tree_ptr destruct_set_diff_ser(Tree_ptr r1, Tree_ptr r2, bool r2_is_subtr) { Tree_ptr left, right, duplicate; if ((r1 == NULL) || (r2 == NULL)) return r2_is_subtr?r1:r2; if (r1->priority < r2->priority) { r2_is_subtr = !r2_is_subtr; left = r1; r1 = r2; r2 = left; } duplicate = destruct_set_split(&left, &right, r2, r1->key); left = destruct_set_diff_ser(r1->left, left, r2_is_subtr); right = destruct_set_diff_ser(r1->right, right, r2_is_subtr); /* Keep r1 if no duplicate and subtracting r2 otherwise delete r1. */ if ((duplicate == NULL) && (r2_is_subtr)) { r1->left = left; r1->right = right; return r1; } else { return destruct_set_join(left, right); } } Tree_ptr destruct_set_difference_ser(Tree_ptr r1, Tree_ptr r2) { return destruct_set_diff_ser(r1, r2, TRUE); } /* Returns a new leaf node with key "key" and random priority. */ Tree_ptr create_node(key_type *key) { Tree_ptr new = TALLOC(); new->key = *key; new->priority = rand(); new->left = NULL; new->right = NULL; return new; } /* Creates a treap from the key set "keys". Removes any duplicate * keys. */ Tree_ptr set_create_ser(Key_set *keys) { Tree_ptr root, r1, r2; int half = (keys->n)/2; Key_set keys1; Key_set keys2; key_type *data = keys->data; if (keys->n == 1) { return create_node(data); } /* split the data in half */ keys1.data = data; keys1.n = half; keys2.data = data + half; keys2.n = keys->n - half; r1 = set_create_ser(&keys1); r2 = set_create_ser(&keys2); root = destruct_set_union_ser(r1, r2); return root; } /**************************** PARALLEL CODE ****************************/ /* Takes two treaps, "r1" and "r2", and in parallel returns a treap that * is the set union of the two of them while keeping the original * treaps. Assumes each treap has no duplicates and removes any * duplicates between them. */ cilk Tree_ptr set_union(Tree_ptr r1, Tree_ptr r2, int depth) { Tree_ptr root; Tree_ptr left, right; Tree_ptr duplicate; if (r1 == NULL) return r2; if (r2 == NULL) return r1; if (r1->priority < r2->priority) { root = r1; r1 = r2; r2 = root; } duplicate = set_split(&left, &right, r2, r1->key); root = COPY_DATA(root, r1); if (depth == 0) { root->left = set_union_ser(r1->left, left); root->right = set_union_ser(r1->right, right); } else { root->left = spawn set_union(r1->left, left, depth-1); root->right = spawn set_union(r1->right, right, depth-1); sync; } return root; } /* Destructive version of set_union becuase it destroys r1 and * r2. Doesn't free duplicates. */ cilk Tree_ptr destruct_set_union(Tree_ptr r1, Tree_ptr r2, int depth) { Tree_ptr left, right, root; Tree_ptr duplicate; if (r1 == NULL) return r2; if (r2 == NULL) return r1; if (r1->priority < r2->priority) { root = r1; r1 = r2; r2 = root; } duplicate = destruct_set_split(&left, &right, r2, r1->key); if (depth == 0) { r1->left = destruct_set_union_ser(r1->left, left); r1->right = destruct_set_union_ser(r1->right, right); } else { r1->left = spawn destruct_set_union(r1->left, left, depth-1); r1->right = spawn destruct_set_union(r1->right, right, depth-1); sync; } return r1; } /* Takes two treaps, "r1" and "r2", and in parallel returns a treap that * is the set intersection of the two of them while keeping the original * treaps. Assumes each treap has no duplicates within it. */ cilk Tree_ptr set_intersect(Tree_ptr r1, Tree_ptr r2, int depth) { Tree_ptr duplicate; Tree_ptr root, left, right; if ((r2 == NULL) || (r1 == NULL)) return NULL; if (r1->priority < r2->priority) { root = r1; r1 = r2; r2 = root; } duplicate = set_split(&left, &right, r2, r1->key); if (depth == 0) { left = set_intersect_ser(r1->left, left); right = set_intersect_ser(r1->right, right); } else { left = spawn set_intersect(r1->left, left, depth-1); right = spawn set_intersect(r1->right, right, depth-1); sync; } if (duplicate == NULL) { return destruct_set_join(left, right); } else { root = COPY_DATA(root, r1); root->left = left; root->right = right; return root; } } /* Destructive version of set_intersect because it destroys r1 and * r2. Doesn't free nodes not in the intersection. */ cilk Tree_ptr destruct_set_intersect(Tree_ptr r1, Tree_ptr r2, int depth) { Tree_ptr duplicate; Tree_ptr left, right; if ((r2 == NULL) || (r1 == NULL)) return NULL; if (r1->priority < r2->priority) { duplicate = r1; r1 = r2; r2 = duplicate; } duplicate = destruct_set_split(&left, &right, r2, r1->key); if (depth == 0) { left = destruct_set_intersect_ser(r1->left, left); right = destruct_set_intersect_ser(r1->right, right); } else { left = spawn destruct_set_intersect(r1->left, left, depth-1); right = spawn destruct_set_intersect(r1->right, right, depth-1); sync; } if (duplicate == NULL) { return destruct_set_join(left, right); } else { r1->left = left; r1->right = right; return r1; } } /* Takes two treaps, "r1" and "r2", and in parallel returns a treap that * is the set difference of them while keeping the original treaps. * Assumes each treap has no duplicates within it. If "r2_is_subtr" is * false reverses the role of "r1" and "r2". */ cilk Tree_ptr set_diff(Tree_ptr r1, Tree_ptr r2, bool r2_is_subtr, int depth) { Tree_ptr root, left, right, duplicate; if ((r1 == NULL) || (r2 == NULL)) return r2_is_subtr?r1:r2; if (r1->priority < r2->priority) { r2_is_subtr = !r2_is_subtr; left = r1; r1 = r2; r2 = left; } duplicate = set_split(&left, &right, r2, r1->key); if (depth == 0) { left = set_diff_ser(r1->left, left, r2_is_subtr); right = set_diff_ser(r1->right, right, r2_is_subtr); } else { left = spawn set_diff(r1->left, left, r2_is_subtr, depth-1); right = spawn set_diff(r1->right, right, r2_is_subtr, depth-1); sync; } /* Keep r1 if no duplicate and subtracting r2 otherwise delete r1. */ if ((duplicate == NULL) && (r2_is_subtr)) { root = COPY_DATA(root, r1); root->left = left; root->right = right; return root; } else { /* left and right are copies of nodes in r1 */ return set_join(left, right); } } cilk Tree_ptr set_difference(Tree_ptr r1, Tree_ptr r2, int depth) { Tree_ptr t; t = spawn set_diff(r1, r2, TRUE, depth); sync; return t; } /* Destructive version of set_diff because it destroys r1 and r2. * Does not free deleted nodes. */ cilk Tree_ptr destruct_set_diff(Tree_ptr r1, Tree_ptr r2, bool r2_is_subtr, int depth) { Tree_ptr left, right, duplicate; if ((r1 == NULL) || (r2 == NULL)) return r2_is_subtr?r1:r2; if (r1->priority < r2->priority) { r2_is_subtr = !r2_is_subtr; left = r1; r1 = r2; r2 = left; } duplicate = destruct_set_split(&left, &right, r2, r1->key); if (depth == 0) { left = destruct_set_diff_ser(r1->left, left, r2_is_subtr); right = destruct_set_diff_ser(r1->right, right, r2_is_subtr); } else { left = spawn destruct_set_diff(r1->left, left, r2_is_subtr, depth-1); right = spawn destruct_set_diff(r1->right, right, r2_is_subtr, depth-1); sync; } /* Keep r1 if no duplicate and subtracting r2 otherwise delete r1. */ if ((duplicate == NULL) && (r2_is_subtr)) { r1->left = left; r1->right = right; return r1; } else { return destruct_set_join(left, right); } } cilk Tree_ptr destruct_set_difference(Tree_ptr r1, Tree_ptr r2, int depth) { Tree_ptr t; t = spawn destruct_set_diff(r1, r2, TRUE, depth); sync; return t; } /* Creates a treap from the key set "keys" in parallel. Removes any * duplicate keys. */ cilk Tree_ptr set_create(Key_set *keys, int depth) { Tree_ptr root, r1, r2; int half = (keys->n)/2; Key_set keys1; Key_set keys2; key_type *data = keys->data; if (keys->n == 1) { return create_node(data); } /* split the data in half */ keys1.data = data; keys1.n = half; keys2.data = data + half; keys2.n = keys->n - half; if (depth == 0) { r1 = set_create_ser(&keys1); r2 = set_create_ser(&keys2); root = destruct_set_union_ser(r1, r2); } else { r1 = spawn set_create(&keys1, depth-1); r2 = spawn set_create(&keys2, depth-1); sync; root = spawn destruct_set_union(r1, r2, depth-1); sync; } return root; } cilk int main(int argc, char *argv[]) { int n = 40; int m = 20; int i; Tree_ptr r1, r2, r3, r4, r5, r6, r7; Key_set keys; key_type *data; data = (key_type *) malloc(n*sizeof(key_type)); init_memory(); alloc_memory(10000); clear_memory(); for (i = 0; i < n; i++) { data[i] = lrand48(); } keys.n = n; keys.data = data; r1 = spawn set_create(&keys, 3); sync; for (i = 0; i < m; i++) { data[i] = lrand48(); } keys.n = m; keys.data = data; r2 = spawn set_create(&keys, 3); sync; r3 = spawn set_union(r1, r2, 3); sync; r4 = spawn set_intersect(r3, r2, 3); sync; r5 = spawn set_difference(r3, r2, 3); sync; r6 = spawn destruct_set_union(r4, r5, 3); sync; r7 = spawn destruct_set_intersect(r6, r2, 3); sync; r7 = spawn destruct_set_difference(r3, r1, 3); sync; return 0; }