#ifndef CA_UWATERLOO_ALUMNI_DWHARDER_AVERAGE_TREE #define CA_UWATERLOO_ALUMNI_DWHARDER_AVERAGE_TREE #include #include #include #include #include #include // Author: Douglas Wilhelm Harder // Copyright (c) 2011 by Douglas Wilhelm Harder. All rights reserved. // Construct, modify, query and display a binary search tree // with N pseudo-random entries. namespace Data_structures { /**************************************************** * ************************************************ * * * Random Tree Facts * * * ************************************************ * ****************************************************/ // Note: this class uses mrand48/srand48. // If this is not available, you can always use rand/srand. class Random_tree { public: class statistics; Random_tree( int ); ~Random_tree(); // Constants const int N; const double LOG_SIZE; const double SQRT_SIZE; const double EXPECTED_AVERAGE_DEPTH; void regenerate(); void sample(); void erase_insert( bool = true, int = 1 ); void list() const; void draw() const; void svg( char * ) const; statistics current() const; statistics initial() const; statistics between() const; // Nested 'statistics' class class statistics { public: double height; double height_std; double average_depth; double average_depth_std; statistics( double, double, double, double ); void print() const; }; private: class node; static double const GAMMA = 0.57721566490153286061; node *array; int *stack; int *depth; int root; // Statistics int within_count; int between_count; double sum_of_initial_heights; double sum_of_squares_of_initial_heights; double sum_of_initial_average_depths; double sum_of_squares_of_initial_average_depths; double sum_of_heights; double sum_of_squares_of_heights; double sum_of_average_depths; double sum_of_squares_of_average_depths; double sum_of_heights_between_trees; double sum_of_variances_of_heights_between_trees; double sum_of_average_depths_between_trees; double sum_of_variances_of_average_depths_between_trees; class node { public: int value; int parent; int left; int right; }; int front( int ) const; int back( int ) const; void draw( int, int ) const; void svg( std::ofstream&, int&, int, int ) const; void generate_random_leaf( int ); }; /* * Random_tree Constructor * Random_tree( int n ): * * Allocate memory for three arrays of size N = max( 1, n ): * 1. The array 'array' will store N nodes, * 2. The arrays 'stack' and 'depth' will be used in the functions current() and svg() to * traverse the tree in a pre-order depth-first traversal. * * The constructor calls regenerate() which constructs a new * pseudo-randomly constructed tree. * * Run time: O(N^2) with an average Th(N*ln(N)) * Memory: Th(N) */ Random_tree::Random_tree( int n ): N( std::max( 1, n ) ), LOG_SIZE( std::log( static_cast( N ) ) ), SQRT_SIZE( std::sqrt( static_cast( N ) ) ), EXPECTED_AVERAGE_DEPTH( 2.0*(N + 1.0)/(1.0*N)*( GAMMA + std::log( static_cast( N ) ) ) - 4.0 ), array( new node[N] ), stack( new int[N] ), depth( new int[N] ), within_count( 0 ), between_count( -1 ), sum_of_initial_heights( 0.0 ), sum_of_squares_of_initial_heights( 0.0 ), sum_of_heights( 0.0 ), sum_of_squares_of_heights( 0.0 ), sum_of_heights_between_trees( 0.0 ), sum_of_variances_of_heights_between_trees( 0.0 ), sum_of_initial_average_depths( 0.0 ), sum_of_squares_of_initial_average_depths( 0.0 ), sum_of_average_depths( 0.0 ), sum_of_squares_of_average_depths( 0.0 ), sum_of_average_depths_between_trees( 0.0 ), sum_of_variances_of_average_depths_between_trees( 0.0 ) { regenerate(); } /* * Random_tree Destructor * ~Random_tree(): * * Deallocate the memory for the three arrays. * * Run time: Th(1) * Memory: Th(1) */ Random_tree::~Random_tree() { delete [] array; delete [] stack; delete [] depth; } Random_tree::statistics::statistics( double a, double b, double c, double d ): height( a ), height_std( b ), average_depth( c ), average_depth_std( d ) { // Does nothing } /* * stastistics current() const * * Perform a pre-order depth-first traversal of the * pseudo-randomly generated tree and gather statistics * about it, including: * * 1. The height of the tree, * 2. The standard deviation of the height of the tree, * 3. The average depth of the tree. * 4. The standard deviation of the average depth of the tree. * * This function uses the arrays 'stack' and 'depth'. * * Run time: Th(N) * Memory: Th(N) */ Random_tree::statistics Random_tree::current() const { int n = 0; int height = 0; double tipl = 0.0; // The following does a pre-order depth-first traversal: // The root node is pushed onto a stack, and // while the stack is not empty: // the top is popped off the stack, stack[0] = root; depth[0] = 0; int st = 1; while ( st != 0 ) { // Pop the top off the stack --st; int t = stack[st]; int d = depth[st]; // Update the statistics ++n; height = std::max( height, d ); tipl += d; // If the right tree is not empty, push it onto the stack if ( array[t].right >= 0 ) { stack[st] = array[t].right; depth[st] = d + 1; ++st; } // If the left tree is not empty, push it onto the stack if ( array[t].left >= 0 ) { stack[st] = array[t].left; depth[st] = d + 1; ++st; } } if ( n != N ) { exit( -1 ); } return statistics( height, 0.0, tipl/static_cast( n ), 0.0 ); } Random_tree::statistics Random_tree::initial() const { double D = static_cast( between_count ) + 1.0; return statistics( sum_of_initial_heights / D, std::sqrt( ( sum_of_squares_of_initial_heights - sum_of_initial_heights*sum_of_initial_heights/D ) / ( D + 1.0 ) ), sum_of_initial_average_depths / D, std::sqrt( ( sum_of_squares_of_initial_average_depths - sum_of_initial_average_depths*sum_of_initial_average_depths/D ) / ( D + 1.0 ) ) ); } Random_tree::statistics Random_tree::between() const { double D = static_cast( between_count ); return statistics( sum_of_heights_between_trees/D, std::sqrt( sum_of_variances_of_heights_between_trees / D ), sum_of_average_depths_between_trees/D, std::sqrt( sum_of_variances_of_average_depths_between_trees / D ) ); } /* * void statistics::print() * * Print the statistics in a human-readable format. * * Run time: Th(1) * Memory: Th(1) */ void Random_tree::statistics::print() const { // Print out the relevant statistics // std::cout << "N: " << N << std::endl; // std::cout << "ln( N ): " << LOG_SIZE << std::endl; // std::cout << "sqrt( N ): " << SQRT_SIZE << std::endl; // std::cout << "2*(N + 1)/N*(gamma + ln(N)) - 4: " << EXPECTED_AVERAGE_DEPTH << std::endl; std::cout << "Height: " << height << std::endl; if ( height_std != 0 ) { std::cout << "Standard Deveation of Height: " << height_std << std::endl; } std::cout << "Average Depth of a Node: " << average_depth << std::endl; if ( average_depth_std != 0 ) { std::cout << "Standard Deveation of the Average Ddepth: " << average_depth_std << std::endl; } } /* * void generate_random_leaf( int posn ) * * At the indicated index position, generate a new * leaf node (left and right subtrees set to -1) * storing a pseudo-randomly generated integer. * * Run time: Th(1) * Memory: Th(1) */ void Random_tree::generate_random_leaf( int posn ) { array[posn].value = mrand48(); array[posn].left = -1; array[posn].right = -1; } /* * void erase_insert( bool left, int repetitions ) * * Remove a pseudo-randomly chosen node from the tree and then * insert a new pseudo-randomly generated leaf 'repetitions' times. * * The argument indicates whether a full node should be * replace by: * 1. The minimum node of the right subtree ( only_right == true, the default ), or * 2. The maximum node of the left subtree. * * Once the removals and insertions are performed, the statistics of the * tree are added to accummulating statistics of this tree. * * Run time: O(rep*h) * Memory: Th(1) */ void Random_tree::erase_insert( bool only_right, int repetitions ) { for ( int i = 0; i < repetitions; ++i ) { ///////////////////////////////////// // Pop a Random Node from the Tree // ///////////////////////////////////// // Pick a position within the tree to erase int posn = mrand48() % N; posn = ( posn >= 0 ) ? posn : posn + N; // Find the parent of the node to be erased int parent = array[posn].parent; // If the node is full, copy the front value from the // the right sub-tree and then reframe that node as the // one to be erased if ( array[posn].left >= 0 && array[posn].right >= 0 ) { // By default and if left == false, // Choose the minimum from the right subtree, else // Choose the maximum from the left subtree. int fb = ( only_right || ( ( i & 1 ) == 1 ) ) ? front( array[posn].right ) : back( array[posn].left ); array[posn].value = array[fb].value; posn = fb; parent = array[posn].parent; } // Now we are in one of three cases: // 1. The left sub-tree is not empty, // 2. The right sub-tree is not empty, // 3. The node being deleted is a leaf node. if ( array[posn].left >= 0 ) { // The left sub-tree is not empty: // Set the parent of the left node to be the parent of the current node, and // Set the parent to point to the left sub-tree array[array[posn].left].parent = parent; if ( parent == -1 ) { // We are at the root root = array[posn].left; } else if ( array[parent].left == posn ) { array[parent].left = array[posn].left; } else { array[parent].right = array[posn].left; } } else if ( array[posn].right >= 0 ) { // The left sub-tree is not empty: // Set the parent of the right node to be the parent of the current node, and // Set the parent to point to the right sub-tree array[array[posn].right].parent = parent; if ( parent == -1 ) { // We are at the root root = array[posn].right; } else if ( array[parent].left == posn ) { array[parent].left = array[posn].right; } else { array[parent].right = array[posn].right; } } else { // Set the parent to point to an empty node (-1) if ( array[parent].left == posn ) { array[parent].left = -1; } else { array[parent].right = -1; } } /////////////////////////////////////// // Push a Random Value into the Tree // /////////////////////////////////////// // Place the new value into the node previously erased generate_random_leaf( posn ); // Starting at the root, determine where that new node should go; // once that position is found: // 1. Update the node to point back to the parent, and // 2. Update the parent to get the new node. int k = root; while ( true ) { if ( array[posn].value < array[k].value ) { // If the left child is empty (-1), // place the new node at that location, else // move to that location. if ( array[k].left == -1 ) { array[k].left = posn; array[posn].parent = k; break; } else { k = array[k].left; } } else { // If the right child is empty (-1), // place the new node at that location, else // move to that location. if ( array[k].right == -1 ) { array[k].right = posn; array[posn].parent = k; break; } else { k = array[k].right; } } } } // Add the information about the final state of the tree // to the statistics for this tree. // // Only once the tree is regenerated will the average of these // statistics be included in the average between trees. ++within_count; statistics data = current(); sum_of_heights += data.height; sum_of_squares_of_heights += data.height*data.height; sum_of_average_depths += data.average_depth; sum_of_squares_of_average_depths += data.average_depth*data.average_depth; } /* * Find the minimum/front element of a sub-tree * rooted at the node stored at array[n]. * * The minimum/front of the entire tree is front( root ). * * Run time: O(h) * Memory: Th(1) */ int Random_tree::front( int n ) const { while ( array[n].left >= 0 ) { n = array[n].left; } return n; } /* * Find the maximum/back element of a sub-tree * rooted at the node stored at array[n]. * * The maximum/back of the entire tree is back( root ). * * Run time: O(h) * Memory: Th(1) */ int Random_tree::back( int n ) const { while ( array[n].right >= 0 ) { n = array[n].right; } return n; } /* * Regenerate a new new pseudo-random tree. * * The root node is placed at array[0]. * * Average run time: O(N ln(N)) * Worst-case run time: O(N^2) * Memory: Th(1) */ void Random_tree::regenerate() { // Save the statistics for the previous tree ++between_count; if ( between_count != 0 ) { double D = static_cast( within_count ); sum_of_heights_between_trees += sum_of_heights/D; sum_of_variances_of_heights_between_trees += (sum_of_squares_of_heights - sum_of_heights*sum_of_heights/D)/(D + 1.0); sum_of_average_depths_between_trees += sum_of_average_depths/D; sum_of_variances_of_average_depths_between_trees += (sum_of_squares_of_average_depths - sum_of_average_depths*sum_of_average_depths/D)/(D + 1.0); sum_of_heights = 0.0; sum_of_squares_of_heights = 0; sum_of_average_depths = 0.0; sum_of_squares_of_average_depths = 0; } within_count = 0; // Reset the root to be at 0 and // create a new node at array[0] root = 0; generate_random_leaf( root ); array[root].parent = -1; // For each i from 1 to N - 1, create a // new node node at array[i] and insert it // into the tree at the appropriate location. for ( int i = 1; i < N; ++i ) { // Create a new leaf node. generate_random_leaf( i ); // Starting at the root, search for where the // node should be placed and break once it is // found. int k = root; while ( true ) { if ( array[i].value < array[k].value ) { if ( array[k].left == -1 ) { array[k].left = i; array[i].parent = k; break; } else { k = array[k].left; } } else { if ( array[k].right == -1 ) { array[k].right = i; array[i].parent = k; break; } else { k = array[k].right; } } } } // Save the statistics for the initial sizes statistics data = current(); sum_of_initial_heights += data.height; sum_of_squares_of_initial_heights += data.height * data.height; sum_of_initial_average_depths += data.average_depth; sum_of_squares_of_initial_average_depths += data.average_depth * data.average_depth; } /* * Print the nodes of the tree including: * The value, * The location of the left child, * The location of the right child, and * The location of the parent node. * * Run time: O(N) * Memory: Th(1) */ void Random_tree::list() const { for ( int k = 0; k < N; ++k ) { std::cout << '(' << array[k].value << ",\t" << array[k].left << ",\t" << array[k].right << ",\t" << array[k].parent << ")" << std::endl; } } /* * Perform an in-order traversal of the tree in order to print it. * * The routine is recursive. * The public routine calls the private recursive routine with the default arguements. * The output should be looked at by tilting the head to the left: * * * * * * * * * * * * * * * * * * * * * * * ^ * | * root * * is the tree * * * <- root * \ * * * / \ * * * * / \ * * * * / / * * * * / \ * * * * * The connections must be interpreted; however, there is no possibility * of ambiguitity. * * Run time: O(N) * Memory: Th(h) */ void Random_tree::draw() const { draw( root, 0 ); } void Random_tree::draw( int node, int depth ) const { if ( array[node].left >= 0 ) { draw( array[node].left, depth + 1 ); } for ( int i = 0; i < depth; ++i ) { std::cout << ' '; } std::cout << '*' << std::endl; if ( array[node].right >= 0 ) { draw( array[node].right, depth + 1 ); } } /* * Print the tree as a scalable vector graphic. * This still requires work. Currently, it only prints the nodes. * At this point, it performs an in-order traversal. * * The routine is recursive. * The public routine calls the recursive routine with the default arguements. * * Run time: O(N) * Memory: Th(h) */ void Random_tree::svg( char *filename ) const { int n = 0; // Keep track of the order in which they are printed // Open the output file stream std::ofstream fout( filename, std::ios::out ); // Print the svg opening tag to the svg file fout << "" << std::endl; svg( fout, n, root, 0 ); // Print the svg closing tag to the file fout << "" << std::endl; // This closes the file fout.close(); } void Random_tree::svg( std::ofstream &fout, int &n, int node, int depth ) const { if ( array[node].left >= 0 ) { svg( fout, n, array[node].left, depth + 1 ); } ++n; fout << "\t" << std::endl; if ( array[node].right >= 0 ) { svg( fout, n, array[node].right, depth + 1 ); } } } #endif