树的遍历及应用

用于包括 Unix 和 DOS 在内的许多常用操作系统中的目录结构.

伪代码

void FileSystem::listAll( int depth = 0 ) const
{
    printName( depth );  // Print the name of the object
    if( isDirectory( ) )
        for each file c in this directory (for each child)
            c.listAll( depth + 1 );
}

实验(CodeBlocks-mingw)

将下面的程序保存为 showdir.c

#include <stdio.h>
#include <dirent.h>

int main(int argc, char *argv[])
{
    DIR *directory_pointer;
    struct dirent *entry;

    if((directory_pointer=opendir(argv[1]))==NULL)
    {
        printf("Error opening %s\n",argv[1]);
    }
    else
    {
        while( (entry=readdir(directory_pointer)) )
        {
            printf("%s\n",entry->d_name);
        }
        closedir(directory_pointer);
    }
    return 0;
}

showdir c:\windows

实验(CodeBlocks-mingw)

下面的例子递归地显示目录列表中的文件, 不过有问题, 请修正.

#include <stdio.h>
#include "Dirent.h" //需要更改库中的 dirent.h, 加入下面几行, 然后不妨保存为 Dirent.h
/*
#define FA_ANY 0xff
#undef FA_DIREC
#define FA_DIREC 0x10
*/
#include <dos.h>
#include <io.h>
#include <direct.h>
#include <string.h>

#define MAXPATH 100

void show_directory(char *directory_name)
{
    DIR *directory_pointer;
    struct dirent *entry;
    unsigned attributes;

    if((directory_pointer=opendir(directory_name)) == NULL)
        printf("Error opening %s\n", directory_name);
    else
    {
        chdir(directory_name);
        while(entry=readdir(directory_pointer))
        {
            attributes=_chmod(entry->d_name,0);
            //Check if entry is for a subdirectory and is not "." or ".."
            if((attributes & FA_DIREC) && (strncmp(entry->d_name,".",1)!=0))
            {
                printf("\n\n-----%s-----\n",entry->d_name);
                show_directory(entry->d_name);
            }else
                printf("%s\n",entry->d_name);
        }

        closedir(directory_pointer);
        chdir("..");
    }
}


int main(int argc, char *argv[])
{
    //char buffer[MAXPATH];

    //Save current directory so you can restore it later
    //getcwd(buffer,sizeof(buffer));
    show_directory(argv[1]);
    //chdir(buffer);

    return 0;
}

实验: 改进下面的程序

#include <stdio.h>
#include <stdlib.h>
#include <io.h>
#include <time.h>
#include <sys/stat.h>

#include <dirent.h> 
#include <string.h>//如果去掉, 则编译会提示: warning: implicit declaration of function 'strncmp' [-Wimplicit-function-declaration]

#define MAXPATH 100


int isFolder(char* fileName);
void show_directory(char *directory_name, int depth);
void printTabString(int depth);

int main(int argc, char *argv[])
{
    //char buffer[MAXPATH];

    //Save current directory so you can restore it later
    //getcwd(buffer,sizeof(buffer));

    int depth=0;//记录当前目录的深度

    if(argc < 2)
    {
        argv[1]=".";
    }else if(argc > 2)
    {
        printf("\nUsage: showdir.exe \n");
    }

    show_directory(argv[1], depth);

    //chdir(buffer);

    return 0;
}

//return value: 1: folder, 0: file
int isFolder(char* fileName)
{
    //char* fileName = "aa.txt";
    struct _stat buf;
    int value;
    //int result;
    //result = _stat( fileName, &buf );
    _stat( fileName, &buf );
    if(_S_IFDIR & buf.st_mode){
        //printf("folder\n");
        value=1;

    }else if(_S_IFREG & buf.st_mode){
        //printf("file\n");
        value=0;
    }
    return value;
}

void show_directory(char *directory_name, int depth)
{
    DIR *directory_pointer;
    struct dirent *entry;
    int _depth=depth;
    //printf("depth=%d\n",_depth);

    if((directory_pointer=opendir(directory_name)) != NULL)
    {
        //it is a directory
        chdir(directory_name);
        _depth++;
        while( (entry=readdir(directory_pointer)) )
        {
            //对于 System Volume Information 文件夹, 下面的 _chmod 会失败.
            //attributes=_chmod(entry->d_name,0);

            //printf("d_name: %s\td_ino=%ld, d_namlen=%d, d_reclen=%d\n",entry->d_name,entry->d_ino, entry->d_namlen, entry->d_reclen);

            if( (strncmp(entry->d_name,".",1)!=0) )
            {
                //对于 System Volume Information 文件夹

                if(isFolder(entry->d_name))
                {
                    printTabString(_depth);
                    printf("[%s]\n",entry->d_name);
                    show_directory(entry->d_name,_depth);
                }
                else
                {
                    //is file
                    printTabString(_depth);
                    printf("%s","|-");
                    printf("%s\n",entry->d_name);
                }

            }else
            {
                printTabString(_depth);
                printf("%s","|-");
                printf("%s\n",entry->d_name);
            }

        }



    }

    closedir(directory_pointer);
    chdir("..");
    _depth--;

}


void printTabString(int depth)
{
    if(depth <= 0)
    {
        return;
    }
    putchar('|');
    while(depth > 0)
    {
        putchar(' ');
        putchar(' ');
        --depth;
    }
}

使用Visual Studio C++编程

dirent.h

适合Windows Visual Studio 编程的头文件 dirent.h 位于 https://github.com/tronkko/dirent

实验(Visual Studio C++)

下载此文件, 并将其加入到工程中, 比如我们将其命名为 ls (给 Windows 也提供一个 ls 命令).

此外还需要加载 direct.h 这个头文件.

#include <direct.h>

并且要将之前代码中的 chdir() 函数改为 _chdir(). 类似的, dirent.h 头文件还提供了 _mkdir(), _rmdir() 等函数.

参考

https://blog.csdn.net/10km/article/details/51004888

前序遍历

前序遍历(preorder traversal)

在前序遍历中, 对结点的处理工作是在它的诸 child 结点被处理之前进行的.

后序遍历

后序遍历(postorder traversal)

在后序遍历中, 在一个结点的工作是在它的诸 child 结点被计算之后进行的.

二叉查找树

二叉树的一个重要应用是它们在查找中的应用. 树的每个结点存储一项数据. 这里为简单起见, 我们假设是整数, 并且所有项是互异的.

二叉树的每个结点 $X$, 如果它的左子树中的所有值小于 $X$ 中的值, 并且它的右子树中的所有值大于 $X$ 中的值, 则称此二叉树为二叉查找树.

二叉查找树的平均深度是 $O(\log N)$, 所以一般不必担心栈空间被用尽.

template <typename Comparable>
class BinarySearchTree
{
  public:
    BinarySearchTree( );
    BinarySearchTree( const BinarySearchTree & rhs );
    ~BinarySearchTree( );

    const Comparable & findMin( ) const;
    const Comparable & findMax( ) const;
    bool contains( const Comparable & x ) const;
    bool isEmpty( ) const;
    void printTree( ) const;

    void makeEmpty( );
    void insert( const Comparable & x );
    void remove( const Comparable & x );

    const BinarySearchTree & operator=( const BinarySearchTree & rhs );

  private:
    struct BinaryNode
    {
       Comparable element;
       BinaryNode *left;
       BinaryNode *right;

       BinaryNode( const Comparable & theElement, BinaryNode *lt, BinaryNode *rt )
         : element( theElement ), left( lt ), right( rt ) { }
    };

    BinaryNode *root;

    void insert( const Comparable & x, BinaryNode * & t ) const;
    void remove( const Comparable & x, BinaryNode * & t ) const;
    BinaryNode * findMin( BinaryNode *t ) const;
    BinaryNode * findMax( BinaryNode *t ) const;
    bool contains( const Comparable & x, BinaryNode *t ) const;
    void makeEmpty( BinaryNode * & t );
    void printTree( BinaryNode *t ) const;
    BinaryNode * clone( BinaryNode *t ) const;
};

数据成员是指向根结点 root 的指针, 对于空树该指针为 NULL.

public 的成员函数使用调用 private 递归函数的常规技术.

公有成员函数调用私有递归成员函数的示例

    /**
     * Returns true if x is found in the tree.
     */
    bool contains( const Comparable & x ) const
    {
        return contains( x, root );
    }
    	
    /**
     * Insert x into the tree; duplicates are ignored.
     */
    void insert( const Comparable & x )
    {
        insert( x, root );
    }
    
    /**
     * Remove x from the tree. Nothing is done if x is not found.
     */
    void remove( const Comparable & x )
    {
        remove( x, root );
    }

    /**
     * Internal method to test if an item is in a subtree.
     * x is item to search for.
     * t is the node that roots the subtree.
     */
    bool contains( const Comparable & x, BinaryNode *t ) const
    {
        if( t == NULL )
            return false;
        else if( x < t->element )
            return contains( x, t->left );
        else if( t->element < x )
            return contains( x, t->right );
        else
            return true;    // Match
     }

注意测试的顺序, 首先要对是否为空树进行测试, 因为如果不那么做就会产生一个企图通过 NULL 指针访问数据成员的运行错误.(做一下实验)

其余的测试应该使得最不可能的情况安排在最后进行.

这里的两个递归调用事实上都是“尾递归”, 可以用一个 while 循环来代替(做一下实验).

尾递归的使用是合理的, 因为算法表达式的简明性是以速度的降低为代价的, 而这里所使用的栈空间的量也只不过是 $O(\log N)$ 而已.

使用函数对象实现二叉查找树的示例

图 4-19 给出了使用函数对象而不是使用 Comparable 项所需要做的微小修改. 这模拟了 1.6 节的常用例程.

template <typename Object, typename Comparator=less<Object> >
class BinarySearchTree
{
  public:

    // Same methods, with Object replacing Comparable

  private:

    BinaryNode *root;
    Comparator isLessThan;

    // Same methods, with Object replacing Comparable

    /**
     * Internal method to test if an item is in a subtree.
     * x is item to search for.
     * t is the node that roots the subtree.
     */
    bool contains( const Object & x, BinaryNode *t ) const
    {
        if( t == NULL )
            return false;
        else if( isLessThan( x, t->element ) )
            return contains( x, t->left );
        else if( isLessThan( t->element, x ) )
            return contains( x, t->right );
        else
            return true;    // Match
    }
};

findMin 和 findMax

这两个 private 例程分别返回指向树中包含最小元和最大元的结点的指针.

为执行 findMin, 从根开始并且只要有左儿子就向左进行. 终止点就是最小的元素. findMax 例程除了分支朝向右儿子外其余过程相同.

    /**
     * Internal method to find the smallest item in a subtree t.
     * Return node containing the smallest item.
     */
    BinaryNode * findMin( BinaryNode *t ) const
    {
        if( t == NULL )
            return NULL;
        if( t->left == NULL )
            return t;
        return findMin( t->left );
    }

这种递归是如此简单, 以至于许多程序员不厌其烦地使用它. 思考一下, 能否不用递归? 怎么实现.

if(t==NULL) return NULL;
else{
	while(t->left!=NULL)
	{
		t=t->left;
	}
	return t;
}

二叉查找树的 findMax 的非递归实现

    /**
     * Internal method to find the largest item in a subtree t.
     * Return node containing the largest item.
     */
    BinaryNode * findMax( BinaryNode *t ) const
    {
        if( t != NULL )
            while( t->right != NULL )
                t = t->right;
        return t;
    }

二叉查找树的 insert

进行插入操作的例程在概念上是很简单的. 为了将 x 插入到树 T 中, 可以像使用 contains 那样沿着树查找. 如果找到 x, 则什么也不用做(这里假定不允许有重复元)或做一些“更新”. 否则, 将 x 插入到遍历的路径上的最后一点上.

    /**
     * Internal method to insert into a subtree.
     * x is the item to insert.
     * t is the node that roots the subtree.
     * Set the new root of the subtree.
     */
    void insert( const Comparable & x, BinaryNode * & t )
    {
        if( t == NULL )
            t = new BinaryNode( x, NULL, NULL );
        else if( x < t->element )
            insert( x, t->left );
        else if( t->element < x )
            insert( x, t->right );
        else
            ;  // Duplicate; do nothing
	}

注意, 在递归例程中, 只有当一个新树叶生成时, t 才改变.

当这种情况发生时, 就说明递归例程被其他结点 p 调用了, 该结点 p 是新生成树叶 t 的父亲. 调用将是 insert(x,p->left) 或 insert(x,p->right). 在任何一种方法中, t 是到 p->left 或 p->right 的引用. 这意味着 p->left 或 p->right 将会改变为指向新结点.

重复元的插入可以通过在结点记录中保留一个附加字段以指示此数据元出现的频率来处理. 这使整棵树增加了某些附加空间, 但是, 却比将重复信息放到树中要好(它将使树的深度变得很大).

当然, 如果 < 操作符使用的键只是一个更大的结构的一部分, 那么这种方法行不通. 此时可以把具有相同键的所有结构保留在一个辅助数据结构中, 如表或是另一棵查找树.

二叉查找树的 remove

同许多数据结构一样, 最困难的操作是删除.

结点是树叶, 则直接删除.
结点有一个儿子, 则将其父亲结点到自身的链接调整为到自己儿子的链接即可.

    /**
     * Internal method to remove from a subtree.
     * x is the item to remove.
     * t is the node that roots the subtree.
     * Set the new root of the subtree.
     */
    void remove( const Comparable & x, BinaryNode * & t )
    {
        if( t == NULL )
            return;   // Item not found; do nothing
        if( x < t->element )
            remove( x, t->left );
        else if( t->element < x )
            remove( x, t->right );
        else if( t->left != NULL && t->right != NULL ) // Two children
        {
            t->element = findMin( t->right )->element;
            remove( t->element, t->right );

        }
        else
        {
            BinaryNode *oldNode = t;
            t = ( t->left != NULL ) ? t->left : t->right;
            delete oldNode;
        }
    }

这里的 remove 函数设定为总是删除右子树中的最小元. 长此以往, 二叉树会变得越来越不平衡. 我们可以设定删除时交替删除左右子树的内容(删除左子树中的最大元). 此时需要在类中增加一个变量, 比如 long long del_count=0; del_count 可以记录删除的总次数. 根据 del_count 的奇偶性决定要删除左子树还是右子树中的内容. 当然这样做也是有缺点的, 也即是不具有随机性.

若使用C语言中的rand(), 则需要加载 stdlib.h ; 若使用 C++ 的 random 库, 则可以更好地产生随机数. 但无论怎样, 都需要额外的处理成本. 此时, 我们不妨考虑当前指针 t, 其地址本身具有一定的随机性. 不妨考虑它的最后一位（或其他位置）的值, 若是0则删除右子树中的最小元; 若是1则删除右子树中的最大元.

    //根据当前指针t最后一位的奇偶性决定是左删除还是右删除.
    void removeByAddr( const Comparable & x, BinaryNode * & t )
    {
        if( t == NULL )
            return;   // Item not found; do nothing
        if( x < t->element )
            removeByAddr( x, t->left );
        else if( t->element < x )
            removeByAddr( x, t->right );
        else if( t->left != NULL && t->right != NULL ) // Two children
        {//根据t中存储地址最后一位的奇偶性决定是左删除还是右删除.
            if(t&1==1)
            {//左删除
                //从t的左子树中找到最大元的位置
                //将最大元element覆盖当前节点t的element.
                t->element=findMax(t->left)->element;
                //虽然现在知道了右子树中最小元的位置, 但不能直接删除. 因为pt可能还有右子树.
                removeByAddr(t->element, t->left);
            }else{
                //右删除
                //从t的右子树中找到最小元并覆盖t的element
                t->element=findMin(t->right)->element;
                removeByAddr(t->element, t->left);
            }
        }
        else
        {
            BinaryNode *oldNode = t;
            t = ( t->left != NULL ) ? t->left : t->right;
            delete oldNode;
        }
    }

如果删除的次数不多, 则通常使用的策略是懒惰删除(lazy deletion). 即当一个元素要被删除时, 它仍留在树中, 而只是做了一个被删除的记号. 这种做法在有重复项时很流行, 因为此时记录出现频率数的数据成员可以减 1.

注: (有重复项的二叉查找树的remove方法本应该就这么做.)

析构函数和复制赋值操作符

位于 public 部分的析构函数 ~BinarySearchTree().

public:
    /**
	 * Destructor for the tree
	 */
    ~BinarySearchTree( )
    {
        makeEmpty( );
    }
	/**
     * Make the tree logically empty.
     */
    void makeEmpty( )
    {
        makeEmpty( root );
    }

位于 private 部分的清空函数 void makeEmpty( BinaryNode * & t).

private:
    /**
     * Internal method to make subtree empty.
     * (recursive method)
     */
    void makeEmpty( BinaryNode * & t )
    {
        if( t != NULL )
        {
            makeEmpty( t->left );
            makeEmpty( t->right );
            delete t;
        }
        t = NULL;
    }

public:
    /**
     * Deep copy.
     */
    const BinarySearchTree & operator=( const BinarySearchTree & rhs )
    {
        if( this != &rhs )
        {
            makeEmpty( );
            root = clone( rhs.root );
        }
        return *this;
    }

    /**
     * Internal method to clone subtree.
     *
     * 思考, 如何输出子树克隆过程中各个节点复制的次序?
     */
    BinaryNode * clone( BinaryNode *t ) const
    {
        if( t == NULL )
            return NULL;
        else
        {
            //打印 t 结点, 从而知道次序
            return new BinaryNode( t->element, clone( t->left ), clone( t->right ) );
        }

    }

线索树

线索树(Threaded tree)

我们知道二叉查找树中的元素是有一定顺序的, 但是要将其中的元素排序, 需要有额外的高效方法. 在二叉树中对非满结点添加一些前后相继结点的信息, 得到所谓的线索树, 可以用来将二叉树中的结点进行排序.

在二叉树中, 每个叶子有两个空链接(left和right都是nullptr), 只有一个儿子的结点也有一个空链接. 一般的, 可以证明, 具有 $N$ 个结点的二叉树有 $N+1$ 个 nullptr 指针.

因此, 二叉查找树中分配的这些指针有一半被浪费了. 现在设指针 pt 指向某结点 $P$. 若 pt->left==nullptr, 则令其指向 $P$ 的(中序)前驱结点(inorder predecessor); 若 pt->right==nullptr, 则令其指向 $P$ 的(中序)后驱结点(inorder successor). 这样所构建的二叉查找树被称为线索树(threaded tree), 附加的链就叫做线索(thread).

线索树

线索树(threaded tree)

书本 P.134, Ex 4.49.

由于具有 $N$ 个结点的二叉查找树有 $N+1$ 个 NULL 指针, 因此在二叉查找树中指定给链接信息的空间有一般浪费了. 这部分为 NULL 的空间可以利用起来, 存储一些有用的信息, 比如存储该节点的中序前驱元(inorder predecessor)或中序后继元(inorder successor).

具体的, 若一个结点(我们用指针 t 表示它)的左儿子为 NULL, 则将这个 NULL 改存 t 的中序前驱元; 若它的右儿子为 NULL, 则将指向右儿子这个 NULL 指针改存 t 的中序后继元.

这些附加的链就是所谓的线索(thread).

实现线索树之前, 必须考虑的几个问题

如何从实际的结点(t), 区分其两个 link 是线索还是指向儿子的链接?
编写线索树的插入例程(insert)和删除例程(remove)
使用线索树的优点是什么?

要添加线索, 应在 insert 例程中添加.

线索树的优点

不需要额外的栈或向量. 但是每个结点中新增了一个bool变量rightThread, 用以标识是否是线索结点. 否则无法判断结点中的链接(link)是真实的指向儿子的链接还是指向下一个元素的链接.

线索树可以是“升序排列”, 如这里采用了rightThread, 当右儿子为空时, 存储其下一个结点的地址. 也可以是“降序排列”.

线索树的插入例程

从 BinarySearchTree 的 insert 修改而来.

    void insert( const Comparable & x, BinaryNode * & t, BinaryNode * nextThreadNode)
    {
        if( t == NULL )
        {
            t = new BinaryNode( x, NULL, nextThreadNode , true);
        }

        else if( x < t->element )
        {
            //现在的线索树是从小到大的顺序, 因此, 此时直接调用 insert()
            insert( x, t->left, t );
        }
        else if( t->element < x )
        {
            //需要判断t是否是线索结点, 如果是, 则其右儿子为空, 可以插入新的结点.
            if(t->rightThread)
            {
                //!!!
                //此时 t 真实的右儿子为空, 因此第二个参数是 NULL,
                //而 t 是线索结点, t->right 指向下一个结点, 因此作为第三个参数传递
                insert( x, NULL, t->right );
            }

        }
        else
            ;  // Duplicate; do nothing
    }

线索树的应用

线索树可以用于多项式的按幂排列.

AVL 树

AVL(Adelson-Velskii and Landis) 树

AVL 树是指带有平衡条件(balance condition)的二叉查找树.

这个平衡条件必须要容易保持,
而且必须保证树的深度是 $O(\log N)$.

最简单的想法是要求左右子树始终具有相同高度.

另一种平衡条件是要求每个结点都必须有相同高度的左子树和右子树.

如果空树的高度定义为 $-1$(通常就是这么定义的), 那么只有具有 $2^k-1$ 个结点的理想平衡树满足这个条件.

设这样的树的总的结点数为 $x$, 则 $x+1$ 可以被 $2$ 整除, 因为左子树和右子树的结点数目相同.
对于左子树、右子树分别类似讨论, 可知 $x+1$ 是 2 的次幂.

因此, 虽然这种平衡条件保证了树的深度小, 但是它太严格, 使用面极小. 为此我们需要放宽条件.

AVL 树

AVL 树是指每个结点的左子树和右子树的高度最多相差 $1$ 的二叉查找树.

上面的两颗二叉查找树中, 哪一个是 AVL 树?

给定高度的最少结点的 AVL 树

在高度为 $h$ 的 AVL 树中, 最少结点数 $S(h)$ 由下面的递推关系给出

\[ S(h)=S(h-1)+S(h-2)+1,\quad S(0)=1,\ S(1)=2. \]

原因在于要求 AVL 树的结点最少, 则其左子树和右子树的结点也要要求最少, 加上一个根结点就得到上面的递推公式.
从上面的递推关系式看出, 函数 $S(h)$ 与斐波那契数列(Fibonacci)密切相关.

试求出 $S(h)$ 的具体表达式. (见问题2125 http://www.atzjg.net/admin/do/view_question.php?qid=2125)

AVL 树的插入操作

当进行插入操作时, 需要更新通向根结点路径上那些结点的所有平衡信息, 而插入操作隐含着困难的原因在于, 插入一个结点可能破坏 AVL 树的特性.

如果发生这种情况, 那么就要恢复平衡的性质后才认为这一步插入操作完成.

事实上, 这总可以通过对树进行简单的修正来做到, 我们称其为旋转(rotation).

在插入之后, 只有那些从插入点到根结点之路径上的结点的平衡性可能被改变, 因为只有这些结点的子树可能发生变化.
沿着这条路径, 上行至根, 在更新平衡信息时, 可以发现一个结点, 它的新平衡破坏了 AVL 条件.
我们将指出如何在第一个这样的结点（即最深的结点）重新平衡这棵树, 并证明这一重新平衡保证整个树满足 AVL 条件.

必须重新平衡的结点

假设 $a$ 是必须重新平衡的结点. 由于任意结点最多有两个儿子, 因此高度不平衡时, $a$ 结点的两棵子树的高度相差 $2$. 容易看出, 这种不平衡可能出现在下面的四种情况中:

对 $a$ 的左儿子 $L$ 的左子树进行一次插入.
对 $a$ 的左儿子 $L$ 的右子树进行一次插入.
对 $a$ 的右儿子 $R$ 的左子树进行一次插入.
对 $a$ 的右儿子 $R$ 的右子树进行一次插入.

第一种情况(左左——右右)

单旋转(single rotation)

第二种情况(左右——右左)

双旋转(double rotation)

AVL树的代码

给出 AVL树代码的一些参考文献.

https://www.cnblogs.com/skywang12345/p/3577360.html#a2
https://blog.csdn.net/qq1263292336/article/details/49936485
https://www.geeksforgeeks.org/avl-tree-set-1-insertion/

书本中给出的关于AVL树的源代码

#ifndef AVL_TREE_H
#define AVL_TREE_H

#include "dsexceptions.h"
#include <iomanip>
#include <iostream>    // For NULL
using namespace std;

// AvlTree class
//
// CONSTRUCTION: with ITEM_NOT_FOUND object used to signal failed finds
//
// ******************PUBLIC OPERATIONS*********************
// int height()           --> get Height of the tree
// void insert( x )       --> Insert x
// void remove( x )       --> Remove x (unimplemented)
// bool contains( x )     --> Return true if x is present
// Comparable findMin( )  --> Return smallest item
// Comparable findMax( )  --> Return largest item
// boolean isEmpty( )     --> Return true if empty; else false
// void makeEmpty( )      --> Remove all items
// void printTree( )      --> Print tree in sorted order
// ******************ERRORS********************************
// Throws UnderflowException as warranted

//template <class Comparable> //这里写class或typename都可以
template <typename Comparable>
class AvlTree
{
  public:
    AvlTree( ) : root( NULL )
      { }
    AvlTree( const AvlTree & rhs ) : root( NULL )
    {
        *this = rhs;
    }

    ~AvlTree( )
    {
        makeEmpty( );
    }

    /**
     * height(), 返回该AVL树的高度
     */
     int height() const
     {
         return height(root);
     }


     //前序遍历"AVL树"
    void preOrderErgodic()
    {
        preOrderErgodic(root);
    }

    // 中序遍历"AVL树"
    void midOrderErgodic()
    {
        midOrderErgodic(root);
    }

    // 后序遍历"AVL树"
    void postOrderErgodic()
    {
        postOrderErgodic(root);
    }


    /**
     * Find the smallest item in the tree.
     * Throw UnderflowException if empty.
     */
    const Comparable & findMin( ) const
    {
        if( isEmpty( ) )
            throw UnderflowException( );
        return findMin( root )->element;
    }

    /**
     * Find the largest item in the tree.
     * Throw UnderflowException if empty.
     */
    const Comparable & findMax( ) const
    {
        if( isEmpty( ) )
            throw UnderflowException( );
        return findMax( root )->element;
    }


    /**
     * Returns true if x is found in the tree.
     */
    bool contains( const Comparable & x ) const
    {
        return contains( x, root );
    }


    /**
     * Test if the tree is logically empty.
     * Return true if empty, false otherwise.
     */
    bool isEmpty( ) const
    {
        return root == NULL;
    }

    /**
     * Print the tree contents in sorted order.
     */
    void printTree( ) const
    {
        if( isEmpty( ) )
            cout << "Empty tree" << endl;
        else
            printTree( root );
    }

    /**
     * Make the tree logically empty.
     */
    void makeEmpty( )
    {
        makeEmpty( root );
    }

    /**
     * Insert x into the tree; duplicates are ignored.
     */
    void insert( const Comparable & x )
    {
        insert( x, root );
    }

    /**
     * Remove x from the tree. Nothing is done if x is not found.
     * 非懒惰删除比较困难, 请实现懒惰删除.
     */
    void remove( const Comparable & x )
    {
        AvlNode * t;
        if( (t=search(root,x))!=NULL )
        {
            remove(root,t);
        }
    }


    /**
     * Deep copy.
     */
    const AvlTree & operator=( const AvlTree & rhs )
    {
        if( this != &rhs )
        {
            makeEmpty( );
            root = clone( rhs.root );
        }
        return *this;
    }

  private:
    //struct 是默认 public 的 class
    struct AvlNode
    {
        Comparable element;
        AvlNode   *left;
        AvlNode   *right;
        int       height;

        AvlNode( const Comparable & theElement, AvlNode *lt,
                                                AvlNode *rt, int h = 0 )
          : element( theElement ), left( lt ), right( rt ), height( h ) { }
    };

    AvlNode *root;

  public:
    /**
     * Returns the AVLNode pt if x is found in the tree.
     */
    // (递归实现)查找AVL树中值为x的节点
    AvlNode * recursiveSearch(const Comparable & x)
    {
        return recursiveSearch(root, x);
    }
    // (非递归实现)查找AVL树中值为x的节点
    AvlNode * search(const Comparable & x)
    {
        return search(root, x);
    }

  private:
    //前序遍历"AVL树"
    void preOrderErgodic(AvlNode * & t)
    {
        if(t != NULL)
        {
            cout<< t->element << " ";
            preOrderErgodic(t->left);
            preOrderErgodic(t->right);
        }
    }

    // 中序遍历"AVL树"
    void midOrderErgodic(AvlNode * & t)
    {
        if(t != NULL)
        {
            preOrderErgodic(t->left);
            cout<< t->element << " ";
            preOrderErgodic(t->right);
        }
    }

    // 后序遍历"AVL树"
    void postOrderErgodic(AvlNode * & t)
    {
        if(t != NULL)
        {
            preOrderErgodic(t->left);
            preOrderErgodic(t->right);
            cout<< t->element << " ";
        }
    }

    /**
     * Internal method to insert into a subtree.
     * x is the item to insert.
     * t is the node that roots the subtree.
     * Set the new root of the subtree.
     */
     //在 t 所指的结点处插入一个结点, 值为x
    void insert( const Comparable & x, AvlNode * & t )
    {
        if( t == NULL )
        {
            t = new AvlNode( x, NULL, NULL );
        }
        else if( x < t->element )
        {
            insert( x, t->left );//注意这里调用时会传递第二个参数的值, 也即 t 的值会变化.
            if( height( t->left ) - height( t->right ) == 2 )
            {
                if( x < t->left->element )
                {//左-左 ==> 则进行右旋转
                    rotateWithLeftChild( t );
                }
                else
                {//左-右. 则先左旋转, 后右旋转
                    doubleWithLeftChild( t );
                }
            }
        }
        else if( t->element < x )
        {
            insert( x, t->right );
            if( height( t->right ) - height( t->left ) == 2 )
            {
                if( t->right->element < x )
                {
                    rotateWithRightChild( t );
                }
                else
                {
                    doubleWithRightChild( t );
                }
            }
        }
        else
            ;  // Duplicate; do nothing
        //更新 t 结点处的高度
        t->height = max( height( t->left ), height( t->right ) ) + 1;
    }


    /**
     * Internal method to remove the item at the AvlNode ptX starting from AvlNode t.
     * 这里的 remove 参考了 skywang12345 写的代码
	 * https://www.cnblogs.com/skywang12345/p/3577360.html#a2
	 * 但是实际上是有问题的.
	 * 可以举出例子说明这里的代码并没有很好的纠正不平衡性.
     */
    AvlNode * remove(AvlNode * & t, AvlNode * ptX)
    {
        // 根为空 或者 没有要删除的节点，直接返回NULL。
        if (t==NULL || ptX==NULL)
            return NULL;

        // 待删除的节点在t的左子树中
        if (ptX->element < t->element)
        {
            t->left = remove(t->left, ptX);
            // 删除节点后，若AVL树失去平衡，则进行相应的调节。

            if (height(t->right) - height(t->left) == 2)
            {// 此时看成在 t->right 中插入了结点
                AvlNode * r =  t->right;
                if (height(r->left) > height(r->right))
                {
                    t = doubleWithRightChild(t);
                }
                else
                {
                    t = rotateWithRightChild(t);
                }

            }
        }
        else if (ptX->element > t->element)
        {// 待删除的节点在t的右子树中
            t->right = remove(t->right, ptX);

            // 删除节点后，若AVL树失去平衡，则进行相应的调节。
            if (height(t->left) - height(t->right) == 2)
            {// 此时看成在 t->left 中插入了结点
                AvlNode *l =  t->left;
                if (height(l->right) > height(l->left))
                {
                    t = doubleWithLeftChild(t);
                }
                else
                {
                    t = rotateWithLeftChild(t);
                }
            }
        }
        else
        {// t是要删除的节点

            // 如果t所指的结点两个儿子都非空
            if ((t->left!=NULL) && (t->right!=NULL))
            {
                if (height(t->left) > height(t->right))
                {
                    // 如果tree的左子树比右子树高；
                    // 则(01)找出t的左子树中的最大节点
                    //   (02)将该最大节点的值赋值给t。
                    //   (03)删除该最大节点。
                    AvlNode * _maxNode = findMax(t->left);
                    t->element = _maxNode->element;
                    t->left = remove(t->left, _maxNode);
                }
                else
                {
                    // 如果t的左子树不比右子树高(即它们相等，或右子树比左子树高1)
                    // 则(01)找出t的右子树中的最小节点
                    //   (02)将该最小节点的值赋值给t。
                    //   (03)删除该最小节点。
                    // 这类似于用t的右子树中最小节点做t的替身；
                    // 采用这种方式的好处是：删除t的右子树中最小节点之后，AVL树仍然是平衡的。
                    AvlNode * _maxNode = findMax(t->right);
                    t->element = _maxNode->element;
                    t->right = remove(t->right, _maxNode);
                }
            }
            else
            {//t所指的结点只有一个儿子或没有儿子.
                AvlNode * tmp = t;
                //t->left 和 t->right 哪个非空, 就将 t 下移到哪个.
                t = (t->left!=NULL) ? t->left : t->right;
                //比如当 t->left 非空时, t->left 中的值覆盖 t.
                //而这个t的值是原t结点父亲link所指的值, 现在的值被替换为t->left的值.
                //从而实现了原t结点父亲指向了原t结点的左儿子.
                //TO DO!!! 待处理, tmp的父结点的link得指向t
                delete tmp;
            }
        }

        return t;
    }

	//这是教材(第四版)中提供的remove方法
	/**
     * 从子树实施删除的内部方法
     * x 是要被删除的项
     * t 为该子树的根结点
     * 设置该子树的新根
     */
     void remove(const Comparable & x, AvlNode * & t)
     {
         if(t==nullptr)
            return; //子树是空树, 则什么也不用做

         if(x < t->element)
            remove(x, t->left);
         else if(t->element < x)
            remove(x, t->right);
         else if(t->left!=nullptr && t->right != nullptr)//两个儿子
         {
             t->element=findMin(t->right)->element;
             remove(t->element, t->right);
         }else
         {
             AvlNode * oldNode =t;
             t=(t->left !=nullptr)?t->left:t->right;
             delete oldNode;
         }

         balance(t);
     }

     //假设子树 t 是平衡的, 或刚破坏平衡条件, 即t的左子树和右子树高度之差的绝对值为1
     void balance(AvlNode * & t)
     {
         if(t==nullptr)
            return ;

         if(height(t->left)-height(t->right)>ALLOWED_IMBALANCE)
         {
             if(height(t->left->left)>=height(t->left->right))
                rotateWithLeftChild(t);
             else
                doubleWithLeftChild(t);
         }else
         if(height(t->right)-height(t->left)>ALLOWED_IMBALANCE)
         {
             if(height(t->right->right)>=height(t->right->left))
                rotateWithRightChild(t);
             else
                doubleWithRightChild(t);
         }

         t->height=max(height(t->left), height(t->right))+1;
     }


    /**
     * Internal method to find the smallest item in a subtree t.
     * Return node containing the smallest item.
     */
    AvlNode * findMin( AvlNode *t ) const
    {
        if( t == NULL )
            return NULL;
        if( t->left == NULL )
            return t;
        return findMin( t->left );
    }

    /**
     * Internal method to find the largest item in a subtree t.
     * Return node containing the largest item.
     */
    AvlNode * findMax( AvlNode *t ) const
    {
        if( t != NULL )
            while( t->right != NULL )
                t = t->right;
        return t;
    }


    /**
     * Internal method to test if an item is in a subtree.
     * x is item to search for.
     * t is the node that roots the tree.
     */
    bool contains( const Comparable & x, AvlNode *t ) const
    {
        if( t == NULL )
            return false;
        else if( x < t->element )
            return contains( x, t->left );
        else if( t->element < x )
            return contains( x, t->right );
        else
            return true;    // Match
    }
/****** NONRECURSIVE VERSION*************************
    bool contains( const Comparable & x, AvlNode *t ) const
    {
        while( t != NULL )
            if( x < t->element )
                t = t->left;
            else if( t->element < x )
                t = t->right;
            else
                return true;    // Match

        return false;   // No match
    }
*****************************************************/
    //这里的recursiveSearch() 使用了递归
    AvlNode * recursiveSearch(AvlNode * t, const Comparable & x) const
    {
        if (t==NULL || t->element==x)
            return t;

        if (x < t->element)
            return recursiveSearch(t->left, x);
        else
            return recursiveSearch(t->right, x);
    }

    //非递归实现的查找
    AvlNode * search(AvlNode * t, const Comparable & x) const
    {
        while ((t!=NULL) && (t->element!=x))
        {
            if (x < t->element)
                t = t->left;
            else
                t = t->right;
        }
        return t;
    }

    /**
     * Internal method to make subtree empty.
     */
    void makeEmpty( AvlNode * & t )
    {
        if( t != NULL )
        {
            makeEmpty( t->left );
            makeEmpty( t->right );
            delete t;
        }
        t = NULL;
    }

    /**
     * Internal method to print a subtree rooted at t in sorted order.
     */
    /*
    void printTree( AvlNode *t ) const
    {
        if( t != NULL )
        {
            printTree( t->left );
            cout << t->element << endl;
            printTree( t->right );
        }
    }
    */

    void printTree(AvlNode * t) const
    {
        if(t != NULL)
        {
            cout << t->element << " ";
            printTree(t->left);
            printTree(t->right);
        }
    }

    /**
     * Internal method to clone subtree.
     */
    AvlNode * clone( AvlNode *t ) const
    {
        if( t == NULL )
            return NULL;
        else
            return new AvlNode( t->element, clone( t->left ), clone( t->right ), t->height );
    }
        // Avl manipulations
    /**
     * Return the height of node t or -1 if NULL.
     */
    int height( AvlNode *t ) const
    {
        return t == NULL ? -1 : t->height;
    }

    int max( int lhs, int rhs ) const
    {
        return lhs > rhs ? lhs : rhs;
    }

    /**
     * Rotate binary tree node with left child.
     * For AVL trees, this is a single rotation for case 1.
     * Update heights, then set new root.
     * 书本原来返回类型为 void, 现在改为 AvlNode *
     * rotateWithLeftChild() 右旋转
     */
    AvlNode * rotateWithLeftChild( AvlNode * & k2 )
    {
        AvlNode *k1 = k2->left;
        k2->left = k1->right;
        k1->right = k2;
        k2->height = max( height( k2->left ), height( k2->right ) ) + 1;
        k1->height = max( height( k1->left ), k2->height ) + 1;
        k2 = k1;

        return k1;
    }

    /**
     * Rotate binary tree node with right child.
     * For AVL trees, this is a single rotation for case 4.
     * Update heights, then set new root.
     * 书本原来返回类型为 void, 现在改为 AvlNode *
     * rotateWithRightChild() 左旋转
     */
    AvlNode * rotateWithRightChild( AvlNode * & k1 )
    {
        AvlNode *k2 = k1->right;
        k1->right = k2->left;
        k2->left = k1;
        k1->height = max( height( k1->left ), height( k1->right ) ) + 1;
        k2->height = max( height( k2->right ), k1->height ) + 1;
        k1 = k2;

        return k2;
    }

    /**
     * Double rotate binary tree node: first left child.
     * with its right child; then node k3 with new left child.
     * For AVL trees, this is a double rotation for case 2.
     * Update heights, then set new root.
     * 书本原来返回类型为 void, 现在改为 AvlNode *
     */
    AvlNode * doubleWithLeftChild( AvlNode * & k3 )
    {
        rotateWithRightChild( k3->left );
        return rotateWithLeftChild( k3 );
    }

    /**
     * Double rotate binary tree node: first right child.
     * with its left child; then node k1 with new right child.
     * For AVL trees, this is a double rotation for case 3.
     * Update heights, then set new root.
     * 书本原来返回类型为 void, 现在改为 AvlNode *
     */
    AvlNode * doubleWithRightChild( AvlNode * & k1 )
    {
        rotateWithLeftChild( k1->right );
        return rotateWithRightChild( k1 );
    }
};

#endif

伸展树(splay tree)

本节描述一种相对简单的数据结构, 叫做伸展树(splay tree). 它保证从空树开始任意连续 $M$ 次对树的操作最多花费 $O(M\log N)$ 的时间. 这里 $N$ 是指该树的结点数.

摊还(amortized)运行时间

一般的, 当 $M$ 次操作的序列总的最坏情形运行时间为 $O(M\cdot f(N))$ 时, 就称它的摊还(amortized)运行时间为 $O(f(N))$.

因此, 一棵伸展树每次操作的摊还代价是 $O(\log(N))$. 经过一系列操作, 有的操作可能花费时间多一些, 有的可能要少一些.

摊还分析 (amortized analysis) 是一种分析一个操作序列中所执行的所有操作的平均时间分析方法。

伸展树所基于的事实

对于二叉查找树来说, 每次操作最坏情形时间 $O(N)$ 并非不好, 只要它相对不常发生就行.

任何一次访问, 即使花费 $O(N)$ 时间, 仍然可能非常快.

二叉查找树的问题在于, 虽然一系列访问整体都是不好的操作有可能发生, 但是很罕见. 此时, 累积的运行时间很重要.

如果任意特定操作可以有最坏时间界 $O(N)$, 而我们仍然要求一个 $O(\log N)$ 的摊还时间界, 那么很清楚, 只要有一个结点被访问, 它就必须被移动. 否则, 一旦我们发现一个深层结点, 就有可能不断地对它进行访问. 如果这个结点不改变位置, 而每次访问又花费 $O(N)$, 那么 $M$ 次访问将花费 $M\cdot N$ 的时间.

一个简单的想法(不能直接使用)

比如要访问一棵树的某个结点 $k_1$, 在它到根结点的路径上依次进行单旋转, 直到将其推向树根.

这种做法的好处在于对结点 $k_1$ 的访问变得容易了(暂时的),
但不足之处是它把另外一个结点几乎推向和 $k_1$ 以前同样的深度. 而对那个结点的访问又将把另外的结点向深处推进, 如此等等.

伸展(splaying)

伸展(splaying)的方法类似于上面介绍的旋转的想法, 不过在旋转如何实施上稍微有些选择的余地.

我们仍然从底向上沿着访问路径旋转. 令 $X$ 是在访问路径上的一个（非根）结点, 我们将在这个路径上实施旋转操作.

如果 $X$ 的父结点是树根, 那么只要旋转 $X$ 和树根. 这就是沿着访问路径上的最后的旋转.
否则, $X$ 就有父亲 $P$ 和祖父 $G$, 存在两种情况以及对称的情形需要考虑.
- 第一种之字形(zig-zag): $X$ 是 $P$ 的右儿子, 而 $P$ 是 $G$ 的左儿子. 此时需要执行类似 AVL 树那样的双旋转.
- 第二种一字形: $X$ 和 $P$ 都是左儿子或者都是右儿子. 此时执行杠杆操作, 即以父结点为支点, 提升 $X$, 降低 $G$.

伸展操作不仅将访问的结点移动到根处, 而且还有把访问路径上的大部分结点的深度大致减少一半的效果. 某些浅的结点最多向下推两个层次.

一个特殊的例子

这些图着重强调了伸展树基本的和关键的性质.

当访问路径长而导致超出正常查找时间的时候, 这些旋转将对未来的操作有益.
当访问耗时很少的时候, 这些旋转则不那么有益, 甚至有害.

伸展树有几种变体. 参见第12章.

树的遍历

按顺序打印二叉查找树的例程

    /**
     * Print the tree contents in sorted order.
     */
    void printTree( ostream & out = cout ) const
    {
        if( isEmpty( ) )
            out << "Empty tree" << endl;
        else
            printTree( root, out );
    }
    
    /**
     * Internal method to print a subtree rooted at t in sorted order.
     */
    void printTree( BinaryNode *t, ostream & out ) const
    {
        if( t != NULL )
        {
            printTree( t->left, out );
            out << t->element << endl;
            printTree( t->right, out );
        }
    }

使用后序遍历计算树的高度的例程

    /**
     * Internal method to compute the height of a subtree rooted at t.
     */
    int height( BinaryNode *t )
    {
        if( t == NULL )
            return -1;
        else
            return 1 + max( height( t->left ), height( t->right ) );
    }

B 树

B 树主要用于数据库和文件系统.

迄今为止, 我们始终假设可以把整个数据结构存储到计算机的主存中. 但如果数据太多, 主存装不下时, 那么意味着必须把数据结构放到磁盘上.

此时时间计算中大O模型不再适用, 因为在大O分析中, 我们假设所有的操作花费同等的时间. 但现在这样假设不再合适, 特别是涉及到磁盘I/O的时候.

例如, 一台 500 MIPS 的机器每秒执行 5 亿条指令. IPS (Instructions per second) 是衡量处理器速度的一个指标. 比如: 1996 年, Intel Pentium Pro, 541 MIPS at 200 MHz. 这是相当快的, 主要是因为速度主要依赖于电的特性. 另一方面, 磁盘是机械运动的, 它的速度主要依赖于转动磁盘和移动磁头的时间. 许多磁盘以 7200 RPM 旋转, 也就是每分钟 7200 转, 1转占用1/120秒, 即 8.3ms. 平均认为磁盘转到一半的时候发现要寻找的信息; 因此如果忽略其他因素, 那么可以得到访问时间为 8.3ms(这是非常宽松的估计; 9-11ms 的访问时间更为常见). 因此, 每条大约可以进行120次磁盘访问. 若不和处理器的速度比较, 那么这听起来还是相当不错的.

B 树的定义

阶为 M 的 B 树是一棵具有下列结构特性的树.

数据项存储在树叶上.
非叶子结点存储直到 M-1 个键, 以指示搜索的方向; 键 i 代表子树 i+1 中的最小的键.
树的根或者是一片树叶, 或者其儿子树在 2 到 M 之间.
除根外, 所有非树叶结点的儿子数在 $\lceil M/2\rceil$ 和 $M$ 之间.
所有的树叶都在相同的深度上, 并有 $\lceil L/2\rceil$ 和 $L$ 之间个数据项.

$L$ 的确定稍后描述.

B 树的例子

考虑一棵 5 阶 B 树. 所有的非叶子结点的儿子数都在 3 和 5 之间(从而有 2 到 4 个关键字). 根至多有两个儿子. 这里, 我们让 $L=5$. 每片树叶有 3 到 5 个数据项.

B 树的叶子结点数和非叶子结点数之间的关系

设 T 是一棵 $M$ 阶 B 树(即除根之外的所有非叶子结点, 至多有 $M$ 个儿子). 有 $n$ 层. (这里设根在第0层, 第1层是根的儿子, 至少有两个结点.) 则总的结点数 $N$ 为 \[ N= M^0+M^1+M^2+\cdots+M^{n-1}=2+\frac{1-M^n}{1-M} \] 非叶子结点数与叶子结点数的比值为 \[ \frac{1+\frac{M^n-1}{M-1}-M^{n-1}}{M^{n-1}}=\frac{M^{n-1}+M-2}{M^{n-1}(M-1)}\approx\frac{1}{M-1} \]

外存使用的B 树

每个结点代表一个磁盘区块, 根据所存储的项数决定 $M$ 和 $L$. 例如, 设一个区块可容纳8192($=2^{13}$)字节. 每个关键字使用 32 个字节. 在一棵 $M$ 阶 B 树中, 有 $M-1$ 个关键字, 总数为 32M-32 字节. 由于 \[ 36*228-32 = 8176 < 8192 < 8212=36*229-32 \] 故取 $M=228$.

由于每个数据记录是 256 字节, 因此我们能够把 32 个记录装入一个区块中(8192/256=32). 于是选择 $L=32$. 这样就保证每片树叶有 16--32个数据记录以及除根之外每个内部结点至少以114(=228/2)种方式分叉. 如果有 1000万个记录, 则至多需要 625000(=10000000/16)片叶子结点. $114^3=1481544$, 故在最坏的情形, 也就是以114种方式分叉, 则树叶在第4层上(注意根节点至多是2分叉). 若总的结点数是 $N$, 则当根节点是$N$换句话说, 最快情形的访问次数近似地由 $\log_{M/2}N$ 给出. 这里叶子结点数是 625000, 可大致推出总的结点数 \[N=(625000*228-1)/227+1\approx 627754.29\] 于是 $\log_{M/2}N\approx\log_{114}627754\approx 2.8187$

B 树的操作图示

BTree.pdf

B 树的代码

代码来自https://www.programiz.com/dsa/b-tree

// Searching a key on a B-tree in C++

#include <iostream>
using namespace std;

class TreeNode {
    int *keys;//该结点中保存的所有键, 组成一个数组.
    int t;//2t为阶数, 即至多有2t个儿子
    TreeNode **C;//指向一列TreeNode, 即TreeNode数组, 其中每个元素是TreeNode* 指针
    int n;//结点中元素个数
    bool leaf;//是否是叶子

public:
    TreeNode(int temp, bool bool_leaf);

    void insertNonFull(int k);
    void splitChild(int i, TreeNode *y);
    void traverse();

    TreeNode *search(int k);

    friend class BTree;
};

class BTree {
  TreeNode *root;
  int t;

   public:
  BTree(int temp) {
    root = NULL;
    t = temp;
  }

  void traverse() {
    if (root != NULL)
      root->traverse();
  }

  TreeNode *search(int k) {
    return (root == NULL) ? NULL : root->search(k);
  }

  void insert(int k);
};

/*
* 插入新元素 k
*/
void BTree::insert(int k)
{
    if (root == NULL)
    {//如果B树是空树, 则直接新建一结点(当然是叶子结点)
        root = new TreeNode(t, true);
        root->keys[0] = k;//将此元素k设置为第0个键(key)
        root->n = 1;//结点中元素个数为1.
    } else
    {//根非空
        //如果root结点中元素个数等于 2*deg-1, 则可以直接插入
        if (root->n == 2 * t - 1)
        {
            TreeNode *s = new TreeNode(t, false);

            s->C[0] = root;//root作为第一个儿子

            s->splitChild(0, root);//分裂root结点

            int i = 0;
            if (s->keys[0] < k)
                i++;
            s->C[i]->insertNonFull(k);

            root = s;//将新生成的结点作为root结点
        } else
        {
            root->insertNonFull(k);
        }
    }
}


/*  每个结点形如
*     [v|key1|v|key2|v|key3|v]
* 这里有 2t-1 个key, 2t个指针(v代表指针)
*/
TreeNode::TreeNode(int t1, bool leaf1)
{
    t = t1;
    leaf = leaf1;

    keys = new int[2 * t - 1];//生成具有2*t-1元素的键向量.
    C = new TreeNode *[2 * t];//生成2*t个元素的TreeNode*数组
    n = 0;
}

/*
* 遍历此结点中所有元素
*/
void TreeNode::traverse()
{
    int i;
    for (i = 0; i < n; i++)
    {
        if (leaf == false)
            C[i]->traverse();

        //当leaf==true时,
        std::cout << " " << keys[i];
    }
    //此时i==n
    if (leaf == false)
        C[i]->traverse();
}


/*
*  在此结点中寻找 k=79, 这里 keys[]={72,78,83}
*  [v|72|v|78|v|83|v| | ]
*   |    |    |    |
*  66    72   78   83
*  68    73   79   84
*  69    74   81   85
*  70    76
*/
TreeNode *TreeNode::search(int k)
{
    int i = 0;
    //找到k所在的keys[i]
    while (i < n && k > keys[i])
        i++;

    //如果k等于此key,则就返回指向此结点的指针
    if (keys[i] == k)
        return this;

    //如果并不等于此key, 并且此结点已经是叶子结点, 则意味着找不到.
    if (leaf == true)
        return NULL;

    //如果不是叶子结点, 则从C[i]指向的结点开始搜寻k.
    return C[i]->search(k);
}


/*
* 在非满的结点中插入元素k
*/
void TreeNode::insertNonFull(int k)
{
    int i = n - 1;

    //如果是叶子结点
    if (leaf == true)
    {
        while (i >= 0 && keys[i] > k)
        {
            keys[i + 1] = keys[i];
            i--;
        }
        keys[i + 1] = k;//将k插入到正确位置
        n++;//插入成功, 实际上插入的就是关键字. 故关键字数目加一,
    }
    else
    {//当前结点不是叶子结点
        while (i >= 0 && keys[i] > k)
            i--;

        //此时i满足 keys[i]<=k. 即 k 在C[i+1]子树中.
        if (C[i + 1]->n == 2 * t - 1)
        {//若C[i+1]所指向的结点中关键字已经满了, 即儿子数达到2t
            splitChild(i + 1, C[i + 1]);

            if (keys[i + 1] < k)
                i++;
        }
        C[i + 1]->insertNonFull(k);
    }
}


/**
* 分裂当前结点中的儿子结点y
* =========================
*/
void TreeNode::splitChild(int i, TreeNode *y)
{
    //既然从y处分裂一个结点, 那么新的结点与 y 同处于一层. 所以若 y是树叶, 则 z 也是树叶.
    //因此传递的参数与y一致. y->t, 2t指y结点的最多儿子数.
    TreeNode *z = new TreeNode(y->t, y->leaf);
    //每个结点至多具有2t个指针,至多可以存放2t-1个key.
    //分裂结点时, key 已经满, 因此有 2t-1 个元素, 算上即将插入的元素, 则有2t个元素
    //将这2t个元素放到两个结点中, 每个结点有t个元素.
    z->n = t - 1;

    // [v|0|v|1|v|2|v|3|v|...|t-1|v|t|v|t+1|v|...|2t-2|v]  v 代表指针
    //                             [拷贝到 z           ]
    // 将y指向的结点中 keys[t], keys[t+1], ..., keys[2t-2] 都拷贝到 z 中
    // 共计拷贝 t-1 个元素 (2t-2-t+1=t-1).
    for (int j = 0; j < t - 1; j++)
        z->keys[j] = y->keys[j + t];

    //如果y结点不是叶子, 则需要将其一般的儿子拷贝到新的结点.
    //即j=t,t+1,...,2t-1的儿子C[j]拷贝到 z->C[j]
    if (y->leaf == false)
    {
        for (int j = 0; j < t; j++)
            z->C[j] = y->C[j + t];
    }

    y->n = t - 1;//更新被分裂结点的关键字的个数.

    //这里的C[] 是当前 TreeNode 中的C, 由此推出这里的函数 splitChild(i,y)
    //的确是分裂的是当前结点的儿子y.
    //将index为i+1,i+2,...,n的儿子右移一个位置.
    for (int j = n; j >= i + 1; j--)
        C[j + 1] = C[j];

    C[i + 1] = z;//将新的结点z插入到正确的位置 i+1 上.

    //将当前结点的指标为i,i+1,...,n-1的关键字也右移一个位置
    for (int j = n - 1; j >= i; j--)
        keys[j + 1] = keys[j];

    keys[i] = y->keys[t - 1];//分裂后, 被分裂的结点最后一个关键字不需要了, 存放到其父结点(也就是当前结点)的keys[]数组中.
    n++;//当前结点分裂成功, 关键字数目加1.
}


int main() {
  BTree t(3);
  t.insert(8);
  t.insert(9);
  t.insert(10);
  t.insert(11);
  t.insert(15);
  t.insert(16);
  t.insert(17);
  t.insert(18);
  t.insert(20);
  t.insert(23);

  cout << "The B-tree is: ";
  t.traverse();

  int k = 10;
  (t.search(k) != NULL) ? cout << endl
                 << k << " is found"
              : cout << endl
                 << k << " is not Found";

  k = 2;
  (t.search(k) != NULL) ? cout << endl
                 << k << " is found"
              : cout << endl
                 << k << " is not Found\n";
}

B 树的可视化

https://dichchankinh.com/~galles/visualization/BTree.html

https://cs.csub.edu/~msarr/visualizations/BTree.html

https://roy2220.github.io/bptree/visualization

B+ 树

B+ 树的结构

标准库中的 set 和 map

之前讨论了 STL 中的容器 vector 和 list, 这两者对于查找来说是不够用的. 相应地, STL 提供了两个附加的容器 set 和 map. 事实上, STL 还提供了 multiset 和 multimap. C++ 11标准中加入了unordered系列的容器. 与 set 相对的有 unordered_set, 与 map 相对的有 unordered_map.

set

set 是一个排序后的容器, 该容器不允许重复. 而 multiset 允许元素重复.

在使用 set 或 multiset 之前, 必须先含入头文件 <set>.
#include <set>

namespace std{
	template <class T,
		class Compare=less<T>,
		class Allocator=allocator<T> >
	class set;
	
	template <class T,
		class Compare=less<T>,
		class Allocator=allocator<T> >
	class multiset;
}

许多用于访问 vector 和 list 中的项的例程也适用于 set.

允许遍历 set 的类型是嵌套于 set 的 iterator 和 const_iterator 类型.
vector 和 list 的几个方法在 set 中有完全相同的名字, 包括 begin, end, size 和 empty.

图 3-6 中的 printCollection 函数模板在传递的参数为 set 时也可以工作.

template <typename Container>
void printCollection( const Container & c, ostream & out = cout )
{
    if( c.empty( ) )
        out << "(empty)";
    else
    {
        typename Container::const_iterator itr = c.begin( );
        out << "[ " << *itr++;   // Print first item
	
        while( itr != c.end( ) )
            out << ", " << *itr++;
        out << " ]" << endl;
     }
}

set 特有的操作是高效的插入、删除和执行基本查找.

因为 set 不允许重复, 因此对于插入(insert)来说, 可能会出现失败的情况. 因此, 我们希望返回类型是一个可以指示是否成功的布尔变量, 但有时往往还需要知道插入时的位置. 以前常用的做法是返回指针: 当插入失败, 返回空指针.

事实上, set 中的 insert 返回的是一个比 bool 类型复杂得多的类型. 这是因为 insert 也返回一个 iterator 来给出当前插入时的位置.

这个 iterator 或者指向新插入的项, 或者指向导致 insert 失败的已存在的项.
有了这个返回的位置, 我们可以快捷地执行删除（避免了查找操作）, 也可以直接获得包含该项的结点.

要使得返回内容包含上面复杂的内容, 我们可以考虑 STL 的 pair 这个类型.

pair

STL 定义了一个名为 pair 的类模板. 它采用 struct 来封装两个成员数据. 但是还有两个成员函数 first 和 second 用于访问这两个成员数据.

下面是两个不同的 insert 例程:

单参数 insert, 使用常规的插入算法.

pair<iterator,bool> insert( const Object & x);

双参数 insert, hint 是即将插入的位置.

pair<iterator,bool> insert( iterator hint, const Object & x);

双参数 insert 如果提供的 hint 是精确的, 则插入会很快, 通常是 O(1) 的时间.
如果 hint 不准确, 则使用常规的插入算法, 与单参数一样.

set<int> s;
for(int i=0; i<1000000;i++)
	s.insert(s.end(),i);

int erase(const Object & x);
iterator erase(iterator itr);
iterator erase(iterator start, iterator end);

第一个单参数 erase 函数删除 x, 返回删除元素的个数. 当删除失败时, 返回0; 删除成功时, 返回1.
第二个单参数 erase 函数接受迭代器参数 itr, 删除由 itr 所指定位置的元素, 返回原itr所指对象下一个位置, 然后使itr失效, 因为此时 itr 已经没用了. 其执行方式与 vector 和 list 是一样的.
双参数 erase 函数的执行与 vector 和 list 的 erase 函数一样, 删除从 start 开始, 到 end 终止的所有项, 不包括 end.

set 和 multiset 在元素快速搜寻方面有优化设计, 所以提供了特殊的搜寻函数, 这些函数是同名的 STL 算法的特殊版本. 面对 set 和 multiset, 你应该优先采用这些优化算法, 如此可获得对数复杂度, 而非 STL 算法的线性复杂度.

set 和 multiset 通常以平衡二叉树(balanced binary tree)实现. 有一个限制, 不能直接改变元素的值. 要改变元素的值, 必须先删除旧元素, 再插入新元素.

对于查找, set 提供了一个优于返回布尔变量的 contains 例程的 find 例程. 这个想法也可以用于改进之前的 AVL 树.

iterator find(const Object & x) const;

内在二叉树所基于的排序操作

默认情况下, 排序操作使用 less<Object> 函数对象实现, 而该函数对象是通过对 Object 调用 operator< 来实现的.

另一种可替代的排序方案可以通过具有函数对象类型的 set 模板来举例说明. 例如, 可以生成一个存储 string 对象的 set, 通过使用 CaseInsensitiveCompare 函数对象来忽略字符的大小写. 在下面的代码中, set s 的大小是 1.

set<string, CaseInsensitiveCompare> s;
s.insert("Hello");
s.insert("HeLLo");
cout << "The size is: " << s.size() << endl;

这里的 CaseInsensitiveCompare 函数对象在第一章(图1-22)讲过.

class CaseInsensitiveCompare
{
  public:
    bool isLessThan( const string & lhs, const string & rhs) const
    {
        return stricmp(lhs.c_str(), rhs.c_str()) < 0;
    }
}

其他操作


操作	效果
count(elem)	返回元素值为 elem 的元素个数
find(elem)	返回元素值为 elem 的第一个元素, 如果找不到就返回 end()
lower_bound(elem)	返回 elem 第一个可安插位置, 也就是“元素值>=elem” 的第一个元素的位置
upper_bound(elem)	返回 elem 最后一个可安插位置, 也就是“元素值>elem” 的第一个元素的位置
equal_range(elem)	返回 elem 可安插的第一个位置和最后一个位置, 也就是“元素值==elem” 的元素区间

// set::lower_bound/upper_bound
#include <iostream>
#include <set>

int main ()
{
  std::set<int> myset;
  std::set<int>::iterator itlow,itup;

  for (int i=1; i<10; i++) myset.insert(i*10); // 10 20 30 40 50 60 70 80 90

  itlow=myset.lower_bound (30);                //       ^
  itup=myset.upper_bound (60);                 //                   ^

  //删除itlow 至 itup 之间的元素. 注意 itup 所指元素不包括在删除范围内.  可以理解为 [itlow, itup)
  myset.erase(itlow,itup);                     // 10 20 70 80 90

  std::cout << "myset contains:";
  for (std::set<int>::iterator it=myset.begin(); it!=myset.end(); ++it)
    std::cout << ' ' << *it;
  std::cout << '\n';

  return 0;
}

map

map 用来存储排序后的由键和值组成的项的集合. 键必须唯一, 不同的键可以对应同一个值. 在 map 中, 键保持逻辑排序后的顺序.

map 的执行类似于用 pair 例示的 set. 其中的比较函数仅仅涉及键. 因此, map 含有 begin, end, size 和 empty 方法, 但是基本的迭代器是一个键值对.

换句话说, 对于 iterator itr, *itr 是 pair<KeyType, ValueType> 类型.

map 也支持 insert, find 和 erase. 对于 insert, 必须提供 pair<KeyType, ValueType> 对象. 虽然 find 仅需要一个键, 返回的 iterator 还是指向一个 pair<KeyType, ValueType> 类型的对象. 通常使用这些操作都是不值得的, 因为这回导致昂贵的语法累赘.

map 的索引操作

幸运的是, map 有一个重要的额外操作可以获得简单的语法. 下面是 map 的索引操作符重载

ValueType & operator[] ( const KeyType & key );

operator[] 的语法如下.

如果在 map 中存在 key, 就返回指向相应的值的引用.
如果在 map 中不存在 key, 就在 map 中插入一个默认的值, 然后返回指向这个插入的默认值的引用. 这个默认值通过应用零参数构造函数获得, 如果是基本类型的话就是 0.

这些语法不允许修改函数版本的 operator[], 因此 operator[] 不能用于常量的 map. 例如, 如果在例程中 map 是通过常量引用来传递的, 那么 operator[] 就不可用.

例子

这里举了两个访问 map 的项的技术.

    map<string,double> salaries;
    	
    salaries[ "Pat" ] = 75000.00;//这里等号的左边调用了operator[], 因此插入了键为"Pat", 值为0的一个项到 salaries 这个 map 中. 同时返回指向这个 double 类型对象的引用. 然后第二步, 赋值将 map 中键为 "Pat" 的 double 对象改为 75000.
    
    cout << salaries[ "Pat" ] << endl; // 输出这个 double 类型的对象, 值为 75000
    
    cout << salaries[ "Jan" ] << endl; // 这里插入配对 "Jan" 和 0.0 到 salaries 这个 map 中, 并打印出来. 但是打印的结果可能不是 0.0, 这取决于应用程序.
    

    /* 如果判断一个项是否在 map 中非常重要, 那么可以使用下面的方法. 
    * 
    */
    map<string,double>::const_iterator itr;
    itr = salaries.find( "Chris" ); //如果键 Chris 没有找到, 则返回末端标识(salaries.end()).
    if( itr == salaries.end( ) )
        cout << "Not an employee of this company!" << endl;
    else
        cout << itr->second << endl; // 输出 itr 指向的 double 类型的对象, 这里的 itr 是 const_iterator 类型, 因此不能赋值.

map

map是c++的一个标准容器

map是c++的一个标准容器，它提供了很好的一对一的关系，在一些程序中建立一个map可以起到事半功倍的效果. 这里总结了map 的一些基本简单实用的操作！

map最基本的构造函数；

   map<string,int>mapstring;         map<int,string>mapint;
   map<sring,char>mapstring;         map<char,string>mapchar;
   map<char,int>mapchar;            map<int,char>mapint；

2. map添加数据；

   map<int,string> maplive;  
   maplive.insert(pair<int,string>(102,"aclive"));
   maplive.insert(map<int,string>::value_type(321,"hai"));
   maplive[112]="April";//map中最简单最常用的插入添加！

3. map中元素的查找：

find()函数返回一个迭代器指向键值为key的元素，如果没找到就返回指向map尾部的迭代器。

map<int,string>::iterator itr; 
itr=maplive.find(112);
if(itr==maplive.end())
    cout << "we do not find 112" << endl;
else
    cout << "we find 112" << endl;

4. map中元素的删除：如果删除112；

map<int,string>::iterator itr;
itr=maplive.find(112);
if(itr==maplive.end())
    cout<<"we do not find 112"<<endl;
else
    maplive.erase(l_it);  //delete 112;

5. map中 swap的用法： Map中的swap不是一个容器中的元素交换，而是两个容器交换； For example：

#include <map>
#include <iostream>

using namespace std;

int main( )
{
    map <int, int> m1, m2, m3;
    map <int, int>::iterator m1_Iter;

    m1.insert ( pair <int, int>  ( 1, 10 ) );
    m1.insert ( pair <int, int>  ( 2, 20 ) );
    m1.insert ( pair <int, int>  ( 3, 30 ) );

    m2.insert ( pair <int, int>  ( 10, 100 ) );
    m2.insert ( pair <int, int>  ( 20, 200 ) );

    m3.insert ( pair <int, int>  ( 30, 300 ) );

    cout << "The original map m1 is:";
    for ( m1_Iter = m1.begin( ); m1_Iter != m1.end( ); m1_Iter++ )
        cout << " " << m1_Iter->second;
    cout   << "." << endl;

   // This is the member function version of swap
   //m2 is said to be the argument map; m1 the target map
   m1.swap( m2 );

   cout << "After swapping with m2, map m1 is:";
   for ( m1_Iter = m1.begin( ); m1_Iter != m1.end( ); m1_Iter++ )
      cout << " " << m1_Iter -> second;
      cout  << "." << endl;
  
   cout << "After swapping with m2, map m2 is:";
   for ( m1_Iter = m2.begin( ); m1_Iter != m2.end( ); m1_Iter++ )
      cout << " " << m1_Iter -> second;
      cout  << "." << endl;
  
   // This is the specialized template version of swap
   swap( m1, m3 );

   cout << "After swapping with m3, map m1 is:";
   for ( m1_Iter = m1.begin( ); m1_Iter != m1.end( ); m1_Iter++ )
      cout << " " << m1_Iter -> second;
      cout   << "." << endl;
  
}

6. map的sort问题： Map中的元素是自动按key升序排序,所以不能对map用sort函数： For example：

  
  #include <map>
  #include <iostream>

  using namespace std;

 int main( )
 {
   map <int, int> m1;
   map <int, int>::iterator m1_Iter;

   m1.insert ( pair <int, int>  ( 1, 20 ) );
   m1.insert ( pair <int, int>  ( 4, 40 ) );
   m1.insert ( pair <int, int>  ( 3, 60 ) );
   m1.insert ( pair <int, int>  ( 2, 50 ) );
   m1.insert ( pair <int, int>  ( 6, 40 ) );
   m1.insert ( pair <int, int>  ( 7, 30 ) );

   cout << "The original map m1 is:" << endl;
   for ( m1_Iter = m1.begin( ); m1_Iter != m1.end( ); m1_Iter++ )
      cout <<  m1_Iter->first << " " << m1_Iter->second << endl;
  
}

 
  The original map m1 is:
  1 20
  2 50
  3 60
  4 40
  6 40
  7 30
  请按任意键继续. . .

7. map的基本操作函数：
C++ Maps是一种关联式容器，包含“关键字/值”对
begin() 返回指向map头部的迭代器
clear(）删除所有元素
count() 返回指定元素出现的次数
empty() 如果map为空则返回true
end() 返回指向map末尾的迭代器
equal_range() 返回特殊条目的迭代器对
erase() 删除一个元素
find() 查找一个元素
get_allocator() 返回map的配置器
insert() 插入元素
key_comp() 返回比较元素key的函数
lower_bound() 返回键值大于等于给定元素的第一个位置
max_size() 返回可以容纳的最大元素个数
rbegin() 返回一个指向map尾部的逆向迭代器
rend() 返回一个指向map头部的逆向迭代器
size() 返回map中元素的个数
swap() 交换两个map
upper_bound() 返回键值大于给定元素的第一个位置
value_comp() 返回比较元素value的函数

首页

Haifeng Xu

(hfxu@yzu.edu.cn)

目录

预备知识

预备知识

树的实现

树的实现

树的遍历及应用