3.1 B-树节点设计
template<class K,size_t N> //关键字类型和N路B树
struct BTreeNode {
BTreeNode()
:_parent(nullptr), _n(0)
{
for (int i = 0; i < N; i++)
{
_keys[i] = K();
_subs[i] = nullptr;
}
_subs[N] = nullptr;
}
//额外开一个空间,方便节点满时进行分裂
K _keys[N];
BTreeNode* _subs[N+1];
int _n;
BTreeNode* _parent;
};
3.2 插入key的过程
按照插入排序的思想插入key,注意:在插入key的同时,可能还要插入新分裂出来的节点
//类似于插入排序的一次插入过程
void InsertKey(const K& key, Node* parent, Node* child)
{
int j = parent->_n - 1;
while (j >= 0 && parent->_keys[j] > key)
{
//不仅要挪动关键字还要挪动孩子指针
parent->_keys[j + 1] = parent->_keys[j];
parent->_subs[j + 2] = parent->_subs[j + 1];
j--;
}
parent->_keys[j + 1] = key;
parent->_subs[j + 2] = child;
if (child != nullptr)
child->_parent = parent;
parent->_n++;
}
B-树插入实现:
//在B树中查找对应关键字,找到返回节点指针和关键字下标,找不到返回叶子节点指针和-1
std::pair<Node*, int> Find(const K& key)
{
Node* parent = nullptr;
Node* cur = _root;
while (true)
{
if (cur == nullptr) break;
//二分查找
int l = 0, r = cur->_n - 1;
while (l < r)
{
int mid = (l + r + 1) / 2;
if (cur->_keys[mid] > key) r = mid-1;
else l = mid + 1;
}
if (cur->_keys[r] == key)
{
return { cur,r };
}
else if (cur->_keys[r] < key)
{
parent = cur;
cur = cur->_subs[r + 1];
}
else {
parent = cur;
cur = cur->_subs[r];
}
}
return { parent,-1 };
}
bool Insert(const K& key)
{
//如果树为空,新增根节点
if (_root == nullptr)
{
_root = new Node;
_root->_keys[0] = key;
_root->_n = 1;
return true;
}
std::pair<Node*,int> p = Find(key);
//如果关键字已存在,则不插入
if (p.second != -1) return false;
K midKey = key;
Node* parent = p.first;
Node* child = nullptr;
while (true)
{
InsertKey(midKey, parent, child);
if (parent->_n != N) //如果插入后容量没满,则插入完成
{
break;
}
//分裂
Node* brother = new Node;
int mid = parent->_n / 2;
midKey = parent->_keys[mid];
parent->_keys[mid] = K();
int j = 0;
for (size_t i = mid + 1; i < parent->_n; i++)
{
brother->_keys[j] = parent->_keys[i];
brother->_subs[j] = parent->_subs[i];
if(brother->_subs[j] != nullptr) brother->_subs[j]->_parent = brother;
parent->_keys[i] = K();
parent->_subs[i] = nullptr;
j++;
}
brother->_n = j;
brother->_subs[j] = parent->_subs[parent->_n];
parent->_subs[parent->_n] = nullptr;
if (brother->_subs[j] != nullptr) brother->_subs[j]->_parent = brother;
parent->_n -= j + 1;
//如果分裂后没有父节点,则要新增父节点
if (parent->_parent == nullptr)
{
_root = new Node;
_root->_keys[0] = midKey;
_root->_subs[0] = parent;
_root->_subs[1] = brother;
brother->_parent = _root;
parent->_parent = _root;
_root->_n = 1;
break;
}
else
{
brother->_parent = parent->_parent;
parent = parent->_parent;
child = brother;
}
}
return true;
}
3.3 B树的简单验证
对B树进行中序遍历,如果能得到一个有序的序列,说明插入正确
void _Inorder(Node* root)
{
if (root == nullptr) return;
for (size_t i = 0; i < root->_n; ++i)
{
_Inorder(root->_subs[i]);
std::cout << root->_keys[i] << ' ';
}
_Inorder(root->_subs[root->_n]);
}
3.4 B树性能分析
对于一棵节点为N度为M的B-树,查找和插入需要 l o g M − 1 N log_{M-1}N logM−1N~ l o g M / 2 N log_{M/2}N logM/2N次比较,这个很好证明:对于度为M的B-树,每一个节点的子节点个数为M/2 ~(M-1)之间,因此树的高度应该在要 l o g M − 1 N log_{M-1}N logM−1N和 l o g M / 2 N log_{M/2}N logM/2N之间,在定位到该节点后,再采用二分查找的方式可以很快的定位到该元素。
B-树的效率是很高的,对于N = 62*1000000000个节点,如果度M为1024,则 l o g M / 2 N log_{M/2}N logM/2N <=4,即在620亿个元素中,如果这棵树的度为1024,则需要小于4次即可定位到该节点,然后利用二分查找可以快速定位到该元素,大大减少了读取磁盘的次数。