b tree Algorithm
The B-tree algorithm is a highly efficient data structure and search algorithm, widely utilized in computer science and database management systems for organizing and accessing large collections of data. Developed by Rudolf Bayer and Edward M. McCreight in 1972, B-trees are designed to work optimally with secondary storage devices, such as hard disks, by optimizing the number of disk accesses required to search, insert, and delete data. A B-tree is a self-balancing, multi-level tree structure that ensures that all leaf nodes are at the same depth and maintains a sorted order of elements within each node, allowing for efficient search, insertion, and deletion operations.
The key feature of the B-tree algorithm is its ability to maintain balance and minimize the height of the tree by adjusting the number of keys stored in each node. Every B-tree has a specified minimum and maximum degree (t), which determines the number of keys that can be stored in a node. Each node in the tree, except for the root, must have at least t-1 keys and can store up to 2t-1 keys. When a node becomes full, the B-tree algorithm performs a 'split' operation, dividing the node into two separate nodes and promoting the median key to the parent node. Conversely, if a node has too few keys, the algorithm will 'merge' it with a neighboring node or 'rotate' keys among adjacent nodes to maintain the minimum degree. This dynamic restructuring ensures that the tree remains balanced and guarantees logarithmic time complexity for search, insertion, and deletion operations.
use std::convert::TryFrom;
use std::fmt::Debug;
use std::mem;
struct Node<T> {
keys: Vec<T>,
children: Vec<Node<T>>,
}
pub struct BTree<T> {
root: Node<T>,
props: BTreeProps,
}
// Why to need a different Struct for props...
// Check - http://smallcultfollowing.com/babysteps/blog/2018/11/01/after-nll-interprocedural-conflicts/#fnref:improvement
struct BTreeProps {
degree: usize,
max_keys: usize,
mid_key_index: usize,
}
impl<T> Node<T>
where
T: Ord,
{
fn new(degree: usize, _keys: Option<Vec<T>>, _children: Option<Vec<Node<T>>>) -> Self {
Node {
keys: match _keys {
Some(_keys) => _keys,
None => Vec::with_capacity(degree - 1),
},
children: match _children {
Some(_children) => _children,
None => Vec::with_capacity(degree),
},
}
}
fn is_leaf(&self) -> bool {
self.children.len() == 0
}
}
impl BTreeProps {
fn new(degree: usize) -> Self {
BTreeProps {
degree,
max_keys: degree - 1,
mid_key_index: (degree - 1) / 2,
}
}
fn is_maxed_out<T: Ord + Copy>(&self, node: &Node<T>) -> bool {
node.keys.len() == self.max_keys
}
// Split Child expects the Child Node to be full
/// Move the middle_key to parent node and split the child_node's
/// keys/chilren_nodes into half
fn split_child<T: Ord + Copy + Default>(&self, parent: &mut Node<T>, child_index: usize) {
let child = &mut parent.children[child_index];
let middle_key = child.keys[self.mid_key_index];
let right_keys = match child.keys.split_off(self.mid_key_index).split_first() {
Some((_first, _others)) => {
// We don't need _first, as it will move to parent node.
_others.to_vec()
}
None => Vec::with_capacity(self.max_keys),
};
let mut right_children = None;
if !child.is_leaf() {
right_children = Some(child.children.split_off(self.mid_key_index + 1));
}
let new_child_node: Node<T> = Node::new(self.degree, Some(right_keys), right_children);
parent.keys.insert(child_index, middle_key);
parent.children.insert(child_index + 1, new_child_node);
}
fn insert_non_full<T: Ord + Copy + Default>(&mut self, node: &mut Node<T>, key: T) {
let mut index: isize = isize::try_from(node.keys.len()).ok().unwrap() - 1;
while index >= 0 && node.keys[index as usize] >= key {
index -= 1;
}
let mut u_index: usize = usize::try_from(index + 1).ok().unwrap();
if node.is_leaf() {
// Just insert it, as we know this method will be called only when node is not full
node.keys.insert(u_index, key);
} else {
if self.is_maxed_out(&node.children[u_index]) {
self.split_child(node, u_index);
if node.keys[u_index] < key {
u_index += 1;
}
}
self.insert_non_full(&mut node.children[u_index], key);
}
}
fn traverse_node<T: Ord + Debug>(&self, node: &Node<T>, depth: usize) {
if node.is_leaf() {
print!(" {0:{<1$}{2:?}{0:}<1$} ", "", depth, node.keys);
} else {
let _depth = depth + 1;
for (index, key) in node.keys.iter().enumerate() {
self.traverse_node(&node.children[index], _depth);
// Check https://doc.rust-lang.org/std/fmt/index.html
// And https://stackoverflow.com/a/35280799/2849127
print!("{0:{<1$}{2:?}{0:}<1$}", "", depth, key);
}
self.traverse_node(&node.children.last().unwrap(), _depth);
}
}
}
impl<T> BTree<T>
where
T: Ord + Copy + Debug + Default,
{
pub fn new(branch_factor: usize) -> Self {
let degree = 2 * branch_factor;
BTree {
root: Node::new(degree, None, None),
props: BTreeProps::new(degree),
}
}
pub fn insert(&mut self, key: T) {
if self.props.is_maxed_out(&self.root) {
// Create an empty root and split the old root...
let mut new_root = Node::new(self.props.degree, None, None);
mem::swap(&mut new_root, &mut self.root);
self.root.children.insert(0, new_root);
self.props.split_child(&mut self.root, 0);
}
self.props.insert_non_full(&mut self.root, key);
}
pub fn traverse(&self) {
self.props.traverse_node(&self.root, 0);
println!("");
}
pub fn search(&self, key: T) -> bool {
let mut current_node = &self.root;
let mut index: isize;
loop {
index = isize::try_from(current_node.keys.len()).ok().unwrap() - 1;
while index >= 0 && current_node.keys[index as usize] > key {
index -= 1;
}
let u_index: usize = usize::try_from(index + 1).ok().unwrap();
if index >= 0 && current_node.keys[u_index - 1] == key {
break true;
} else if current_node.is_leaf() {
break false;
} else {
current_node = ¤t_node.children[u_index];
}
}
}
}
#[cfg(test)]
mod test {
use super::BTree;
#[test]
fn test_search() {
let mut tree = BTree::new(2);
tree.insert(10);
tree.insert(20);
tree.insert(30);
tree.insert(5);
tree.insert(6);
tree.insert(7);
tree.insert(11);
tree.insert(12);
tree.insert(15);
assert!(tree.search(15));
assert_eq!(tree.search(16), false);
}
}