|
| 1 | +""" |
| 2 | +B-Tree Implementation |
| 3 | +
|
| 4 | +A B-Tree is a self-balancing tree data structure that maintains sorted data and allows |
| 5 | +searches, sequential access, insertions, and deletions in logarithmic time. |
| 6 | +
|
| 7 | +B-Trees are commonly used in databases and file systems. |
| 8 | +
|
| 9 | +Reference: https://en.wikipedia.org/wiki/B-tree |
| 10 | +Time Complexity: |
| 11 | + - Search: O(log n) |
| 12 | + - Insert: O(log n) |
| 13 | + - Delete: O(log n) |
| 14 | +""" |
| 15 | + |
| 16 | +from __future__ import annotations |
| 17 | + |
| 18 | + |
| 19 | +class BTreeNode: |
| 20 | + """ |
| 21 | + A node in the B-Tree. |
| 22 | +
|
| 23 | + Attributes: |
| 24 | + keys: List of keys stored in the node |
| 25 | + children: List of child nodes |
| 26 | + is_leaf: Boolean indicating if this is a leaf node |
| 27 | + """ |
| 28 | + |
| 29 | + def __init__(self, is_leaf: bool = True) -> None: |
| 30 | + self.keys: list[int] = [] |
| 31 | + self.children: list[BTreeNode] = [] |
| 32 | + self.is_leaf = is_leaf |
| 33 | + |
| 34 | + def split(self, parent: BTreeNode, index: int) -> None: |
| 35 | + """ |
| 36 | + Split this node and move the median key up to the parent. |
| 37 | +
|
| 38 | + Args: |
| 39 | + parent: The parent node |
| 40 | + index: The index in parent's children where this node is located |
| 41 | + """ |
| 42 | + new_node = BTreeNode(is_leaf=self.is_leaf) |
| 43 | + mid_index = len(self.keys) // 2 |
| 44 | + new_node.keys = self.keys[mid_index + 1 :] |
| 45 | + self.keys = self.keys[:mid_index] |
| 46 | + |
| 47 | + if not self.is_leaf: |
| 48 | + new_node.children = self.children[mid_index + 1 :] |
| 49 | + self.children = self.children[: mid_index + 1] |
| 50 | + |
| 51 | + parent.keys.insert(index, self.keys[mid_index]) |
| 52 | + parent.children.insert(index + 1, new_node) |
| 53 | + |
| 54 | + |
| 55 | +class BTree: |
| 56 | + """ |
| 57 | + B-Tree data structure. |
| 58 | +
|
| 59 | + A B-Tree of order m has the following properties: |
| 60 | + - Every node has at most m children |
| 61 | + - Every non-leaf node (except root) has at least ⌈m/2⌉ children |
| 62 | + - The root has at least 2 children if it is not a leaf |
| 63 | + - All leaves appear on the same level |
| 64 | + - A non-leaf node with k children contains k−1 keys |
| 65 | +
|
| 66 | + Examples: |
| 67 | + >>> btree = BTree(order=3) |
| 68 | + >>> btree.insert(10) |
| 69 | + >>> btree.insert(20) |
| 70 | + >>> btree.insert(5) |
| 71 | + >>> btree.insert(6) |
| 72 | + >>> btree.insert(12) |
| 73 | + >>> btree.insert(30) |
| 74 | + >>> btree.insert(7) |
| 75 | + >>> btree.insert(17) |
| 76 | + >>> btree.search(6) |
| 77 | + True |
| 78 | + >>> btree.search(15) |
| 79 | + False |
| 80 | + >>> btree.search(12) |
| 81 | + True |
| 82 | + >>> btree.search(100) |
| 83 | + False |
| 84 | + """ |
| 85 | + |
| 86 | + def __init__(self, order: int = 3) -> None: |
| 87 | + """ |
| 88 | + Initialize a B-Tree. |
| 89 | +
|
| 90 | + Args: |
| 91 | + order: The maximum number of children a node can have (must be >= 3) |
| 92 | +
|
| 93 | + Raises: |
| 94 | + ValueError: If order is less than 3 |
| 95 | + """ |
| 96 | + if order < 3: |
| 97 | + msg = "Order must be at least 3" |
| 98 | + raise ValueError(msg) |
| 99 | + |
| 100 | + self.order = order |
| 101 | + self.min_keys = (order + 1) // 2 - 1 |
| 102 | + self.max_keys = order - 1 |
| 103 | + self.root = BTreeNode() |
| 104 | + |
| 105 | + def search(self, key: int, node: BTreeNode | None = None) -> bool: |
| 106 | + """ |
| 107 | + Search for a key in the B-Tree. |
| 108 | +
|
| 109 | + Args: |
| 110 | + key: The key to search for |
| 111 | + node: The node to start searching from (defaults to root) |
| 112 | +
|
| 113 | + Returns: |
| 114 | + True if the key exists, False otherwise |
| 115 | +
|
| 116 | + Time Complexity: O(log n) |
| 117 | +
|
| 118 | + >>> btree = BTree(order=3) |
| 119 | + >>> btree.insert(50) |
| 120 | + >>> btree.search(50) |
| 121 | + True |
| 122 | + >>> btree.search(25) |
| 123 | + False |
| 124 | + """ |
| 125 | + if node is None: |
| 126 | + node = self.root |
| 127 | + |
| 128 | + i = 0 |
| 129 | + while i < len(node.keys) and key > node.keys[i]: |
| 130 | + i += 1 |
| 131 | + |
| 132 | + if i < len(node.keys) and key == node.keys[i]: |
| 133 | + return True |
| 134 | + |
| 135 | + if node.is_leaf: |
| 136 | + return False |
| 137 | + |
| 138 | + return self.search(key, node.children[i]) |
| 139 | + |
| 140 | + def insert(self, key: int) -> None: |
| 141 | + """ |
| 142 | + Insert a key into the B-Tree. |
| 143 | +
|
| 144 | + Args: |
| 145 | + key: The key to insert |
| 146 | +
|
| 147 | + Time Complexity: O(log n) |
| 148 | +
|
| 149 | + >>> btree = BTree(order=3) |
| 150 | + >>> btree.insert(10) |
| 151 | + >>> btree.insert(20) |
| 152 | + >>> btree.insert(30) |
| 153 | + >>> btree.search(20) |
| 154 | + True |
| 155 | + """ |
| 156 | + if len(self.root.keys) >= self.max_keys: |
| 157 | + new_root = BTreeNode(is_leaf=False) |
| 158 | + new_root.children.append(self.root) |
| 159 | + self.root.split(new_root, 0) |
| 160 | + self.root = new_root |
| 161 | + |
| 162 | + self._insert_non_full(self.root, key) |
| 163 | + |
| 164 | + def _insert_non_full(self, node: BTreeNode, key: int) -> None: |
| 165 | + """ |
| 166 | + Insert a key into a node that is not full. |
| 167 | +
|
| 168 | + Args: |
| 169 | + node: The node to insert into |
| 170 | + key: The key to insert |
| 171 | + """ |
| 172 | + i = len(node.keys) - 1 |
| 173 | + |
| 174 | + if node.is_leaf: |
| 175 | + node.keys.append(0) |
| 176 | + while i >= 0 and key < node.keys[i]: |
| 177 | + node.keys[i + 1] = node.keys[i] |
| 178 | + i -= 1 |
| 179 | + node.keys[i + 1] = key |
| 180 | + else: |
| 181 | + while i >= 0 and key < node.keys[i]: |
| 182 | + i -= 1 |
| 183 | + i += 1 |
| 184 | + |
| 185 | + if len(node.children[i].keys) >= self.max_keys: |
| 186 | + node.children[i].split(node, i) |
| 187 | + if key > node.keys[i]: |
| 188 | + i += 1 |
| 189 | + |
| 190 | + self._insert_non_full(node.children[i], key) |
| 191 | + |
| 192 | + def traverse(self, node: BTreeNode | None = None) -> list[int]: |
| 193 | + """ |
| 194 | + Traverse the B-Tree in sorted order. |
| 195 | +
|
| 196 | + Args: |
| 197 | + node: The node to start traversal from (defaults to root) |
| 198 | +
|
| 199 | + Returns: |
| 200 | + List of all keys in sorted order |
| 201 | +
|
| 202 | + >>> btree = BTree(order=3) |
| 203 | + >>> for i in [10, 20, 5, 6, 12, 30, 7, 17]: |
| 204 | + ... btree.insert(i) |
| 205 | + >>> btree.traverse() |
| 206 | + [5, 6, 7, 10, 12, 17, 20, 30] |
| 207 | + """ |
| 208 | + if node is None: |
| 209 | + node = self.root |
| 210 | + |
| 211 | + result: list[int] = [] |
| 212 | + i = 0 |
| 213 | + |
| 214 | + for i in range(len(node.keys)): |
| 215 | + if not node.is_leaf: |
| 216 | + result.extend(self.traverse(node.children[i])) |
| 217 | + result.append(node.keys[i]) |
| 218 | + |
| 219 | + if not node.is_leaf: |
| 220 | + result.extend(self.traverse(node.children[i + 1])) |
| 221 | + |
| 222 | + return result |
| 223 | + |
| 224 | + def get_height(self, node: BTreeNode | None = None) -> int: |
| 225 | + """ |
| 226 | + Get the height of the B-Tree. |
| 227 | +
|
| 228 | + Args: |
| 229 | + node: The node to start from (defaults to root) |
| 230 | +
|
| 231 | + Returns: |
| 232 | + The height of the tree |
| 233 | +
|
| 234 | + >>> btree = BTree(order=3) |
| 235 | + >>> btree.get_height() |
| 236 | + 0 |
| 237 | + >>> btree.insert(10) |
| 238 | + >>> btree.get_height() |
| 239 | + 0 |
| 240 | + >>> for i in range(20): |
| 241 | + ... btree.insert(i) |
| 242 | + >>> btree.get_height() > 0 |
| 243 | + True |
| 244 | + """ |
| 245 | + if node is None: |
| 246 | + node = self.root |
| 247 | + |
| 248 | + if node.is_leaf: |
| 249 | + return 0 |
| 250 | + |
| 251 | + return 1 + self.get_height(node.children[0]) |
| 252 | + |
| 253 | + def __str__(self) -> str: |
| 254 | + """ |
| 255 | + String representation of the B-Tree. |
| 256 | +
|
| 257 | + Returns: |
| 258 | + String showing all keys in sorted order |
| 259 | + """ |
| 260 | + return f"BTree(order={self.order}, keys={self.traverse()})" |
| 261 | + |
| 262 | + |
| 263 | +if __name__ == "__main__": |
| 264 | + import doctest |
| 265 | + |
| 266 | + doctest.testmod() |
| 267 | + |
| 268 | + btree = BTree(order=3) |
| 269 | + keys = [10, 20, 5, 6, 12, 30, 7, 17, 3, 8, 15, 25, 35, 40] |
| 270 | + |
| 271 | + print("Inserting keys:", keys) |
| 272 | + for key in keys: |
| 273 | + btree.insert(key) |
| 274 | + |
| 275 | + print("\nB-Tree traversal (sorted):", btree.traverse()) |
| 276 | + print("B-Tree height:", btree.get_height()) |
| 277 | + print("\nSearching for 12:", btree.search(12)) |
| 278 | + print("Searching for 100:", btree.search(100)) |
0 commit comments