Below is my code for the split method of BTree class. Now, when I test the insertion and more than 1 split occurs, it doesn't reflect on the tree structure, although I see the keys list is updated in the parent node when debugging and the splitting is done correctly. As shown in the code, I do write back the changes to the DISK. Any suggestion why it's not working properly?
FYI, the split method is called in the insert_recursive method below:
def _insert_recursive(self, node_addr: Address, parent_addr: Address, key: KT, value: VT):
node = get_node(node_addr)
node.parent_addr = parent_addr
if node.is_leaf:
node.insert_data(key, value)
node.keys, node.data = self._sort_keys_data(node.keys, node.data)
if len(node.keys) > self.L:
# If the leaf node overflows, split it
self._split_leaf_node(node)
node.write_back() # Update changes made to the leaf node
else:
# Finding the appropriate child node to insert the key
idx = node.find_idx(key)
child_addr = node.children_addrs[idx]
self._insert_recursive(child_addr, node_addr, key, value)
node.write_back() # Update changes made to the internal node
def _split_leaf_node(self, node: BTreeNode) -> None:
mid = len(node.keys) // 2
# Create a new leaf node
new_node = BTreeNode(DISK.new(), node.parent_addr, None, True)
new_node.keys = node.keys[mid:]
new_node.data = node.data[mid:]
# Update the node's keys and data
node.keys = node.keys[:mid]
node.data = node.data[:mid]
# Update the parent's children addresses
if node.parent_addr is not None:
parent_node = get_node(node.parent_addr)
idx = parent_node.children_addrs.index(node.my_addr)
parent_node.children_addrs.insert(idx + 1, new_node.my_addr)
parent_node.keys.insert(idx, new_node.keys[0]) # Update the keys in the parent node
node.index_in_parent = idx
new_node.index_in_parent = idx + 1
DISK.write(parent_node.my_addr,parent_node)
if len(parent_node.keys) > self.M - 1:
self._split_internal(parent_node, node.parent_addr)
# Update the index_in_parent of node and new_node in the new parent node
if parent_node.parent_addr is not None:
new_parent_node = get_node(parent_node.parent_addr)
idx = new_parent_node.children_addrs.index(parent_node.my_addr)
new_parent_node.children_addrs.insert(idx + 1, new_node.my_addr)
new_parent_node.keys.insert(idx, parent_node.keys[-1])
node.index_in_parent = idx
new_node.index_in_parent = idx + 1
new_parent_node.write_back()
node.write_back()
new_node.write_back()
# Update the root node if necessary
if node.my_addr == self.root_addr:
root = BTreeNode(DISK.new(), None, None, False)
root.keys = [new_node.keys[0]] # Assign the mid key to the new root
root.children_addrs = [node.my_addr, new_node.my_addr] # Children are the split nodes
node.index_in_parent = root.children_addrs.index(node.my_addr)
new_node.index_in_parent = root.children_addrs.index(new_node.my_addr)
node.parent_addr = root.my_addr
new_node.parent_addr = root.my_addr
self.root_addr = root.my_addr
node.write_back()
new_node.write_back()
root.write_back()
else:
node.write_back()
new_node.write_back()
I ran the following test which require 2 splits, and the result is only showing the first split.
def test_big_tree():
M = 3
L = 3
btree = BTree(M, L)
for i in range(6):
btree.insert(i, str(i))
for i in range(6):
assert btree.find(i) == str(i)
