I am trying to build a 2-3-4 tree in python. So far, the insertion seems to be working up to nodes of height 3 or so. After that, the data seems to get dropped rather than being inserted into the tree. I'm unsure why this is happening, and have double and triple checked my code several times. I have commented near the insert code where I got the insertion algorithm from. Thanks in advance for any insight on my issue.
import re
class Node:
def __init__(self, newInfo = None, p = None, q = None,
r = None, s = None, parent = None):
#initilize parent / child links
self.children = [p, q, r, s]
self.parent = parent
#initialize data
self.info = [ newInfo, None, None ]
class TwoThreeFourTree:
def __init__(self):
self.root = Node()
# Built in sort sinks None types to front, which
# is opposite of how info was structured to work,
# Using this sort will push None values to back.
def sortNode(self, curr):
c = []
for i in curr.info:
if i is not None:
c.append(i)
c.sort()
for i in range(3-len(c)):
c.append(None)
return c
# Built in len counts None, this one doesn't.
def length(self, node):
i = 0
for info in node:
if info is not None:
i = i + 1
return i
def isLeaf(self, node):
for child in node.children:
if child is not None:
return False
return True
def isOrphan(self, node):
if node.parent is None:
return True
return False
def lookup(self, userStr, reg):
curr = self.root
while curr is not None:
#Two Node
if self.length(curr.info) is 1:
if re.match(reg, str(curr.info[0])) is not None:
return curr.info[0]
else:
if userStr < curr.info[0]:
curr = curr.children[0]
else:
curr = curr.children[1]
#Three Node
elif self.length(curr.info) is 2:
for item in curr.info:
if re.match(reg, str(item)) is not None:
return item
if userStr < curr.info[0]:
curr = curr.children[0]
elif userStr < curr.info[1]:
curr = curr.children[1]
else:
curr = curr.children[2]
#Four Node
elif self.length(curr.info) is 3:
for item in curr.info:
if item is not None:
if re.match(reg, str(item)) is not None:
return item
if userStr < curr.info[0]:
curr = curr.children[0]
elif userStr < curr.info[1]:
curr = curr.children[1]
elif userStr < curr.info[2]:
curr = curr.children[2]
else:
curr = curr.children[3]
def inorder(self, node, retlst = None):
if retlst is None:
retlst = []
if node.children[0]:
retlst = self.inorder(node.children[0], retlst)
retlst += [node.info[0]]
if node.children[1]:
retlst = self.inorder(node.children[1], retlst)
retlst += [node.info[1]]
if node.children[2]:
retlst = self.inorder(node.children[2], retlst)
retlst += [node.info[2]]
if node.children[3]:
retlst = self.inorder(node.children[3], retst)
return retlst
## Using Algorithm from: http://www.clear.rice.edu/comp212/01-fall/lectures/33/
def insert(self, info, node):
curr = node
if self.length(curr.info) == 0: # curr is empty
curr.info[0] = info
return True
elif self.length(curr.info) == 1: # curr is two node.
if self.isLeaf(curr):
curr.info[1] = info
curr.info = self.sortNode(curr)
return True
else:
if info < curr.info[0]:
self.insert(info, curr.children[0])
else:
self.insert(info, curr.children[1])
elif self.length(curr.info) == 2: # curr is 3 node
if self.isLeaf(curr):
curr.info[2] = info
curr.info = self.sortNode(curr)
return True
else:
if info < curr.info[0]:
self.insert(info, curr.children[0])
elif info < curr.info[1]:
self.insert(info, curr.children[1])
elif info > curr.info[2]:
self.insert(info, curr.children[2])
elif self.length(curr.info) == 3: # curr is 4 node
if self.isOrphan(curr): # curr has no parent
curr = Node(curr.info[1],
Node(curr.info[0], curr.children[0], curr.children[1]),
Node(curr.info[2], curr.children[2], curr.children[3]))
for child in curr.children:
if child is not None:
child.parent = curr
self.root = curr
self.insert(info, self.root)
else: #curr has a parent
if self.length(curr.parent.info) == 1: # Parent is Two Node:
if curr.parent.children[0] == curr:# cur is lst.
#If P = [curr, M, p-rst], then P becomes [[lst, X, mlst], Y, [mrst, Z, rst], M, p-rst].
curr.parent.info[1] = curr.info[1]
curr.parent.info = self.sortNode(curr.parent)
curr.parent.children[2] = curr.parent.children[1]
curr.parent.children[1] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
curr.parent.children[0] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
self.insert(info, self.root)
elif curr.parent.children[1] == curr: # curr is rst.
#If P = [p-lst, M, curr], then P becomes [p-lst, M, [lst, X, mlst], Y, [mrst, Z, rst]].
curr.parent.info[1] = curr.info[1]
curr.parent.info = self.sortNode(curr.parent)
curr.parent.children[2] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
curr.parent.children[1] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
self.insert(info, self.root)
elif self.length(curr.parent.info) == 2: # Parent is Three Node:
if curr.parent.children[0] == curr: # curr is lst
#If P = [curr, M1, p-mst, M2, p-rst], then P becomes [[lst, X, mlst], Y, [mrst, Z, rst], M1, p-mst, M2, p-rst].
curr.parent.info[2] = curr.info[1]
curr.parent.info = self.sortNode(curr.parent)
curr.parent.children[3] = curr.parent.children[2]
curr.parent.children[2] = curr.parent.children[1]
curr.parent.children[1] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
curr.parent.children[0] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
self.insert(info, self.root)
elif curr.parent.children[1] == curr: # curr is mst
#If P = [p-lst, M1, curr, M2, p-rst], then P becomes [p-lst, M1,[lst, X, mlst], Y, [mrst, Z, rst], M2, p-rst].
curr.parent.info[2] = curr.info[1]
curr.parent.info = self.sortNode(curr.parent)
curr.parent.children[3] = curr.parent.children[2]
curr.parent.children[2] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
curr.parent.children[1] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
self.insert(info, self.root)
elif curr.parent.children[2] == curr: # curr is rst
#If P = [p-lst, M1, p-mst, M2, curr], then P becomes [p-lst, M1, p-mst, M2, [lst, X, mlst], Y, [mrst, Z, rst]].
curr.parent.info[2] = curr.info[1]
curr.parent.info = self.sortNode(curr.parent)
curr.parent.children[3] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
curr.parent.children[2] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
self.insert(info, self.root)
As a partial answer, you can start by eliminating most of the repetition in your code. For example, you don't need to special-case the
lookup()
function for the three different node types. You could instead write something like this:Also, traversals should be implemented as generators, and again, use loops instead of repetitive code:
This kind of a cleanup will firstly make it much easier for others to diagnose the problem. Secondly, you might find that the problem evaporates in the process.