I need help in this one.
I'm trying to implement B-tree insertion but I got stuck in some part because I'm confused with this algorithm. Accordingly to B-tree pseudocode in the book Introduction to Algorithms of the autor Cormen, In my mind I'm coding B-tree insert right but when I check the B-tree in disk after run the algorithm, the data is wrong. I don't know why I'm doing wrong in this code.
The page in Introduction to Algorithms: http://staff.ustc.edu.cn/~csli/graduate/algorithms/book6/chap19.htm
my btree.c file:
#include "../include/btree.h"
#include <stdio.h>
struct BTreeNode
{
// leaf
char leaf;
// numberOfIndexedKeys
int numberOfIndexedKeys;
// RRNnode
int RRNnode;
// Set of keys
int key[4];
// Set of references to other file
ll dataRegReference[4];
// Subtree pointers
int pointers[5];
};
void BTreeWriteHeader(BTreeHeader_t *header, s_file_t *indexFile)
{
if (indexFile->fp == NULL)
{
printf("Processing file failed.");
return;
}
fseek(indexFile->fp, 0, SEEK_SET);
fwrite(&header->status, sizeof(char), 1, indexFile->fp);
fwrite(&header->noderoot, sizeof(int), 1, indexFile->fp);
fwrite(&header->RRNnextNode, sizeof(int), 1, indexFile->fp);
fwrite(header->trash, sizeof(char), 68, indexFile->fp);
}
BTreeHeader_t *BTreeReadHeader(s_file_t *indexFile)
{
if (indexFile->fp == NULL || indexFile->fileConsistency == '0')
{
printf("Processing file failed.");
return NULL;
}
BTreeHeader_t *header = (BTreeHeader_t *)calloc(1, sizeof(BTreeHeader_t));
fseek(indexFile->fp, 0, SEEK_SET);
fread(&header->status, sizeof(char), 1, indexFile->fp);
fread(&header->noderoot, sizeof(int), 1, indexFile->fp);
fread(&header->RRNnextNode, sizeof(int), 1, indexFile->fp);
fread(header->trash, sizeof(char), 68, indexFile->fp);
return header;
}
void BTreeWriteNode(BTreeNode_t *WriteNode, s_file_t *indexFile)
{
if (indexFile->fp == NULL)
{
printf("Processing file failed.");
return;
}
fseek(indexFile->fp, 77 * (WriteNode->RRNnode + 1), SEEK_SET);
fwrite(&WriteNode->leaf, sizeof(char), 1, indexFile->fp);
fwrite(&WriteNode->numberOfIndexedKeys, sizeof(int), 1, indexFile->fp);
fwrite(&WriteNode->RRNnode, sizeof(int), 1, indexFile->fp);
for (int i = 0; i < MAX_INDEXED_KEYS; i++)
{
fwrite(&WriteNode->pointers[i], sizeof(int), 1, indexFile->fp);
fwrite(&WriteNode->key[i], sizeof(int), 1, indexFile->fp);
fwrite(&WriteNode->dataRegReference[i], sizeof(ll), 1,
indexFile->fp);
}
fwrite(&WriteNode->pointers[MAX_INDEXED_KEYS], sizeof(int), 1, indexFile->fp);
}
BTreeNode_t *BTreeReadNode(s_file_t *indexFile, int RRNnode)
{
if (indexFile->fp == NULL || indexFile->fileConsistency == '0')
{
printf("Processing file failed.");
return NULL;
}
BTreeNode_t *ReadNode = (BTreeNode_t *)calloc(1, sizeof(BTreeNode_t));
fseek(indexFile->fp, 77 * (RRNnode + 1), SEEK_SET);
fread(&ReadNode->leaf, sizeof(char), 1, indexFile->fp);
fread(&ReadNode->numberOfIndexedKeys, sizeof(int), 1, indexFile->fp);
fread(&ReadNode->RRNnode, sizeof(int), 1, indexFile->fp);
for (int i = 0; i < MAX_INDEXED_KEYS; i++)
{
fread(&ReadNode->pointers[i], sizeof(int), 1, indexFile->fp);
fread(&ReadNode->key[i], sizeof(int), 1, indexFile->fp);
fread(&ReadNode->dataRegReference[i], sizeof(ll), 1, indexFile->fp);
}
fread(&ReadNode->pointers[MAX_INDEXED_KEYS], sizeof(int), 1, indexFile->fp);
return ReadNode;
}
// Creating B-Tree Header
void BTreeHeaderCreate(s_file_t *indexFile)
{
if (indexFile->fp == NULL)
{
printf("Processing file failed.");
return;
}
BTreeHeader_t *header = (BTreeHeader_t *)calloc(1, sizeof(BTreeHeader_t));
header->RRNnextNode = 0;
header->noderoot = -1;
header->status = '0';
strcpy(
header->trash,
"@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@");
BTreeWriteHeader(header, indexFile);
}
// INSERTION
// =========================================================================
void cleanNode(BTreeNode_t *node)
{
for (int i = 0; i < MAX_INDEXED_KEYS; i++)
{
node->key[i] = node->dataRegReference[i] = node->pointers[i] = -1;
}
node->pointers[MAX_INDEXED_KEYS] = -1;
node->numberOfIndexedKeys = 0;
}
BTreeNode_t *createNode(int RRNnode, char leaf)
{
BTreeNode_t *newNode = (BTreeNode_t *)calloc(1, sizeof(BTreeNode_t));
newNode->RRNnode = RRNnode;
newNode->leaf = leaf;
newNode->numberOfIndexedKeys = 0;
cleanNode(newNode);
return newNode;
}
void InsertInNode(BTreeNode_t *root, s_file_t *indexFile, int key, ll dataRegReference, int pointerLeft, int pointerRight)
{
if (indexFile->fp == NULL)
{
printf("Processing file failed.");
return;
}
if (root->numberOfIndexedKeys > MAX_INDEXED_KEYS)
{
printf("Aqui!");
}
/*
for(int i = 0; i < root->numberOfIndexedKeys; i++){
if(key.key == root->chaves[i].key) return;
}
*/
int numberOfIndexedKeysOfNode = root->numberOfIndexedKeys - 1;
while (numberOfIndexedKeysOfNode >= 0 && key < root->key[numberOfIndexedKeysOfNode])
{
// Shifting keys and dataRegReference
root->key[numberOfIndexedKeysOfNode + 1] = root->key[numberOfIndexedKeysOfNode];
root->dataRegReference[numberOfIndexedKeysOfNode + 1] = root->dataRegReference[numberOfIndexedKeysOfNode];
// Shifting pointers
root->pointers[numberOfIndexedKeysOfNode + 2] = root->pointers[numberOfIndexedKeysOfNode + 1];
numberOfIndexedKeysOfNode--;
}
numberOfIndexedKeysOfNode++;
root->pointers[numberOfIndexedKeysOfNode + 1] = root->pointers[numberOfIndexedKeysOfNode];
root->key[numberOfIndexedKeysOfNode] = key;
root->dataRegReference[numberOfIndexedKeysOfNode] = dataRegReference;
root->pointers[numberOfIndexedKeysOfNode] = pointerLeft;
root->pointers[numberOfIndexedKeysOfNode + 1] = pointerRight;
root->numberOfIndexedKeys++;
BTreeWriteNode(root, indexFile);
}
BTreeNode_t *SearchNode(s_file_t *indexFile, int RRNnode, int key)
{
if (indexFile->fp == NULL || indexFile->fileConsistency == '0')
{
printf("Processing file failed.");
return NULL;
}
BTreeNode_t *searchNode = BTreeReadNode(indexFile, RRNnode);
int pos = 0;
while (key > searchNode->key[pos] && pos < searchNode->numberOfIndexedKeys)
pos++;
if (key == searchNode->key[pos])
return searchNode;
if (searchNode->leaf == '1')
return NULL;
return SearchNode(indexFile, searchNode->pointers[pos], key);
}
void SplitChild(BTreeHeader_t *header, s_file_t *indexFile, BTreeNode_t *root, BTreeNode_t *child)
{
int keys[MAX_INDEXED_KEYS];
ll dataRegReference[MAX_INDEXED_KEYS];
int pointers[MAX_INDEXED_KEYS + 1];
for (int i = 0; i < MAX_INDEXED_KEYS; i++)
{
keys[i] = child->key[i];
dataRegReference[i] = child->dataRegReference[i];
pointers[i] = child->pointers[i];
}
pointers[MAX_INDEXED_KEYS] = child->pointers[MAX_INDEXED_KEYS];
cleanNode(child);
BTreeNode_t *newNode = createNode(header->RRNnextNode, child->leaf);
header->RRNnextNode++;
InsertInNode(child, indexFile, keys[0], dataRegReference[0], pointers[0], pointers[1]);
InsertInNode(child, indexFile, keys[1], dataRegReference[1], pointers[1], pointers[2]);
pointers[2] = child->RRNnode;
InsertInNode(root, indexFile, keys[2], dataRegReference[2], pointers[2], newNode->RRNnode);
InsertInNode(newNode, indexFile, keys[3], dataRegReference[3], pointers[3], pointers[4]);
}
void InsertNonFull(BTreeHeader_t *header, BTreeNode_t *root, s_file_t *indexFile, int key, ll dataRegReference, int pointerLeft, int pointerRight)
{
if (indexFile->fp == NULL)
{
printf("Processing file failed.");
return;
}
int numberOfIndexedKeysOfNode = root->numberOfIndexedKeys - 1;
if (root->leaf == '1')
{
InsertInNode(root, indexFile, key, dataRegReference, pointerLeft, pointerRight);
}
else
{
while (numberOfIndexedKeysOfNode >= 0 && key < root->key[numberOfIndexedKeysOfNode])
numberOfIndexedKeysOfNode--;
numberOfIndexedKeysOfNode++;
BTreeNode_t *childNode = NULL;
childNode = BTreeReadNode(indexFile, root->pointers[numberOfIndexedKeysOfNode]);
if (childNode->numberOfIndexedKeys == MAX_INDEXED_KEYS)
{
SplitChild(header, indexFile, root, childNode);
}
if (key > root->key[numberOfIndexedKeysOfNode])
numberOfIndexedKeysOfNode++;
InsertNonFull(header, childNode, indexFile, key, dataRegReference, pointerLeft, pointerRight);
}
}
void BtreeInsertion(s_file_t *indexFile, int codLinha, ll offsetRegistro,
char removido)
{
if (indexFile->fp == NULL)
{
printf("Processing file failed.");
return;
}
if (removido == '0')
return;
BTreeHeader_t *header = BTreeReadHeader(indexFile);
BTreeNode_t *root = BTreeReadNode(indexFile, header->noderoot);
if (root->numberOfIndexedKeys == MAX_INDEXED_KEYS)
{
BTreeNode_t *newRoot = createNode(header->RRNnextNode + 1, '0');
SplitChild(header, indexFile, newRoot, root);
header->RRNnextNode++;
header->noderoot = newRoot->RRNnode;
InsertNonFull(header, newRoot, indexFile, codLinha, offsetRegistro, -1, -1);
}
else
InsertNonFull(header, root, indexFile, codLinha, offsetRegistro, -1, -1);
BTreeWriteHeader(header, indexFile);
}
void BtreeCreate(s_file_t *indexFile)
{
BTreeHeaderCreate(indexFile);
BTreeHeader_t *header = BTreeReadHeader(indexFile);
BTreeNode_t *newRoot = createNode(header->RRNnextNode, '1');
header->RRNnextNode++;
header->noderoot = newRoot->RRNnode;
BTreeWriteHeader(header, indexFile);
BTreeWriteNode(newRoot, indexFile);
free(header);
free(newRoot);
}
// ==========================================================================
// B-TREE TRAVERSAL
// ===========================================================================
void InOrderTraversal(int RRNroot, s_file_t *indexFile)
{
if (RRNroot == -1)
return;
BTreeNode_t *root = BTreeReadNode(indexFile, RRNroot);
//printf("node: %d ", root->RRNnode);
for (int i = 0; i < root->numberOfIndexedKeys; i++)
{
InOrderTraversal(root->pointers[i], indexFile);
//printf("(%d, %lld)", root->chaves[i].key, root->chaves[i].dataRegReference);
}
InOrderTraversal(root->pointers[root->numberOfIndexedKeys], indexFile);
}
my btree.h file:
#ifndef BTREE_H_
#define BTREE_H_
#include "funcoes-gerais.h"
#include "linha.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_INDEXED_KEYS 4
typedef struct BTreeNode BTreeNode_t;
typedef struct BTreeHeader BTreeHeader_t;
typedef long long int ll;
struct BTreeHeader {
char status;
int noderoot;
int RRNnextNode;
// This guy is not important, ignore this
char trash[69];
};
void BTreeHeaderCreate(s_file_t *indexFile);
void BtreeInsertion(s_file_t *indexFile, int codLinha, ll offsetRegistro,
char removido);
void BtreeCreate(s_file_t *indexFile);
void InOrderTraversal(int RRNroot, s_file_t *indexFile);
BTreeHeader_t *BTreeReadHeader(s_file_t *indexFile);
void BTreeWriteHeader(BTreeHeader_t *header, s_file_t *indexFile);
#endif
In this B-tree, the order is 5. Then, I can get at most 4 keys and 5 pointers in a same node. One of my doubt is the splitChild algorithm. How am I split the node and promove the key if I get exactly the maximum of keys in the node accordingly with the book's algorithm? If I give one more space for insert the key for each node and split only if it has the maximum + 1 keys in the node, the algorithm will split only in the next insertion. This makes me get messy.