I have made some headway with building my engine. I want my cells to handle concatenation when '&' is in place, for cell names, and strings alike. I am using lark for my formula evaluation:
// String concatenation
?concat_expr : (concat_expr "&")? base
This is my current code dealing with it.
def set_cell_contents(self, sheet_name: str, location: str,
contents: Optional[str]) -> None:
# Set the contents of the specified cell on the specified sheet.
#
# The sheet name match is case-insensitive; the text must match but the
# case does not have to. Additionally, the cell location can be
# specified in any case.
#
# If the specified sheet name is not found, a KeyError is raised.
# If the cell location is invalid, a ValueError is raised.
#
# A cell may be set to "empty" by specifying a contents of None.
#
# Leading and trailing whitespace are removed from the contents before
# storing them in the cell. Storing a zero-length string "" (or a
# string composed entirely of whitespace) is equivalent to setting the
# cell contents to None.
#
# If the cell contents appear to be a formula, and the formula is
# invalid for some reason, this method does not raise an exception;
# rather, the cell's value will be a CellError object indicating the
# naure of the issue.
# finds if sheet inputed is found in the list of sheets
#move this to its own function FUTURE
from .interp import FormulaEvaluator
if(type(contents) == str):
contents = contents.strip()
sheetNum = self.get_sheet_num(sheet_name)
#if valid, set current sheet to sheet_name
self.currentSheet = sheet_name
location = location.upper() # we assume cell is uppercase
# if location outside of extent, expand sheet
self.increase_sheet_size(sheet_name, location)
# if cell in contents is outside of extent, expand sheet
# store sheet_name in cell
self.list_sheets_contents[sheetNum][location].set_cell_contents(contents) # set contents even if there is a error displayed
# reset cell error on calcuation
self.list_sheets_contents[sheetNum][location].cellError = None
#if cell contents is a cell error, assign the cell error to the cell
if type(contents) == CellError:
self.list_sheets_contents[sheetNum][location].set_cellError(contents)
return
elif type(contents) == str and contents[0] == '#':
if contents.lower() == "#error!":
self.list_sheets_contents[sheetNum][location].set_cellError(CellError(CellErrorType.PARSE_ERROR, "Error"))
elif contents.lower() == "#circref!":
self.list_sheets_contents[sheetNum][location].set_cellError(CellError(CellErrorType.CIRCULAR_REFERENCE, "Circular reference detected"))
elif contents.lower() == '#ref!':
self.list_sheets_contents[sheetNum][location].set_cellError(CellError(CellErrorType.BAD_REFERENCE, "Bad reference"))
elif contents.lower() == '#name?':
self.list_sheets_contents[sheetNum][location].set_cellError(CellError(CellErrorType.BAD_NAME, "Bad name"))
elif contents.lower() == '#value!':
self.list_sheets_contents[sheetNum][location].set_cellError(CellError(CellErrorType.TYPE_ERROR, "Type error"))
elif contents.lower() == '#div/0!':
self.list_sheets_contents[sheetNum][location].set_cellError(CellError(CellErrorType.DIVIDE_BY_ZERO, "Divide by zero error"))
return
# if cell contents is None, done with function
if contents == None or contents == "": # if the cell is empty, set it to None
self.list_sheets_contents[sheetNum][location].contents = None
return
#if cell contents is a float, set it to a decimal
is_decimal = False
try:
float(contents)
is_decimal = True
except ValueError:
is_decimal = False
if(is_decimal):
contents = decimal.Decimal(contents)
self.list_sheets_contents[sheetNum][location].contents = contents
return
# if the cell is not a formula, set it to the contents
if (type(contents) == str and contents[0] != '=') or type(contents) != str:
self.list_sheets_contents[sheetNum][location].set_cell_contents(contents)
return
# Check if the formula contains the '+' or '&' operator
if '&' in contents:
# Split the formula into parts
parts = re.split('[&]', contents)
evaluated_parts = []
for part in parts:
# Recursively call set_cell_contents to handle nested concatenations or other expressions
self.set_cell_contents(sheet_name, location, part.strip())
# Retrieve the evaluated contents after handling the nested expression
evaluated_part = self.list_sheets_contents[sheetNum][location].contents
# Append the evaluated part to the list of evaluated parts
evaluated_parts.append(evaluated_part)
# Concatenate the evaluated parts
contents = ''.join(evaluated_parts)
# if the cell is in bounds, first check for parse errors
evaluator = FormulaEvaluatorChecker(self)
try:
evaluator.visit(lark.Lark.open('sheets/formulas.lark', start='formula').parse(contents))
except Exception as error:
self.list_sheets_contents[sheetNum][location].set_cellError(CellError(CellErrorType.PARSE_ERROR, str(error)))
return
parser = lark.Lark.open('sheets/formulas.lark', start='formula')
tree = parser.parse(contents)
v = CellRefFinder()
v.visit(tree)
cellsRefrenced = v.refs
# first we set the new cells outcoming and incoming neighbors
for cell in cellsRefrenced:
cell = cell.upper()
if '!' in cell: # if the cell has an exclamation mark (!), has a sheetname
pattern = r'^(.*?)!(.*)' # Updated regex pattern to match text before and after '!'
match = re.match(pattern, cell)
if match:
currSheet = match.group(1)
#print("currSheet: ", currSheet)
cell = match.group(2)
#get the number sheet of the sheet name
currSheet_int = None
for i in range(len(self.list_of_sheets)):
if self.list_of_sheets[i].lower() == currSheet.lower():
currSheet_int = i
break
if(type(currSheet) != int):
#if sheet is not in list set error to badRefrence
self.list_sheets_contents[sheetNum][location].set_cellError(CellError(CellErrorType.BAD_REFERENCE, "Sheet not found"))
else:
#add error catch here if the cell being referenced is not found bc its in another sheet not ready yet
self.increase_sheet_size(currSheet, cell)
self.list_sheets_contents[sheetNum][location].add_incoming_neighbor(self.list_sheets_contents[currSheet_int][cell])
self.list_sheets_contents[currSheet_int][cell].add_outgoing_neighbor(self.list_sheets_contents[sheetNum][location])
else:
self.increase_sheet_size(sheet_name, cell)
self.list_sheets_contents[sheetNum][location].add_incoming_neighbor(self.list_sheets_contents[sheetNum][cell])
self.list_sheets_contents[sheetNum][cell].add_outgoing_neighbor(self.list_sheets_contents[sheetNum][location])
self.list_sheets_contents[sheetNum][location].set_cell_contents(contents)
# then we check for cycles in topological sort
topoSort = TopologicalSort()
self.globalStack = topoSort.topological_sort(self.list_sheets_contents[sheetNum])
PS: If someone could help me with the formula evaluation function, I would greatly appreciate that.
This is how I handle my workbook
global_stack = [] # if this becomes a memory problem, switch get_cell_value to traverse all neighbors to get the order we want to evaluate cells
list_of_sheets: List[str] #list of sheet names
list_sheets_contents: List[dict] #list of the graphs of sheet contents
list_sheets_size: List[list] #list of the sizes of the sheets
globalIteration = 0 #global iteration counter for cycle detection
currentSheet = ""
def __init__(self):
self.list_of_sheets = []
self.list_sheets_contents = []
self.list_sheets_size = []
self.globalIteration = 0
self.currentSheet = ""
These are the tests I am trying :
# Make a new empty workbook
wb = sheets.Workbook()
(index, name) = wb.new_sheet()
# Should print: New spreadsheet "Sheet1" at index 0
print(f'New spreadsheet "{name}" at index {index}')
wb.set_cell_contents(name, 'a1', '12')
wb.set_cell_contents(name, 'b1', '34')
wb.set_cell_contents(name, 'c1', '=a1+b1')
...
# test.set_cell_contents("Sheet2", "B3", "=A5 & b2")
# print("B3 = A5 & b2 (concat cells test!)")
# test.set_cell_contents("Sheet2", "B4", "=taco & cat")
# print("B4 = taco & cat (concat strs test!)")
These are the results I am getting:
B3 value (should be helloworld): ERROR[CellErrorType.PARSE_ERROR, "Unexpected end-of-input. Expected one of:
* BANG
"]
B4 value (should be tacocat): ERROR[CellErrorType.PARSE_ERROR, "Unexpected end-of-input. Expected one of:
* BANG
"]
A15 value (should be #REF!): ERROR[CellErrorType.BAD_REFERENCE, "Sheet not found"]
I also want to work with saving and loading my workbook to and from JSON files This is my code
def load_workbook(fp: TextIO) -> 'Workbook':
"""Load a workbook from a text file or file-like object in JSON format."""
try:
data = json.load(fp)
except json.JSONDecodeError as e:
raise json.JSONDecodeError("Invalid JSON format") from e
workbook = Workbook()
try:
for sheet in data['sheets']:
if not isinstance(sheet, dict):
raise TypeError("Sheet data should be a dictionary")
if 'name' not in sheet or 'cell-contents' not in sheet:
raise KeyError("Sheet data should contain 'name' and 'cell-contents' keys")
if not isinstance(sheet['name'], str) or not isinstance(sheet['cell-contents'], dict):
raise TypeError("'name' should be a string and 'cell-contents' should be a dictionary")
workbook.list_of_sheets.append(sheet['name'])
cell_contents = {k.upper(): v for k, v in sheet['cell-contents'].items()} # Convert cell names to uppercase
workbook.list_sheets_contents.append(cell_contents)
workbook.list_sheets_size.append([len(cell_contents), len(cell_contents[0]) if cell_contents else 0])
except KeyError as e:
raise KeyError("Missing key in JSON data") from e
except TypeError as e:
raise TypeError("Invalid type in JSON data") from e
return workbook
def save_workbook(self, fp: TextIO) -> None:
"""Save the workbook to a text file or file-like object in JSON format."""
data = {
'sheets': [
{
'name': name,
'cell-contents': {k.upper(): str(v) for k, v in contents.items()} # Convert cell names to uppercase and values to strings
}
for name, contents in zip(self.list_of_sheets, self.list_sheets_contents)
]
}
But my tests are not properly acknowledging my code Off the bat, am I doing something wrong