I have coded a tokenizer and a recursive parser for a postfix expression. My code is the following:
import re
token_patterns = [
('OPERATOR', r'[+\-*/]'),
('NUMBER', r'\d+'),
('WHITESPACE', r'\s+'),
]
def tokenize(source_code):
tokens = []
source_code = source_code.strip()
while source_code:
matched = False
for token_type, pattern in token_patterns:
match = re.match(pattern, source_code)
if match:
value = match.group(0)
tokens.append((token_type, value))
source_code = source_code[len(value):].lstrip()
matched = True
break
if not matched:
raise ValueError(f"Invalid character in source code: {source_code[0]}")
return tokens
def parse_expression(tokens):
if not tokens:
return None
token_type, value = tokens.pop(0)
if token_type == 'NUMBER':
return int(value)
elif token_type == 'OPERATOR':
if value in ('+', '-', '*', '/'):
right = parse_expression(tokens)
left = parse_expression(tokens)
return (value, left, right)
else:
raise ValueError(f"Unexpected operator: {value}")
else:
raise ValueError(f"Unexpected token: {token_type}")
def evaluate_expression(expression):
if isinstance(expression, int):
return expression
elif isinstance(expression, tuple):
operator, left, right = expression
if operator == '+':
return evaluate_expression(left) + evaluate_expression(right)
elif operator == '-':
return evaluate_expression(left) - evaluate_expression(right)
elif operator == '*':
return evaluate_expression(left) * evaluate_expression(right)
elif operator == '/':
return evaluate_expression(left) / evaluate_expression(right)
else:
raise ValueError(f"Invalid expression: {expression}")
def main():
source_code = "2 3 4 * +"
tokens = tokenize(source_code)
parsed_expression = parse_expression(tokens)
print(f"Source code: {source_code}")
print(f"Parsed expression: {parsed_expression}")
result = evaluate_expression(parsed_expression)
print(f"Result: {result}")
if __name__ == "__main__":
main()
The part of the tokenize function is working correctly, giving me:
[('NUMBER', '2'), ('NUMBER', '3'), ('NUMBER', '4'), ('OPERATOR', '*'), ('OPERATOR', '+')]
but I would like to return in the
print(f"Parsed expression: {parsed_expression}")
something like this:
Parsed expression: ('+', 2, ('*', 3, 4))
however it only prints 2. Also, the result should be 14 and I also got 2. I am not able to find the mistake. Any help?
For reference, here is the recursive descent parser from the book, "Python Cookbook", by O'Reilly Media.
GitHub – writing_a_simple_recursive_descent_parser.