I have coded a tokenizer and a recursive parser for a postfix expression. My code is the following:
import re
token_patterns = [
    ('OPERATOR', r'[+\-*/]'),
    ('NUMBER', r'\d+'),
    ('WHITESPACE', r'\s+'),
]
def tokenize(source_code):
    tokens = []
    source_code = source_code.strip()
    while source_code:
        matched = False
        for token_type, pattern in token_patterns:
            match = re.match(pattern, source_code)
            if match:
                value = match.group(0)
                tokens.append((token_type, value))
                source_code = source_code[len(value):].lstrip()
                matched = True
                break
        if not matched:
            raise ValueError(f"Invalid character in source code: {source_code[0]}")
    return tokens
def parse_expression(tokens):
    if not tokens:
        return None
    token_type, value = tokens.pop(0)
    if token_type == 'NUMBER':
        return int(value)
    elif token_type == 'OPERATOR':
        if value in ('+', '-', '*', '/'):
            right = parse_expression(tokens)
            left = parse_expression(tokens)
            return (value, left, right)
        else:
            raise ValueError(f"Unexpected operator: {value}")
    else:
        raise ValueError(f"Unexpected token: {token_type}")
def evaluate_expression(expression):
    if isinstance(expression, int):
        return expression
    elif isinstance(expression, tuple):
        operator, left, right = expression
        if operator == '+':
            return evaluate_expression(left) + evaluate_expression(right)
        elif operator == '-':
            return evaluate_expression(left) - evaluate_expression(right)
        elif operator == '*':
            return evaluate_expression(left) * evaluate_expression(right)
        elif operator == '/':
            return evaluate_expression(left) / evaluate_expression(right)
    else:
        raise ValueError(f"Invalid expression: {expression}")
def main():
    source_code = "2 3 4 * +"
    tokens = tokenize(source_code)
    parsed_expression = parse_expression(tokens)
    
    print(f"Source code: {source_code}")
    print(f"Parsed expression: {parsed_expression}")
    
    result = evaluate_expression(parsed_expression)
    print(f"Result: {result}")
if __name__ == "__main__":
    main()
The part of the tokenize function is working correctly, giving me:
[('NUMBER', '2'), ('NUMBER', '3'), ('NUMBER', '4'), ('OPERATOR', '*'), ('OPERATOR', '+')]
but I would like to return in the
print(f"Parsed expression: {parsed_expression}")
something like this:
Parsed expression: ('+', 2, ('*', 3, 4))
however it only prints 2. Also, the result should be 14 and I also got 2. I am not able to find the mistake. Any help?
                        
For reference, here is the recursive descent parser from the book, "Python Cookbook", by O'Reilly Media.
GitHub – writing_a_simple_recursive_descent_parser.