I've built a function called foo
to alter a function's code at bytecode level and execute it before returning to regular function execution flow.
import sys
from types import CodeType
def foo():
frame = sys._getframe(1) # get main's frame
main_code: CodeType = do_something(frame.f_code) # modify function code
# copy globals & locals
main_globals: dict = frame.f_globals.copy()
main_locals: dict = frame.f_locals.copy()
# execute altered bytecode before returning to regular code
exec(main_code, main_globals, main_locals)
return
def main():
bar: list = []
# run altered code
foo()
# return to regular code
bar.append(0)
return bar
if __name__ == '__main__':
main()
Though, there is a problem with the evaluation of the local variable during exec
:
Traceback (most recent call last):
File "C:\Users\Pedro\main.py", line 31, in <module>
main()
File "C:\Users\Pedro\main.py", line 23, in main
foo()
File "C:\Users\Pedro\main.py", line 15, in foo
exec(main_code, main_globals, main_locals)
File "C:\Users\Pedro\main.py", line 26, in main
bar.append(0)
UnboundLocalError: local variable 'bar' referenced before assignment
If I print main_locals
before the call to exec
it shows exactly the same contents as if it was done before calling foo
. I wonder if it has to do with any of the frame.f_code.co_*
arguments passed to the CodeType
constructor. They are pretty much the same, except for the actual bytecode frame.f_code.co_code
, to which I made a few operations.
I need help to understand why the evaluation of the code under these globals and locals fail to reference main
's local variables.
Note: I'm pretty sure that the changes made to main
's bytecode prevent the process from going into unwanted recursion.
Edit: As asked in the comments, the basic behaviour of do_something
can be resumed to remove all of main
's code before call to foo
. Some additional steps would involve applying changes to local variables i.e. bar
.
import copy
import dis
## dump opcodes into global scope
globals().update(dis.opmap)
NULL = 0
def do_something(f_code) -> CodeType:
bytecode = f_code.co_code
f_consts = copy.deepcopy(f_code.co_consts)
for i in range(0, len(bytecode), 2):
cmd, arg = bytecode[i], bytecode[i+1]
# watch for the first occurence of calling 'foo'
if cmd == LOAD_GLOBAL and f_code.co_names[arg] == 'foo':
break # use 'i' variable later
else:
raise NameError('foo is not defined.')
f_bytelist = list(bytecode)
f_bytelist[i:i+4] = [
NOP, NULL, ## LOAD
LOAD_CONST, len(f_consts) ## CALL
# Constant 'None' will be added to 'f_consts'
]
f_bytelist[-2:] = [NOP, NULL] # 'main' function RETURN
# This piece of code removes all code before
# calling 'foo' (except for JUMP_ABSOLUTE) so
# it can be usend inside while loops.
null_code = [True] * i
j = i + 2
while j < len(f_bytelist):
if j >= i:
cmd, arg = f_bytelist[j], f_bytelist[j+1]
if cmd == JUMP_ABSOLUTE and arg < i and null_code[arg]:
j = arg
else:
j += 2
else:
null_code[j] = False
j += 2
else:
for j in range(0, i, 2):
if null_code[j]:
f_bytelist[j:j+2] = [NOP, NULL] # skip instruction
else:
continue
f_bytecode = bytes(f_bytelist)
f_consts = f_consts + (None,) ## Add constant to return
return CodeType(
f_code.co_argcount,
f_code.co_kwonlyargcount,
f_code.co_posonlyargcount, # Remove this if Python < 3.8
f_code.co_nlocals,
f_code.co_stacksize,
f_code.co_flags,
f_bytecode,
f_consts,
f_code.co_names,
f_code.co_varnames,
f_code.co_filename,
f_code.co_name,
f_code.co_firstlineno,
f_code.co_lnotab,
f_code.co_freevars,
f_code.co_cellvars
)