Minmax changes the current state and I could't figure out how to avoid this problem

34 Views Asked by At

I have implemented a modefied version of Dynamic Connect but I couldn't fix a problem I am facing which is apparantly the minimax alters the current state when it searches the tree. The minimax returns the right move but it doesn't apply it correctly.

def humanPolicy(game, state, player=-1):
  instructions= input('Input action:')
  action = game.instructions_to_actions(state, instructions, player)
  return actin

def minimax(game, st, depth, player):
  def recurse(st, depth):
    if game.isEnd(state, player) or depth == 0:
      return (game.utility(st, player), 0)
    choices = [(recurse(game.action_succ(st, action, player), depth - 1)[0], action) for action in game.actions(st, player)]
    if player ==1:
      return (max(choices))
    elif player == -1:
      return min(choices)
  value, action = recurse(st, depth)
  print('minimax says action={}, value = {}'.format(action, value))
  return action

# stt = {}
w_list =  [(5, 1), (1, 3), (1, 4), (7, 4), (7, 5), (3, 7)]
b_list = [(4, 1), (3, 1), (7, 2), (1, 6), (5, 7), (6, 7)]

policies = {1: humanPolicy, -1: minimax}
game = DynamicConnect(w_list, b_list)
state = game.InitialState()
st = state
player = 1
depth = 3
while True:

  board = [['.' for i in range(7)] for j in range(7)]
  
  if player == 1:
    print('Player: White: O')
  else:
    print('Player: Black: X')
  print(state)
  for (x, y) in state[0]:
    board[y-1][x-1] = 'O'
  for (x, y) in state[1]:
    board[y-1][x-1] = 'X'
  print(*(' '.join(row) for row in board), sep='\n')
  print('='*10, state)
  # policy = policies[player]
  if player == 1:
    action = humanPolicy(game, state, player)
  else:
    action = minimax(game, st,depth, player)
  print(action)
  state = game.action_succ(st, action, player)
  st = state
  player = -player
  if game.isEnd(state, player):
    print('Game over')

print('utility = {}'.format(game.utility(state, player)))
print(state)

This is the action successor function which return a new state but when it receives the action from the minimax it doesn't work properly.

def action_succ(self, state, action, player): 

    initial_pos = action[0]
    new_pos = action[1]
    if player == -1:
      if initial_pos in state[1]:
        state[1][state[1].index(initial_pos)] = new_pos
    else:
      if initial_pos in state[0]:
        state[0][state[0].index(initial_pos)] = new_pos
    return (state)
0

There are 0 best solutions below