I have implemented a modefied version of Dynamic Connect but I couldn't fix a problem I am facing which is apparantly the minimax alters the current state when it searches the tree. The minimax returns the right move but it doesn't apply it correctly.
def humanPolicy(game, state, player=-1):
instructions= input('Input action:')
action = game.instructions_to_actions(state, instructions, player)
return actin
def minimax(game, st, depth, player):
def recurse(st, depth):
if game.isEnd(state, player) or depth == 0:
return (game.utility(st, player), 0)
choices = [(recurse(game.action_succ(st, action, player), depth - 1)[0], action) for action in game.actions(st, player)]
if player ==1:
return (max(choices))
elif player == -1:
return min(choices)
value, action = recurse(st, depth)
print('minimax says action={}, value = {}'.format(action, value))
return action
# stt = {}
w_list = [(5, 1), (1, 3), (1, 4), (7, 4), (7, 5), (3, 7)]
b_list = [(4, 1), (3, 1), (7, 2), (1, 6), (5, 7), (6, 7)]
policies = {1: humanPolicy, -1: minimax}
game = DynamicConnect(w_list, b_list)
state = game.InitialState()
st = state
player = 1
depth = 3
while True:
board = [['.' for i in range(7)] for j in range(7)]
if player == 1:
print('Player: White: O')
else:
print('Player: Black: X')
print(state)
for (x, y) in state[0]:
board[y-1][x-1] = 'O'
for (x, y) in state[1]:
board[y-1][x-1] = 'X'
print(*(' '.join(row) for row in board), sep='\n')
print('='*10, state)
# policy = policies[player]
if player == 1:
action = humanPolicy(game, state, player)
else:
action = minimax(game, st,depth, player)
print(action)
state = game.action_succ(st, action, player)
st = state
player = -player
if game.isEnd(state, player):
print('Game over')
print('utility = {}'.format(game.utility(state, player)))
print(state)
This is the action successor function which return a new state but when it receives the action from the minimax it doesn't work properly.
def action_succ(self, state, action, player):
initial_pos = action[0]
new_pos = action[1]
if player == -1:
if initial_pos in state[1]:
state[1][state[1].index(initial_pos)] = new_pos
else:
if initial_pos in state[0]:
state[0][state[0].index(initial_pos)] = new_pos
return (state)