Monte Carlo Tree Search random choices

319 Views Asked by At

My IS-MCTS implementation chooses always to go allin and i dont know why. Maybe you guys can help me?

Ive already tried to change the saved value in the node from wins to value, which means the earned chip amount, but got bad results too. The algorithm even loses to a random player and only calling player.

Is there anything wrong with the mcts method? If not it has to be maybe the ucb1 method or "node" class.

import math
import random

def find_best_action(self, rootstate):
    rootnode = Node()

    for i in range(self.max_iterations):
        node = rootnode

        # Determinization
        state = rootstate.reset()
        state.randomize(state.get_current_player())

        while not state.is_terminal():
            untried_actions = node.get_untried_actions(state.get_valid_actions())
            if not untried_actions == []:  # if we can expand (i.e. state/node is non-terminal)
                # Expansion
                a = random.choice(untried_actions)
                node = node.add_child(a, state.get_current_player())  # add child and descend tree
                state.act(a)
            else:
                # Selection
                node = node.select_child()
                state.act(node.action)

        # Rollout
        while not state.is_terminal():  # while state is non-terminal
            state.act(random.choice(state.get_valid_actions()))

        # Backpropagation
        while node is not None:  # backpropagate from the expanded node and work back to the root node
            node.update(state)
            node = node.parent_node

    return max(rootnode.child_nodes, key=lambda n: n.visits).action  # return the action that was most visited

I guess it has to be something within the node class which selects the wrong childs.

import math
import random
class Node:

def __init__(self, action=None, parent=None, acted_player=None):
    self.action = action
    self.parent_node = parent
    self.child_nodes = []
    self.wins = 0
    self.visits = 0
    self.acted_player = acted_player

def get_untried_actions(self, valid_actions):
    tried_actions = [child.action for child in self.child_nodes]
    return [action for action in valid_actions if action not in tried_actions]

def select_child(self, exploration=0.7):
    # Get the child with the highest UCB score
    c = max(self.child_nodes, key=lambda node: node.calc_ucb1_score(exploration))
    return c

def add_child(self, a, p):
    n = Node(action=a, parent=self, acted_player=p)
    self.child_nodes.append(n)
    return n

def update(self, terminal_state):
    self.visits += 1
    if self.acted_player is not None:
        self.wins += terminal_state.get_result(self.acted_player)

def calc_ucb1_score(self, exploration):
    if self.visits == 0:
        return 0
    else:
        return self._calc_avg_wins() + exploration * sqrt(2 * log(self.parent_node.visits) / float(self.visits))

def _calc_avg_wins(self):
    if self.wins == 0:
        return 0.0
    elif self.visits == 0:
        return 0.0
    else:
        return float(self.wins) / float(self.visits)
0

There are 0 best solutions below