The Monte Carlo Tree Search implementation. To the Othello

235 Views Asked by At

This question is about Monte Carlo tree search - How can we improve the performance or some move that the algorithm is going to make each turn.

I have tried a few things to improve it. First, I try to make the simulation part to not just random but pick the simulation move by the weighted graph.

That didn't work and my bot is still very dumb. Even if I guess all the moves, I still Win. The second method I try is to increase the simulation number from 1000 to 4000. This didn't work neither. Increasing the number of the simulation just make the runtime slower. And the last method that I try is to change the C constant number in the UCT formula to make the bot explore more node in the tree not only the highest score one.

And I'm not very sure that I have implemented all the method or the Othello class itself all correct or not. So I'm very sorry if some of the code is wrong or if it is written badly.

So the question is can you guy suggest me some more method to try or share me some idea that I can try to implement to mine code. Thank you. Sorry for my english skill.

Here is the MCTS code part.

import math
import random
import numpy as np

class MCTSNode:
    def __init__(self, state, parent=None, move=None):
        self.state = state
        self.parent = parent
        self.move = move
        self.children = []
        self.visits = 0
        self.value = 0
        
    def select(self):
        """Select a child node using UCB1 (Upper Confidence Bound)."""
        total_visits = sum(child.visits for child in self.children)
        return max(self.children, key=lambda node: node.value / (1 + node.visits) + math.sqrt(8 * math.log(total_visits) / (1 + node.visits)))

    # Expand the node one by one each iteration
    # def expand(self, moves):
    #     """Expand the current node by adding all possible moves."""
    #     for move in moves:
    #         if move not in [child.move for child in self.children]:
    #             new_state = self.state.copy()
    #             new_state.make_move(move)
    #             self.children.append(MCTSNode(new_state, parent=self, move=move))

    def expand(self, moves):
        """Expand the current node by adding one possible move."""
        for move in moves:
            if move not in [child.move for child in self.children]:
                new_state = self.state.copy()
                new_state.make_move(move)
                self.children.append(MCTSNode(new_state, parent=self, move=move))
                break


    # this is the Function Evaluate method            
    def rollout(self):
        """Perform a random simulation from this node."""
        current_state = self.state.copy()
        while not current_state.is_game_over():  # You'll need to define `is_game_over` in your game class
            all_move = current_state.get_valid_move()
            move = max(all_move, key=lambda x:current_state.evaluate(x))
            current_state.make_move(move)
        result = current_state.get_result()
        if result == 1:
            return -1
        elif result == -1:
            return 1
        else:
            return 0  # You'll need to define `get_result` in your game class, should return 1 for win, -1 for loss, and 0 for draw
    
    # This is the weight graph method
    def rollout(self):
        """Perform a random simulation from this node."""
        current_state = self.state.copy()
        while not current_state.is_game_over():  
            all_move = current_state.get_valid_move()
            move = max(all_move, key=lambda x:current_state.weights[x[0]][x[1]])
            current_state.make_move(move)
        result = current_state.get_result()
        if result == 1:
            return -1
        elif result == -1:
            return 1
        else:
            return 0
        
    # this is the random method
    # def rollout(self):
    #     """Perform a random simulation from this node."""
    #     current_state = self.state.copy()
    #     while not current_state.is_game_over(): 
    #         move = random.choice(current_state.get_valid_move())
    #         current_state.make_move(move)
    #     result = current_state.get_result()
    #     if result == 1:
    #         return -1
    #     elif result == -1:
    #         return 1
    #     else:
    #         return 0
        
    def backpropagate(self, result):
        """Backpropagate the result of a simulation up to the root."""
        self.visits += 1
        self.value += result
        if self.parent:
            self.parent.backpropagate(-result)

def MCTS(root_state, n_simulations=1000):
    # assign the whole board
    root = MCTSNode(root_state)
    for _ in range(n_simulations):
        # copy it so the node will change every loop that occur
        node = root
        # Selection
        # selection one node from what ever
        while len(node.children) > 0:
            if node == root and len(node.children) < len(root.state.get_valid_move()):
                break
            node = node.select()
        # Expansion
        # get all the move assign it to the node children then only pick one
        legal_moves = node.state.get_valid_move()
        if not node.state.is_game_over() and legal_moves:
            node.expand(legal_moves)
            node = node.children[-1]
        # Simulation
        # random move till the leaf return 1 -1 or 0
        result = node.rollout()
        # Backpropagation
        # sum all the way up to the root 
        node.backpropagate(result)
    # Return the best move from the root
    for k in root.children:
        print(k.visits, k.move)
    return max(root.children, key=lambda node: node.visits).move

And over here is the Othello (reversi) game code.

board = [[0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0]]

# 1 = white
# 2 = black


class Othello:
    def __init__(self, grid):
        self.grid = grid
        self.direction = [(0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0), (-1, -1)]
         #top top_right right bottom_right bottom bottom_left left top_left 
        self.current_player = 1
        self.weights = [
            [120, -20, 20, 5, 5, 20, -20, 120],
            [-20, -40, -5, -5, -5, -5, -40, -20],
            [20, -5, 15, 3, 3, 15, -5, 20],
            [5, -5, 3, 3, 3, 3, -5, 5],
            [5, -5, 3, 3, 3, 3, -5, 5],
            [20, -5, 15, 3, 3, 15, -5, 20],
            [-20, -40, -5, -5, -5, -5, -40, -20],
            [120, -20, 20, 5, 5, 20, -20, 120]
        ]


    def initilize_board(self):
        self.grid[3][3] = 1
        self.grid[3][4] = 2
        self.grid[4][3] = 2
        self.grid[4][4] = 1

    def valid_coord(self, row, col):
        if 0 <= row < 8 and 0 <= col < 8:
            return True
        return False

    def valid_move(self, start):

        if start != () and self.valid_coord(start[0], start[1]) and self.grid[start[0]][start[1]] == 0:
            for direction in self.direction:
                if self.has_tile_to_flip(start, direction):
                    return True
        return False

    def get_valid_move(self):
        moves = []
        for row in range(8):
            for col in range(8):
                move = (row, col)
                if self.valid_move(move):
                    moves.append(move)
        return moves

    def has_tile_to_flip(self, start, direction):
        i = 1
        if self.valid_coord(start[0], start[1]):
            while True:
                row = start[0] + direction[0] * i
                col = start[1] + direction[1] * i
                if not self.valid_coord(row, col) or \
                    self.grid[row][col] == 0:
                    return False
                elif self.grid[row][col] == self.current_player:
                    break
                else:
                    i += 1
        return i > 1
    
    def flip_tile(self, start):
        for direction in self.direction:
            if self.has_tile_to_flip(start, direction):
                i = 1
                while True:
                    row = start[0] + direction[0] * i
                    col = start[1] + direction[1] * i
                    if self.grid[row][col] == self.current_player:
                        break
                    else:
                        self.grid[row][col] = self.current_player
                        i += 1

    def evaluate(self, move):
        fake = self.copy()
        fake.make_move(move)
        score = 0

        # Use the weight table for evaluation
        for i in range(8):
            for j in range(8):
                if fake.grid[i][j] == 2:
                    score += self.weights[i][j]
                elif fake.grid[i][j] == 1: 
                    score -= self.weights[i][j]


        return score


    def make_move(self, start):
        if self.valid_move(start):
            self.grid[start[0]][start[1]] = self.current_player
            self.flip_tile(start)
            # self.score[self.current_player] += 1
            self.current_player = 2 if self.current_player == 1 else 1
        else:
            self.current_player = 2 if self.current_player == 1 else 1

    def copy(self):
        new_game = Othello([row.copy() for row in self.grid])
        new_game.current_player = self.current_player
        return new_game

    def is_game_over(self):
        return not self.get_valid_move()  # The game is over if there are no valid moves

    def get_result(self):
        player_one_tiles = sum(row.count(1) for row in self.grid)
        player_two_tiles = sum(row.count(2) for row in self.grid)
        
        if player_one_tiles > player_two_tiles:
            # If player one has more tiles, they win
            return 1
        elif player_two_tiles > player_one_tiles:
            # If player two has more tiles, they win
            return -1
        else:
            # Draw
            return 0

And this is the final part of the code where it initilize the whole thing.

def play_game():
    game = Othello(board)
    game.initilize_board()

    while not game.is_game_over():
        print(np.array(game.grid))
        print(game.get_valid_move())
        
        if game.current_player == 1:
            # Human player's turn
            x, y = map(int, input("Enter your move as 'row col': ").split())
            move = (x, y)
            try:
                game.make_move(move)
            except ValueError:
                print("Invalid move. Try again.")
                continue
        else:
            # AI's turn
            print("AI is thinking...")
            move = MCTS(game, n_simulations=4000)
            game.make_move(move)

    print(np.array(game.grid))
    result = game.get_result()
    if result == 0:
        print("It's a draw!")
    elif result == 1:
        print("Player 1 wins!")
    else:
        print("Player 2 (AI) wins!")

if __name__ == "__main__":
    play_game()

I have already tried to change the Constant method, change the random choose to choose by weight graph and change the random to choose from the Function Evaluate by weight graph. And of course I had tried to randomize it.

What I was expecting is some kind of idea or some method that help the code choose more optimal move each iteration.

0

There are 0 best solutions below