I'm trying to test my minimax algorithm on a game of connect4. I'm currently having it play a game essentially against itself 10 times. In my gameplay, ten games are played with the thing that moves first (computer or player) switching back and forth. I have both the computer and the "player" (another computer) using the same minimax algorithm, so I'd expect them to tie in the number of games that they play. However, I keep seeing that the computer wins every single time. I don't know why my algorithm biases one player; I think it has to do with the way I call the minimax function with the input of "false", but I'm not sure how to fix this.

import numpy as np
import random
import time
import math
import operator
import random

#from numpy.core.multiarray import normalize_axis_index

# Set static variables
ROW_COUNT = 6
COLUMN_COUNT = 7

PLAYER = 0
BOT = 1

EMPTY = 0
PLAYER_PIECE = 1
BOT_PIECE = 2

WINDOW_LENGTH = 4

# Create a Connect-4 board
def create_board():
    board = np.zeros((ROW_COUNT, COLUMN_COUNT))
    return board

def pretty_print_board(board):
    flipped_board = np.flipud(board)

    print("\033[0;37;41m 0 \033[0;37;41m 1 \033[0;37;41m 2 \033[0;37;41m 3 \033[0;37;41m 4 \033[0;37;41m 5 \033[0;37;41m 6 \033[0m")
    for i in flipped_board:
        row_str = ""

        for j in i:
            # print("\033[40;38;5;82m Hello \033[30;48;5;82m World \033[0m")

            if j == 1:
                #print(yellow)
                row_str +="\033[0;37;43m 1 "
            elif j ==2:
                row_str +="\033[0;37;44m 2 "
            else:
                #print black
                row_str +="\033[0;37;45m   "
        print(row_str+"\033[0m")

# Place a piece on the board
def drop_piece(board, row, col, piece):
    board[row][col] = piece

# Check if chosen column has an empty slot
def is_valid_location(board, col):
    return board[ROW_COUNT-1][col] == 0

# Check which row the piece falls into
def get_next_open_row(board, col):
    for r in range(ROW_COUNT):
        if board[r][col] == 0:
            return r

# Get all locations that could contain a piece
def get_valid_locations(board):
    valid_locations= []
    for col in range(COLUMN_COUNT):
        if is_valid_location(board, col):
            valid_locations.append(col)
    return valid_locations

# Look at the board using a 4-piece window to evaluate the whole board & choose a move
def score_position(board, piece):
    score = 0

    # Score centre column
    centre_array = [int(i) for i in list(board[:, COLUMN_COUNT//2])]
    centre_count = centre_array.count(piece)
    score += centre_count * 3

    # Score horizontal positions
    for r in range(ROW_COUNT):
        row_array = [int(i) for i in list(board[r, :])]
        for c in range(COLUMN_COUNT-3):
            # Create a horizontal window of 4
            window = row_array[c:c+WINDOW_LENGTH]
            score += evaluate_window(window, piece)

    # Score vertical positions
    for c in range(COLUMN_COUNT):
        col_array = [int(i) for i in list(board[:, c])]
        for r in range(ROW_COUNT-3):
            # Create a vertical window of 4
            window = col_array[r:r+WINDOW_LENGTH]
            score += evaluate_window(window, piece)

    # Score positive diagonals
    for r in range(ROW_COUNT-3):
        for c in range(COLUMN_COUNT-3):
            # Create a positive diagonal window of 4
            window = [board[r+i][c+i] for i in range(WINDOW_LENGTH)]
            score += evaluate_window(window, piece)

    # Score negative diagonals
    for r in range(ROW_COUNT-3):
        for c in range(COLUMN_COUNT-3):
            # Create a negative diagonal window of 4
            window = [board[r+3-i][c+i] for i in range(WINDOW_LENGTH)]
            score += evaluate_window(window, piece)

    return score

# Set window scores based on contents
def evaluate_window(window, piece):
    score = 0
    # Switch scoring based on turn
    opp_piece = PLAYER_PIECE
    if piece == PLAYER_PIECE:
        opp_piece = BOT_PIECE

    # Prioritise a winning move
    # Minimax makes this less important
    if window.count(piece) == 4:
        score += 100
    # Make connecting 3 second priority
    elif window.count(piece) == 3 and window.count(EMPTY) == 1:
        score += 5
    # Make connecting 2 third priority
    elif window.count(piece) == 2 and window.count(EMPTY) == 2:
        score += 2
    # Prioritise blocking an opponent's winning move (but not over bot winning)
    # Minimax makes this less important
    if window.count(opp_piece) == 3 and window.count(EMPTY) == 1:
        score -= 4

    return score

# Check to see if the game has been won
def winning_move(board, piece):
    # Check valid horizontal locations for win
    for c in range(COLUMN_COUNT-3):
        for r in range(ROW_COUNT):
            if board[r][c] == piece and board [r][c+1] == piece and board[r][c+2] == piece and board[r][c+3] == piece:
                return True

    # Check valid vertical locations for win
    for c in range(COLUMN_COUNT):
        for r in range(ROW_COUNT-3):
            if board[r][c] == piece and board [r+1][c] == piece and board[r+2][c] == piece and board[r+3][c] == piece:
                return True

    # Check valid positive diagonal locations for win
    for c in range(COLUMN_COUNT-3):
        for r in range(ROW_COUNT-3):
            if board[r][c] == piece and board [r+1][c+1] == piece and board[r+2][c+2] == piece and board[r+3][c+3] == piece:
                return True

    # check valid negative diagonal locations for win
    for c in range(COLUMN_COUNT-3):
        for r in range(3, ROW_COUNT):
            if board[r][c] == piece and board [r-1][c+1] == piece and board[r-2][c+2] == piece and board[r-3][c+3] == piece:
                return True

# Pick the best move based on the highest-scoring possible move
# Not required if using Minimax
def pick_best_move(board, piece):
    # Retrieve valid locations from function
    valid_locations = get_valid_locations(board)
    # Start with a base-level best score
    best_score = -1000
    best_col = random.choice(valid_locations)
    for col in valid_locations:
        row = get_next_open_row(board, col)
        # Create a copy of the board to simulate moves
        temp_board = board.copy()
        # Drop a piece in the temporary board and record score
        drop_piece(temp_board, row, col, piece)
        score = score_position(temp_board, piece)
        # Update piece position based on which returns the highest weighting (score)
        if score > best_score:
            best_score = score
            best_col = col
    return best_col

# Define winning moves or no remaining valid locations as terminal nodes (end points)
def is_terminal_node(board):
    return winning_move(board, PLAYER_PIECE) or winning_move(board, BOT_PIECE) or len(get_valid_locations(board)) == 0

# Pick the best move by looking at all possible future moves and comparing their scores
def minimax(board, depth, alpha, beta, maximisingPlayer):
    valid_locations = get_valid_locations(board)

    is_terminal = is_terminal_node(board)
    if depth == 0 or is_terminal:
        if is_terminal:
            # Weight the bot winning really high
            if winning_move(board, BOT_PIECE):
                return (None, 10000000)
            # Weight the human winning really low
            elif winning_move(board, PLAYER_PIECE):
                return (None, -10000000)
            else: # No more valid moves
                return (None, 0)
        # Return the bot's score
        else:
            return (None, score_position(board, BOT_PIECE))

    if maximisingPlayer:
        value = -math.inf
        # Randomise column to start
        column = random.choice(valid_locations)
        for col in valid_locations:
            row = get_next_open_row(board, col)
            # Create a copy of the board
            b_copy = board.copy()
            # Drop a piece in the temporary board and record score
            drop_piece(b_copy, row, col, BOT_PIECE)
            new_score = minimax(b_copy, depth-1, alpha, beta, False)[1]
            if new_score > value:
                value = new_score
                # Make 'column' the best scoring column we can get
                column = col
            alpha = max(alpha, value)
            if alpha >= beta:
                break
        return column, value

    else: # Minimising player
        value = math.inf
        # Randomise column to start
        column = random.choice(valid_locations)
        for col in valid_locations:
            row = get_next_open_row(board, col)
            # Create a copy of the board
            b_copy = board.copy()
            # Drop a piece in the temporary board and record score
            drop_piece(b_copy, row, col, PLAYER_PIECE)
            new_score = minimax(b_copy, depth-1, alpha, beta, True)[1]
            if new_score < value:
                value = new_score
                # Make 'column' the best scoring column we can get
                column = col
            beta = min(beta, value)
            if alpha >= beta:
                break
        return column, value



'''
------------- Gameplay --------------
'''

# Initialise game
board = create_board()
game_over = False
turn = 1 # Human goes first

robotWins = 0
playerWins = 0

for i in range(0,10):
    game_over = False
    board = create_board()
    turn = i%2
    while not game_over:
        if turn == PLAYER and not game_over:
            col, minimax_score = minimax(board, 4, -math.inf, math.inf, False) # A higher value takes longer to run
            print("this is the player's minimax score ", minimax_score)
            if is_valid_location(board, col):
                row = get_next_open_row(board, col)
                drop_piece(board, row, col, PLAYER_PIECE)

                if winning_move(board, PLAYER_PIECE):
                    game_over = True
                    playerWins = playerWins + 1
                    #print("Player Wins!")

            # Advance turn & alternate between Player 1 and 2
            turn += 1
            turn = turn % 2

        if turn == BOT and not game_over:

            # Ask Ro-Bot (Player 2) to pick the best move based on possible opponent future moves
            col, minimax_score = minimax(board, 4, -math.inf, math.inf, True) # A higher value takes longer to run
            print("this is the robot's minimax score ", minimax_score)
            # list1 = [0,1,2,3,4,5,6]
            # col = random.choice(list1)

            if is_valid_location(board, col):
                row = get_next_open_row(board, col)
                drop_piece(board, row, col, BOT_PIECE)

                if winning_move(board, BOT_PIECE):
                    game_over = True
                    robotWins = robotWins + 1
                    #print("Ro-Bot Wins!")

            # Advance turn & alternate between Player 1 and 2
            turn += 1
            turn = turn % 2

        # print("")
        # pretty_print_board(board)
        # print("")

        # When game finishes, wait for 30 seconds
        if game_over:
            print('Game finished!', i)

print("The player won this many times", playerWins)
print("The robot won this many times", robotWins)

I tried running my program and I'm keeping track of the number of wins between both the player and the computer. I am expecting that they have a similar number of wins, but instead I'm seeing that the computer consistently beats the "player" even though both the "player" and the "computer" are using the same minimax algorithm.

0

There are 0 best solutions below