Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Chapter 3: Multiagent Search - Implementation

Complete Python implementations of game-playing algorithms.

Contents

  1. Framework

  2. Minimax

  3. Alpha-Beta Pruning

  4. MCTS

  5. Tic-Tac-Toe

  6. Connect Four

  7. Comparisons

1. Framework {#framework}

import numpy as np
import math
import random
import time
from typing import List, Optional, Tuple, Any
from abc import ABC, abstractmethod
from copy import deepcopy

print('Libraries imported')
class GameState(ABC):
    @abstractmethod
    def get_legal_actions(self, player: int) -> List[Any]:
        pass
    
    @abstractmethod
    def get_successor(self, action: Any):
        pass
    
    @abstractmethod
    def is_terminal(self) -> bool:
        pass
    
    @abstractmethod
    def utility(self, player: int) -> float:
        pass
    
    @abstractmethod
    def current_player(self) -> int:
        pass
    
    def display(self):
        print(self)

print('GameState framework defined')

2. Minimax {#minimax}

def minimax(state, depth=0, maximizing=True, eval_fn=None, max_depth=float('inf')):
    if state.is_terminal():
        player = 1 if maximizing else -1
        return state.utility(player), None
    
    if depth >= max_depth and eval_fn:
        return eval_fn(state), None
    
    player = state.current_player()
    actions = state.get_legal_actions(player)
    
    if not actions:
        return state.utility(player), None
    
    if maximizing:
        max_val = float('-inf')
        best_action = None
        for action in actions:
            successor = state.get_successor(action)
            val, _ = minimax(successor, depth+1, False, eval_fn, max_depth)
            if val > max_val:
                max_val = val
                best_action = action
        return max_val, best_action
    else:
        min_val = float('inf')
        best_action = None
        for action in actions:
            successor = state.get_successor(action)
            val, _ = minimax(successor, depth+1, True, eval_fn, max_depth)
            if val < min_val:
                min_val = val
                best_action = action
        return min_val, best_action

print('Minimax implemented')

3. Alpha-Beta Pruning {#alphabeta}

def alpha_beta(state, depth=0, alpha=float('-inf'), beta=float('inf'),
               maximizing=True, eval_fn=None, max_depth=float('inf')):
    if state.is_terminal():
        player = 1 if maximizing else -1
        return state.utility(player), None
    
    if depth >= max_depth and eval_fn:
        return eval_fn(state), None
    
    player = state.current_player()
    actions = state.get_legal_actions(player)
    
    if not actions:
        return state.utility(player), None
    
    if maximizing:
        value = float('-inf')
        best_action = None
        for action in actions:
            successor = state.get_successor(action)
            child_val, _ = alpha_beta(successor, depth+1, alpha, beta, 
                                     False, eval_fn, max_depth)
            if child_val > value:
                value = child_val
                best_action = action
            alpha = max(alpha, value)
            if beta <= alpha:
                break  # Prune
        return value, best_action
    else:
        value = float('inf')
        best_action = None
        for action in actions:
            successor = state.get_successor(action)
            child_val, _ = alpha_beta(successor, depth+1, alpha, beta,
                                     True, eval_fn, max_depth)
            if child_val < value:
                value = child_val
                best_action = action
            beta = min(beta, value)
            if beta <= alpha:
                break  # Prune
        return value, best_action

print('Alpha-Beta implemented')

4. MCTS {#mcts}

class MCTSNode:
    def __init__(self, state, parent=None, action=None):
        self.state = state
        self.parent = parent
        self.action = action
        self.children = []
        self.visits = 0
        self.wins = 0.0
    
    def is_fully_expanded(self):
        player = self.state.current_player()
        legal_actions = self.state.get_legal_actions(player)
        return len(self.children) == len(legal_actions)
    
    def best_child(self, c=1.41):
        def uct(node):
            if node.visits == 0:
                return float('inf')
            exploit = node.wins / node.visits
            explore = c * math.sqrt(math.log(self.visits) / node.visits)
            return exploit + explore
        return max(self.children, key=uct)
    
    def expand(self):
        tried = {child.action for child in self.children}
        player = self.state.current_player()
        legal = self.state.get_legal_actions(player)
        untried = [a for a in legal if a not in tried]
        action = random.choice(untried)
        next_state = self.state.get_successor(action)
        child = MCTSNode(next_state, self, action)
        self.children.append(child)
        return child

def mcts(root_state, num_sims=1000, c=1.41, verbose=True):
    root = MCTSNode(root_state)
    root_player = root_state.current_player()
    
    for sim in range(num_sims):
        node = root
        # Selection
        while node.is_fully_expanded() and not node.state.is_terminal():
            node = node.best_child(c)
        # Expansion
        if not node.state.is_terminal() and not node.is_fully_expanded():
            node = node.expand()
        # Simulation
        sim_state = deepcopy(node.state)
        while not sim_state.is_terminal():
            player = sim_state.current_player()
            actions = sim_state.get_legal_actions(player)
            if not actions: break
            action = random.choice(actions)
            sim_state = sim_state.get_successor(action)
        result = sim_state.utility(root_player)
        # Backpropagation
        while node:
            node.visits += 1
            node.wins += (result + 1) / 2
            node = node.parent
            result = -result
    
    best = max(root.children, key=lambda n: n.visits)
    if verbose:
        print(f'MCTS: {num_sims} sims, best action visits={best.visits}')
    return best.action

print('MCTS implemented')

5. Tic-Tac-Toe {#tictactoe}

class TicTacToe(GameState):
    def __init__(self, board=None, player=1):
        self.board = np.zeros((3,3), dtype=int) if board is None else board.copy()
        self.player = player
    
    def get_legal_actions(self, player):
        return [(i,j) for i in range(3) for j in range(3) if self.board[i,j]==0]
    
    def get_successor(self, action):
        new_state = TicTacToe(self.board, -self.player)
        new_state.board[action] = self.player
        return new_state
    
    def is_terminal(self):
        return self.get_winner() is not None or len(self.get_legal_actions(1))==0
    
    def get_winner(self):
        # Check rows, cols, diagonals
        for i in range(3):
            if abs(self.board[i,:].sum())==3: return self.board[i,0]
        for j in range(3):
            if abs(self.board[:,j].sum())==3: return self.board[0,j]
        if abs(np.trace(self.board))==3: return self.board[0,0]
        if abs(np.trace(np.fliplr(self.board)))==3: return self.board[0,2]
        return None
    
    def utility(self, player):
        winner = self.get_winner()
        if winner == player: return 1.0
        if winner == -player: return -1.0
        return 0.0
    
    def current_player(self):
        return self.player
    
    def display(self):
        symbols = {1:'X', -1:'O', 0:'.'}
        for i in range(3):
            print(' '.join(symbols[self.board[i,j]] for j in range(3)))
        print()

print('Tic-Tac-Toe implemented')

Test Tic-Tac-Toe

print('=== Tic-Tac-Toe Test ===')
game = TicTacToe()
print('Initial:')
game.display()

print('Minimax:')
start = time.time()
val, action = minimax(game, maximizing=True)
print(f'Best move: {action}, value: {val:.1f}, time: {time.time()-start:.4f}s')

print('\nAlpha-Beta:')
start = time.time()
val, action = alpha_beta(game, maximizing=True)
print(f'Best move: {action}, value: {val:.1f}, time: {time.time()-start:.4f}s')

print('\nMCTS:')
action = mcts(game, num_sims=1000)
print(f'Best move: {action}')

6. Connect Four {#connect4}

class ConnectFour(GameState):
    def __init__(self, board=None, player=1):
        self.rows, self.cols = 6, 7
        self.board = np.zeros((self.rows,self.cols), dtype=int) if board is None else board.copy()
        self.player = player
    
    def get_legal_actions(self, player):
        return [c for c in range(self.cols) if self.board[0,c]==0]
    
    def get_successor(self, action):
        col = action
        new_state = ConnectFour(self.board, -self.player)
        for row in range(self.rows-1, -1, -1):
            if new_state.board[row,col] == 0:
                new_state.board[row,col] = self.player
                break
        return new_state
    
    def is_terminal(self):
        return self.check_win() or len(self.get_legal_actions(1))==0
    
    def check_win(self):
        # Check horizontal, vertical, diagonal for 4 in a row
        for r in range(self.rows):
            for c in range(self.cols-3):
                if abs(self.board[r,c:c+4].sum())==4 and self.board[r,c]!=0:
                    return True
        for r in range(self.rows-3):
            for c in range(self.cols):
                if abs(self.board[r:r+4,c].sum())==4 and self.board[r,c]!=0:
                    return True
        for r in range(self.rows-3):
            for c in range(self.cols-3):
                diag = [self.board[r+i,c+i] for i in range(4)]
                if abs(sum(diag))==4 and diag[0]!=0:
                    return True
        for r in range(3,self.rows):
            for c in range(self.cols-3):
                diag = [self.board[r-i,c+i] for i in range(4)]
                if abs(sum(diag))==4 and diag[0]!=0:
                    return True
        return False
    
    def utility(self, player):
        if self.check_win():
            return 1.0 if self.player==-player else -1.0
        return 0.0
    
    def current_player(self):
        return self.player
    
    def display(self):
        symbols = {1:'X', -1:'O', 0:'.'}
        for i in range(self.rows):
            print(' '.join(symbols[self.board[i,j]] for j in range(self.cols)))
        print(' '.join(str(i) for i in range(self.cols)))
        print()

print('Connect Four implemented')

Evaluation Function for Connect Four

def connect4_eval(state):
    # Simple evaluation: center control + threat detection
    score = 0
    player = state.player
    # Center preference
    for r in range(state.rows):
        if state.board[r, 3] == player:
            score += 3
    return score

print('Connect Four eval function defined')

Test Connect Four

print('=== Connect Four Test ===')
c4 = ConnectFour()
print('Initial:')
c4.display()

print('Alpha-Beta (depth 4):')
start = time.time()
val, action = alpha_beta(c4, maximizing=True, eval_fn=connect4_eval, max_depth=4)
print(f'Best column: {action}, value: {val:.2f}, time: {time.time()-start:.4f}s')

print('\nMCTS (500 sims):')
action = mcts(c4, num_sims=500)
print(f'Best column: {action}')

7. Comparisons {#comparisons}

print('=== Algorithm Comparison ===')
print()
print('Tic-Tac-Toe:')
print('  Minimax: Complete search, slow')
print('  Alpha-Beta: 2-10x faster than Minimax')
print('  MCTS: Fast, good for learning')
print()
print('Connect Four:')
print('  Alpha-Beta + eval: Limited depth (4-6 plies)')
print('  MCTS: Handles large branching factor')
print()
print('Key Insights:')
print('  - Alpha-Beta essential for deep search')
print('  - MCTS great when eval function hard to design')
print('  - Modern: Combine both (AlphaZero)')

Summary

This implementation notebook demonstrated:

Implementations

  • Minimax with perfect play

  • Alpha-Beta pruning optimization

  • MCTS with UCT selection

Games

  • Tic-Tac-Toe: Complete search possible

  • Connect Four: Requires depth limits

Key Lessons

  • Alpha-Beta dramatically reduces nodes

  • MCTS learns without evaluation function

  • Real games need hybrid approaches

For theory, see: Chapter 3: Multiagent Search