Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Chapter 3 Lab: Multiagent Search

Lab Overview

Implement game-playing agents using:

  1. Minimax with perfect play assumption

  2. Alpha-Beta pruning optimization

  3. Evaluation functions for non-terminal states

  4. Monte Carlo Tree Search

  5. Complete game implementations (Tic-Tac-Toe, Connect Four)

Exercise 1: Minimax for Tic-Tac-Toe

Implement the complete Minimax algorithm.

class TicTacToe:
    def __init__(self):
        self.board = [' '] * 9
        self.current_player = 'X'
        
    def available_moves(self):
        return [i for i, x in enumerate(self.board) if x == ' ']
        
    def make_move(self, position):
        """Make a move and switch player"""
        if self.board[position] == ' ':
            self.board[position] = self.current_player
            self.current_player = 'O' if self.current_player == 'X' else 'X'
            return True
        return False
        
    def undo_move(self, position):
        self.board[position] = ' '
        self.current_player = 'O' if self.current_player == 'X' else 'X'
        
    def check_winner(self):
        """Return 'X', 'O', 'Draw', or None"""
        # YOUR CODE HERE: Check rows, columns, diagonals
        pass
        
    def is_terminal(self):
        return self.check_winner() is not None
        
    def utility(self, player):
        """Return utility from perspective of player"""
        winner = self.check_winner()
        if winner == player:
            return 1
        elif winner == 'Draw':
            return 0
        elif winner is None:
            return None
        else:
            return -1

def minimax(game, maximizing_player):
    """
    TODO: Implement Minimax algorithm
    Return: (best_value, best_move)
    """
    pass

# Test
game = TicTacToe()
print('Minimax playing Tic-Tac-Toe')
while not game.is_terminal():
    if game.current_player == 'X':
        _, move = minimax(game, 'X')
        game.make_move(move)
    else:
        # Random opponent or minimax
        moves = game.available_moves()
        game.make_move(random.choice(moves))
    print_board(game.board)
    
print(f'Result: {game.check_winner()}')

Exercise 2: Alpha-Beta Pruning

Optimize Minimax with Alpha-Beta pruning.

def alpha_beta(game, alpha, beta, maximizing_player, depth=0):
    """
    TODO: Implement Alpha-Beta pruning
    Track number of nodes pruned
    Return: (best_value, best_move, nodes_evaluated, nodes_pruned)
    """
    pass

# Compare Minimax vs Alpha-Beta
import time

def compare_algorithms(game, depth_limit=9):
    """Compare performance of Minimax and Alpha-Beta"""
    results = {}
    
    # Minimax
    start = time.time()
    value1, move1 = minimax(game, game.current_player)
    results['minimax'] = {
        'time': time.time() - start,
        'value': value1,
        'move': move1
    }
    
    # Alpha-Beta
    start = time.time()
    value2, move2, nodes, pruned = alpha_beta(game, float('-inf'), float('inf'), game.current_player)
    results['alpha_beta'] = {
        'time': time.time() - start,
        'value': value2,
        'move': move2,
        'nodes': nodes,
        'pruned': pruned
    }
    
    return results

# Test
game = TicTacToe()
results = compare_algorithms(game)
print('Algorithm Comparison:')
for algo, stats in results.items():
    print(f'{algo}: {stats}')

Exercise 3: Connect Four with Evaluation Function

Implement Connect Four with depth-limited search and evaluation.

class ConnectFour:
    def __init__(self, rows=6, cols=7):
        self.rows = rows
        self.cols = cols
        self.board = [[' ' for _ in range(cols)] for _ in range(rows)]
        self.current_player = 'X'
        
    def available_moves(self):
        """Return columns where piece can be dropped"""
        return [c for c in range(self.cols) if self.board[0][c] == ' ']
        
    def drop_piece(self, col):
        """Drop piece in column, return row where it landed"""
        for row in range(self.rows-1, -1, -1):
            if self.board[row][col] == ' ':
                self.board[row][col] = self.current_player
                self.current_player = 'O' if self.current_player == 'X' else 'X'
                return row
        return None
        
    def check_winner(self):
        # YOUR CODE: Check horizontal, vertical, diagonal
        pass

def evaluate_board(game, player):
    """
    TODO: Design evaluation function for Connect Four
    
    Consider:
    - Number of 2-in-a-row, 3-in-a-row patterns
    - Center control
    - Blocking opponent threats
    - Potential winning moves
    
    Return: Score (higher is better for player)
    """
    pass

def alpha_beta_depth_limited(game, alpha, beta, depth, max_depth, maximizing):
    """
    TODO: Alpha-Beta with depth limit and evaluation function
    Use evaluate_board() when depth limit reached
    """
    pass

# Test
game = ConnectFour()
print('Playing Connect Four with depth-limited search...')
# YOUR CODE: Implement game loop

Implement MCTS with UCB1 selection.

import math
import random

class MCTSNode:
    def __init__(self, state, parent=None, action=None):
        self.state = state
        self.parent = parent
        self.action = action
        self.children = []
        self.wins = 0
        self.visits = 0
        self.untried_actions = state.available_moves()
        
    def ucb1(self, c=1.41):
        """
        TODO: Calculate UCB1 value
        UCB1 = wins/visits + c * sqrt(ln(parent_visits) / visits)
        """
        pass
        
    def select_child(self):
        """Select child with highest UCB1 value"""
        return max(self.children, key=lambda c: c.ucb1())
        
    def add_child(self, action, state):
        child = MCTSNode(state, parent=self, action=action)
        self.untried_actions.remove(action)
        self.children.append(child)
        return child
        
    def update(self, result):
        self.visits += 1
        self.wins += result

def mcts_search(root_state, num_simulations=1000):
    """
    TODO: Implement MCTS algorithm
    
    Four phases:
    1. Selection: Use UCB1 to select promising node
    2. Expansion: Add new child node
    3. Simulation: Random playout to terminal state
    4. Backpropagation: Update statistics
    
    Return: Best action from root
    """
    pass

def simulate_random_game(state):
    """Play random game from state to terminal"""
    # YOUR CODE HERE
    pass

# Test MCTS on Tic-Tac-Toe
game = TicTacToe()
print('MCTS playing Tic-Tac-Toe')
while not game.is_terminal():
    action = mcts_search(game, num_simulations=1000)
    game.make_move(action)
    print_board(game.board)
print(f'Result: {game.check_winner()}')

Exercise 5: Algorithm Tournament

Pit different algorithms against each other.

def play_game(player1_strategy, player2_strategy, game_class):
    """Play single game between two strategies"""
    game = game_class()
    strategies = [player1_strategy, player2_strategy]
    
    while not game.is_terminal():
        current_strategy = strategies[0 if game.current_player == 'X' else 1]
        action = current_strategy(game)
        game.make_move(action)
    
    return game.check_winner()

def tournament(strategies, game_class, num_games=100):
    """
    TODO: Run round-robin tournament
    Each strategy plays against every other strategy
    Track wins, losses, draws
    """
    pass

# Define strategies
strategies = {
    'Random': lambda game: random.choice(game.available_moves()),
    'Minimax': lambda game: minimax(game, game.current_player)[1],
    'Alpha-Beta': lambda game: alpha_beta(game, float('-inf'), float('inf'), game.current_player)[1],
    'MCTS': lambda game: mcts_search(game, num_simulations=500)
}

# Run tournament
results = tournament(strategies, TicTacToe, num_games=50)

# Visualize results
import pandas as pd
import matplotlib.pyplot as plt

df = pd.DataFrame(results)
df.plot(kind='bar', figsize=(10, 6))
plt.title('Strategy Performance')
plt.ylabel('Win Rate')
plt.show()

Challenge: Implement Iterative Deepening

Combine depth-limited search with iterative deepening.

def iterative_deepening_minimax(game, time_limit=5.0):
    """
    CHALLENGE: Implement iterative deepening Minimax
    Progressively increase depth until time limit reached
    Use results from shallower searches to improve move ordering
    """
    pass

Lab Report

Deliverables

  • All algorithms implemented

  • Tournament results and analysis

  • Evaluation function design document

  • Performance comparison

Discussion Questions

  1. How does Alpha-Beta pruning improve over Minimax?

  2. When is MCTS preferable to Minimax?

  3. What makes a good evaluation function?

  4. How do you balance exploration vs exploitation in MCTS?