Lab Overview¶
Implement game-playing agents using:
Minimax with perfect play assumption
Alpha-Beta pruning optimization
Evaluation functions for non-terminal states
Monte Carlo Tree Search
Complete game implementations (Tic-Tac-Toe, Connect Four)
Exercise 1: Minimax for Tic-Tac-Toe¶
Implement the complete Minimax algorithm.
class TicTacToe:
def __init__(self):
self.board = [' '] * 9
self.current_player = 'X'
def available_moves(self):
return [i for i, x in enumerate(self.board) if x == ' ']
def make_move(self, position):
"""Make a move and switch player"""
if self.board[position] == ' ':
self.board[position] = self.current_player
self.current_player = 'O' if self.current_player == 'X' else 'X'
return True
return False
def undo_move(self, position):
self.board[position] = ' '
self.current_player = 'O' if self.current_player == 'X' else 'X'
def check_winner(self):
"""Return 'X', 'O', 'Draw', or None"""
# YOUR CODE HERE: Check rows, columns, diagonals
pass
def is_terminal(self):
return self.check_winner() is not None
def utility(self, player):
"""Return utility from perspective of player"""
winner = self.check_winner()
if winner == player:
return 1
elif winner == 'Draw':
return 0
elif winner is None:
return None
else:
return -1
def minimax(game, maximizing_player):
"""
TODO: Implement Minimax algorithm
Return: (best_value, best_move)
"""
pass
# Test
game = TicTacToe()
print('Minimax playing Tic-Tac-Toe')
while not game.is_terminal():
if game.current_player == 'X':
_, move = minimax(game, 'X')
game.make_move(move)
else:
# Random opponent or minimax
moves = game.available_moves()
game.make_move(random.choice(moves))
print_board(game.board)
print(f'Result: {game.check_winner()}')Exercise 2: Alpha-Beta Pruning¶
Optimize Minimax with Alpha-Beta pruning.
def alpha_beta(game, alpha, beta, maximizing_player, depth=0):
"""
TODO: Implement Alpha-Beta pruning
Track number of nodes pruned
Return: (best_value, best_move, nodes_evaluated, nodes_pruned)
"""
pass
# Compare Minimax vs Alpha-Beta
import time
def compare_algorithms(game, depth_limit=9):
"""Compare performance of Minimax and Alpha-Beta"""
results = {}
# Minimax
start = time.time()
value1, move1 = minimax(game, game.current_player)
results['minimax'] = {
'time': time.time() - start,
'value': value1,
'move': move1
}
# Alpha-Beta
start = time.time()
value2, move2, nodes, pruned = alpha_beta(game, float('-inf'), float('inf'), game.current_player)
results['alpha_beta'] = {
'time': time.time() - start,
'value': value2,
'move': move2,
'nodes': nodes,
'pruned': pruned
}
return results
# Test
game = TicTacToe()
results = compare_algorithms(game)
print('Algorithm Comparison:')
for algo, stats in results.items():
print(f'{algo}: {stats}')Exercise 3: Connect Four with Evaluation Function¶
Implement Connect Four with depth-limited search and evaluation.
class ConnectFour:
def __init__(self, rows=6, cols=7):
self.rows = rows
self.cols = cols
self.board = [[' ' for _ in range(cols)] for _ in range(rows)]
self.current_player = 'X'
def available_moves(self):
"""Return columns where piece can be dropped"""
return [c for c in range(self.cols) if self.board[0][c] == ' ']
def drop_piece(self, col):
"""Drop piece in column, return row where it landed"""
for row in range(self.rows-1, -1, -1):
if self.board[row][col] == ' ':
self.board[row][col] = self.current_player
self.current_player = 'O' if self.current_player == 'X' else 'X'
return row
return None
def check_winner(self):
# YOUR CODE: Check horizontal, vertical, diagonal
pass
def evaluate_board(game, player):
"""
TODO: Design evaluation function for Connect Four
Consider:
- Number of 2-in-a-row, 3-in-a-row patterns
- Center control
- Blocking opponent threats
- Potential winning moves
Return: Score (higher is better for player)
"""
pass
def alpha_beta_depth_limited(game, alpha, beta, depth, max_depth, maximizing):
"""
TODO: Alpha-Beta with depth limit and evaluation function
Use evaluate_board() when depth limit reached
"""
pass
# Test
game = ConnectFour()
print('Playing Connect Four with depth-limited search...')
# YOUR CODE: Implement game loopExercise 4: Monte Carlo Tree Search¶
Implement MCTS with UCB1 selection.
import math
import random
class MCTSNode:
def __init__(self, state, parent=None, action=None):
self.state = state
self.parent = parent
self.action = action
self.children = []
self.wins = 0
self.visits = 0
self.untried_actions = state.available_moves()
def ucb1(self, c=1.41):
"""
TODO: Calculate UCB1 value
UCB1 = wins/visits + c * sqrt(ln(parent_visits) / visits)
"""
pass
def select_child(self):
"""Select child with highest UCB1 value"""
return max(self.children, key=lambda c: c.ucb1())
def add_child(self, action, state):
child = MCTSNode(state, parent=self, action=action)
self.untried_actions.remove(action)
self.children.append(child)
return child
def update(self, result):
self.visits += 1
self.wins += result
def mcts_search(root_state, num_simulations=1000):
"""
TODO: Implement MCTS algorithm
Four phases:
1. Selection: Use UCB1 to select promising node
2. Expansion: Add new child node
3. Simulation: Random playout to terminal state
4. Backpropagation: Update statistics
Return: Best action from root
"""
pass
def simulate_random_game(state):
"""Play random game from state to terminal"""
# YOUR CODE HERE
pass
# Test MCTS on Tic-Tac-Toe
game = TicTacToe()
print('MCTS playing Tic-Tac-Toe')
while not game.is_terminal():
action = mcts_search(game, num_simulations=1000)
game.make_move(action)
print_board(game.board)
print(f'Result: {game.check_winner()}')Exercise 5: Algorithm Tournament¶
Pit different algorithms against each other.
def play_game(player1_strategy, player2_strategy, game_class):
"""Play single game between two strategies"""
game = game_class()
strategies = [player1_strategy, player2_strategy]
while not game.is_terminal():
current_strategy = strategies[0 if game.current_player == 'X' else 1]
action = current_strategy(game)
game.make_move(action)
return game.check_winner()
def tournament(strategies, game_class, num_games=100):
"""
TODO: Run round-robin tournament
Each strategy plays against every other strategy
Track wins, losses, draws
"""
pass
# Define strategies
strategies = {
'Random': lambda game: random.choice(game.available_moves()),
'Minimax': lambda game: minimax(game, game.current_player)[1],
'Alpha-Beta': lambda game: alpha_beta(game, float('-inf'), float('inf'), game.current_player)[1],
'MCTS': lambda game: mcts_search(game, num_simulations=500)
}
# Run tournament
results = tournament(strategies, TicTacToe, num_games=50)
# Visualize results
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(results)
df.plot(kind='bar', figsize=(10, 6))
plt.title('Strategy Performance')
plt.ylabel('Win Rate')
plt.show()Challenge: Implement Iterative Deepening¶
Combine depth-limited search with iterative deepening.
def iterative_deepening_minimax(game, time_limit=5.0):
"""
CHALLENGE: Implement iterative deepening Minimax
Progressively increase depth until time limit reached
Use results from shallower searches to improve move ordering
"""
passLab Report¶
Deliverables¶
All algorithms implemented
Tournament results and analysis
Evaluation function design document
Performance comparison
Discussion Questions¶
How does Alpha-Beta pruning improve over Minimax?
When is MCTS preferable to Minimax?
What makes a good evaluation function?
How do you balance exploration vs exploitation in MCTS?