Complete Python implementations of game-playing algorithms.
Contents¶
1. Framework {#framework}¶
import numpy as np
import math
import random
import time
from typing import List, Optional, Tuple, Any
from abc import ABC, abstractmethod
from copy import deepcopy
print('Libraries imported')class GameState(ABC):
@abstractmethod
def get_legal_actions(self, player: int) -> List[Any]:
pass
@abstractmethod
def get_successor(self, action: Any):
pass
@abstractmethod
def is_terminal(self) -> bool:
pass
@abstractmethod
def utility(self, player: int) -> float:
pass
@abstractmethod
def current_player(self) -> int:
pass
def display(self):
print(self)
print('GameState framework defined')2. Minimax {#minimax}¶
def minimax(state, depth=0, maximizing=True, eval_fn=None, max_depth=float('inf')):
if state.is_terminal():
player = 1 if maximizing else -1
return state.utility(player), None
if depth >= max_depth and eval_fn:
return eval_fn(state), None
player = state.current_player()
actions = state.get_legal_actions(player)
if not actions:
return state.utility(player), None
if maximizing:
max_val = float('-inf')
best_action = None
for action in actions:
successor = state.get_successor(action)
val, _ = minimax(successor, depth+1, False, eval_fn, max_depth)
if val > max_val:
max_val = val
best_action = action
return max_val, best_action
else:
min_val = float('inf')
best_action = None
for action in actions:
successor = state.get_successor(action)
val, _ = minimax(successor, depth+1, True, eval_fn, max_depth)
if val < min_val:
min_val = val
best_action = action
return min_val, best_action
print('Minimax implemented')3. Alpha-Beta Pruning {#alphabeta}¶
def alpha_beta(state, depth=0, alpha=float('-inf'), beta=float('inf'),
maximizing=True, eval_fn=None, max_depth=float('inf')):
if state.is_terminal():
player = 1 if maximizing else -1
return state.utility(player), None
if depth >= max_depth and eval_fn:
return eval_fn(state), None
player = state.current_player()
actions = state.get_legal_actions(player)
if not actions:
return state.utility(player), None
if maximizing:
value = float('-inf')
best_action = None
for action in actions:
successor = state.get_successor(action)
child_val, _ = alpha_beta(successor, depth+1, alpha, beta,
False, eval_fn, max_depth)
if child_val > value:
value = child_val
best_action = action
alpha = max(alpha, value)
if beta <= alpha:
break # Prune
return value, best_action
else:
value = float('inf')
best_action = None
for action in actions:
successor = state.get_successor(action)
child_val, _ = alpha_beta(successor, depth+1, alpha, beta,
True, eval_fn, max_depth)
if child_val < value:
value = child_val
best_action = action
beta = min(beta, value)
if beta <= alpha:
break # Prune
return value, best_action
print('Alpha-Beta implemented')4. MCTS {#mcts}¶
class MCTSNode:
def __init__(self, state, parent=None, action=None):
self.state = state
self.parent = parent
self.action = action
self.children = []
self.visits = 0
self.wins = 0.0
def is_fully_expanded(self):
player = self.state.current_player()
legal_actions = self.state.get_legal_actions(player)
return len(self.children) == len(legal_actions)
def best_child(self, c=1.41):
def uct(node):
if node.visits == 0:
return float('inf')
exploit = node.wins / node.visits
explore = c * math.sqrt(math.log(self.visits) / node.visits)
return exploit + explore
return max(self.children, key=uct)
def expand(self):
tried = {child.action for child in self.children}
player = self.state.current_player()
legal = self.state.get_legal_actions(player)
untried = [a for a in legal if a not in tried]
action = random.choice(untried)
next_state = self.state.get_successor(action)
child = MCTSNode(next_state, self, action)
self.children.append(child)
return child
def mcts(root_state, num_sims=1000, c=1.41, verbose=True):
root = MCTSNode(root_state)
root_player = root_state.current_player()
for sim in range(num_sims):
node = root
# Selection
while node.is_fully_expanded() and not node.state.is_terminal():
node = node.best_child(c)
# Expansion
if not node.state.is_terminal() and not node.is_fully_expanded():
node = node.expand()
# Simulation
sim_state = deepcopy(node.state)
while not sim_state.is_terminal():
player = sim_state.current_player()
actions = sim_state.get_legal_actions(player)
if not actions: break
action = random.choice(actions)
sim_state = sim_state.get_successor(action)
result = sim_state.utility(root_player)
# Backpropagation
while node:
node.visits += 1
node.wins += (result + 1) / 2
node = node.parent
result = -result
best = max(root.children, key=lambda n: n.visits)
if verbose:
print(f'MCTS: {num_sims} sims, best action visits={best.visits}')
return best.action
print('MCTS implemented')5. Tic-Tac-Toe {#tictactoe}¶
class TicTacToe(GameState):
def __init__(self, board=None, player=1):
self.board = np.zeros((3,3), dtype=int) if board is None else board.copy()
self.player = player
def get_legal_actions(self, player):
return [(i,j) for i in range(3) for j in range(3) if self.board[i,j]==0]
def get_successor(self, action):
new_state = TicTacToe(self.board, -self.player)
new_state.board[action] = self.player
return new_state
def is_terminal(self):
return self.get_winner() is not None or len(self.get_legal_actions(1))==0
def get_winner(self):
# Check rows, cols, diagonals
for i in range(3):
if abs(self.board[i,:].sum())==3: return self.board[i,0]
for j in range(3):
if abs(self.board[:,j].sum())==3: return self.board[0,j]
if abs(np.trace(self.board))==3: return self.board[0,0]
if abs(np.trace(np.fliplr(self.board)))==3: return self.board[0,2]
return None
def utility(self, player):
winner = self.get_winner()
if winner == player: return 1.0
if winner == -player: return -1.0
return 0.0
def current_player(self):
return self.player
def display(self):
symbols = {1:'X', -1:'O', 0:'.'}
for i in range(3):
print(' '.join(symbols[self.board[i,j]] for j in range(3)))
print()
print('Tic-Tac-Toe implemented')Test Tic-Tac-Toe¶
print('=== Tic-Tac-Toe Test ===')
game = TicTacToe()
print('Initial:')
game.display()
print('Minimax:')
start = time.time()
val, action = minimax(game, maximizing=True)
print(f'Best move: {action}, value: {val:.1f}, time: {time.time()-start:.4f}s')
print('\nAlpha-Beta:')
start = time.time()
val, action = alpha_beta(game, maximizing=True)
print(f'Best move: {action}, value: {val:.1f}, time: {time.time()-start:.4f}s')
print('\nMCTS:')
action = mcts(game, num_sims=1000)
print(f'Best move: {action}')6. Connect Four {#connect4}¶
class ConnectFour(GameState):
def __init__(self, board=None, player=1):
self.rows, self.cols = 6, 7
self.board = np.zeros((self.rows,self.cols), dtype=int) if board is None else board.copy()
self.player = player
def get_legal_actions(self, player):
return [c for c in range(self.cols) if self.board[0,c]==0]
def get_successor(self, action):
col = action
new_state = ConnectFour(self.board, -self.player)
for row in range(self.rows-1, -1, -1):
if new_state.board[row,col] == 0:
new_state.board[row,col] = self.player
break
return new_state
def is_terminal(self):
return self.check_win() or len(self.get_legal_actions(1))==0
def check_win(self):
# Check horizontal, vertical, diagonal for 4 in a row
for r in range(self.rows):
for c in range(self.cols-3):
if abs(self.board[r,c:c+4].sum())==4 and self.board[r,c]!=0:
return True
for r in range(self.rows-3):
for c in range(self.cols):
if abs(self.board[r:r+4,c].sum())==4 and self.board[r,c]!=0:
return True
for r in range(self.rows-3):
for c in range(self.cols-3):
diag = [self.board[r+i,c+i] for i in range(4)]
if abs(sum(diag))==4 and diag[0]!=0:
return True
for r in range(3,self.rows):
for c in range(self.cols-3):
diag = [self.board[r-i,c+i] for i in range(4)]
if abs(sum(diag))==4 and diag[0]!=0:
return True
return False
def utility(self, player):
if self.check_win():
return 1.0 if self.player==-player else -1.0
return 0.0
def current_player(self):
return self.player
def display(self):
symbols = {1:'X', -1:'O', 0:'.'}
for i in range(self.rows):
print(' '.join(symbols[self.board[i,j]] for j in range(self.cols)))
print(' '.join(str(i) for i in range(self.cols)))
print()
print('Connect Four implemented')Evaluation Function for Connect Four¶
def connect4_eval(state):
# Simple evaluation: center control + threat detection
score = 0
player = state.player
# Center preference
for r in range(state.rows):
if state.board[r, 3] == player:
score += 3
return score
print('Connect Four eval function defined')Test Connect Four¶
print('=== Connect Four Test ===')
c4 = ConnectFour()
print('Initial:')
c4.display()
print('Alpha-Beta (depth 4):')
start = time.time()
val, action = alpha_beta(c4, maximizing=True, eval_fn=connect4_eval, max_depth=4)
print(f'Best column: {action}, value: {val:.2f}, time: {time.time()-start:.4f}s')
print('\nMCTS (500 sims):')
action = mcts(c4, num_sims=500)
print(f'Best column: {action}')7. Comparisons {#comparisons}¶
print('=== Algorithm Comparison ===')
print()
print('Tic-Tac-Toe:')
print(' Minimax: Complete search, slow')
print(' Alpha-Beta: 2-10x faster than Minimax')
print(' MCTS: Fast, good for learning')
print()
print('Connect Four:')
print(' Alpha-Beta + eval: Limited depth (4-6 plies)')
print(' MCTS: Handles large branching factor')
print()
print('Key Insights:')
print(' - Alpha-Beta essential for deep search')
print(' - MCTS great when eval function hard to design')
print(' - Modern: Combine both (AlphaZero)')
Summary¶
This implementation notebook demonstrated:
Implementations¶
Minimax with perfect play
Alpha-Beta pruning optimization
MCTS with UCT selection
Games¶
Tic-Tac-Toe: Complete search possible
Connect Four: Requires depth limits
Key Lessons¶
Alpha-Beta dramatically reduces nodes
MCTS learns without evaluation function
Real games need hybrid approaches
For theory, see: Chapter 3: Multiagent Search