API Reference: tictactoe¶

Quick examples¶

Solve a position and inspect optimal moves:

from tictactoe import solve_state

# board as string: 0 empty, 1 X, 2 O
board = "100020000"  # simple midgame
res = solve_state(tuple(int(c) for c in board))
print(res["value"], res["plies_to_end"], res["optimal_moves"])  # -> 1/0/-1, dtt, tuple of indices

Generate a small canonical-only dataset in memory:

from tictactoe import solve_all_reachable, extract_board_features, generate_state_action_dataset

solved = solve_all_reachable()
states = [extract_board_features([int(c) for c in k], solved, normalize_to_move=True) for k in solved]
state_actions = generate_state_action_dataset(solved, canonical_only=True, epsilons=[0.0, 0.1])
print(len(states), len(state_actions))

Export datasets to disk with a manifest:

from pathlib import Path
from tictactoe import ExportArgs, run_export

out = run_export(ExportArgs(out=Path("data_raw/example"), format="csv", canonical_only=True, epsilons=[0.1]))
print(out)  # path to manifest.json

tictactoe package.

Core algorithms, symmetry handling, datasets, and a simple CLI.

Convenience imports are exposed for common workflows.

`extract_board_features(board, solved_map, lambda_temp=0.5, q_temp=1.0, epsilons=None, normalize_to_move=False)` ¶

Extract features for a board.

By default, features are computed on the board as given. If normalize_to_move=True, we remap pieces so the side-to-move becomes X=1. Note: Policies and q-values are derived from the original board's solver output. Normalization only swaps labels 1<->2; legality and q-values align by construction because moves are on indices, not on piece IDs.

Source code in src/tictactoe/orchestrator.py

def extract_board_features(
    board: List[int],
    solved_map: Dict[str, dict],
    lambda_temp: float = 0.5,
    q_temp: float = 1.0,
    epsilons: Optional[List[float]] = None,
    normalize_to_move: bool = False,
) -> Dict[str, Any]:
    """Extract features for a board.

    By default, features are computed on the board as given.
    If normalize_to_move=True, we remap pieces so the side-to-move becomes X=1.
    Note: Policies and q-values are derived from the original board's solver
    output. Normalization only swaps labels 1<->2; legality and q-values align
    by construction because moves are on indices, not on piece IDs.
    """
    if epsilons is None:
        epsilons = [0.1]
    to_move = 1 if board.count(1) == board.count(2) else 2
    if normalize_to_move and to_move == 2:
        # swap X and O labels to make current player X
        normalized_board = [0 if v == 0 else (1 if v == 2 else 2) for v in board]
        current_player = 1
    else:
        normalized_board = board[:]
        current_player = to_move
    x_count, o_count = get_piece_counts(normalized_board)
    winner_raw = get_winner(board)
    winner_norm = get_winner(normalized_board)
    key = serialize_board(board)
    norm_key = serialize_board(normalized_board)
    reachable = key in solved_map

    if reachable:
        sol = solved_map[key]
        value_current = sol["value"]
        plies_to_end = sol["plies_to_end"]
        optimal_moves = set(sol["optimal_moves"])
        qvals = list(sol["q_values"])
        dtt_a = list(sol["dtt_action"])
        policy_targets = build_policy_targets(
            normalized_board, sol, lambda_temp=lambda_temp, q_temp=q_temp
        )
        pol_uniform = policy_targets["policy_optimal_uniform"]
        pol_soft = policy_targets["policy_soft_dtt"]
        pol_soft_q = policy_targets["policy_soft_q"]
        eps_policies = {}
        for eps in epsilons:
            tag = f"{int(round(eps*100)):03d}"
            eps_policies[tag] = epsilon_policy_distribution(normalized_board, sol, eps)
        pol_entropy = -sum(p * np.log(p + 1e-10) for p in pol_uniform if p > 0)
        pol_soft_dtt_entropy = -sum(p * np.log(p + 1e-10) for p in pol_soft if p > 0)
        child_tiers = {
            "child_wins": sum(1 for v in qvals if v == +1),
            "child_draws": sum(1 for v in qvals if v == 0),
            "child_losses": sum(1 for v in qvals if v == -1),
        }
        difficulty = difficulty_score(sol)
        # optional: compute reply branching factor as the average number of
        # legal replies for the opponent after optimal moves. Keep lightweight
        # and deterministic. If no optimal moves, 0.0. Terminal children
        # contribute 0 by definition (no replies).
        legal_reply_counts: List[int] = []
        for mv in optimal_moves:
            child = board[:]
            child[mv] = 1 if board.count(1) == board.count(2) else 2
            if get_winner(child) != 0 or is_draw(child):
                legal_reply_counts.append(0)
            else:
                legal_reply_counts.append(sum(1 for v in child if v == 0))
        reply_branching = (
            float(sum(legal_reply_counts) / len(legal_reply_counts)) if legal_reply_counts else 0.0
        )
    else:
        value_current = None
        plies_to_end = None
        optimal_moves = set()
        qvals = [None] * 9
        dtt_a = [None] * 9
        pol_uniform = [None] * 9
        pol_soft = [None] * 9
        pol_soft_q = [None] * 9
        eps_policies = {}
        pol_entropy = None
        pol_soft_dtt_entropy = None
        child_tiers = {"child_wins": 0, "child_draws": 0, "child_losses": 0}
        difficulty = 0.0
        reply_branching = 0.0

    sym = symmetry_info(board)
    legal = [normalized_board[i] == 0 for i in range(9)]
    best_mask = [(i in optimal_moves) for i in range(9)] if reachable else [False] * 9
    cell_pot = calculate_cell_line_potentials(normalized_board)

    # Extended positional/strategic features (deterministic and cheap)
    # Use the normalized_board for player-relative metrics
    x_threats = calculate_line_threats(normalized_board, 1)
    o_threats = calculate_line_threats(normalized_board, 2)
    x_conn = calculate_connectivity(normalized_board, 1)
    o_conn = calculate_connectivity(normalized_board, 2)
    control = calculate_control_metrics(normalized_board)
    x_patterns = calculate_pattern_strength(normalized_board, 1)
    o_patterns = calculate_pattern_strength(normalized_board, 2)
    game_phase = calculate_game_phase(normalized_board)
    x_two_open = count_two_in_row_open(normalized_board, 1)
    o_two_open = count_two_in_row_open(normalized_board, 2)

    features: Dict[str, Any] = {
        "board_state": key,
        "normalized_board_state": norm_key,
        "swapped_color": int(normalize_to_move and to_move == 2),
        "x_count": x_count,
        "o_count": o_count,
        "empty_count": normalized_board.count(0),
        "move_number": x_count + o_count,
        "current_player": current_player,
        "is_terminal": winner_raw != 0 or is_draw(board),
        "winner": winner_raw,
        "winner_normalized": winner_norm,
        "is_draw": is_draw(board),
        "is_valid": is_valid_state(board),
        "reachable_from_start": reachable,
        "canonical_form": sym["canonical_form"],
        "canonical_op": sym["canonical_op"],
        "orbit_size": sym["orbit_size"],
        "horizontal_symmetric": sym["horizontal_symmetric"],
        "vertical_symmetric": sym["vertical_symmetric"],
        "diagonal_symmetric": sym["diagonal_symmetric"],
        "rotational_symmetric": sym["rotational_symmetric"],
        "any_symmetric": sym["any_symmetric"],
        "orbit_index": sym["orbit_index"],
        "value_current": value_current,
        "plies_to_end": plies_to_end,
        "optimal_moves_count": len(optimal_moves),
        "optimal_policy_entropy": pol_entropy,
        "policy_soft_dtt_entropy": pol_soft_dtt_entropy,
        "policy_soft_q_entropy": (
            -sum(p * np.log(p + 1e-10) for p in pol_soft_q if p > 0) if reachable else None
        ),
        # Scalar difficulty per state; 0.0 when not reachable
        "difficulty_score": difficulty,
        "reply_branching_factor": reply_branching,
        **child_tiers,
        # control metrics (symmetric)
        **control,
        # player-relative threats/connectivity/patterns
        "x_row_threats": x_threats["row_threats"],
        "x_col_threats": x_threats["col_threats"],
        "x_diag_threats": x_threats["diag_threats"],
        "x_total_threats": x_threats["total_threats"],
        "o_row_threats": o_threats["row_threats"],
        "o_col_threats": o_threats["col_threats"],
        "o_diag_threats": o_threats["diag_threats"],
        "o_total_threats": o_threats["total_threats"],
        "x_connected_pairs": x_conn["connected_pairs"],
        "x_total_connections": x_conn["total_connections"],
        "x_isolated_pieces": x_conn["isolated_pieces"],
        "x_cluster_count": x_conn["cluster_count"],
        "x_largest_cluster": x_conn["largest_cluster"],
        "o_connected_pairs": o_conn["connected_pairs"],
        "o_total_connections": o_conn["total_connections"],
        "o_isolated_pieces": o_conn["isolated_pieces"],
        "o_cluster_count": o_conn["cluster_count"],
        "o_largest_cluster": o_conn["largest_cluster"],
        "x_open_lines": x_patterns["open_lines"],
        "x_semi_open_lines": x_patterns["semi_open_lines"],
        "x_blocked_lines": x_patterns["blocked_lines"],
        "x_potential_lines": x_patterns["potential_lines"],
        "o_open_lines": o_patterns["open_lines"],
        "o_semi_open_lines": o_patterns["semi_open_lines"],
        "o_blocked_lines": o_patterns["blocked_lines"],
        "o_potential_lines": o_patterns["potential_lines"],
        "x_two_in_row_open": x_two_open,
        "o_two_in_row_open": o_two_open,
        "game_phase": game_phase,
    }

    for i in range(9):
        features[f"cell_{i}"] = normalized_board[i]
        features[f"legal_{i}"] = int(legal[i])
        features[f"best_{i}"] = int(best_mask[i])
        features[f"q_value_{i}"] = qvals[i]
        features[f"dtt_action_{i}"] = dtt_a[i]
        features[f"canonical_action_map_{i}"] = apply_action_transform(i, sym["canonical_op"])
        features[f"policy_uniform_{i}"] = pol_uniform[i] if reachable else None
        features[f"policy_soft_{i}"] = pol_soft[i] if reachable else None
        features[f"policy_soft_q_{i}"] = pol_soft_q[i] if reachable else None
        for tag, pol in eps_policies.items() if reachable else []:
            features[f"epsilon_policy_{tag}_{i}"] = pol[i]
        features[f"x_cell_open_lines_{i}"] = cell_pot["x_cell_open_lines"][i]
        features[f"o_cell_open_lines_{i}"] = cell_pot["o_cell_open_lines"][i]

    return features

`solve_all_reachable()` ¶

Enumerate and solve all states reachable from the empty board.

Source code in src/tictactoe/solver.py

def solve_all_reachable() -> dict:
    """Enumerate and solve all states reachable from the empty board."""
    all_nodes = {}
    q = deque()
    start = tuple([0] * 9)
    q.append(start)
    seen = {start}
    while q:
        s = q.popleft()
        all_nodes[s] = True
        if winner_t(s) != 0 or is_draw_t(s):
            continue
        p = current_player_t(s)
        for mv in legal_moves(s):
            child = apply_move_t(s, mv, p)
            if child not in seen:
                seen.add(child)
                q.append(child)
    solved = {}
    for s in all_nodes:
        res = solve_state(s)
        solved["".join(map(str, s))] = res
    return solved

API Reference: tictactoe¶

Quick examples¶

extract_board_features(board, solved_map, lambda_temp=0.5, q_temp=1.0, epsilons=None, normalize_to_move=False) ¶

solve_all_reachable() ¶

`extract_board_features(board, solved_map, lambda_temp=0.5, q_temp=1.0, epsilons=None, normalize_to_move=False)` ¶

`solve_all_reachable()` ¶