Source code for nlgm.searchspace

import numpy as np
import itertools
from tqdm import tqdm
from difflib import SequenceMatcher
from math import isclose
from itertools import combinations_with_replacement, chain
from nlgm.manifolds import BasicManifold, ProductManifold


[docs] def manifold_to_curvature(manifold: str): """ Convert a dimension 2 manifold type to an arbitrary curvature that matches. Args: manifold (str): The manifold type. Returns: int: The curvature that matches the manifold type. """ if manifold == "E2": return 0 elif manifold == "S2": return 1 elif manifold == "H2": return -1 else: raise ValueError("Only E2, S2, and H2 manifolds supported.")
[docs] def manifold_type(manifold: BasicManifold): """ Identify the type of a manifold based on its curvature. Assumes dimension 2. Args: manifold (BasicManifold): The manifold object. Returns: str: The type of the manifold based on its curvature. Raises: ValueError: If the dimension of the manifold is not 2. """ if manifold.dimension != 2: raise ValueError( f"Dimension must be 2, found manifold.dimension={manifold.dimension}" ) if manifold.curvature == 0: return "E2" elif manifold.curvature > 0: return "S2" else: # manifold.curvature < 0 return "H2"
[docs] def compute_weight(manifold1: ProductManifold, manifold2: ProductManifold): """ Deprecated: Compute the weight between two ProductManifold objects based on the Gromov-Hausdorff distances between their component manifolds using the Hungarian algorithm. Args: manifold1 (ProductManifold): The first ProductManifold object. manifold2 (ProductManifold): The second ProductManifold object. Returns: float: The inverse Gromov-Haustorff distance between manifold1 and manifold2. If the manifolds have mismatching dimensions, returns 0. Note: The weight computation is formulated as an optimal matching problem because we want to find the best alignment between the component manifolds of the two ProductManifold objects that minimizes the total distance. Each component manifold from manifold1 should be matched with exactly one component manifold from manifold2 in a way that minimizes the sum of the distances between the matched pairs. The Hungarian algorithm is used to solve this optimal matching problem efficiently. It finds the minimum weight perfect matching in a bipartite graph, where the nodes represent the component manifolds and the edges represent the distances between them. """ # GH distances between pairs of geometric spaces distances = { ("E2", "S2"): 0.23, ("S2", "E2"): 0.23, ("E2", "H2"): 0.77, ("H2", "E2"): 0.77, ("S2", "H2"): 0.84, ("H2", "S2"): 0.84, } # Check if the product manifolds have the same number of component manifolds if len(manifold1.manifolds) != len(manifold2.manifolds): return 0.0 # No connection between product manifolds of different dimensions # Identifying the types of manifolds in each ProductManifold types1 = sorted([manifold_type(manifold) for manifold in manifold1.manifolds]) types2 = sorted([manifold_type(manifold) for manifold in manifold2.manifolds]) i = 0 while i < len(types1): if types1[i] != types2[i]: break i += 1 return 1 / distances.get((types1[i], types2[i]))
def __construct_graph_search_space( n_p: int, curvature_choices: list = [-1, 0, 1], connectivity: bool = False ): """ Construct the graph search space for finding the optimal latent geometry. Args: n_p (int): The number of model spaces in each product manifold. curvature_choices (list): The possible curvatures to choose from (default: [-1, 0, 1]). connectivity (bool): Whether or not to return a connectivity graph instead of a weighted graph (default: False). Returns: - adjacency_matrix: A 2D numpy array representing the adjacency matrix of the graph search space. - signatures: A list of product manifold curvatures. With model space dimension 2, this specifies the signature. The index of the tuple in the list corresponds to the index of the node in the adjacency matrix. Note: - All model spaces have dimension 2. - Nodes in the graph represent different geometries (product manifolds). - Edges between nodes represent the distances between the corresponding product manifolds. - A distance of 0.0 means there is no edge between the nodes. - All product manifolds have n_p model spaces, ensuring a constant total dimension throughout the graph. """ # Generate all possible combinations of curvatures for the given number of model spaces curvature_combinations = list(itertools.product(curvature_choices, repeat=n_p)) # Deduplicate: Sort the curvatures within each combination to avoid duplicates curvature_combinations = list( set([tuple(sorted(combination)) for combination in curvature_combinations]) ) # Create nodes (product manifolds) for each curvature combination nodes = [ProductManifold(curvatures) for curvatures in curvature_combinations] # Initialize the adjacency matrix with zeros adjacency_matrix = np.zeros((len(nodes), len(nodes))) # Compute the distances between nodes and add edges to the adjacency matrix for i in tqdm(range(len(nodes)), desc="Constructing Graph Search Space"): for j in range(i + 1, len(nodes)): weight = compute_weight(nodes[i], nodes[j]) if weight != 0.0: adjacency_matrix[i][j] = weight adjacency_matrix[j][i] = weight return adjacency_matrix, curvature_combinations
[docs] def construct_graph_search_space( n_p: int = 7, curvature_choices: list = [-1, 0, 1], connectivity: bool = False, ): """ Construct the graph search space for finding the optimal latent geometry. Args: n_p (int): The number of model spaces in each product manifold. curvature_choices (list): The possible curvatures to choose from (default: [-1, 0, 1]). connectivity (bool): Whether or not to return a connectivity graph instead of a weighted graph (default: False). Returns: - adjacency_matrix: A 2D numpy array representing the adjacency matrix of the graph search space. - signatures: A list of product manifold curvatures. With model space dimension 2, this specifies the signature. The index of the tuple in the list corresponds to the index of the node in the adjacency matrix. Note: - All model spaces have dimension 2. - Nodes in the graph represent different geometries (product manifolds). - Edges between nodes represent the distances between the corresponding product manifolds. - A distance of 0.0 means there is no edge between the nodes. - All product manifolds have up to n_p model spaces. """ if n_p <= 0 or len(curvature_choices) <= 0: return np.array([]), [] curvature_choices = sorted(curvature_choices) # evil python trickery!! # unpacking is faster than type conversion or list comprehension # choose without replacement sum from i=1 to n_r of (n_s + i - 1 choose i) product_spaces = { dim: [*combinations_with_replacement(curvature_choices, dim)] for dim in range(1, n_p + 1) } indices = list(chain.from_iterable(product_spaces.values())) inv_map = {indices[i]: i for i in range(len(indices))} nodes = len(indices) distances = np.zeros( (nodes, nodes), np.float32 ) # + np.diag(np.ones(nodes) * np.inf) # compute distance between all product spaces for dim, s in product_spaces.items(): for curnode in s: # compare against adjacent and current dimensions (make sure this is working properly) for i in range(-1, 2): if dim + i < 1 or dim + i > n_p: continue for compnode in product_spaces[dim + i]: if inv_map[compnode] <= inv_map[curnode]: continue if adj_product_spaces(curnode, compnode): distances[inv_map[compnode]][inv_map[curnode]] = 1 # flip upper triangle onto bottom triangle distances = distances + distances.T - np.diag(np.diag(distances)) # if we only want an adjacency matrix, return early if connectivity: return distances, indices # inefficiently compute the inverse d_GH between product spaces for i in range(distances.shape[0]): for j in range(distances.shape[1]): if distances[i][j] != 1: continue pm1 = ProductManifold(indices[i]) pm2 = ProductManifold(indices[j]) dgh = compute_weight(pm1, pm2) distances[i][j] = dgh if dgh != 0 else 1 # different dims return distances, indices
[docs] def adj_product_spaces(s1: list, s2: list) -> bool: """ Check whether two given manifold signatures are adjacent product spaces. Args: s1 (list): The first manifold signature. s2 (list): The second manifold signature. Returns: bool: True if the manifold signatures are adjacent, False otherwise. Note: Adjacency is defined as a difference of at most one manifold between signatures. We use an approximation of Levenshtein (edit) distance to compute this more efficiently. """ if s1 == s2: return True len_delta = abs(len(s1) - len(s2)) total_len = len(s1) + len(s2) if len_delta > 1: return False matcher = SequenceMatcher() matcher.set_seqs(s1, s2) # if equal length, check that approx. Levenshtein distance is 2 # if length differs by 1, check that approx. Levenshtein distance is 1 return isclose(1 - matcher.ratio(), (1.0 if len_delta == 1 else 2.0) / total_len)
[docs] def get_color(weight: float) -> str: """ Return a color based on the edge weight when visualizing the graph search space. Args: weight (float): The weight of the edge. Returns: str: The color corresponding to the edge weight. """ if isclose(weight, 1.0): # diff dim return "grey" elif isclose(weight, 4.34782600402832): # d_GH(E2, S2)^{-1} = 1.0 / 0.23 return "black" elif isclose(weight, 1.298701286315918): # d_GH(E2, H2)^{-1} = 1.0 / 0.77 return "red" else: # d_GH(S2, H2)^{-1} = 1.0 / 0.84 return "blue"