diff options
| author | J08nY | 2024-01-28 20:14:08 +0100 |
|---|---|---|
| committer | J08nY | 2024-01-28 20:14:08 +0100 |
| commit | 418bbee40afe117a1ddf9fc86e87ad3f926921f1 (patch) | |
| tree | 0e9e22de45f95e7d098ea6e13246eb3df9d2b79c | |
| parent | 4cd17c3897c64ae915d06641b6693831f7762529 (diff) | |
| download | pyecsca-418bbee40afe117a1ddf9fc86e87ad3f926921f1.tar.gz pyecsca-418bbee40afe117a1ddf9fc86e87ad3f926921f1.tar.zst pyecsca-418bbee40afe117a1ddf9fc86e87ad3f926921f1.zip | |
Optimize tree expansion.
| -rw-r--r-- | pyecsca/sca/re/tree.py | 57 |
1 files changed, 36 insertions, 21 deletions
diff --git a/pyecsca/sca/re/tree.py b/pyecsca/sca/re/tree.py index 8a76061..2893411 100644 --- a/pyecsca/sca/re/tree.py +++ b/pyecsca/sca/re/tree.py @@ -45,6 +45,8 @@ import pandas as pd from public import public from anytree import RenderTree, NodeMixin, AbstractStyle +from ...misc.utils import log + @public class Map: @@ -219,6 +221,7 @@ class Tree: leafs: List[int] = sum(([size] * size for size in leaf_sizes), []) return "\n".join( ( + f"Dmaps: {len(self.maps)}", f"Total cfgs: {len(self.root.cfgs)}", f"Depth: {self.height}", f"Leaf sizes: {sorted(leaf_sizes)}", @@ -229,12 +232,11 @@ class Tree: def expand(self, dmap: Map) -> "Tree": """Expand a tree with a new distinguishing map.""" - tree = deepcopy(self) - tree.maps.append(dmap) + tree = Tree(deepcopy(self.root), *self.maps, dmap) for leaf in tree.leaves: - # TODO: Make _build_tree take the maps as a mapping of int -> map and only allow the new maps here - # because this is inefficient. - expanded = _build_tree(leaf.cfgs, *tree.maps, response=leaf.response) + expanded = _build_tree( + leaf.cfgs, {len(self.maps): dmap}, response=leaf.response + ) # If we were able to split the leaf further, then replace it with the found tree. if not expanded.is_leaf: parent = leaf.parent @@ -245,7 +247,7 @@ class Tree: @classmethod def build(cls, cfgs: Set[Any], *maps: Map) -> "Tree": """Build a tree.""" - return cls(_build_tree(cfgs, *maps), *maps) + return cls(_build_tree(cfgs, {i: dmap for i, dmap in enumerate(maps)}), *maps) def _degree(split: pd.Series) -> float: @@ -260,60 +262,73 @@ def _average_size(split: pd.Series) -> float: return sum(split**2) / sum(split) -def _build_tree(cfgs: Set[Any], *maps: Map, response: Optional[Any] = None) -> Node: +def _build_tree( + cfgs: Set[Any], + maps: Mapping[int, Map], + response: Optional[Any] = None, + depth: int = 0, +) -> Node: + pad = " " * depth # If there is only one remaining cfg, we do not need to continue and just return (base case 1). # Note that n_cfgs will never be 0 here, as the base case 2 returns if the cfgs cannot # be split into two sets (one would be empty). n_cfgs = len(cfgs) + log(pad + f"Splitting {n_cfgs}.") cfgset = set(cfgs) if n_cfgs == 1: + log(pad + "Trivial.") return Node(cfgset, response=response) # Go over the maps and figure out which one splits the best. - best_distinguishing_column = None - best_distinguishing_dmap = None + best_i = None + best_dmap = None best_restricted = None + best_column = None best_score = None - for dmap in maps: + for i, dmap in maps.items(): # Now we have a map, it may be binary or have larger output domain # Note we should look at the restriction of the map to the current "cfgs" and split those restricted = dmap.mapping.loc[dmap.cfg_map.loc[list(cfgs), "vals"].unique()] - for i, column in restricted.items(): + for j, column in restricted.items(): split = column.value_counts(dropna=False) # TODO: Try the other scores. score = _size_of_largest(split) if best_score is None or score < best_score: - best_distinguishing_column = i - best_distinguishing_dmap = dmap + best_i = i + best_column = j + best_dmap = dmap best_score = score best_restricted = restricted # Early abort if optimal score is hit. if score == ceil(n_cfgs / len(dmap.codomain)): break # We found nothing distinguishing the configs, so return them all (base case 2). - if best_distinguishing_column is None or best_distinguishing_dmap is None: + if best_column is None or best_dmap is None: + log(pad + "Nothing could split.") return Node(cfgset, response=response) - best_distinguishing_element = best_distinguishing_dmap.domain[ - best_distinguishing_column - ] + best_distinguishing_element = best_dmap.domain[best_column] # Now we have a dmap as well as an element in it that splits the best. # Go over the groups of configs that share the response - groups = best_restricted.groupby(best_distinguishing_column, dropna=False) # type: ignore + groups = best_restricted.groupby(best_column, dropna=False) # type: ignore # We found nothing distinguishing the configs, so return them all (base case 2). if groups.ngroups == 1: + log(pad + "Trivial split.") return Node(cfgset, response=response) # Create our node - dmap_index = maps.index(best_distinguishing_dmap) + dmap_index = best_i result = Node(cfgset, dmap_index, best_distinguishing_element, response=response) # Go over the distinct group for output, group in groups: # Lookup the cfgs in the group - group_cfgs = set(best_distinguishing_dmap.cfg_map.index[best_distinguishing_dmap.cfg_map["vals"].isin(group.index)]) + group_cfgs = set( + best_dmap.cfg_map.index[best_dmap.cfg_map["vals"].isin(group.index)] + ) + log(pad + f"Split {len(group_cfgs)} via dmap {best_i}.") # And build the tree recursively - child = _build_tree(group_cfgs, *maps, response=output) + child = _build_tree(group_cfgs, maps, response=output, depth=depth+1) child.parent = result return result |
