diff options
| author | J08nY | 2024-01-21 17:45:16 +0100 |
|---|---|---|
| committer | J08nY | 2024-01-21 17:45:16 +0100 |
| commit | fbf754c407d831c24004285660be62c7f5717b2e (patch) | |
| tree | c2da34a36a90f240fdffd7b0deb68efbdb74a716 /pyecsca/sca/re/tree.py | |
| parent | ff464555abae1d38849c251683064ff9fbe87227 (diff) | |
| download | pyecsca-fbf754c407d831c24004285660be62c7f5717b2e.tar.gz pyecsca-fbf754c407d831c24004285660be62c7f5717b2e.tar.zst pyecsca-fbf754c407d831c24004285660be62c7f5717b2e.zip | |
Diffstat (limited to 'pyecsca/sca/re/tree.py')
| -rw-r--r-- | pyecsca/sca/re/tree.py | 59 |
1 files changed, 38 insertions, 21 deletions
diff --git a/pyecsca/sca/re/tree.py b/pyecsca/sca/re/tree.py index 81b98da..54d0dc0 100644 --- a/pyecsca/sca/re/tree.py +++ b/pyecsca/sca/re/tree.py @@ -49,6 +49,7 @@ from anytree import RenderTree, NodeMixin, AbstractStyle @public class Map: """A distinguishing map.""" + mapping: pd.DataFrame codomain: Set[Any] @@ -66,8 +67,8 @@ class Map: @classmethod def from_io_map(cls, cfgs: Set[Any], mapping: Mapping[Any, Mapping[Any, Any]]): cfgs_l = list(cfgs) - inputs = set() - codomain = set() + inputs: Set[Any] = set() + codomain: Set[Any] = set() for io_map in mapping.values(): inputs.update(io_map.keys()) codomain.update(io_map.values()) @@ -79,12 +80,21 @@ class Map: @public class Node(NodeMixin): """A node in a distinguishing tree.""" + cfgs: Set[Any] dmap_index: Optional[int] dmap_input: Optional[Any] response: Optional[Any] - def __init__(self, cfgs: Set[Any], dmap_index: Optional[int] = None, dmap_input: Optional[Any] = None, response: Optional[Any] = None, parent=None, children=None): + def __init__( + self, + cfgs: Set[Any], + dmap_index: Optional[int] = None, + dmap_input: Optional[Any] = None, + response: Optional[Any] = None, + parent=None, + children=None, + ): self.cfgs = cfgs self.dmap_index = dmap_index self.dmap_input = dmap_input @@ -98,6 +108,7 @@ class Node(NodeMixin): @public class Tree: """A distinguishing tree.""" + maps: List[Map] root: Node @@ -117,7 +128,7 @@ class Tree: def size(self) -> int: return self.root.size - def render(self) -> None: + def render(self) -> str: style = AbstractStyle("\u2502 ", "\u251c\u2500\u2500", "\u2514\u2500\u2500") def _str(n: Node): @@ -126,16 +137,21 @@ class Tree: return pre + "\n".join(str(cfg) for cfg in n.cfgs) else: return pre + f"{n.dmap_index}({n.dmap_input})" - print(RenderTree(self.root, style=style).by_attr(_str)) - def describe(self) -> None: + return RenderTree(self.root, style=style).by_attr(_str) + + def describe(self) -> str: leaf_sizes = [len(leaf.cfgs) for leaf in self.leaves] - print(f"Total cfgs: {len(self.root.cfgs)}") - print(f"Depth: {self.height}") - print(f"Leaf sizes: {sorted(leaf_sizes)}") - print(f"Average leaf size: {np.mean(leaf_sizes):.3}") - leafs = sum(([size] * size for size in leaf_sizes), []) - print(f"Mean result size: {np.mean(leafs):.3}") + leafs: List[int] = sum(([size] * size for size in leaf_sizes), []) + return "\n".join( + ( + f"Total cfgs: {len(self.root.cfgs)}", + f"Depth: {self.height}", + f"Leaf sizes: {sorted(leaf_sizes)}", + f"Average leaf size: {np.mean(leaf_sizes):.3}", + f"Mean result size: {np.mean(leafs):.3}", + ) + ) def expand(self, dmap: Map) -> "Tree": tree = deepcopy(self) @@ -163,7 +179,7 @@ def _size_of_largest(split: pd.Series) -> float: def _average_size(split: pd.Series) -> float: - return sum(split ** 2)/sum(split) + return sum(split**2) / sum(split) def _build_tree(cfgs: Set[Any], *maps: Map, response: Optional[Any] = None) -> Node: @@ -171,8 +187,9 @@ def _build_tree(cfgs: Set[Any], *maps: Map, response: Optional[Any] = None) -> N # Note that n_cfgs will never be 0 here, as the base case 2 returns if the cfgs cannot # be split into two sets (one would be empty). n_cfgs = len(cfgs) + ncfgs = set(cfgs) if n_cfgs == 1: - return Node(set(cfgs), response=response) + return Node(ncfgs, response=response) # Go over the maps and figure out which one splits the best. best_distinguishing_element = None @@ -182,30 +199,30 @@ def _build_tree(cfgs: Set[Any], *maps: Map, response: Optional[Any] = None) -> N for dmap in maps: # Now we have a map, it may be binary or have larger output domain # Note we should look at the restriction of the map to the current "cfgs" and split those - restricted = dmap.mapping.filter(items=cfgs, axis=0) + restricted = dmap.mapping.loc[list(cfgs), :] # .filter(items=cfgs, axis=0) for elem in dmap.mapping: - split = restricted[elem].value_counts(dropna=False) + split = restricted.loc[:, elem].value_counts(dropna=False) # XXX: Try the other scores. - # TODO: Abort here when the total best score is already reached early. score = _size_of_largest(split) if best_score is None or score < best_score: best_distinguishing_element = elem best_distinguishing_dmap = dmap best_score = score best_restricted = restricted - if score == ceil(n_cfgs / len(dmap.codomain)): + # Early abort if optimal score is hit. The +1 is for "None" values which are not in the codomain. + if score == ceil(n_cfgs / (len(dmap.codomain) + 1)): break # Now we have a dmap as well as an element in it that splits the best. # Go over the groups of configs that share the response - groups = best_restricted.groupby(best_distinguishing_element, dropna=False) + groups = best_restricted.groupby(best_distinguishing_element, dropna=False) # type: ignore # We found nothing distinguishing the configs, so return them all (base case 2). if groups.ngroups == 1: - return Node(set(cfgs), response=response) + return Node(ncfgs, response=response) # Create our node dmap_index = maps.index(best_distinguishing_dmap) - result = Node(set(cfgs), dmap_index, best_distinguishing_element, response=response) + result = Node(ncfgs, dmap_index, best_distinguishing_element, response=response) for output, group in groups: child = _build_tree(set(group.index), *maps, response=output) |
