author: J08nY 2024-01-21 17:45:16 +0100
committer: J08nY 2024-01-21 17:45:16 +0100
commit: fbf754c407d831c24004285660be62c7f5717b2e (patch)
tree: c2da34a36a90f240fdffd7b0deb68efbdb74a716 /pyecsca/sca/re/tree.py
parent: ff464555abae1d38849c251683064ff9fbe87227 (diff)
download: pyecsca-fbf754c407d831c24004285660be62c7f5717b2e.tar.gz
pyecsca-fbf754c407d831c24004285660be62c7f5717b2e.tar.zst
pyecsca-fbf754c407d831c24004285660be62c7f5717b2e.zip
1 files changed, 38 insertions, 21 deletions
diff --git a/pyecsca/sca/re/tree.py b/pyecsca/sca/re/tree.py
index 81b98da..54d0dc0 100644
--- a/pyecsca/sca/re/tree.py
+++ b/pyecsca/sca/re/tree.py
@@ -49,6 +49,7 @@ from anytree import RenderTree, NodeMixin, AbstractStyle
 @public
 class Map:
     """A distinguishing map."""
+
     mapping: pd.DataFrame
     codomain: Set[Any]
 
@@ -66,8 +67,8 @@ class Map:
     @classmethod
     def from_io_map(cls, cfgs: Set[Any], mapping: Mapping[Any, Mapping[Any, Any]]):
         cfgs_l = list(cfgs)
-        inputs = set()
-        codomain = set()
+        inputs: Set[Any] = set()
+        codomain: Set[Any] = set()
         for io_map in mapping.values():
             inputs.update(io_map.keys())
             codomain.update(io_map.values())
@@ -79,12 +80,21 @@ class Map:
 @public
 class Node(NodeMixin):
     """A node in a distinguishing tree."""
+
     cfgs: Set[Any]
     dmap_index: Optional[int]
     dmap_input: Optional[Any]
     response: Optional[Any]
 
-    def __init__(self, cfgs: Set[Any], dmap_index: Optional[int] = None, dmap_input: Optional[Any] = None, response: Optional[Any] = None, parent=None, children=None):
+    def __init__(
+        self,
+        cfgs: Set[Any],
+        dmap_index: Optional[int] = None,
+        dmap_input: Optional[Any] = None,
+        response: Optional[Any] = None,
+        parent=None,
+        children=None,
+    ):
         self.cfgs = cfgs
         self.dmap_index = dmap_index
         self.dmap_input = dmap_input
@@ -98,6 +108,7 @@ class Node(NodeMixin):
 @public
 class Tree:
     """A distinguishing tree."""
+
     maps: List[Map]
     root: Node
 
@@ -117,7 +128,7 @@ class Tree:
     def size(self) -> int:
         return self.root.size
 
-    def render(self) -> None:
+    def render(self) -> str:
         style = AbstractStyle("\u2502  ", "\u251c\u2500\u2500", "\u2514\u2500\u2500")
 
         def _str(n: Node):
@@ -126,16 +137,21 @@ class Tree:
                 return pre + "\n".join(str(cfg) for cfg in n.cfgs)
             else:
                 return pre + f"{n.dmap_index}({n.dmap_input})"
-        print(RenderTree(self.root, style=style).by_attr(_str))
 
-    def describe(self) -> None:
+        return RenderTree(self.root, style=style).by_attr(_str)
+
+    def describe(self) -> str:
         leaf_sizes = [len(leaf.cfgs) for leaf in self.leaves]
-        print(f"Total cfgs: {len(self.root.cfgs)}")
-        print(f"Depth: {self.height}")
-        print(f"Leaf sizes: {sorted(leaf_sizes)}")
-        print(f"Average leaf size: {np.mean(leaf_sizes):.3}")
-        leafs = sum(([size] * size for size in leaf_sizes), [])
-        print(f"Mean result size: {np.mean(leafs):.3}")
+        leafs: List[int] = sum(([size] * size for size in leaf_sizes), [])
+        return "\n".join(
+            (
+                f"Total cfgs: {len(self.root.cfgs)}",
+                f"Depth: {self.height}",
+                f"Leaf sizes: {sorted(leaf_sizes)}",
+                f"Average leaf size: {np.mean(leaf_sizes):.3}",
+                f"Mean result size: {np.mean(leafs):.3}",
+            )
+        )
 
     def expand(self, dmap: Map) -> "Tree":
         tree = deepcopy(self)
@@ -163,7 +179,7 @@ def _size_of_largest(split: pd.Series) -> float:
 
 
 def _average_size(split: pd.Series) -> float:
-    return sum(split ** 2)/sum(split)
+    return sum(split**2) / sum(split)
 
 
 def _build_tree(cfgs: Set[Any], *maps: Map, response: Optional[Any] = None) -> Node:
@@ -171,8 +187,9 @@ def _build_tree(cfgs: Set[Any], *maps: Map, response: Optional[Any] = None) -> N
     # Note that n_cfgs will never be 0 here, as the base case 2 returns if the cfgs cannot
     # be split into two sets (one would be empty).
     n_cfgs = len(cfgs)
+    ncfgs = set(cfgs)
     if n_cfgs == 1:
-        return Node(set(cfgs), response=response)
+        return Node(ncfgs, response=response)
 
     # Go over the maps and figure out which one splits the best.
     best_distinguishing_element = None
@@ -182,30 +199,30 @@ def _build_tree(cfgs: Set[Any], *maps: Map, response: Optional[Any] = None) -> N
     for dmap in maps:
         # Now we have a map, it may be binary or have larger output domain
         # Note we should look at the restriction of the map to the current "cfgs" and split those
-        restricted = dmap.mapping.filter(items=cfgs, axis=0)
+        restricted = dmap.mapping.loc[list(cfgs), :]  # .filter(items=cfgs, axis=0)
         for elem in dmap.mapping:
-            split = restricted[elem].value_counts(dropna=False)
+            split = restricted.loc[:, elem].value_counts(dropna=False)
             # XXX: Try the other scores.
-            # TODO: Abort here when the total best score is already reached early.
             score = _size_of_largest(split)
             if best_score is None or score < best_score:
                 best_distinguishing_element = elem
                 best_distinguishing_dmap = dmap
                 best_score = score
                 best_restricted = restricted
-            if score == ceil(n_cfgs / len(dmap.codomain)):
+            # Early abort if optimal score is hit. The +1 is for "None" values which are not in the codomain.
+            if score == ceil(n_cfgs / (len(dmap.codomain) + 1)):
                 break
 
     # Now we have a dmap as well as an element in it that splits the best.
     # Go over the groups of configs that share the response
-    groups = best_restricted.groupby(best_distinguishing_element, dropna=False)
+    groups = best_restricted.groupby(best_distinguishing_element, dropna=False)  # type: ignore
     # We found nothing distinguishing the configs, so return them all (base case 2).
     if groups.ngroups == 1:
-        return Node(set(cfgs), response=response)
+        return Node(ncfgs, response=response)
 
     # Create our node
     dmap_index = maps.index(best_distinguishing_dmap)
-    result = Node(set(cfgs), dmap_index, best_distinguishing_element, response=response)
+    result = Node(ncfgs, dmap_index, best_distinguishing_element, response=response)
 
     for output, group in groups:
         child = _build_tree(set(group.index), *maps, response=output)
author	J08nY	2024-01-21 17:45:16 +0100
committer	J08nY	2024-01-21 17:45:16 +0100
commit	fbf754c407d831c24004285660be62c7f5717b2e (patch)
tree	c2da34a36a90f240fdffd7b0deb68efbdb74a716 /pyecsca/sca/re/tree.py
parent	ff464555abae1d38849c251683064ff9fbe87227 (diff)
download	pyecsca-fbf754c407d831c24004285660be62c7f5717b2e.tar.gz pyecsca-fbf754c407d831c24004285660be62c7f5717b2e.tar.zst pyecsca-fbf754c407d831c24004285660be62c7f5717b2e.zip