aboutsummaryrefslogtreecommitdiff
path: root/pyecsca/sca
diff options
context:
space:
mode:
authorJ08nY2025-03-19 13:56:43 +0100
committerJ08nY2025-03-19 13:56:43 +0100
commit57434113ec4b0400a5ac7b949536e5edfebddc00 (patch)
tree095eeba1bb205e21ff558f2fa564ada5eb73d693 /pyecsca/sca
parentdde027debb675fa20c72eb1d9d315bdf544f3e16 (diff)
downloadpyecsca-57434113ec4b0400a5ac7b949536e5edfebddc00.tar.gz
pyecsca-57434113ec4b0400a5ac7b949536e5edfebddc00.tar.zst
pyecsca-57434113ec4b0400a5ac7b949536e5edfebddc00.zip
Diffstat (limited to 'pyecsca/sca')
-rw-r--r--pyecsca/sca/re/tree.py95
1 files changed, 76 insertions, 19 deletions
diff --git a/pyecsca/sca/re/tree.py b/pyecsca/sca/re/tree.py
index e2174a3..39a7ca7 100644
--- a/pyecsca/sca/re/tree.py
+++ b/pyecsca/sca/re/tree.py
@@ -355,6 +355,61 @@ class Node(NodeMixin):
@public
+class SplitCriterion:
+ """
+ A splitting criterion to be used in tree building.
+ """
+
+ def __call__(self, split: pd.Series) -> float:
+ raise NotImplementedError
+
+ def is_better(self, score: float, current_best: float) -> bool:
+ raise NotImplementedError
+
+ def is_optimal(self, score: float, n_cfgs: int, n_codomain: int) -> bool:
+ raise NotImplementedError
+
+
+class Degree(SplitCriterion):
+ def __call__(self, split: pd.Series) -> float:
+ # We want to maximize the number of groups.
+ # The score is optimal if it is equal to min(n_cfgs, len(dmap.codomain)).
+ return len(split)
+
+ def is_better(self, score: float, current_best: float) -> bool:
+ return score > current_best
+
+ def is_optimal(self, score: float, n_cfgs: int, n_codomain: int) -> bool:
+ return score == min(n_cfgs, n_codomain)
+
+
+class SizeOfLargest(SplitCriterion):
+ def __call__(self, split: pd.Series) -> float:
+ # We want to minimize size of largest group.
+ # The score is optimal if it is equal to ceil(n_cfgs / len(dmap.codomain)).
+ return max(split)
+
+ def is_better(self, score: float, current_best: float) -> bool:
+ return score < current_best
+
+ def is_optimal(self, score: float, n_cfgs: int, n_codomain: int) -> bool:
+ return score == ceil(n_cfgs / n_codomain)
+
+
+class AverageSize(SplitCriterion):
+ def __call__(self, split: pd.Series) -> float:
+ # We want to minimize the average size of the groups.
+ # The score is optimal if it is equal to ceil(n_cfgs / len(dmap.codomain)).
+ return sum(split**2) / sum(split)
+
+ def is_better(self, score: float, current_best: float) -> bool:
+ return score < current_best
+
+ def is_optimal(self, score: float, n_cfgs: int, n_codomain: int) -> bool:
+ return score == ceil(n_cfgs / n_codomain)
+
+
+@public
class Tree:
"""A distinguishing tree."""
@@ -493,27 +548,16 @@ class Tree:
return tree
@classmethod
- def build(cls, cfgs: Set[Any], *maps: Map) -> "Tree":
+ def build(cls, cfgs: Set[Any], *maps: Map, split: str | SplitCriterion = "largest") -> "Tree":
"""
Build a tree.
:param cfgs: The set of configs to build the tree for.
:param maps: The distinguishing maps to use.
+ :param split: The split criterion to use. Can be one of "degree", "largest", "average".
:return: The tree.
"""
- return cls(_build_tree(cfgs, dict(enumerate(maps))), *maps)
-
-
-def _degree(split: pd.Series) -> float:
- return len(split)
-
-
-def _size_of_largest(split: pd.Series) -> float:
- return max(split)
-
-
-def _average_size(split: pd.Series) -> float:
- return sum(split**2) / sum(split)
+ return cls(_build_tree(cfgs, dict(enumerate(maps)), split=split), *maps)
def _build_tree(
@@ -521,6 +565,7 @@ def _build_tree(
maps: Mapping[int, Map],
response: Optional[Any] = None,
depth: int = 0,
+ split: str | SplitCriterion = "largest",
) -> Node:
pad = " " * depth
# If there is only one remaining cfg, we do not need to continue and just return (base case 1).
@@ -533,6 +578,19 @@ def _build_tree(
log(pad + "Trivial.")
return Node(cfgset, response=response)
+ # Choose the split criterion
+ criterion: SplitCriterion
+ if split == "degree":
+ criterion = Degree()
+ elif split == "largest":
+ criterion = SizeOfLargest()
+ elif split == "average":
+ criterion = AverageSize()
+ elif isinstance(split, SplitCriterion):
+ criterion = split
+ else:
+ raise ValueError(f"Unknown split criterion: {split}")
+
# Go over the maps and figure out which one splits the best.
best_i = None
best_dmap = None
@@ -544,17 +602,16 @@ def _build_tree(
# Note we should look at the restriction of the map to the current "cfgs" and split those
restricted = dmap.mapping.loc[dmap.cfg_map.loc[list(cfgs), "vals"]]
for j, column in restricted.items():
- split = column.value_counts(dropna=False)
- # TODO: Try the other scores.
- score = _size_of_largest(split)
- if best_score is None or score < best_score:
+ counts = column.value_counts(dropna=False)
+ score = criterion(counts)
+ if best_score is None or criterion.is_better(score, best_score):
best_i = i
best_column = j
best_dmap = dmap
best_score = score
best_restricted = restricted
# Early abort if optimal score is hit.
- if score == ceil(n_cfgs / len(dmap.codomain)):
+ if criterion.is_optimal(score, n_cfgs, len(dmap.codomain)):
break
# We found nothing distinguishing the configs, so return them all (base case 2).
if best_column is None or best_dmap is None: