diff options
| author | J08nY | 2025-03-19 13:56:43 +0100 |
|---|---|---|
| committer | J08nY | 2025-03-19 13:56:43 +0100 |
| commit | 57434113ec4b0400a5ac7b949536e5edfebddc00 (patch) | |
| tree | 095eeba1bb205e21ff558f2fa564ada5eb73d693 | |
| parent | dde027debb675fa20c72eb1d9d315bdf544f3e16 (diff) | |
| download | pyecsca-57434113ec4b0400a5ac7b949536e5edfebddc00.tar.gz pyecsca-57434113ec4b0400a5ac7b949536e5edfebddc00.tar.zst pyecsca-57434113ec4b0400a5ac7b949536e5edfebddc00.zip | |
| -rw-r--r-- | pyecsca/sca/re/tree.py | 95 | ||||
| -rw-r--r-- | test/sca/test_tree.py | 6 |
2 files changed, 80 insertions, 21 deletions
diff --git a/pyecsca/sca/re/tree.py b/pyecsca/sca/re/tree.py index e2174a3..39a7ca7 100644 --- a/pyecsca/sca/re/tree.py +++ b/pyecsca/sca/re/tree.py @@ -355,6 +355,61 @@ class Node(NodeMixin): @public +class SplitCriterion: + """ + A splitting criterion to be used in tree building. + """ + + def __call__(self, split: pd.Series) -> float: + raise NotImplementedError + + def is_better(self, score: float, current_best: float) -> bool: + raise NotImplementedError + + def is_optimal(self, score: float, n_cfgs: int, n_codomain: int) -> bool: + raise NotImplementedError + + +class Degree(SplitCriterion): + def __call__(self, split: pd.Series) -> float: + # We want to maximize the number of groups. + # The score is optimal if it is equal to min(n_cfgs, len(dmap.codomain)). + return len(split) + + def is_better(self, score: float, current_best: float) -> bool: + return score > current_best + + def is_optimal(self, score: float, n_cfgs: int, n_codomain: int) -> bool: + return score == min(n_cfgs, n_codomain) + + +class SizeOfLargest(SplitCriterion): + def __call__(self, split: pd.Series) -> float: + # We want to minimize size of largest group. + # The score is optimal if it is equal to ceil(n_cfgs / len(dmap.codomain)). + return max(split) + + def is_better(self, score: float, current_best: float) -> bool: + return score < current_best + + def is_optimal(self, score: float, n_cfgs: int, n_codomain: int) -> bool: + return score == ceil(n_cfgs / n_codomain) + + +class AverageSize(SplitCriterion): + def __call__(self, split: pd.Series) -> float: + # We want to minimize the average size of the groups. + # The score is optimal if it is equal to ceil(n_cfgs / len(dmap.codomain)). + return sum(split**2) / sum(split) + + def is_better(self, score: float, current_best: float) -> bool: + return score < current_best + + def is_optimal(self, score: float, n_cfgs: int, n_codomain: int) -> bool: + return score == ceil(n_cfgs / n_codomain) + + +@public class Tree: """A distinguishing tree.""" @@ -493,27 +548,16 @@ class Tree: return tree @classmethod - def build(cls, cfgs: Set[Any], *maps: Map) -> "Tree": + def build(cls, cfgs: Set[Any], *maps: Map, split: str | SplitCriterion = "largest") -> "Tree": """ Build a tree. :param cfgs: The set of configs to build the tree for. :param maps: The distinguishing maps to use. + :param split: The split criterion to use. Can be one of "degree", "largest", "average". :return: The tree. """ - return cls(_build_tree(cfgs, dict(enumerate(maps))), *maps) - - -def _degree(split: pd.Series) -> float: - return len(split) - - -def _size_of_largest(split: pd.Series) -> float: - return max(split) - - -def _average_size(split: pd.Series) -> float: - return sum(split**2) / sum(split) + return cls(_build_tree(cfgs, dict(enumerate(maps)), split=split), *maps) def _build_tree( @@ -521,6 +565,7 @@ def _build_tree( maps: Mapping[int, Map], response: Optional[Any] = None, depth: int = 0, + split: str | SplitCriterion = "largest", ) -> Node: pad = " " * depth # If there is only one remaining cfg, we do not need to continue and just return (base case 1). @@ -533,6 +578,19 @@ def _build_tree( log(pad + "Trivial.") return Node(cfgset, response=response) + # Choose the split criterion + criterion: SplitCriterion + if split == "degree": + criterion = Degree() + elif split == "largest": + criterion = SizeOfLargest() + elif split == "average": + criterion = AverageSize() + elif isinstance(split, SplitCriterion): + criterion = split + else: + raise ValueError(f"Unknown split criterion: {split}") + # Go over the maps and figure out which one splits the best. best_i = None best_dmap = None @@ -544,17 +602,16 @@ def _build_tree( # Note we should look at the restriction of the map to the current "cfgs" and split those restricted = dmap.mapping.loc[dmap.cfg_map.loc[list(cfgs), "vals"]] for j, column in restricted.items(): - split = column.value_counts(dropna=False) - # TODO: Try the other scores. - score = _size_of_largest(split) - if best_score is None or score < best_score: + counts = column.value_counts(dropna=False) + score = criterion(counts) + if best_score is None or criterion.is_better(score, best_score): best_i = i best_column = j best_dmap = dmap best_score = score best_restricted = restricted # Early abort if optimal score is hit. - if score == ceil(n_cfgs / len(dmap.codomain)): + if criterion.is_optimal(score, n_cfgs, len(dmap.codomain)): break # We found nothing distinguishing the configs, so return them all (base case 2). if best_column is None or best_dmap is None: diff --git a/test/sca/test_tree.py b/test/sca/test_tree.py index 58537f5..a3afaf9 100644 --- a/test/sca/test_tree.py +++ b/test/sca/test_tree.py @@ -3,6 +3,7 @@ from collections import OrderedDict from copy import deepcopy import pandas as pd +import pytest from pyecsca.sca.re.tree import Tree, Map @@ -81,7 +82,8 @@ def test_map_with_callable(secp128r1): assert dmap[cfgs[0], 1] -def test_build_tree(): +@pytest.mark.parametrize("split", ["degree", "largest", "average"]) +def test_build_tree(split): cfgs = ["a", "b", "c"] cfg_map = pd.DataFrame([0, 1, 2], index=cfgs, columns=["vals"]) inputs1 = [1, 2, 3, 4] @@ -93,7 +95,7 @@ def test_build_tree(): codomain2 = {0, 1, 2, 3} mapping2 = pd.DataFrame([(1, 0, 0), (2, 0, 0), (3, 0, 0)]) dmap2 = Map(mapping2, cfg_map, inputs2, codomain2) - tree = Tree.build(set(cfgs), dmap1, dmap2) + tree = Tree.build(set(cfgs), dmap1, dmap2, split=split) tree.render() tree.render_basic() tree.describe() |
