diff options
| author | J08nY | 2024-01-29 14:27:10 +0100 |
|---|---|---|
| committer | J08nY | 2024-01-29 14:27:10 +0100 |
| commit | 9333147035a317a2a8b6529612435bee7e3a66ea (patch) | |
| tree | 768483d5f3672eb07dc707d8734fc2dfb7b07c1c | |
| parent | 3720d55c229d62b848f90d8fcd6db5c3b937e6d1 (diff) | |
| download | pyecsca-9333147035a317a2a8b6529612435bee7e3a66ea.tar.gz pyecsca-9333147035a317a2a8b6529612435bee7e3a66ea.tar.zst pyecsca-9333147035a317a2a8b6529612435bee7e3a66ea.zip | |
Add dmap deduplication.
| -rw-r--r-- | pyecsca/sca/re/tree.py | 21 | ||||
| -rw-r--r-- | test/sca/test_tree.py | 33 |
2 files changed, 38 insertions, 16 deletions
diff --git a/pyecsca/sca/re/tree.py b/pyecsca/sca/re/tree.py index 6aae1ea..2955631 100644 --- a/pyecsca/sca/re/tree.py +++ b/pyecsca/sca/re/tree.py @@ -152,10 +152,17 @@ class Map: def deduplicate(self): """Deduplicate the configs of this distinguishing map based on the rows.""" - for row, data in self.mapping.groupby( - self.mapping.columns.tolist(), as_index=False - ): - pass + indices = [] + + def agg(thing): + indices.append(thing.index) + return thing.iloc[0] + + self.mapping = self.mapping.groupby(self.mapping.columns.tolist(), as_index=False, dropna=False).agg(agg) + new_cfg_map = self.cfg_map.copy() + for i, index in enumerate(indices): + new_cfg_map.loc[self.cfg_map["vals"].isin(index), "vals"] = i + self.cfg_map = new_cfg_map def merge(self, other: "Map"): """Merge in another distinguishing map operating on different configs.""" @@ -167,7 +174,9 @@ class Map: last = max(self.cfg_map["vals"]) # Offset the other cfg_map and mapping index by last + 1 other_cfg_map = other.cfg_map + (last + 1) - other_mapping = other.mapping[reordering].set_index(other.mapping.index + (last + 1)) + other_mapping = other.mapping[reordering].set_index( + other.mapping.index + (last + 1) + ) # Now concat the cfg_map and mapping self.cfg_map = pd.concat([self.cfg_map, other_cfg_map], copy=False) self.mapping = pd.concat([self.mapping, other_mapping], copy=False) @@ -350,7 +359,7 @@ def _build_tree( ) log(pad + f"Split {len(group_cfgs)} via dmap {best_i}.") # And build the tree recursively - child = _build_tree(group_cfgs, maps, response=output, depth=depth+1) + child = _build_tree(group_cfgs, maps, response=output, depth=depth + 1) child.parent = result return result diff --git a/test/sca/test_tree.py b/test/sca/test_tree.py index ce16f97..89a9f61 100644 --- a/test/sca/test_tree.py +++ b/test/sca/test_tree.py @@ -1,5 +1,6 @@ import random import time +from copy import deepcopy from pyecsca.sca.re.tree import Tree, Map import pandas as pd @@ -35,7 +36,19 @@ def test_map_merge(): assert len(dmap1.cfg_map) == 4 assert len(dmap1.codomain) == 2 assert not dmap1["c", 3] - assert dmap1["a", 0] + assert dmap1["a", 1] + + +def test_map_deduplicate(): + cfgs = {"a", "b", "c", "d"} + binary_sets = {"a": {1, 2, 3}, "b": {2, 3, 4}, "c": {1, 2, 3}, "d": {4, 2}} + dmap = Map.from_sets(cfgs, binary_sets) + original = deepcopy(dmap) + dmap.deduplicate() + for cfg in cfgs: + for i in [1, 2, 3, 4]: + assert dmap[cfg, i] == original[cfg, i] + assert len(dmap.mapping) < len(original.mapping) def test_build_tree(): @@ -75,13 +88,13 @@ def test_expand_tree(): def test_df(): nrows = 12_000_000 ncols = 5 - index = list(range(nrows)) - df = pd.DataFrame( - [random.choices((True, False), k=ncols) for _ in index], index=index + df = pd.DataFrame([random.choices((True, False), k=ncols) for _ in range(nrows)]) + cfg_map = pd.DataFrame( + [(i,) for i in range(nrows)], + index=[str(i) for i in range(nrows)], + columns=["vals"], ) - print(df.memory_usage().sum()) - start = time.perf_counter() - for row, data in df.groupby(df.columns.tolist(), as_index=False): - pass - end = time.perf_counter() - print(end - start) + dmap = Map(df, cfg_map, list(range(ncols)), {True, False}) + # start = time.perf_counter() + dmap.deduplicate() + # end = time.perf_counter() |
