aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJ08nY2024-01-29 14:27:10 +0100
committerJ08nY2024-01-29 14:27:10 +0100
commit9333147035a317a2a8b6529612435bee7e3a66ea (patch)
tree768483d5f3672eb07dc707d8734fc2dfb7b07c1c
parent3720d55c229d62b848f90d8fcd6db5c3b937e6d1 (diff)
downloadpyecsca-9333147035a317a2a8b6529612435bee7e3a66ea.tar.gz
pyecsca-9333147035a317a2a8b6529612435bee7e3a66ea.tar.zst
pyecsca-9333147035a317a2a8b6529612435bee7e3a66ea.zip
Add dmap deduplication.
-rw-r--r--pyecsca/sca/re/tree.py21
-rw-r--r--test/sca/test_tree.py33
2 files changed, 38 insertions, 16 deletions
diff --git a/pyecsca/sca/re/tree.py b/pyecsca/sca/re/tree.py
index 6aae1ea..2955631 100644
--- a/pyecsca/sca/re/tree.py
+++ b/pyecsca/sca/re/tree.py
@@ -152,10 +152,17 @@ class Map:
def deduplicate(self):
"""Deduplicate the configs of this distinguishing map based on the rows."""
- for row, data in self.mapping.groupby(
- self.mapping.columns.tolist(), as_index=False
- ):
- pass
+ indices = []
+
+ def agg(thing):
+ indices.append(thing.index)
+ return thing.iloc[0]
+
+ self.mapping = self.mapping.groupby(self.mapping.columns.tolist(), as_index=False, dropna=False).agg(agg)
+ new_cfg_map = self.cfg_map.copy()
+ for i, index in enumerate(indices):
+ new_cfg_map.loc[self.cfg_map["vals"].isin(index), "vals"] = i
+ self.cfg_map = new_cfg_map
def merge(self, other: "Map"):
"""Merge in another distinguishing map operating on different configs."""
@@ -167,7 +174,9 @@ class Map:
last = max(self.cfg_map["vals"])
# Offset the other cfg_map and mapping index by last + 1
other_cfg_map = other.cfg_map + (last + 1)
- other_mapping = other.mapping[reordering].set_index(other.mapping.index + (last + 1))
+ other_mapping = other.mapping[reordering].set_index(
+ other.mapping.index + (last + 1)
+ )
# Now concat the cfg_map and mapping
self.cfg_map = pd.concat([self.cfg_map, other_cfg_map], copy=False)
self.mapping = pd.concat([self.mapping, other_mapping], copy=False)
@@ -350,7 +359,7 @@ def _build_tree(
)
log(pad + f"Split {len(group_cfgs)} via dmap {best_i}.")
# And build the tree recursively
- child = _build_tree(group_cfgs, maps, response=output, depth=depth+1)
+ child = _build_tree(group_cfgs, maps, response=output, depth=depth + 1)
child.parent = result
return result
diff --git a/test/sca/test_tree.py b/test/sca/test_tree.py
index ce16f97..89a9f61 100644
--- a/test/sca/test_tree.py
+++ b/test/sca/test_tree.py
@@ -1,5 +1,6 @@
import random
import time
+from copy import deepcopy
from pyecsca.sca.re.tree import Tree, Map
import pandas as pd
@@ -35,7 +36,19 @@ def test_map_merge():
assert len(dmap1.cfg_map) == 4
assert len(dmap1.codomain) == 2
assert not dmap1["c", 3]
- assert dmap1["a", 0]
+ assert dmap1["a", 1]
+
+
+def test_map_deduplicate():
+ cfgs = {"a", "b", "c", "d"}
+ binary_sets = {"a": {1, 2, 3}, "b": {2, 3, 4}, "c": {1, 2, 3}, "d": {4, 2}}
+ dmap = Map.from_sets(cfgs, binary_sets)
+ original = deepcopy(dmap)
+ dmap.deduplicate()
+ for cfg in cfgs:
+ for i in [1, 2, 3, 4]:
+ assert dmap[cfg, i] == original[cfg, i]
+ assert len(dmap.mapping) < len(original.mapping)
def test_build_tree():
@@ -75,13 +88,13 @@ def test_expand_tree():
def test_df():
nrows = 12_000_000
ncols = 5
- index = list(range(nrows))
- df = pd.DataFrame(
- [random.choices((True, False), k=ncols) for _ in index], index=index
+ df = pd.DataFrame([random.choices((True, False), k=ncols) for _ in range(nrows)])
+ cfg_map = pd.DataFrame(
+ [(i,) for i in range(nrows)],
+ index=[str(i) for i in range(nrows)],
+ columns=["vals"],
)
- print(df.memory_usage().sum())
- start = time.perf_counter()
- for row, data in df.groupby(df.columns.tolist(), as_index=False):
- pass
- end = time.perf_counter()
- print(end - start)
+ dmap = Map(df, cfg_map, list(range(ncols)), {True, False})
+ # start = time.perf_counter()
+ dmap.deduplicate()
+ # end = time.perf_counter()