author: J08nY 2025-11-06 14:28:57 +0100
committer: J08nY 2025-11-06 14:34:03 +0100
commit: 3c55021f12339053ca48abcb763c1e34d92c804b (patch)
tree: 1ec3593272d0aa291d419926339fccdab9c7261f
parent: a89cc36a0d7e976488f22bef6328be90292d40af (diff)
download: ECTester-3c55021f12339053ca48abcb763c1e34d92c804b.tar.gz
ECTester-3c55021f12339053ca48abcb763c1e34d92c804b.tar.zst
ECTester-3c55021f12339053ca48abcb763c1e34d92c804b.zip
1 files changed, 82 insertions, 76 deletions
diff --git a/analysis/scalarmults/distinguish.ipynb b/analysis/scalarmults/distinguish.ipynb
index 3a77c4c..dd7e084 100644
--- a/analysis/scalarmults/distinguish.ipynb
+++ b/analysis/scalarmults/distinguish.ipynb
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "bc1528b8-61cd-4219-993f-e3f1ac79e801",
    "metadata": {},
    "outputs": [],
@@ -24,22 +24,32 @@
     "import glob\n",
     "import random\n",
     "import math\n",
+    "import sys\n",
     "\n",
     "from collections import Counter\n",
     "\n",
     "import numpy as np\n",
     "import pandas as pd\n",
+    "import xarray as xr\n",
     "from scipy.stats import binom, entropy\n",
     "from scipy.spatial import distance\n",
     "from tqdm.auto import tqdm, trange\n",
     "from anytree import PreOrderIter, Walker\n",
     "from matplotlib import pyplot as plt\n",
+    "from statsmodels.stats.proportion import proportion_confint\n",
     "\n",
     "from pyecsca.ec.mult import *\n",
     "from pyecsca.misc.utils import TaskExecutor, silent\n",
     "from pyecsca.sca.re.tree import Map, Tree\n",
     "\n",
-    "from common import *\n",
+    "from epare.divisors import divisor_map\n",
+    "from epare.config import all_configs, Config\n",
+    "from epare.error_model import all_error_models\n",
+    "\n",
+    "if sys.version_info >= (3, 14):\n",
+    "    from compression import zstd\n",
+    "else:\n",
+    "    from backports import zstd\n",
     "\n",
     "%matplotlib ipympl"
    ]
@@ -55,7 +65,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "119b1e18-eeb0-4b70-bec3-ff277641403f",
    "metadata": {},
    "outputs": [],
@@ -72,7 +82,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "ccb00342-3c48-49c9-bedf-2341e5eae3a2",
    "metadata": {},
    "outputs": [],
@@ -91,41 +101,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "3dbac9be-d098-479a-8ca2-f531f6668f7c",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1e5c7654ba8c4e559d1e100a334a3331",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/12480 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "with open(f\"merged.pickle\", \"rb\") as f:\n",
-    "    distributions_mults = pickle.load(f)\n",
-    "for probmap in tqdm(distributions_mults.values(), desc=f\"Narrowing probmaps to {divisor_name}.\"):\n",
+    "with zstd.open(\"merged.zpickle\", \"rb\") as f:\n",
+    "    config_map = pickle.load(f)\n",
+    "for probmap in tqdm(config_map.values(), desc=f\"Narrowing probmaps to {divisor_name}.\"):\n",
     "    probmap.narrow(allfeats)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "id": "38c81e38-a37c-4e58-ac9e-927d14dad458",
    "metadata": {},
    "outputs": [],
    "source": [
-    "allmults = list(distributions_mults.keys())\n",
-    "basemults = list(all_mults_with_ctr)\n",
+    "allmults = list(config_map.keys())\n",
+    "basemults = list(all_configs)\n",
+    "\n",
     "nbase = len(basemults)\n",
     "nmults = len(allmults)\n",
     "nallfeats = len(allfeats)"
@@ -133,20 +129,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "206b385a-bb98-47e7-9312-94a864794faa",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Have 12480 configurations (scalar multiplier x countermeasure x error model).\n",
-      "Have 390 base configurations (scalar multiplier x countermeasure).\n",
-      "Have 3215 base point orders (features).\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(f\"Have {nmults} configurations (scalar multiplier x countermeasure x error model).\")\n",
     "print(f\"Have {nbase} base configurations (scalar multiplier x countermeasure).\")\n",
@@ -154,6 +140,18 @@
    ]
   },
   {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e95a1551-3fec-468a-866d-1efa5721364d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "probs = xr.DataArray(np.zeros((nmults, nallfeats), dtype=np.float64), dims=(\"cfg\", \"divisor\"), coords={\"cfg\": allmults, \"divisor\": allfeats})\n",
+    "for mult, probmap in tqdm(config_map.items(), desc=\"Convert probmaps to xarray.\"):\n",
+    "    probs.loc[mult, :] = [probmap[divisor] for divisor in allfeats]"
+   ]
+  },
+  {
    "cell_type": "markdown",
    "id": "437bcd9c-1da5-428a-a979-0835326777f3",
    "metadata": {},
@@ -192,50 +190,56 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "0b85fad7-392f-4701-9329-d75d39736bbb",
+   "id": "bf81fa2e-8746-4af1-963a-8d8340ed22da",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Now go over all divisors, cluster based on overlapping CI for given n?\n",
-    "io_map = {mult:{} for mult in allmults}\n",
-    "for divisor in tqdm(allfeats):\n",
+    "def make_groups(column: np.ndarray, nbuild: int, alpha: float, method: str = \"wilson\"):\n",
     "    prev_ci_low = None\n",
     "    prev_ci_high = None\n",
-    "    groups = {}\n",
-    "    pvals = {}\n",
-    "    group = 0\n",
-    "    for mult, probmap in sorted(distributions_mults.items(), key=lambda item: -item[1][divisor]):\n",
+    "    groups = []\n",
+    "    ci_low, ci_high = proportion_confint(column * nbuild, nbuild, alpha=alpha, method=method) \n",
+    "    order = np.argsort(column)\n",
+    "    for mult_index in reversed(order.data):\n",
     "        # We are going from high to low p.\n",
-    "        pval = probmap[divisor]\n",
-    "        pvals[mult] = pval\n",
-    "        ci_low, ci_high = conf_interval(pval, nbuild, alpha)\n",
-    "        ci_low = max(ci_low, 0.0)\n",
-    "        ci_high = min(ci_high, 1.0)\n",
-    "        if (prev_ci_low is None and prev_ci_high is None) or prev_ci_low >= ci_high:\n",
-    "            g = groups.setdefault(f\"arbitrary{group}\", set())\n",
-    "            g.add(mult)\n",
-    "            group += 1\n",
+    "        low = ci_low[mult_index]\n",
+    "        high = ci_high[mult_index]\n",
+    "        if (prev_ci_low is None and prev_ci_high is None) or prev_ci_low >= high:\n",
+    "            g = set()\n",
+    "            groups.append(g)\n",
+    "            #print(low, high, len(groups), prev_ci_low, prev_ci_high)\n",
     "        else:\n",
-    "            g = groups.setdefault(f\"arbitrary{group}\", set())\n",
-    "            g.add(mult)\n",
-    "        prev_ci_low = ci_low\n",
-    "        prev_ci_high = ci_high\n",
-    "    \n",
-    "    #print(f\"Divisor: {divisor}, num groups: {group}\", end=\"\\n\\t\")\n",
-    "    #for g in groups.values():\n",
+    "            g = groups[-1]\n",
+    "        g.add(mult_index)\n",
+    "        prev_ci_low = low\n",
+    "        prev_ci_high = high\n",
+    "    return groups"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b85fad7-392f-4701-9329-d75d39736bbb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now go over all divisors, cluster based on overlapping CI for given n?\n",
+    "cfg_map = pd.DataFrame(list(range(len(allmults))), index=allmults, columns=[\"vals\"])\n",
+    "codomain = set()\n",
+    "mapping = np.zeros((nmults, nallfeats), dtype=int)\n",
+    "for i, divisor in tqdm(enumerate(allfeats), desc=\"Computing groups.\", total=nallfeats):\n",
+    "    groups = make_groups(probs.loc[:, divisor], nbuild, alpha)\n",
+    "    #print(f\"Divisor: {divisor}, num groups: {len(groups)}\")\n",
+    "    #for g in groups:\n",
     "    #    print(len(g), end=\", \")\n",
     "    #print()\n",
-    "    for group, mults in groups.items():\n",
-    "        mult_pvals = [pvals[mult] for mult in mults]\n",
-    "        group_pval_avg = np.mean(mult_pvals)\n",
-    "        group_pval_var = np.var(mult_pvals)\n",
-    "        group_pval_min = np.min(mult_pvals)\n",
-    "        group_pval_max = np.max(mult_pvals)\n",
-    "        for mult in mults:\n",
-    "            io_map[mult][divisor] = (group,  group_pval_avg, group_pval_var, group_pval_min, group_pval_max)\n",
+    "    for group, mult_indices in enumerate(groups):\n",
+    "        codomain.add(group)\n",
+    "        for mult_index in mult_indices:\n",
+    "            mapping[mult_index, i] = group\n",
     "\n",
     "# then build dmap\n",
-    "dmap = Map.from_io_maps(set(distributions_mults.keys()), io_map)"
+    "dmap = Map(mapping=pd.DataFrame(mapping), cfg_map=cfg_map, domain=allfeats, codomain=codomain)"
    ]
   },
   {
@@ -285,12 +289,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "5735e7d4-149c-4184-96f7-dcfd6017fbad",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
     "# build a tree\n",
-    "with silent():\n",
-    "    tree = Tree.build(set(allmults), dmap)"
+    "#with silent():\n",
+    "tree = Tree.build(set(allmults), dmap)"
    ]
   },
   {
@@ -1610,7 +1616,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.13.5"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,
author	J08nY	2025-11-06 14:28:57 +0100
committer	J08nY	2025-11-06 14:34:03 +0100
commit	3c55021f12339053ca48abcb763c1e34d92c804b (patch)
tree	1ec3593272d0aa291d419926339fccdab9c7261f
parent	a89cc36a0d7e976488f22bef6328be90292d40af (diff)
download	ECTester-3c55021f12339053ca48abcb763c1e34d92c804b.tar.gz ECTester-3c55021f12339053ca48abcb763c1e34d92c804b.tar.zst ECTester-3c55021f12339053ca48abcb763c1e34d92c804b.zip