diff options
| author | J08nY | 2025-03-22 18:08:31 +0100 |
|---|---|---|
| committer | J08nY | 2025-04-16 12:25:06 +0200 |
| commit | 2e0a31a16740cf05e1bec5b609e29a3c31ecbc83 (patch) | |
| tree | b83956936b8503e633ae78f7bc732133ac6d53c7 | |
| parent | c084f85cddde9727e61a503fd37af2551b94eeb8 (diff) | |
| download | ECTester-2e0a31a16740cf05e1bec5b609e29a3c31ecbc83.tar.gz ECTester-2e0a31a16740cf05e1bec5b609e29a3c31ecbc83.tar.zst ECTester-2e0a31a16740cf05e1bec5b609e29a3c31ecbc83.zip | |
| -rw-r--r-- | epare/distinguish.ipynb | 96 |
1 files changed, 93 insertions, 3 deletions
diff --git a/epare/distinguish.ipynb b/epare/distinguish.ipynb index 875b9ed..02ee444 100644 --- a/epare/distinguish.ipynb +++ b/epare/distinguish.ipynb @@ -609,12 +609,102 @@ ] }, { + "cell_type": "markdown", + "id": "f16a5868-e92c-4b84-9f19-664627d9848a", + "metadata": {}, + "source": [ + "## Simulate distinguishing using a Bayes classifier" + ] + }, + { + "cell_type": "markdown", + "id": "ed81e076-9ccb-445d-ada9-384b73efb2c5", + "metadata": {}, + "source": [ + "### Feature selection using trees\n", + "\n", + "We can reuse the clustering + tree building approach above and just take the inputs that the greedy tree building choses as the features. However, we can also use more conventional feature selection approaches." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f24b323-3604-4e34-a880-9dfd611fb245", + "metadata": {}, + "outputs": [], + "source": [ + "good_inputs = Counter()\n", + "for node in PreOrderIter(tree.root):\n", + " if node.is_leaf:\n", + " continue\n", + " good_inputs[node.dmap_input] += 1\n", + "for good in sorted(good_inputs):\n", + " print(good)\n", + " print(bin(good))\n", + " print(f\"used {good_inputs[good]} times\")\n", + " print(f\"nbits {good.bit_length()}\")\n", + " for div_name, div_group in divisor_map.items():\n", + " if good in div_group and div_name != \"all\":\n", + " print(div_name, end=\", \")\n", + " print(\"\\n\")" + ] + }, + { "cell_type": "code", "execution_count": null, - "id": "f2787faf-a487-4f28-aa3c-8fdd9562550d", + "id": "f1052222-ad32-4e25-97ca-851cc42bf546", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "simulations = 400\n", + "retries = 1000\n", + "\n", + "for nfeats in (6,): #trange(1, 7)\n", + " for nattack in range(100, 200, 100):\n", + " best_feats = None\n", + " best_feats_mean_pos = None\n", + " best_successes = None\n", + " for _ in trange(retries):\n", + " feats = random.sample(sorted(good_inputs), nfeats)\n", + " successes = {k:0 for k in range(1, 11)}\n", + " mean_pos = 0\n", + " for _ in range(simulations):\n", + " true_mult = random.choice(list(distributions_mults.keys()))\n", + " probmap = distributions_mults[true_mult]\n", + " feat_vector = []\n", + " for divisor in enumerate(feats):\n", + " prob = probmap[divisor]\n", + " sampled = binom(nattack, prob).rvs()\n", + " feat_vector.append(sampled)\n", + " scoring = []\n", + " for other_mult, other_probmap in distributions_mults.items():\n", + " proba = 1\n", + " for sampled, divisor in zip(feat_vector, feats):\n", + " other_p = other_probmap[divisor]\n", + " prob = binom(nattack, other_p).pmf(sampled)\n", + " proba *= prob\n", + " scoring.append((proba, other_mult))\n", + " scoring.sort(key=lambda item: item[0], reverse=True)\n", + " for i, (sim, other) in enumerate(scoring):\n", + " if other == true_mult:\n", + " mean_pos += i\n", + " for k in range(10):\n", + " if i <= k:\n", + " successes[k+1] +=1\n", + " for i in successes.keys():\n", + " successes[i] /= simulations\n", + " #print(f\"{nattack:<10}: mean position {mean_pos/simulations}\")\n", + " #print(f\" top1: {successes[1]}, top5: {successes[5]}, top10: {successes[10]}\")\n", + " if best_feats is None or best_feats_mean_pos > mean_pos/simulations:\n", + " best_feats = feats\n", + " best_feats_mean_pos = mean_pos/simulations\n", + " best_successes = successes\n", + " print(flush=True)\n", + " print(nattack)\n", + " print(f\"Features: ({nfeats}) {best_feats}\")\n", + " print(f\"mean_pos: {best_feats_mean_pos}\")\n", + " print(f\"top1: {best_successes[1]}, top2: {best_successes[2]}, top5: {best_successes[5]}, top10: {best_successes[10]}\")" + ] } ], "metadata": { @@ -633,7 +723,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.3" + "version": "3.13.1" } }, "nbformat": 4, |
