{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Analysis of key agreement data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2019-03-19T13:35:38.954375Z", "start_time": "2019-03-19T13:35:38.578219Z" } }, "outputs": [], "source": [ "%matplotlib notebook\n", "import numpy as np\n", "from scipy.stats import describe\n", "from scipy.stats import norm as norm_dist\n", "from scipy.stats.mstats import mquantiles\n", "from math import log, sqrt\n", "import matplotlib.pyplot as plt\n", "from matplotlib import ticker, colors, gridspec\n", "from copy import deepcopy\n", "from utils import plot_hist, moving_average, hw, time_scale, hist_size_func\n", "from binascii import unhexlify\n", "from IPython.display import display, HTML\n", "from ipywidgets import interact, interactive, fixed, interact_manual\n", "import ipywidgets as widgets\n", "import tabulate" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Settings\n", "Enter your input below." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2019-03-19T12:43:15.121139Z", "start_time": "2019-03-19T12:43:15.117537Z" } }, "outputs": [], "source": [ "# File name with output from ECTesterReader or ECTesterStandalone ECDH.\n", "fname = \"filename.csv\"\n", "\n", "# The time unit used in displaying the plots. One of \"milli\", \"micro\", \"nano\".\n", "# WARNING: Using nano might lead to very large plots/histograms and to the\n", "# notebook to freeze or run out of memory, as well as bad visualization\n", "# quality, due to noise and low density.\n", "time_unit = \"milli\"\n", "# A number which will be used to divide the time into sub-units, e.g. for 5, time will be in fifths of units\n", "scaling_factor = 1\n", "\n", "# The amount of entries skipped from the beginning of the file, as they are usually outliers.\n", "skip_first = 10\n", "\n", "# Whether to plot things in logarithmic scale or not.\n", "log_scale = False\n", "\n", "# Whether to trim the time data outside the 1 - 99 percentile range (adjust below). Quite useful.\n", "trim = True\n", "\n", "# How much to trim? Either a number in [0,1] signifying a quantile, or an absolute value signifying a threshold\n", "trim_low = 0.01\n", "trim_high = 0.99\n", "\n", "# Graphical (matplotlib) style name\n", "style = \"ggplot\"\n", "\n", "# Color map to use, and what color to assign to \"bad\" values (necessary for log_scale)\n", "color_map = plt.cm.plasma\n", "color_map_bad = \"black\"\n", "\n", "# What function to use to calculate number of histogram bins of time\n", "# one of \"sqrt\", \"sturges\", \"rice\", \"scott\" and \"fd\" or a number specifying the number of bins\n", "hist_size = \"sturges\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Data processing" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2019-03-19T12:43:16.687260Z", "start_time": "2019-03-19T12:43:16.031604Z" } }, "outputs": [], "source": [ "# Setup plot style\n", "\n", "plt.style.use(style)\n", "\n", "cmap = deepcopy(color_map)\n", "cmap.set_bad(color_map_bad)\n", "\n", "# Normalization, linear or log.\n", "if log_scale:\n", " norm = colors.LogNorm()\n", "else:\n", " norm = colors.Normalize()\n", "\n", "# Read the header line.\n", "\n", "with open(fname, \"r\") as f:\n", " header = f.readline()\n", "header_names = header.split(\";\")\n", "if len(header_names) != 5:\n", " print(\"Bad data?\")\n", " exit(1)\n", "\n", "# Load the data\n", "\n", "hx = lambda x: int(x, 16)\n", "data = np.genfromtxt(fname, delimiter=\";\", skip_header=1, converters={2: unhexlify, 3: hx, 4: hx},\n", " dtype=np.dtype([(\"index\", \"u4\"), (\"time\", \"u4\"), (\"pub\", \"O\"), (\"priv\", \"O\"), (\"secret\", \"O\")]))\n", "\n", "# Skip first (outliers?)\n", "\n", "data = data[skip_first:]\n", "\n", "# Setup the data\n", "\n", "orig_time_unit = header_names[1].split(\"[\")[1][:-1]\n", "time_disp_unit = time_scale(data[\"time\"], orig_time_unit, time_unit, scaling_factor)\n", "\n", "# Trim times\n", "quant_low_bound = trim_low if 0 <= trim_low <= 1 else 0.01\n", "quant_high_bound = trim_high if 0 <= trim_high <= 1 else 0.95\n", "quantiles = mquantiles(data[\"time\"], prob=(quant_low_bound, 0.25, 0.5, 0.75, quant_high_bound))\n", "if trim:\n", " low_bound = quantiles[0] if 0 <= trim_low <= 1 else trim_low\n", " high_bound = quantiles[4] if 0 <= trim_high <= 1 else trim_high\n", " data_trimmed = data[np.logical_and(data[\"time\"] >= low_bound,\n", " data[\"time\"] <= high_bound)]\n", " quantiles_trim = mquantiles(data_trimmed[\"time\"], prob=(quant_low_bound, 0.25, 0.5, 0.75, quant_high_bound))\n", "else:\n", " low_bound = None\n", " high_bound = None\n", " data_trimmed = data\n", " quantiles_trim = quantiles_gen\n", "\n", "description = describe(data[\"time\"])\n", "description_trim = describe(data_trimmed[\"time\"])\n", "\n", "max_time = description.minmax[1]\n", "min_time = description.minmax[0]\n", "bit_size = len(bin(max(data[\"priv\"]))) - 2\n", "byte_size = (bit_size + 7) // 8\n", "bit_size = byte_size * 8\n", "\n", "hist_size_time = hist_size_func(hist_size)(description.nobs, min_time, max_time, description.variance, quantiles[1], quantiles[3])\n", "hist_size_time_trim = hist_size_func(hist_size)(description_trim.nobs, description_trim.minmax[0], description_trim.minmax[1], description_trim.variance, quantiles_trim[1], quantiles_trim[3])\n", "\n", "if hist_size_time < 30:\n", " hist_size_time = max_time - min_time\n", "if hist_size_time_trim < 30:\n", " hist_size_time_trim = description_trim.minmax[1] - description_trim.minmax[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Analysis" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Summary" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2019-03-19T12:43:17.706648Z", "start_time": "2019-03-19T12:43:17.695215Z" } }, "outputs": [], "source": [ "display(\"Raw\")\n", "desc = [(\"N\", \"min, max\", \"mean\", \"variance\", \"skewness\", \"kurtosis\"),\n", " description]\n", "display(HTML(tabulate.tabulate(desc, tablefmt=\"html\")))\n", "display(\"Trimmed\")\n", "desc = [(\"N\", \"min, max\", \"mean\", \"variance\", \"skewness\", \"kurtosis\"),\n", " description_trim]\n", "display(HTML(tabulate.tabulate(desc, tablefmt=\"html\")))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Selected quantiles" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2019-03-19T12:43:19.507884Z", "start_time": "2019-03-19T12:43:19.502941Z" } }, "outputs": [], "source": [ "tbl = [(quant_low_bound, \"0.25\", \"0.5\", \"0.75\", quant_high_bound),\n", " list(map(lambda x: \"{} {}\".format(x, time_disp_unit), quantiles))]\n", "display(HTML(tabulate.tabulate(tbl, tablefmt=\"html\")))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Info" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2019-03-19T12:43:20.963153Z", "start_time": "2019-03-19T12:43:20.956502Z" } }, "outputs": [], "source": [ "display(\"Bitsize: {}\".format(bit_size))\n", "display(\"Histogram time bins: {}\".format(hist_size_time))\n", "display(\"Histogram time bins(trimmed): {}\".format(hist_size_time_trim))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Plots" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Private key MSB vs time heatmap\n", "The heatmap should show uncorrelated variables." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2019-03-19T12:43:22.063050Z", "start_time": "2019-03-19T12:43:21.967845Z" } }, "outputs": [], "source": [ "fig_private = plt.figure(figsize=(10.5, 8), dpi=90)\n", "axe_private = fig_private.add_subplot(1, 1, 1, title=\"Private key MSB vs key agreement time\")\n", "priv_msb = np.array(list(map(lambda x: x >> (bit_size - 8), data_trimmed[\"priv\"])), dtype=np.dtype(\"u1\"))\n", "max_msb = max(priv_msb)\n", "min_msb = min(priv_msb)\n", "heatmap, xedges, yedges = np.histogram2d(priv_msb, data_trimmed[\"time\"],\n", " bins=[max_msb - min_msb + 1, hist_size_time_trim])\n", "extent = [min_msb, max_msb, yedges[0], yedges[-1]]\n", "im = axe_private.imshow(heatmap.T, extent=extent, aspect=\"auto\", cmap=cmap, origin=\"low\",\n", " interpolation=\"nearest\", norm=norm)\n", "axe_private.set_xlabel(\"private key MSB value\")\n", "axe_private.set_ylabel(\"key agreement time ({})\".format(time_disp_unit))\n", "fig_private.colorbar(im, ax=axe_private)\n", "\n", "fig_private.tight_layout()\n", "del priv_msb" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Private key Hamming Weight vs time heatmap\n", "The heatmap should show uncorrelated variables.\n", "\n", "Also contains a private key Hamming Weight histogram, which should be binomially distributed." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2019-03-19T12:47:12.436692Z", "start_time": "2019-03-19T12:47:11.310271Z" } }, "outputs": [], "source": [ "fig_priv_hist = plt.figure(figsize=(10.5, 12), dpi=90)\n", "gs = gridspec.GridSpec(2, 1, height_ratios=[2.5, 1])\n", "axe_priv_hist = fig_priv_hist.add_subplot(gs[0], title=\"Private key Hamming weight vs key agreement time\")\n", "axe_priv_hist_hw = fig_priv_hist.add_subplot(gs[1], sharex=axe_priv_hist, title=\"Private key Hamming weight\")\n", "priv_hw = np.array(list(map(hw, data_trimmed[\"priv\"])), dtype=np.dtype(\"u2\"))\n", "h, xe, ye = np.histogram2d(priv_hw, data_trimmed[\"time\"], bins=[max(priv_hw) - min(priv_hw), hist_size_time_trim])\n", "im = axe_priv_hist.imshow(h.T, origin=\"low\", cmap=cmap, aspect=\"auto\", extent=[xe[0], xe[-1], ye[0], ye[-1]], norm=norm)\n", "axe_priv_hist.axvline(x=bit_size//2, alpha=0.7, linestyle=\"dotted\", color=\"white\", label=str(bit_size//2) + \" bits\")\n", "axe_priv_hist.set_xlabel(\"private key Hamming weight\")\n", "axe_priv_hist.set_ylabel(\"key agreement time ({})\".format(time_disp_unit))\n", "axe_priv_hist.legend(loc=\"best\")\n", "\n", "plot_hist(axe_priv_hist_hw, priv_hw, \"private key Hamming weight\", log_scale, None)\n", "\n", "param = norm_dist.fit(priv_hw)\n", "pdf_range = np.arange(min(priv_hw), max(priv_hw))\n", "norm_pdf = norm_dist.pdf(pdf_range, *param[:-2], loc=param[-2], scale=param[-1]) * description_trim.nobs\n", "axe_priv_hist_hw.plot(pdf_range, norm_pdf, label=\"fitted normal distribution\")\n", "axe_priv_hist_hw.legend(loc=\"best\")\n", "\n", "fig_priv_hist.tight_layout()\n", "fig_priv_hist.colorbar(im, ax=[axe_priv_hist, axe_priv_hist_hw])\n", "\n", "display(HTML(\"Private key Hamming weight fitted with normal distribution:\"))\n", "display(HTML(tabulate.tabulate([(\"Mean\", \"Variance\"), param], tablefmt=\"html\")))\n", "\n", "del priv_hw" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Key agreement time histogram" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2019-03-19T12:47:20.496134Z", "start_time": "2019-03-19T12:47:20.360405Z" } }, "outputs": [], "source": [ "fig_ka_hist = plt.figure(figsize=(10.5, 8), dpi=90)\n", "axe_hist_full = fig_ka_hist.add_subplot(2, 1, 1)\n", "axe_hist_trim = fig_ka_hist.add_subplot(2, 1, 2)\n", "plot_hist(axe_hist_full, data[\"time\"], \"key agreement time ({})\".format(time_disp_unit), log_scale, hist_size_time);\n", "plot_hist(axe_hist_trim, data_trimmed[\"time\"], \"key agreement time ({})\".format(time_disp_unit), log_scale, hist_size_time_trim);\n", "\n", "fig_ka_hist.tight_layout()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Moving averages of key agreement time" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2019-03-19T12:47:21.706585Z", "start_time": "2019-03-19T12:47:21.658363Z" } }, "outputs": [], "source": [ "fig_avg = plt.figure(figsize=(10.5, 7), dpi=90)\n", "axe_avg = fig_avg.add_subplot(1, 1, 1, title=\"Moving average of key agreement time\")\n", "avg_100 = moving_average(data[\"time\"], 100)\n", "avg_1000 = moving_average(data[\"time\"], 1000)\n", "axe_avg.plot(avg_100, label=\"window = 100\")\n", "axe_avg.plot(avg_1000, label=\"window = 1000\")\n", "if low_bound is not None:\n", " axe_avg.axhline(y=low_bound, alpha=0.7, linestyle=\"dotted\", color=\"green\", label=\"Low trim bound = {}\".format(low_bound))\n", "if high_bound is not None:\n", " axe_avg.axhline(y=high_bound, alpha=0.7, linestyle=\"dotted\", color=\"orange\", label=\"Hight trim bound = {}\".format(high_bound))\n", "axe_avg.set_ylabel(\"key agreement time ({})\".format(time_disp_unit))\n", "axe_avg.set_xlabel(\"index\")\n", "axe_avg.legend(loc=\"best\")\n", "\n", "fig_avg.tight_layout()\n", "del avg_100, avg_1000" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Private key MSB and LSB histograms\n", "Expected to be uniform over [0, 255]." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2019-03-19T12:47:32.875112Z", "start_time": "2019-03-19T12:47:32.542216Z" }, "hide_input": false }, "outputs": [], "source": [ "fig_priv_hists = plt.figure(figsize=(10.5, 8), dpi=90)\n", "priv_msb = np.array(list(map(lambda x: x >> (bit_size - 8), data[\"priv\"])), dtype=np.dtype(\"u1\"))\n", "priv_lsb = np.array(list(map(lambda x: x & 0xff, data[\"priv\"])), dtype=np.dtype(\"u1\"))\n", "axe_msb_s_hist = fig_priv_hists.add_subplot(2, 1, 1, title=\"Private key MSB\")\n", "axe_lsb_s_hist = fig_priv_hists.add_subplot(2, 1, 2, title=\"Private key LSB\")\n", "msb_h = plot_hist(axe_msb_s_hist, priv_msb, \"private key MSB\", log_scale, False, False)\n", "lsb_h = plot_hist(axe_lsb_s_hist, priv_lsb, \"private key LSB\", log_scale, False, False)\n", "\n", "fig_priv_hists.tight_layout()\n", "del priv_msb, priv_lsb" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Private key bit length vs time heatmap\n", "Also contains private key bit length histogram, which is expected to be axis flipped geometric distribution with $p = \\frac{1}{2}$ peaking at the bit size of the order of the curve." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2019-03-19T12:47:34.638019Z", "start_time": "2019-03-19T12:47:34.479903Z" } }, "outputs": [], "source": [ "fig_bl = plt.figure(figsize=(10.5, 12), dpi=90)\n", "gs = gridspec.GridSpec(2, 1, height_ratios=[2.5, 1])\n", "axe_bl_heat = fig_bl.add_subplot(gs[0], title=\"Private key bit length vs keygen time\")\n", "axe_bl_hist = fig_bl.add_subplot(gs[1], sharex=axe_bl_heat, title=\"Private key bit length\")\n", "bl_data = np.array(list(map(lambda x: x.bit_length(), data_trimmed[\"priv\"])), dtype=np.dtype(\"u2\"))\n", "\n", "h, xe, ye = np.histogram2d(bl_data, data_trimmed[\"time\"], bins=[max(bl_data) - min(bl_data), hist_size_time_trim])\n", "im = axe_bl_heat.imshow(h.T, origin=\"low\", cmap=cmap, aspect=\"auto\", extent=[xe[0], xe[-1], ye[0], ye[-1]], norm=norm)\n", "axe_bl_heat.set_xlabel(\"private key bit length\")\n", "axe_bl_heat.set_ylabel(\"key agreement time ({})\".format(time_disp_unit))\n", "\n", "plot_hist(axe_bl_hist, bl_data, \"Private key bit length\", log_scale, align=\"right\")\n", "\n", "fig_bl.tight_layout()\n", "fig_bl.colorbar(im, ax=[axe_bl_heat, axe_bl_hist])\n", "\n", "del bl_data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Private key bit length histogram given time\n", "Interactively shows the histogram of private key bit length given a selected time range centered around `center` of width `width`. Ideally, the means of these conditional distributions are equal, while the variances can vary." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig_bl_time = plt.figure(figsize=(10.5, 5), dpi=90)\n", "axe_bl_time = fig_bl_time.add_subplot(111)\n", "axe_bl_time.set_autoscalex_on(False)\n", "def f(center, width):\n", " lower_bnd = center - width/2\n", " upper_bnd = center + width/2\n", " values = data_trimmed[np.logical_and(data_trimmed[\"time\"] <= upper_bnd,\n", " data_trimmed[\"time\"] >= lower_bnd)]\n", " axe_bl_time.clear()\n", " axe_bl_time.set_title(\"Private key bit length, given key agreement time $\\in ({}, {})$ {}\".format(int(lower_bnd), int(upper_bnd), time_disp_unit))\n", " bl_data = np.array(list(map(lambda x: x.bit_length(), values[\"priv\"])), dtype=np.dtype(\"u2\"))\n", " plot_hist(axe_bl_time, bl_data, \"private key bit length\", bins=11, range=(bit_size-10, bit_size+1), align=\"left\")\n", " axe_bl_time.set_xlim((bit_size-10, bit_size))\n", " fig_bl_time.tight_layout()\n", "\n", "center_w = widgets.IntSlider(min=min(data_trimmed[\"time\"]),\n", " max=max(data_trimmed[\"time\"]),\n", " step=1,\n", " value=description_trim.mean,\n", " continuous_update=False,\n", " description=\"center {}\".format(time_disp_unit))\n", "width_w = widgets.IntSlider(min=1, max=100, continuous_update=False,\n", " description=\"width {}\".format(time_disp_unit))\n", "w = interactive(f, center=center_w,\n", " width=width_w)\n", "display(w)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Validation\n", "Perform some tests on the produced data and compare to expected results.\n", "\n", "This requires some information about the used curve, enter it below." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2019-03-18T18:36:15.492599Z", "start_time": "2019-03-18T18:36:12.008827Z" } }, "outputs": [], "source": [ "p_str = input(\"The prime specifying the finite field:\")\n", "p = int(p_str, 16) if p_str.startswith(\"0x\") else int(p_str)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "r_str = input(\"The order of the curve:\")\n", "r = int(r_str, 16) if r_str.startswith(\"0x\") else int(r_str)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "All of the following tests should pass (e.g. be true), given a large enough sample." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "max_priv = max(data[\"priv\"])\n", "un = len(np.unique(data[\"priv\"])) != 1\n", "if un:\n", " print(\"Private keys are smaller than order:\\t\\t\\t\" + str(max_priv < r))\n", " print(\"Private keys are larger than prime(if order > prime):\\t\" + str(r <= p or max_priv > p))\n", " print(\"Private keys reach full bit length of order:\\t\\t\" + str(max_priv.bit_length() == r.bit_length()))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2019-03-19T12:53:48.777395Z", "start_time": "2019-03-19T12:53:48.766190Z" } }, "outputs": [], "source": [ "if un:\n", " print(\"Private key bit length (min, max):\" + str(min(data[\"priv\"]).bit_length()) + \", \" + str(max(data[\"priv\"]).bit_length()))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "@webio": { "lastCommId": "68bed3134fc443a3829779707f7e77b0", "lastKernelId": "168cb4cf-3a36-4e98-a687-9a9f57b50354" }, "hide_input": false, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.2" }, "latex_envs": { "LaTeX_envs_menu_present": true, "autoclose": false, "autocomplete": true, "bibliofile": "biblio.bib", "cite_by": "apalike", "current_citInitial": 1, "eqLabelWithNumbers": true, "eqNumInitial": 1, "hotkeys": { "equation": "Ctrl-E", "itemize": "Ctrl-I" }, "labels_anchors": false, "latex_user_defs": false, "report_style_numbering": false, "user_envs_cfg": false }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }