{ "cells": [ { "cell_type": "markdown", "id": "6212ee2f4518283e", "metadata": {}, "source": [ "# FIPS IUT and MIP queues" ] }, { "cell_type": "code", "execution_count": null, "id": "4d7d7d8c", "metadata": {}, "outputs": [], "source": [ "from itertools import takewhile\n", "from operator import itemgetter\n", "\n", "from sec_certs.dataset.fips_mip import MIPDataset\n", "from sec_certs.dataset.fips_iut import IUTDataset\n", "from sec_certs.sample.fips_mip import MIPStatus\n", "from sec_certs.model.fips_matching import FIPSProcessMatcher\n", "from sec_certs.dataset.fips import FIPSDataset\n", "from sec_certs.configuration import config\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from tqdm import tqdm\n", "import matplotlib.ticker as mtick\n", "import warnings\n", "\n", "plt.style.use(\"seaborn-whitegrid\")\n", "sns.set_palette(\"deep\")\n", "sns.set_context(\"notebook\") # Set to \"paper\" for use in paper :)\n", "\n", "warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)" ] }, { "cell_type": "code", "execution_count": null, "id": "a3faaf17", "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "fips = FIPSDataset.from_web()\n" ] }, { "cell_type": "markdown", "id": "bd5f0fd6", "metadata": {}, "source": [ "## IUT dataset" ] }, { "cell_type": "code", "execution_count": null, "id": "80f04ffe", "metadata": {}, "outputs": [], "source": [ "iut_dset = IUTDataset.from_web()" ] }, { "cell_type": "code", "execution_count": null, "id": "47a4d967", "metadata": {}, "outputs": [], "source": [ "iut_global_df = pd.DataFrame(iut_dset.snapshots, columns=[\"timestamp\", \"displayed\", \"not_displayed\", \"total\"])\n", "iut_global_df.info()" ] }, { "cell_type": "code", "execution_count": null, "id": "776323d7", "metadata": {}, "outputs": [], "source": [ "iut_melt = iut_global_df.melt(\"timestamp\", var_name=\"type\", value_name=\"count\")\n", "line = sns.lineplot(data=iut_melt, x=\"timestamp\", y=\"count\", hue=\"type\")\n", "line.set(xlabel=\"Date\", ylabel=\"Number of entries\", title=\"Entries in IUT list over time\")\n", "line.legend(title=\"Type\", bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.);" ] }, { "cell_type": "code", "execution_count": null, "id": "0d1626ad", "metadata": {}, "outputs": [], "source": [ "s0 = iut_dset.snapshots[0]\n", "print(s0.__dict__.keys())\n", "print(next(iter(s0.entries)))" ] }, { "cell_type": "code", "execution_count": null, "id": "97b726d6", "metadata": {}, "outputs": [], "source": [ "def iut_key(entry):\n", " return entry.module_name, entry.vendor_name, entry.standard\n", "\n", "iut_first_seen = {}\n", "iut_last_seen = {}\n", "for snapshot in tqdm(sorted(iut_dset.snapshots, key=lambda x: x.timestamp)):\n", " snapshot_date = snapshot.timestamp.date()\n", " for entry in snapshot.entries:\n", " entry_key = entry # iut_key(entry) # or entry here\n", " if entry_key not in iut_first_seen:\n", " iut_first_seen[entry_key] = snapshot_date\n", " if entry_key not in iut_last_seen or iut_last_seen[entry_key] < snapshot_date:\n", " iut_last_seen[entry_key] = snapshot_date\n", "\n", "#iut_local_df = pd.DataFrame([(entry[0], entry[1], entry[2], iut_first_seen[entry], iut_last_seen[entry], iut_last_seen[entry] == snapshot_date) for entry in iut_first_seen.keys()], columns=(\"name\", \"vendor\", \"standard\", \"first_seen\", \"last_seen\", \"present\"))\n", "iut_local_df = pd.DataFrame([(entry.module_name, entry.vendor_name, entry.standard, entry.iut_date, iut_first_seen[entry], iut_last_seen[entry], iut_last_seen[entry] == snapshot_date) for entry in iut_first_seen.keys()], columns=(\"name\", \"vendor\", \"standard\", \"iut_date\", \"first_seen\", \"last_seen\", \"present\"))\n", "iut_local_df = iut_local_df.astype({\"standard\": \"category\", \"iut_date\": \"datetime64[ns]\", \"first_seen\": \"datetime64[ns]\", \"last_seen\": \"datetime64[ns]\", \"present\": \"bool\"}).fillna(value=np.nan)\n", "iut_local_df[\"seen_for_iut\"] = (iut_local_df.last_seen - iut_local_df.iut_date).dt.days\n", "iut_local_df[\"seen_for\"] = (iut_local_df.last_seen - iut_local_df.first_seen).dt.days\n", "\n", "print(\"All:\")\n", "print(\"Average seen for\", np.mean(iut_local_df.seen_for))\n", "print(\"Average seen for (FIPS 140-2)\", np.mean(iut_local_df.loc[iut_local_df.standard == \"FIPS 140-2\"].seen_for))\n", "print(\"Average seen for (FIPS 140-3)\", np.mean(iut_local_df.loc[iut_local_df.standard == \"FIPS 140-3\"].seen_for))\n", "print(\"Average seen for[iut]\", np.mean(iut_local_df.seen_for_iut))\n", "print(\"Average seen for[iut] (FIPS 140-2)\", np.mean(iut_local_df.loc[iut_local_df.standard == \"FIPS 140-2\"].seen_for_iut))\n", "print(\"Average seen for[iut] (FIPS 140-3)\", np.mean(iut_local_df.loc[iut_local_df.standard == \"FIPS 140-3\"].seen_for_iut))\n", "\n", "print(\"Only not present:\")\n", "print(\"Average seen for\", np.mean(iut_local_df.loc[~iut_local_df.present].seen_for))\n", "print(\"Average seen for (FIPS 140-2)\", np.mean(iut_local_df.loc[(iut_local_df.standard == \"FIPS 140-2\") & ~iut_local_df.present].seen_for))\n", "print(\"Average seen for (FIPS 140-3)\", np.mean(iut_local_df.loc[(iut_local_df.standard == \"FIPS 140-3\") & ~iut_local_df.present].seen_for))\n", "print(\"Average seen for[iut]\", np.mean(iut_local_df.loc[~iut_local_df.present].seen_for_iut))\n", "print(\"Average seen for[iut] (FIPS 140-2)\", np.mean(iut_local_df.loc[(iut_local_df.standard == \"FIPS 140-2\") & ~iut_local_df.present].seen_for_iut))\n", "print(\"Average seen for[iut] (FIPS 140-3)\", np.mean(iut_local_df.loc[(iut_local_df.standard == \"FIPS 140-3\") & ~iut_local_df.present].seen_for_iut))\n" ] }, { "cell_type": "code", "execution_count": null, "id": "5bcc6da5", "metadata": {}, "outputs": [], "source": [ "scatter = sns.relplot(kind=\"scatter\", data=iut_local_df, x=\"iut_date\", y=\"seen_for_iut\", hue=\"standard\", style=\"present\", aspect=1)" ] }, { "cell_type": "code", "execution_count": null, "id": "10bad193", "metadata": {}, "outputs": [], "source": [ "scatter = sns.relplot(kind=\"scatter\", data=iut_local_df, x=\"iut_date\", y=\"last_seen\", hue=\"standard\", style=\"present\", aspect=1)" ] }, { "cell_type": "code", "execution_count": null, "id": "fbfbf88e", "metadata": {}, "outputs": [], "source": [ "hist = sns.histplot(data=iut_local_df[~iut_local_df.present], x=\"seen_for_iut\", hue=\"standard\")" ] }, { "cell_type": "code", "execution_count": null, "id": "98b27134", "metadata": {}, "outputs": [], "source": [ "hist = sns.histplot(data=iut_local_df[~iut_local_df.present], x=\"seen_for\", hue=\"standard\")" ] }, { "cell_type": "code", "execution_count": null, "id": "6b8e02d3", "metadata": {}, "outputs": [], "source": [ "iut_local_df.vendor.value_counts()" ] }, { "cell_type": "markdown", "id": "e2e5805f", "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%% md\n" } }, "source": [ "### IUT - Certificate mapping" ] }, { "cell_type": "code", "execution_count": null, "id": "48f49012", "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "first_snapshot = iut_dset.snapshots[-1]\n", "matches = FIPSProcessMatcher.match_snapshot(first_snapshot, fips)" ] }, { "cell_type": "markdown", "id": "cbc02977", "metadata": {}, "source": [ "## MIP dataset" ] }, { "cell_type": "code", "execution_count": null, "id": "46ce3c13", "metadata": {}, "outputs": [], "source": [ "mip_dset = MIPDataset.from_web()" ] }, { "cell_type": "code", "execution_count": null, "id": "aa74b965", "metadata": {}, "outputs": [], "source": [ "mip_global_df = pd.DataFrame(mip_dset.snapshots, columns=[\"timestamp\", \"displayed\", \"not_displayed\", \"total\"])\n", "mip_global_df.info()" ] }, { "cell_type": "code", "execution_count": null, "id": "dc3186f6", "metadata": {}, "outputs": [], "source": [ "mip_melt = mip_global_df.melt(\"timestamp\", var_name=\"type\", value_name=\"count\")\n", "line = sns.lineplot(data=mip_melt, x=\"timestamp\", y=\"count\", hue=\"type\")\n", "line.set(xlabel=\"Date\", ylabel=\"Number of entries\", title=\"Entries in MIP list over time\")\n", "line.legend(title=\"Type\", bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.);" ] }, { "cell_type": "code", "execution_count": null, "id": "f433f0c5", "metadata": {}, "outputs": [], "source": [ "m0 = mip_dset.snapshots[0]\n", "print(m0.__dict__.keys())\n", "print(next(iter(m0.entries)))" ] }, { "cell_type": "code", "execution_count": null, "id": "205b9a97", "metadata": {}, "outputs": [], "source": [ "def mip_key(entry):\n", " return entry.module_name, entry.vendor_name, entry.standard, entry.status\n", "\n", "mip_first_seen = {}\n", "mip_last_seen = {}\n", "for snapshot in sorted(mip_dset.snapshots, key=lambda x: x.timestamp):\n", " snapshot_date = snapshot.timestamp.date()\n", " for entry in snapshot.entries:\n", " entry_key = entry # mip_key(entry) # or entry here\n", " if entry_key not in mip_first_seen:\n", " mip_first_seen[entry_key] = snapshot_date\n", " if entry_key not in mip_last_seen or mip_last_seen[entry_key] < snapshot_date:\n", " mip_last_seen[entry_key] = snapshot_date\n", "\n", "#mip_local_df = pd.DataFrame([(entry[0], entry[1], entry[2], entry[3], mip_first_seen[entry], mip_last_seen[entry], mip_last_seen[entry] == snapshot_date) for entry in iut_first_seen.keys()], columns=(\"name\", \"vendor\", \"standard\", \"status\", first_seen\", \"last_seen\", \"present\"))\n", "mip_local_df = pd.DataFrame([(entry.module_name, entry.vendor_name, entry.standard, entry.status, entry.status_since, mip_first_seen[entry], mip_last_seen[entry], mip_last_seen[entry] == snapshot_date) for entry in mip_first_seen.keys()], columns=(\"name\", \"vendor\", \"standard\", \"status\", \"status_since\", \"first_seen\", \"last_seen\", \"present\"))\n", "mip_local_df = mip_local_df.astype({\"standard\": \"category\", \"status\": \"category\", \"status_since\": \"datetime64[ns]\", \"first_seen\": \"datetime64[ns]\", \"last_seen\": \"datetime64[ns]\", \"present\": \"bool\"}).fillna(value=np.nan)\n", "mip_local_df[\"seen_for_status\"] = (mip_local_df.last_seen - mip_local_df.status_since).dt.days\n", "mip_local_df[\"seen_for\"] = (mip_local_df.last_seen - mip_local_df.first_seen).dt.days\n", "mip_local_df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "29679c73", "metadata": {}, "outputs": [], "source": [ "scatter = sns.relplot(kind=\"scatter\", data=mip_local_df.loc[mip_local_df.status == MIPStatus.REVIEW_PENDING], x=\"first_seen\", y=\"seen_for\", hue=\"standard\")" ] }, { "cell_type": "code", "execution_count": null, "id": "771662ff", "metadata": {}, "outputs": [], "source": [ "scatter = sns.relplot(kind=\"scatter\", data=mip_local_df.loc[mip_local_df.status == MIPStatus.IN_REVIEW], x=\"first_seen\", y=\"seen_for\", hue=\"standard\")" ] }, { "cell_type": "code", "execution_count": null, "id": "2b2ba494", "metadata": {}, "outputs": [], "source": [ "scatter = sns.relplot(kind=\"scatter\", data=mip_local_df.loc[mip_local_df.status == MIPStatus.COORDINATION], x=\"first_seen\", y=\"seen_for\", hue=\"standard\")" ] }, { "cell_type": "code", "execution_count": null, "id": "883667b4", "metadata": {}, "outputs": [], "source": [ "scatter = sns.relplot(kind=\"scatter\", data=mip_local_df.loc[mip_local_df.status == MIPStatus.FINALIZATION], x=\"first_seen\", y=\"seen_for\", hue=\"standard\")" ] }, { "cell_type": "code", "execution_count": null, "id": "9856e568", "metadata": {}, "outputs": [], "source": [ "for status in MIPStatus:\n", " print(status)\n", " print(\"All:\")\n", " print(\"Average seen for\", np.mean(mip_local_df.loc[mip_local_df.status == status].seen_for))\n", " print(\"Average seen for (FIPS 140-2)\", np.mean(mip_local_df.loc[(mip_local_df.status == status) & (mip_local_df.standard == \"FIPS 140-2\")].seen_for))\n", " print(\"Average seen for (FIPS 140-3)\", np.mean(mip_local_df.loc[(mip_local_df.status == status) & (mip_local_df.standard == \"FIPS 140-3\")].seen_for))\n", "\n", " print(\"Only not present:\")\n", " print(\"Average seen for\", np.mean(mip_local_df.loc[~(mip_local_df.present) & (mip_local_df.status == status)].seen_for))\n", " print(\"Average seen for (FIPS 140-2)\", np.mean(mip_local_df.loc[~(mip_local_df.present) & (mip_local_df.status == status) & (mip_local_df.standard == \"FIPS 140-2\")].seen_for))\n", " print(\"Average seen for (FIPS 140-3)\", np.mean(mip_local_df.loc[~(mip_local_df.present) & (mip_local_df.status == status) & (mip_local_df.standard == \"FIPS 140-3\")].seen_for))\n", " print()" ] }, { "cell_type": "code", "execution_count": null, "id": "ccf7e2fc", "metadata": {}, "outputs": [], "source": [ "with sns.plotting_context(\"notebook\", font_scale=0.75):\n", " g = sns.FacetGrid(mip_local_df.loc[~mip_local_df.present], col=\"status\", hue=\"standard\", col_wrap=2, height=2, ylim=(0,300))\n", " g.map(sns.histplot, \"seen_for\")\n", " g.set_titles(\"{col_name}\")\n", " plt.show()" ] }, { "cell_type": "markdown", "id": "b1ce1208", "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%% md\n" } }, "source": [ "### MIP - Certificate matching" ] }, { "cell_type": "code", "execution_count": null, "id": "c692ad3f", "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "first_snapshot = mip_dset.snapshots[-1]\n", "matches = FIPSProcessMatcher.match_snapshot(first_snapshot, fips)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.3" }, "vscode": { "interpreter": { "hash": "a5b8c5b127d2cfe5bc3a1c933e197485eb9eba25154c3661362401503b4ef9d4" } } }, "nbformat": 4, "nbformat_minor": 5 }