{ "cells": [ { "cell_type": "markdown", "id": "3a0981d008383c12", "metadata": {}, "source": [ "# Vulnerability analysis" ] }, { "cell_type": "code", "execution_count": null, "id": "41674b9c", "metadata": { "ExecuteTime": { "end_time": "2024-10-17T09:37:51.724995Z", "start_time": "2024-10-17T09:37:51.033775Z" } }, "outputs": [], "source": [ "from sec_certs.dataset.fips import FIPSDataset\n", "from sec_certs.dataset.cpe import CPEDataset\n", "from sec_certs.dataset.cve import CVEDataset\n", "from sec_certs.dataset.auxiliary_dataset_handling import CPEDatasetHandler, CVEDatasetHandler\n", "from sec_certs.utils.pandas import expand_df_with_cve_cols\n", "import pandas as pd\n", "import seaborn as sns\n", "import itertools\n", "import functools\n", "import matplotlib.pyplot as plt\n", "from scipy import stats" ] }, { "cell_type": "code", "execution_count": null, "id": "5ee5dca5", "metadata": {}, "outputs": [], "source": [ "dset = FIPSDataset.from_web(path=\"dset\", auxiliary_datasets=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "be466617-e182-4bec-bdb5-a479703faa50", "metadata": {}, "outputs": [], "source": [ "dset.load_auxiliary_datasets()\n", "cve_dset: CVEDataset = dset.aux_handlers[CVEDatasetHandler].dset\n", "cpe_dset: CPEDataset = dset.aux_handlers[CPEDatasetHandler].dset" ] }, { "cell_type": "code", "execution_count": null, "id": "530354be", "metadata": {}, "outputs": [], "source": [ "df = dset.to_pandas()\n", "cves = list(itertools.chain.from_iterable(x.heuristics.related_cves for x in dset if x.heuristics.related_cves))\n", "cve_dict = {x: cve_dset[x] for x in cves}\n", "cve_dset.cves = cve_dict # Limit cve_dset to CVEs relevant to some certificate\n", "df = expand_df_with_cve_cols(df, cve_dset)\n", "\n", "df_cpe_rich = df.loc[~df.cpe_matches.isnull()].copy()\n", "df_cve_rich = df.loc[df.related_cves.notnull()].copy()" ] }, { "cell_type": "code", "execution_count": null, "id": "0bf3a0a5", "metadata": {}, "outputs": [], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "3d33b063", "metadata": {}, "outputs": [], "source": [ "len(df_cve_rich)" ] }, { "cell_type": "code", "execution_count": null, "id": "aacaf7f7", "metadata": {}, "outputs": [], "source": [ "len(df_cpe_rich)" ] }, { "cell_type": "code", "execution_count": null, "id": "726d77a2", "metadata": {}, "outputs": [], "source": [ "categories_cpe = df_cpe_rich.type.value_counts().sort_index().rename('Type distribution CPE-rich')\n", "categories_cve = df_cve_rich.type.value_counts().sort_index().rename('Type distribution CVE-rich')\n", "categories_all = df.type.value_counts().sort_index().rename('Type distribution all')\n", "\n", "categories_merged = pd.concat([categories_all, categories_cpe, categories_cve], axis=1)\n", "categories_merged = categories_merged.div(categories_merged.sum(axis=0), axis=1)\n", "\n", "categories_merged.plot.bar(title='Type comparison between CPE-rich, CVE-rich and all certificates');" ] }, { "cell_type": "code", "execution_count": null, "id": "80de4629", "metadata": {}, "outputs": [], "source": [ "years_cpe = df_cpe_rich.year_from.value_counts().sort_index().rename('Year distribution CPE-rich')\n", "years_cve = df_cve_rich.year_from.value_counts().sort_index().rename('Year distribution CVE-rich')\n", "years_all = df.year_from.value_counts().sort_index().rename('Year distribution all certificates')\n", "\n", "years_merged = pd.concat([years_all, years_cpe, years_cve], axis=1)\n", "years_merged.index.name = \"year_from\"\n", "years_merged = years_merged.loc[years_merged.index < 2022]\n", "years_merged = years_merged.div(years_merged.sum(axis=0), axis=1)\n", "years_merged.plot.line(title='Years comparision between CPE-rich, CVE-rich and all certificates');" ] }, { "cell_type": "code", "execution_count": null, "id": "2b12db24", "metadata": {}, "outputs": [], "source": [ "levels_cpe = df_cpe_rich.level.value_counts().sort_index().rename('Level distribution CPE-rich')\n", "levels_cve = df_cve_rich.level.value_counts().sort_index().rename('Level distribution CVE-rich')\n", "levels_all = df.level.value_counts().sort_index().rename('Level distribution all certificates')\n", "\n", "levels_merged = pd.concat([levels_all, levels_cpe, levels_cve], axis=1)\n", "levels_merged = levels_merged.div(levels_merged.sum(axis=0), axis=1)\n", "levels_merged.plot.bar(title='EAL comparision between CPE-rich, CVE-rich and all certificates');" ] }, { "cell_type": "code", "execution_count": null, "id": "27423367", "metadata": {}, "outputs": [], "source": [ "spearmanr = functools.partial(stats.spearmanr, nan_policy=\"omit\", alternative=\"less\")\n", "n_cves_level_corr, n_cves_level_pvalue = spearmanr(df_cve_rich.level, df_cve_rich.n_cves)\n", "print(n_cves_level_corr, n_cves_level_pvalue)\n", "\n", "worst_cve_level_corr, worst_cve_level_pvalue = spearmanr(df_cve_rich.level, df_cve_rich.worst_cve_score)\n", "print(worst_cve_level_corr, worst_cve_level_pvalue)\n", "\n", "avg_cve_level_corr, avg_cve_level_pvalue = spearmanr(df_cve_rich.level, df_cve_rich.avg_cve_score)\n", "print(avg_cve_level_corr, avg_cve_level_pvalue)" ] }, { "cell_type": "code", "execution_count": null, "id": "1f99702a", "metadata": {}, "outputs": [], "source": [ "g = sns.relplot(data=df_cve_rich, x=\"level\", y=\"n_cves\")\n", "plt.show()\n", "g = sns.relplot(data=df_cve_rich, x=\"level\", y=\"worst_cve_score\")\n", "plt.show()\n", "g = sns.relplot(data=df_cve_rich, x=\"level\", y=\"avg_cve_score\")\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.6" }, "vscode": { "interpreter": { "hash": "a5b8c5b127d2cfe5bc3a1c933e197485eb9eba25154c3661362401503b4ef9d4" } } }, "nbformat": 4, "nbformat_minor": 5 }