{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "3a0981d008383c12",
   "metadata": {},
   "source": [
    "# Vulnerability analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "41674b9c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-10-17T09:37:51.724995Z",
     "start_time": "2024-10-17T09:37:51.033775Z"
    }
   },
   "outputs": [],
   "source": [
    "from sec_certs.dataset.fips import FIPSDataset\n",
    "from sec_certs.dataset.cpe import CPEDataset\n",
    "from sec_certs.dataset.cve import CVEDataset\n",
    "from sec_certs.dataset.auxiliary_dataset_handling import CPEDatasetHandler, CVEDatasetHandler\n",
    "from sec_certs.utils.pandas import expand_df_with_cve_cols\n",
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import itertools\n",
    "import functools\n",
    "import matplotlib.pyplot as plt\n",
    "from scipy import stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5ee5dca5",
   "metadata": {},
   "outputs": [],
   "source": [
    "dset = FIPSDataset.from_web(path=\"dset\", auxiliary_datasets=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "be466617-e182-4bec-bdb5-a479703faa50",
   "metadata": {},
   "outputs": [],
   "source": [
    "dset.load_auxiliary_datasets()\n",
    "cve_dset: CVEDataset = dset.aux_handlers[CVEDatasetHandler].dset\n",
    "cpe_dset: CPEDataset = dset.aux_handlers[CPEDatasetHandler].dset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "530354be",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = dset.to_pandas()\n",
    "cves = list(itertools.chain.from_iterable(x.heuristics.related_cves for x in dset if x.heuristics.related_cves))\n",
    "cve_dict = {x: cve_dset[x] for x in cves}\n",
    "cve_dset.cves = cve_dict # Limit cve_dset to CVEs relevant to some certificate\n",
    "df = expand_df_with_cve_cols(df, cve_dset)\n",
    "\n",
    "df_cpe_rich = df.loc[~df.cpe_matches.isnull()].copy()\n",
    "df_cve_rich = df.loc[df.related_cves.notnull()].copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0bf3a0a5",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3d33b063",
   "metadata": {},
   "outputs": [],
   "source": [
    "len(df_cve_rich)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aacaf7f7",
   "metadata": {},
   "outputs": [],
   "source": [
    "len(df_cpe_rich)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "726d77a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "categories_cpe = df_cpe_rich.type.value_counts().sort_index().rename('Type distribution CPE-rich')\n",
    "categories_cve = df_cve_rich.type.value_counts().sort_index().rename('Type distribution CVE-rich')\n",
    "categories_all = df.type.value_counts().sort_index().rename('Type distribution all')\n",
    "\n",
    "categories_merged = pd.concat([categories_all, categories_cpe, categories_cve], axis=1)\n",
    "categories_merged = categories_merged.div(categories_merged.sum(axis=0), axis=1)\n",
    "\n",
    "categories_merged.plot.bar(title='Type comparison between CPE-rich, CVE-rich and all certificates');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "80de4629",
   "metadata": {},
   "outputs": [],
   "source": [
    "years_cpe = df_cpe_rich.year_from.value_counts().sort_index().rename('Year distribution CPE-rich')\n",
    "years_cve = df_cve_rich.year_from.value_counts().sort_index().rename('Year distribution CVE-rich')\n",
    "years_all = df.year_from.value_counts().sort_index().rename('Year distribution all certificates')\n",
    "\n",
    "years_merged = pd.concat([years_all, years_cpe, years_cve], axis=1)\n",
    "years_merged.index.name = \"year_from\"\n",
    "years_merged = years_merged.loc[years_merged.index < 2022]\n",
    "years_merged = years_merged.div(years_merged.sum(axis=0), axis=1)\n",
    "years_merged.plot.line(title='Years comparision between CPE-rich, CVE-rich and all certificates');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2b12db24",
   "metadata": {},
   "outputs": [],
   "source": [
    "levels_cpe = df_cpe_rich.level.value_counts().sort_index().rename('Level distribution CPE-rich')\n",
    "levels_cve = df_cve_rich.level.value_counts().sort_index().rename('Level distribution CVE-rich')\n",
    "levels_all = df.level.value_counts().sort_index().rename('Level distribution all certificates')\n",
    "\n",
    "levels_merged = pd.concat([levels_all, levels_cpe, levels_cve], axis=1)\n",
    "levels_merged = levels_merged.div(levels_merged.sum(axis=0), axis=1)\n",
    "levels_merged.plot.bar(title='EAL comparision between CPE-rich, CVE-rich and all certificates');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "27423367",
   "metadata": {},
   "outputs": [],
   "source": [
    "spearmanr = functools.partial(stats.spearmanr, nan_policy=\"omit\", alternative=\"less\")\n",
    "n_cves_level_corr, n_cves_level_pvalue = spearmanr(df_cve_rich.level, df_cve_rich.n_cves)\n",
    "print(n_cves_level_corr, n_cves_level_pvalue)\n",
    "\n",
    "worst_cve_level_corr, worst_cve_level_pvalue = spearmanr(df_cve_rich.level, df_cve_rich.worst_cve_score)\n",
    "print(worst_cve_level_corr, worst_cve_level_pvalue)\n",
    "\n",
    "avg_cve_level_corr, avg_cve_level_pvalue = spearmanr(df_cve_rich.level, df_cve_rich.avg_cve_score)\n",
    "print(avg_cve_level_corr, avg_cve_level_pvalue)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1f99702a",
   "metadata": {},
   "outputs": [],
   "source": [
    "g = sns.relplot(data=df_cve_rich, x=\"level\", y=\"n_cves\")\n",
    "plt.show()\n",
    "g = sns.relplot(data=df_cve_rich, x=\"level\", y=\"worst_cve_score\")\n",
    "plt.show()\n",
    "g = sns.relplot(data=df_cve_rich, x=\"level\", y=\"avg_cve_score\")\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.6"
  },
  "vscode": {
   "interpreter": {
    "hash": "a5b8c5b127d2cfe5bc3a1c933e197485eb9eba25154c3661362401503b4ef9d4"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}