1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
{
"cells": [
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Results on 5 classes:\n",
"\t- Cohen's Kappa: 0.7101271765978729\n",
"\t- Percentage agreement: 0.8225\n",
"Results on simplified 2 classes:\n",
"\t- Cohen's Kappa: 0.8424203759140207\n",
"\t- Percentage agreement: 0.9437869822485208\n"
]
}
],
"source": [
"from pathlib import Path\n",
"\n",
"import pandas as pd\n",
"from sklearn.metrics import cohen_kappa_score\n",
"\n",
"label_mapping = {\n",
" \"COMPONENT_USED\": \"COMPONENT_USED\",\n",
" \"RE-EVALUATION\": \"PREVIOUS_VERSION\",\n",
" \"EVALUATION_REUSED\": \"COMPONENT_USED\",\n",
" \"PREVIOUS_VERSION\": \"PREVIOUS_VERSION\",\n",
" \"COMPONENT_SHARED\": \"COMPONENT_USED\",\n",
"}\n",
"\n",
"\n",
"def load_all_dataframes(base_folder: Path) -> pd.DataFrame:\n",
" splits = [\"train\", \"valid\", \"test\"]\n",
"\n",
" df_train, df_valid, df_test = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()\n",
" for split in splits:\n",
" df = pd.read_csv(base_folder / f\"{split}.csv\")\n",
" if split == \"train\":\n",
" df_train = df\n",
" elif split == \"valid\":\n",
" df_valid = df\n",
" else:\n",
" df_test = df\n",
"\n",
" df_to_return = pd.concat([df_train, df_valid, df_test])\n",
" return df_to_return.assign(label=lambda df_: df_.label.fillna(\"unknown\")).assign(\n",
" label=lambda df_: df_.label.str.upper(),\n",
" simplified_label=lambda df_: df_.label.map(label_mapping),\n",
" )\n",
"\n",
"\n",
"REPO_ROOT = Path()\n",
"\n",
"\n",
"adam_df = load_all_dataframes(REPO_ROOT / \"src/sec_certs/data/reference_annotations/adam\")\n",
"jano_df = load_all_dataframes(REPO_ROOT / \"src/sec_certs/data/reference_annotations/jano\")\n",
"agreement_series = adam_df.label == jano_df.label\n",
"\n",
"print(\"Results on 5 classes:\")\n",
"print(f\"\\t- Cohen's Kappa: {cohen_kappa_score(adam_df.label, jano_df.label)}\")\n",
"print(f\"\\t- Percentage agreement: {agreement_series.loc[agreement_series == True].count() / agreement_series.count()}\")\n",
"\n",
"indices_to_drop = set(adam_df.loc[adam_df.simplified_label.isnull()].index.tolist()) | set(\n",
" jano_df.loc[jano_df.simplified_label.isnull()].index.tolist()\n",
")\n",
"adam_df_simplified = adam_df.drop(indices_to_drop)\n",
"jano_df_simplified = jano_df.drop(indices_to_drop)\n",
"agreement_series = adam_df_simplified.simplified_label == jano_df_simplified.simplified_label\n",
"\n",
"\n",
"print(\"Results on simplified 2 classes:\")\n",
"print(\n",
" f\"\\t- Cohen's Kappa: {cohen_kappa_score(adam_df_simplified.simplified_label, jano_df_simplified.simplified_label)}\"\n",
")\n",
"print(f\"\\t- Percentage agreement: {agreement_series.loc[agreement_series == True].count() / agreement_series.count()}\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
|