aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorAdam Janovsky2023-03-16 14:22:42 +0100
committerAdam Janovsky2023-03-16 14:22:42 +0100
commit10698b3fd07fc585027926b6d75bfa844997bff9 (patch)
tree7270c7b063250594474d27db67ad3cce78bc5658 /src
parent474291023d534e68dd0f5b2c6bc89c66bebd7387 (diff)
parent664de252456e13df40a2526adaf3a68009f592ff (diff)
downloadsec-certs-10698b3fd07fc585027926b6d75bfa844997bff9.tar.gz
sec-certs-10698b3fd07fc585027926b6d75bfa844997bff9.tar.zst
sec-certs-10698b3fd07fc585027926b6d75bfa844997bff9.zip
Merge branch 'main' into reference-analysis
Diffstat (limited to 'src')
-rw-r--r--src/sec_certs/dataset/cve.py15
-rw-r--r--src/sec_certs/dataset/dataset.py26
-rw-r--r--src/sec_certs/sample/cpe.py3
3 files changed, 40 insertions, 4 deletions
diff --git a/src/sec_certs/dataset/cve.py b/src/sec_certs/dataset/cve.py
index 43cd5b80..7217e1c9 100644
--- a/src/sec_certs/dataset/cve.py
+++ b/src/sec_certs/dataset/cve.py
@@ -62,7 +62,12 @@ class CVEDataset(JSONPathDataset, ComplexSerializableType):
"""
self.cves_with_vulnerable_configurations = [cve for cve in self if cve.vulnerable_cpe_configurations]
- def build_lookup_dict(self, use_nist_mapping: bool = True, nist_matching_filepath: Path | None = None):
+ def build_lookup_dict(
+ self,
+ use_nist_mapping: bool = True,
+ nist_matching_filepath: Path | None = None,
+ limit_to_cpes: set[CPE] | None = None,
+ ):
"""
Builds look-up dictionary CPE -> Set[CVE] and filter the CVEs which contain CPE configurations.
Developer's note: There are 3 CPEs that are present in the cpe matching feed, but are badly processed by CVE
@@ -82,6 +87,14 @@ class CVEDataset(JSONPathDataset, ComplexSerializableType):
cve: CVE
for cve in tqdm(self, desc="Building-up lookup dictionaries for fast CVE matching"):
+ # Filter to CVEs that contain relevant CPEs
+ if limit_to_cpes and not (
+ set(cve.vulnerable_cpes).union(
+ set(itertools.chain.from_iterable(x.get_all_cpes() for x in cve.vulnerable_cpe_configurations))
+ )
+ ).intersection(limit_to_cpes):
+ continue
+
# See note above, we use matching_dict.get(cpe, []) instead of matching_dict[cpe] as would be expected
if use_nist_mapping:
vulnerable_configurations = list(
diff --git a/src/sec_certs/dataset/dataset.py b/src/sec_certs/dataset/dataset.py
index 614f60cf..350e5126 100644
--- a/src/sec_certs/dataset/dataset.py
+++ b/src/sec_certs/dataset/dataset.py
@@ -1,5 +1,6 @@
from __future__ import annotations
+import itertools
import json
import logging
import re
@@ -252,7 +253,9 @@ class Dataset(Generic[CertSubType, AuxiliaryDatasetsSubType], ComplexSerializabl
logger.info("Processing auxiliary datasets.")
self.auxiliary_datasets_dir.mkdir(parents=True, exist_ok=True)
self.auxiliary_datasets.cpe_dset = self._prepare_cpe_dataset(download_fresh)
- self.auxiliary_datasets.cve_dset = self._prepare_cve_dataset(download_fresh_cves=download_fresh)
+ self.auxiliary_datasets.cve_dset = self._prepare_cve_dataset(
+ download_fresh_cves=download_fresh, build_lookup_dict=False
+ )
self.state.auxiliary_datasets_processed = True
@serialize
@@ -326,7 +329,23 @@ class Dataset(Generic[CertSubType, AuxiliaryDatasetsSubType], ComplexSerializabl
def _compute_heuristics(self) -> None:
logger.info("Computing various heuristics from the certificates.")
+
+ if not self.auxiliary_datasets.cpe_dset:
+ self.auxiliary_datasets.cpe_dset = self._prepare_cpe_dataset()
+
self.compute_cpe_heuristics()
+
+ cpe_rich = [
+ set(map(self.auxiliary_datasets.cpe_dset.cpes.get, x.heuristics.cpe_matches))
+ for x in self
+ if x.heuristics.cpe_matches is not None
+ ]
+ all_cpes = set(itertools.chain.from_iterable(cpe_rich))
+
+ if not self.auxiliary_datasets.cve_dset:
+ self.auxiliary_datasets.cve_dset = self._prepare_cve_dataset(build_lookup_dict=False)
+ self.auxiliary_datasets.cve_dset.build_lookup_dict(use_nist_mapping=True, limit_to_cpes=all_cpes) # type: ignore
+
self.compute_related_cves()
self._compute_references()
self._compute_transitive_vulnerabilities()
@@ -353,7 +372,7 @@ class Dataset(Generic[CertSubType, AuxiliaryDatasetsSubType], ComplexSerializabl
return cpe_dataset
def _prepare_cve_dataset(
- self, download_fresh_cves: bool = False, use_nist_cpe_matching_dict: bool = True
+ self, download_fresh_cves: bool = False, use_nist_cpe_matching_dict: bool = True, build_lookup_dict: bool = True
) -> CVEDataset:
logger.info("Preparing CVE dataset.")
if not self.auxiliary_datasets_dir.exists():
@@ -365,7 +384,8 @@ class Dataset(Generic[CertSubType, AuxiliaryDatasetsSubType], ComplexSerializabl
else:
cve_dataset = CVEDataset.from_json(self.cve_dataset_path)
- cve_dataset.build_lookup_dict(use_nist_cpe_matching_dict, self.nist_cve_cpe_matching_dset_path)
+ if build_lookup_dict:
+ cve_dataset.build_lookup_dict(use_nist_cpe_matching_dict, self.nist_cve_cpe_matching_dset_path)
return cve_dataset
@serialize
diff --git a/src/sec_certs/sample/cpe.py b/src/sec_certs/sample/cpe.py
index 1e3cbf12..d56535c2 100644
--- a/src/sec_certs/sample/cpe.py
+++ b/src/sec_certs/sample/cpe.py
@@ -35,6 +35,9 @@ class CPEConfiguration(ComplexSerializableType):
"""
return self.platform.uri in other_cpe_uris and any(x.uri in other_cpe_uris for x in self.cpes)
+ def get_all_cpes(self) -> set[CPE]:
+ return {self.platform}.union(self.cpes)
+
@dataclass
class CPE(PandasSerializableType, ComplexSerializableType):