diff options
| author | Adam Janovsky | 2023-03-16 14:22:42 +0100 |
|---|---|---|
| committer | Adam Janovsky | 2023-03-16 14:22:42 +0100 |
| commit | 10698b3fd07fc585027926b6d75bfa844997bff9 (patch) | |
| tree | 7270c7b063250594474d27db67ad3cce78bc5658 /src | |
| parent | 474291023d534e68dd0f5b2c6bc89c66bebd7387 (diff) | |
| parent | 664de252456e13df40a2526adaf3a68009f592ff (diff) | |
| download | sec-certs-10698b3fd07fc585027926b6d75bfa844997bff9.tar.gz sec-certs-10698b3fd07fc585027926b6d75bfa844997bff9.tar.zst sec-certs-10698b3fd07fc585027926b6d75bfa844997bff9.zip | |
Merge branch 'main' into reference-analysis
Diffstat (limited to 'src')
| -rw-r--r-- | src/sec_certs/dataset/cve.py | 15 | ||||
| -rw-r--r-- | src/sec_certs/dataset/dataset.py | 26 | ||||
| -rw-r--r-- | src/sec_certs/sample/cpe.py | 3 |
3 files changed, 40 insertions, 4 deletions
diff --git a/src/sec_certs/dataset/cve.py b/src/sec_certs/dataset/cve.py index 43cd5b80..7217e1c9 100644 --- a/src/sec_certs/dataset/cve.py +++ b/src/sec_certs/dataset/cve.py @@ -62,7 +62,12 @@ class CVEDataset(JSONPathDataset, ComplexSerializableType): """ self.cves_with_vulnerable_configurations = [cve for cve in self if cve.vulnerable_cpe_configurations] - def build_lookup_dict(self, use_nist_mapping: bool = True, nist_matching_filepath: Path | None = None): + def build_lookup_dict( + self, + use_nist_mapping: bool = True, + nist_matching_filepath: Path | None = None, + limit_to_cpes: set[CPE] | None = None, + ): """ Builds look-up dictionary CPE -> Set[CVE] and filter the CVEs which contain CPE configurations. Developer's note: There are 3 CPEs that are present in the cpe matching feed, but are badly processed by CVE @@ -82,6 +87,14 @@ class CVEDataset(JSONPathDataset, ComplexSerializableType): cve: CVE for cve in tqdm(self, desc="Building-up lookup dictionaries for fast CVE matching"): + # Filter to CVEs that contain relevant CPEs + if limit_to_cpes and not ( + set(cve.vulnerable_cpes).union( + set(itertools.chain.from_iterable(x.get_all_cpes() for x in cve.vulnerable_cpe_configurations)) + ) + ).intersection(limit_to_cpes): + continue + # See note above, we use matching_dict.get(cpe, []) instead of matching_dict[cpe] as would be expected if use_nist_mapping: vulnerable_configurations = list( diff --git a/src/sec_certs/dataset/dataset.py b/src/sec_certs/dataset/dataset.py index 614f60cf..350e5126 100644 --- a/src/sec_certs/dataset/dataset.py +++ b/src/sec_certs/dataset/dataset.py @@ -1,5 +1,6 @@ from __future__ import annotations +import itertools import json import logging import re @@ -252,7 +253,9 @@ class Dataset(Generic[CertSubType, AuxiliaryDatasetsSubType], ComplexSerializabl logger.info("Processing auxiliary datasets.") self.auxiliary_datasets_dir.mkdir(parents=True, exist_ok=True) self.auxiliary_datasets.cpe_dset = self._prepare_cpe_dataset(download_fresh) - self.auxiliary_datasets.cve_dset = self._prepare_cve_dataset(download_fresh_cves=download_fresh) + self.auxiliary_datasets.cve_dset = self._prepare_cve_dataset( + download_fresh_cves=download_fresh, build_lookup_dict=False + ) self.state.auxiliary_datasets_processed = True @serialize @@ -326,7 +329,23 @@ class Dataset(Generic[CertSubType, AuxiliaryDatasetsSubType], ComplexSerializabl def _compute_heuristics(self) -> None: logger.info("Computing various heuristics from the certificates.") + + if not self.auxiliary_datasets.cpe_dset: + self.auxiliary_datasets.cpe_dset = self._prepare_cpe_dataset() + self.compute_cpe_heuristics() + + cpe_rich = [ + set(map(self.auxiliary_datasets.cpe_dset.cpes.get, x.heuristics.cpe_matches)) + for x in self + if x.heuristics.cpe_matches is not None + ] + all_cpes = set(itertools.chain.from_iterable(cpe_rich)) + + if not self.auxiliary_datasets.cve_dset: + self.auxiliary_datasets.cve_dset = self._prepare_cve_dataset(build_lookup_dict=False) + self.auxiliary_datasets.cve_dset.build_lookup_dict(use_nist_mapping=True, limit_to_cpes=all_cpes) # type: ignore + self.compute_related_cves() self._compute_references() self._compute_transitive_vulnerabilities() @@ -353,7 +372,7 @@ class Dataset(Generic[CertSubType, AuxiliaryDatasetsSubType], ComplexSerializabl return cpe_dataset def _prepare_cve_dataset( - self, download_fresh_cves: bool = False, use_nist_cpe_matching_dict: bool = True + self, download_fresh_cves: bool = False, use_nist_cpe_matching_dict: bool = True, build_lookup_dict: bool = True ) -> CVEDataset: logger.info("Preparing CVE dataset.") if not self.auxiliary_datasets_dir.exists(): @@ -365,7 +384,8 @@ class Dataset(Generic[CertSubType, AuxiliaryDatasetsSubType], ComplexSerializabl else: cve_dataset = CVEDataset.from_json(self.cve_dataset_path) - cve_dataset.build_lookup_dict(use_nist_cpe_matching_dict, self.nist_cve_cpe_matching_dset_path) + if build_lookup_dict: + cve_dataset.build_lookup_dict(use_nist_cpe_matching_dict, self.nist_cve_cpe_matching_dset_path) return cve_dataset @serialize diff --git a/src/sec_certs/sample/cpe.py b/src/sec_certs/sample/cpe.py index 1e3cbf12..d56535c2 100644 --- a/src/sec_certs/sample/cpe.py +++ b/src/sec_certs/sample/cpe.py @@ -35,6 +35,9 @@ class CPEConfiguration(ComplexSerializableType): """ return self.platform.uri in other_cpe_uris and any(x.uri in other_cpe_uris for x in self.cpes) + def get_all_cpes(self) -> set[CPE]: + return {self.platform}.union(self.cpes) + @dataclass class CPE(PandasSerializableType, ComplexSerializableType): |
