aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorJ08nY2023-04-14 20:46:12 +0200
committerJ08nY2023-04-14 20:47:32 +0200
commitae2bb3247521aaf0930fac70e1f3f1cb2ffbc153 (patch)
tree844e2f32d0896938b07ca020edfdb15c580ba411 /src
parent89b3d880088b5c30fa10036f280e73b1c1aee05e (diff)
downloadsec-certs-ae2bb3247521aaf0930fac70e1f3f1cb2ffbc153.tar.gz
sec-certs-ae2bb3247521aaf0930fac70e1f3f1cb2ffbc153.tar.zst
sec-certs-ae2bb3247521aaf0930fac70e1f3f1cb2ffbc153.zip
Share code between FIPS and CC matching.
Diffstat (limited to 'src')
-rw-r--r--src/sec_certs/dataset/cc_scheme.py4
-rw-r--r--src/sec_certs/model/cc_matching.py48
-rw-r--r--src/sec_certs/model/fips_matching.py41
-rw-r--r--src/sec_certs/model/matching.py43
4 files changed, 70 insertions, 66 deletions
diff --git a/src/sec_certs/dataset/cc_scheme.py b/src/sec_certs/dataset/cc_scheme.py
index 49c059d8..64f0eb9b 100644
--- a/src/sec_certs/dataset/cc_scheme.py
+++ b/src/sec_certs/dataset/cc_scheme.py
@@ -59,7 +59,7 @@ class CCSchemeDataset:
:return: The entries.
"""
soup = CCSchemeDataset._get_page(constants.CC_AUSTRALIA_INEVAL_URL)
- header = soup.find("h2", text="Products in evaluation")
+ header = soup.find("h2", string="Products in evaluation")
table = header.find_next_sibling("table")
results = []
for tr in tqdm(table.find_all("tr"), desc="Get AU scheme in evaluation."):
@@ -983,7 +983,7 @@ class CCSchemeDataset:
v["product"] = value
elif "Common Criteria" in title:
v["cc_version"] = value
- elif "Date of Certification" in title or "Date issued":
+ elif "Date of Certification" in title or "Date issued" in title:
v["certification_date"] = value
elif "EvaluationAssurance Level" in title:
v["assurance_level"] = value
diff --git a/src/sec_certs/model/cc_matching.py b/src/sec_certs/model/cc_matching.py
index 9c081ed2..a4c9c2d5 100644
--- a/src/sec_certs/model/cc_matching.py
+++ b/src/sec_certs/model/cc_matching.py
@@ -1,22 +1,18 @@
from __future__ import annotations
-import typing
-from heapq import heappop, heappush
from typing import Any, Mapping
from rapidfuzz import fuzz
from sec_certs.configuration import config
-
-if typing.TYPE_CHECKING:
- from sec_certs.dataset.cc import CCDataset
- from sec_certs.sample.cc import CCCertificate
-
+from sec_certs.dataset.cc import CCDataset
+from sec_certs.model.matching import AbstractMatcher
+from sec_certs.sample.cc import CCCertificate
from sec_certs.sample.cc_certificate_id import CertificateId
from sec_certs.utils.strings import fully_sanitize_string
-class CCSchemeMatcher:
+class CCSchemeMatcher(AbstractMatcher[CCCertificate, CCDataset]):
"""
A heuristic matcher between entries on CC scheme websites (see CCSchemeDataset) and
CC certificates from the Common Criteria portal (as in CCDataset).
@@ -73,6 +69,7 @@ class CCSchemeMatcher:
cert_manufacturer = fully_sanitize_string(cert.manufacturer)
if self._product == cert.name and self._vendor == cert.manufacturer:
return 99
+ # TODO: Add matching based on document hashes: cert_hash, report_hash, target_hash.
product_ratings = [
fuzz.token_set_ratio(self._product, cert_name),
@@ -85,30 +82,15 @@ class CCSchemeMatcher:
return max((0, max(product_ratings) * 0.5 + max(vendor_ratings) * 0.5 - 2))
@classmethod
- def match_all(cls, entries: list[dict[str, Any]], scheme: str, dset: CCDataset):
+ def match_all(cls, entries: list[dict[str, Any]], scheme: str, dset: CCDataset) -> dict[str, Mapping]:
+ """
+ Match all entries of a given CC scheme to certificates from the dataset.
+
+ :param entries: The entries from the scheme, obtained from CCSchemeDataset.
+ :param scheme: The scheme, e.g. "DE".
+ :param dset: The dataset to match against.
+ :return: A mapping of certificate digests to entries, without duplicates, not all entries may be present.
+ """
certs: list[CCCertificate] = list(filter(lambda cert: cert.scheme == scheme, dset))
matchers = [CCSchemeMatcher(entry, scheme) for entry in entries]
- scores: list[tuple[float, int, int]] = []
- matched_is: set[int] = set()
- matched_js: set[int] = set()
- for i, cert in enumerate(certs):
- for j, matcher in enumerate(matchers):
- score = matcher.match(cert)
- triple = (100 - score, i, j)
- heappush(scores, triple)
- results = {}
- for triple in (heappop(scores) for _ in range(len(scores))):
- inv_score, i, j = triple
- # Do not match already matched entries/certs.
- if i in matched_is or j in matched_js:
- continue
- # Compute the actual score from the inverse.
- score = 100 - inv_score
- # Do not match if we are below threshold, all the following will be as well.
- if score < config.cc_matching_threshold:
- break
- # Match cert dgst to entry
- cert = certs[i]
- entry = matchers[j].entry
- results[cert.dgst] = entry
- return results
+ return cls._match_all(matchers, certs, config.cc_matching_threshold)
diff --git a/src/sec_certs/model/fips_matching.py b/src/sec_certs/model/fips_matching.py
index f73067b3..e4f9fbc5 100644
--- a/src/sec_certs/model/fips_matching.py
+++ b/src/sec_certs/model/fips_matching.py
@@ -1,22 +1,22 @@
from __future__ import annotations
import typing
-from operator import itemgetter
-from typing import Mapping, MutableMapping
+from typing import Mapping
from rapidfuzz import fuzz
from sec_certs.configuration import config
+from sec_certs.dataset.fips import FIPSDataset
+from sec_certs.model.matching import AbstractMatcher
+from sec_certs.sample.fips import FIPSCertificate
from sec_certs.utils.strings import fully_sanitize_string
if typing.TYPE_CHECKING:
- from sec_certs.dataset.fips import FIPSDataset
- from sec_certs.sample.fips import FIPSCertificate
from sec_certs.sample.fips_iut import IUTEntry, IUTSnapshot
from sec_certs.sample.fips_mip import MIPEntry, MIPSnapshot
-class FIPSProcessMatcher:
+class FIPSProcessMatcher(AbstractMatcher[FIPSCertificate, FIPSDataset]):
"""
A heuristic matcher between entries on the FIPS IUT/MIP lists and
the FIPS certificates.
@@ -64,31 +64,10 @@ class FIPSProcessMatcher:
"""
Match a whole snapshot of IUT/MIP entries to a FIPS certificate dataset.
- Duplicates may occur.
-
:param snapshot: The snapshot to match the entries of.
- :param dset: The dataset tot match to.
- :return: The matching.
+ :param dset: The dataset to match to.
+ :return: A mapping of certificate digests to entries, without duplicates, not all entries may be present.
"""
- matches: MutableMapping[IUTEntry | MIPEntry, FIPSCertificate | None] = {}
- for entry in snapshot:
- matcher = FIPSProcessMatcher(entry)
- scores = sorted(((matcher.match(cert), cert) for cert in dset), key=itemgetter(0), reverse=True)
- found = False
- for score, cert in scores:
- if score < config.fips_matching_threshold:
- break
- validations = cert.web_data.validation_history
- if not validations:
- continue
- for validation in validations:
- if validation.date >= snapshot.timestamp.date():
- # It could be this cert, so take it
- found = True
- matches[entry] = cert
- break
- if found:
- break
- else:
- matches[entry] = None
- return matches
+ certs: list[FIPSCertificate] = list(dset)
+ matchers = [FIPSProcessMatcher(entry) for entry in snapshot]
+ return cls._match_all(matchers, certs, config.fips_matching_threshold)
diff --git a/src/sec_certs/model/matching.py b/src/sec_certs/model/matching.py
new file mode 100644
index 00000000..687a5888
--- /dev/null
+++ b/src/sec_certs/model/matching.py
@@ -0,0 +1,43 @@
+import typing
+from abc import ABC, abstractmethod
+from heapq import heappop, heappush
+from typing import Generic
+
+from sec_certs.dataset.dataset import Dataset
+from sec_certs.sample.certificate import Certificate
+
+CertSubType = typing.TypeVar("CertSubType", bound=Certificate)
+DatasetSubType = typing.TypeVar("DatasetSubType", bound=Dataset)
+
+
+class AbstractMatcher(Generic[CertSubType, DatasetSubType], ABC):
+ @abstractmethod
+ def match(self, cert: CertSubType) -> float:
+ raise NotImplementedError
+
+ @staticmethod
+ def _match_all(matchers, certs, threshold):
+ scores: list[tuple[float, int, int]] = []
+ matched_is: set[int] = set()
+ matched_js: set[int] = set()
+ for i, cert in enumerate(certs):
+ for j, matcher in enumerate(matchers):
+ score = matcher.match(cert)
+ triple = (100 - score, i, j)
+ heappush(scores, triple)
+ results = {}
+ for triple in (heappop(scores) for _ in range(len(scores))):
+ inv_score, i, j = triple
+ # Do not match already matched entries/certs.
+ if i in matched_is or j in matched_js:
+ continue
+ # Compute the actual score from the inverse.
+ score = 100 - inv_score
+ # Do not match if we are below threshold, all the following will be as well.
+ if score < threshold:
+ break
+ # Match cert dgst to entry
+ cert = certs[i]
+ entry = matchers[j].entry
+ results[cert.dgst] = entry
+ return results