diff options
| -rw-r--r-- | src/sec_certs/constants.py | 9 | ||||
| -rw-r--r-- | src/sec_certs/sample/cc_scheme.py | 246 | ||||
| -rw-r--r-- | src/sec_certs/sample/cve.py | 2 | ||||
| -rw-r--r-- | tests/cc/test_cc_schemes.py | 3 |
4 files changed, 163 insertions, 97 deletions
diff --git a/src/sec_certs/constants.py b/src/sec_certs/constants.py index 01649bb7..228a2b84 100644 --- a/src/sec_certs/constants.py +++ b/src/sec_certs/constants.py @@ -82,8 +82,8 @@ CC_CANADA_BASE_URL = "https://www.cyber.gc.ca" CC_CANADA_API_URL = CC_CANADA_BASE_URL + "/api/cccs/page/v1/get" CC_CANADA_CERTIFIED_URL = "/en/tools-services/common-criteria/certified-products" CC_CANADA_INEVAL_URL = "/en/tools-services/common-criteria/products-evaluation" -CC_ANSSI_BASE_URL = "https://www.ssi.gouv.fr" -CC_ANSSI_CERTIFIED_URL = CC_ANSSI_BASE_URL + "/en/products/certified-products/" +CC_ANSSI_BASE_URL = "https://cyber.gouv.fr" +CC_ANSSI_CERTIFIED_URL = CC_ANSSI_BASE_URL + "/produits-certifies" CC_BSI_BASE_URL = "https://www.bsi.bund.de/" CC_BSI_CERTIFIED_URL = ( CC_BSI_BASE_URL @@ -103,7 +103,10 @@ CC_JAPAN_ARCHIVED_SW_URL = CC_JAPAN_BASE_URL + "/software/certified-cert/archive CC_JAPAN_INEVAL_URL = CC_JAPAN_BASE_URL + "/prdct-in-eval/in_eval_list.html" CC_MALAYSIA_BASE_URL = "https://iscb.cybersecurity.my" CC_MALAYSIA_CERTIFIED_URL = ( - CC_MALAYSIA_BASE_URL + "/index.php/certification/product-certification/mycc/certified-products-and-systems" + CC_MALAYSIA_BASE_URL + "/index.php/certification/product-certification/mycc/certified-products-and-systems-5" +) +CC_MALAYSIA_ARCHIVED_URL = ( + CC_MALAYSIA_BASE_URL + "/index.php/certification/product-certification/mycc/archived-certified-products-and-systems" ) CC_MALAYSIA_INEVAL_URL = ( CC_MALAYSIA_BASE_URL diff --git a/src/sec_certs/sample/cc_scheme.py b/src/sec_certs/sample/cc_scheme.py index 73946cf3..465d0dce 100644 --- a/src/sec_certs/sample/cc_scheme.py +++ b/src/sec_certs/sample/cc_scheme.py @@ -4,6 +4,7 @@ from __future__ import annotations import hashlib import math +import re import tempfile import warnings from collections.abc import Callable, Iterable @@ -67,7 +68,7 @@ def _get(url: str, session, **kwargs) -> Response: with warnings.catch_warnings(): warnings.simplefilter("ignore", category=InsecureRequestWarning) conn = session if session else requests - resp = conn.get(url, headers={"User-Agent": "seccerts.org"}, verify=False, **kwargs) + resp = conn.get(url, headers={"User-Agent": "seccerts.org"}, verify=False, **kwargs, timeout=10) resp.raise_for_status() return resp @@ -91,6 +92,7 @@ def get_australia_in_evaluation(enhanced: bool = True) -> list[dict[str, Any]]: :param enhanced: Whether to enhance the results by following links (slower, more data). :return: The entries. """ + # TODO: Australian scheme is blocking our User-Agent. soup = _get_page(constants.CC_AUSTRALIA_INEVAL_URL) header = soup.find("h2", string="Products in evaluation") table = header.find_next_sibling("table") @@ -207,85 +209,80 @@ def get_france_certified(enhanced: bool = True, artifacts: bool = False) -> list :return: The entries. """ base_soup = _get_page(constants.CC_ANSSI_CERTIFIED_URL) - category_nav = base_soup.find("ul", class_="nav-categories") + pager = base_soup.find("nav", class_="pager") + last_page_a = re.search("[0-9]+", pager.find("a", title="Aller à la dernière page").text) + if not last_page_a: + raise ValueError + pages = int(last_page_a.group()) results = [] - for li in tqdm(category_nav.find_all("li"), desc="Get FR scheme certified."): - a = li.find("a") - url = a["href"] - category_name = sns(a.text) - soup = _get_page(urljoin(constants.CC_ANSSI_BASE_URL, url)) - table = soup.find("table", class_="produits-liste cc") - if not table: - continue - tbody = table.find("tbody") - for tr in tqdm(tbody.find_all("tr")): - tds = tr.find_all("td") - if not tds: - continue + for page in range(pages + 1): + soup = _get_page(constants.CC_ANSSI_CERTIFIED_URL + f"?page={page}") + for row in soup.find_all("article", class_="node--type-produit-certifie-cc"): cert: dict[str, Any] = { - "product": sns(tds[0].text), - "vendor": sns(tds[1].text), - "level": sns(tds[2].text), - "id": sns(tds[3].text), - "certification_date": sns(tds[4].text), - "category": category_name, - "url": urljoin(constants.CC_ANSSI_BASE_URL, tds[0].find("a")["href"]), + "product": sns(row.find("h3").text), + "description": sns(row.find("p", class_="field-body")), + "url": urljoin(constants.CC_ANSSI_BASE_URL, row.find("a")["href"]), } + complement_info = row.find("div", class_="info-complement") + for li in complement_info.find_all("li"): + label = li.find("span").text + value = sns(li.find(text=True, recursive=False)) + if "Commanditaire" in label: + cert["sponsor"] = value + elif "Développeur" in label: + cert["developer"] = value + elif "Référence du certificat" in label: + cert["cert_id"] = value + elif "Niveau" in label: + cert["level"] = value + elif "Date de fin de validité" in label: + cert["expiration_date"] = value if enhanced: e: dict[str, Any] = {} cert_page = _get_page(cert["url"]) - ref = cert_page.find("div", class_="ref-date") - for ref_li in ref.find_all("li"): - title, value = (sns(span.text) for span in ref_li.find_all("span", recursive=False)) - if not title: - continue - if "Référence" in title: - e["id"] = value - elif "Date de certification" in title: + infos = cert_page.find("div", class_="product-infos-wrapper") + for tr in infos.find_all("tr"): + label = tr.find("th").text + value = sns(tr.find("td").text) + if "Référence du certificat" in label: + e["cert_id"] = value + elif "Date de certification" in label: e["certification_date"] = value - elif "Date de fin de validité" in title: + elif "Date de fin de validité" in label: e["expiration_date"] = value - details = cert_page.find("div", class_="details") - for detail_li in details.find_all("li"): - title, value = (sns(span.text) for span in detail_li.find_all("span", recursive=False)) - if not title: - continue - if "Catégorie" in title: + elif "Catégorie" in label: + # TODO: translate? e["category"] = value - elif "Référentiel" in title: + elif "Référentiel" in label: e["cc_version"] = value - elif "Niveau" in title: - e["level"] = value - elif "Augmentations" in title: - e["augmentations"] = value - elif "Profil de protection" in title: - e["protection_profile"] = value - elif "Développeur" in title: + elif "Développeur(s)" in label: e["developer"] = value - elif "Centre d'évaluation" in title: + elif "Commanditaire(s)" in label: + e["sponsor"] = value + elif "Centre d'évaluation" in label: e["evaluation_facility"] = value - elif "Accords de reconnaissance" in title: - e["recognition"] = value - e["description"] = sns(cert_page.find("div", class_="box-produit-descriptif").text) - links = cert_page.find("div", class_="box-produit-telechargements") - for link_li in links.find_all("li"): - a = link_li.find("a") - href = urljoin(constants.CC_ANSSI_BASE_URL, a["href"]) - title = sns(a.text) - if not title: - continue - if "Rapport de certification" in title: - e["report_link"] = href + elif "Niveau" in label: + e["level"] = value + elif "Profil de protection" in label: + e["protection_profile"] = value + elif "Accords de reconnaissance" in label: + e["mutual_recognition"] = value + elif "Augmentations" in label: + e["augmented"] = value + documents = cert_page.find("div", class_="documents") + for a in documents.find_all("a"): + if "Rapport de certification" in a.text: + e["report_link"] = urljoin(constants.CC_ANSSI_BASE_URL, a["href"]) if artifacts: - e["report_hash"] = _get_hash(href).hex() - elif "Security target" in title: - e["target_link"] = href + e["report_hash"] = _get_hash(e["report_link"]).hex() + elif "Cible de sécurité" in a.text: + e["target_link"] = urljoin(constants.CC_ANSSI_BASE_URL, a["href"]) if artifacts: - e["target_hash"] = _get_hash(href).hex() - elif "Certificat" in title: - e["cert_link"] = href + e["target_hash"] = _get_hash(e["target_link"]).hex() + elif "Certificat" in a.text: + e["cert_link"] = urljoin(constants.CC_ANSSI_BASE_URL, a["href"]) if artifacts: - e["cert_hash"] = _get_hash(href).hex() + e["cert_hash"] = _get_hash(e["cert_link"]).hex() cert["enhanced"] = e results.append(cert) return results @@ -731,40 +728,99 @@ def get_japan_in_evaluation() -> list[dict[str, Any]]: return results -def get_malaysia_certified() -> list[dict[str, Any]]: - """ - Get Malaysian "certified product" entries. - - :return: The entries. - """ - soup = _get_page(constants.CC_MALAYSIA_CERTIFIED_URL) - sections = soup.find("div", attrs={"itemprop": "articleBody"}).find_all("section", class_="sppb-section") +def _get_malaysia(url, enhanced, artifacts) -> list[dict[str, Any]]: # noqa: C901 + soup = _get_page(url) + pages_re = re.search("Page [0-9]+ of ([0-9]+)", soup.find("form").text) + if not pages_re: + raise ValueError + total_pages = int(pages_re.group(1)) results = [] - for section in sections: - table = section.find("table") - if table is None: - continue - heading = section.find("h5") - if heading is None: - continue - category_name = sns(heading.text) - tbody = table.find("tbody") - for tr in tbody.find_all("tr", recursive=False): - tds = tr.find_all("td", recursive=False) - if len(tds) != 6: - continue - cert = { - "category": category_name, - "level": sns(tds[0].text), - "cert_id": sns(tds[1].text), - "certification_date": sns(tds[2].text), + for i in range(total_pages): + soup = _get_page(url + f"?start={i*10}") + table = soup.find("table", class_="directoryTable") + for tr in table.find_all("tr", class_="directoryRow"): + tds = tr.find_all("td") + cert: dict[str, Any] = { + "cert_no": sns(tds[0].text), + "developer": sns(tds[1].text), + "level": sns(tds[2].text), "product": sns(tds[3].text), - "developer": sns(tds[4].text), + "certification_date": sns(tds[4].text), + "expiration_date": sns(tds[5].text), + "recognition": sns(tds[6].text), + "url": urljoin(constants.CC_MALAYSIA_BASE_URL, tds[7].find("a")["href"]), } + if enhanced: + e: dict[str, Any] = {} + cert_page = _get_page(cert["url"]) + for row in cert_page.find_all("div", class_="rsform-table-row"): + left = row.find("div", class_="rsform-left-col") + right = row.find("div", class_="rsform-right-col") + title = left.text + value = sns(right.text) + if "Project ID" in title: + e["cert_id"] = value + elif "Product Name and Version" in title: + e["product"] = sns(right.text) + elif "Product Sponsor / Developer" in title: + e["developer"] = value + elif "Category" in title: + e["category"] = value + elif "Product Type" in title: + e["type"] = value + elif "Scope" in title: + e["scope"] = value + elif "Product Sponsor / Developer Contact Details" in title: + e["developer_contact"] = value + elif "Assurance Level" in title: + e["assurance_level"] = value + elif "Certificate Date" in title: + e["certification_date"] = value + elif "Expiry Date" in title: + e["expiration_date"] = value + elif "Recognized By" in title: + e["mutual_recognition"] = value + elif "Reports" in title: + for a in right.find_all("a"): + if "ST" in a.text: + e["target_link"] = urljoin(constants.CC_MALAYSIA_BASE_URL, a["href"]) + if artifacts: + e["target_hash"] = _get_hash(e["target_link"]).hex() + elif "CR" in a.text: + e["report_link"] = urljoin(constants.CC_MALAYSIA_BASE_URL, a["href"]) + if artifacts: + e["report_hash"] = _get_hash(e["report_link"]).hex() + elif "Maintenance" in title: + pass + elif "Status" in title: + e["status"] = value + cert["enhanced"] = e results.append(cert) return results +def get_malaysia_certified(enhanced: bool = True, artifacts: bool = False) -> list[dict[str, Any]]: + """ + Get Malaysian "certified product" entries. + + :param enhanced: Whether to enhance the results by following links (slower, more data). + :param artifacts: Whether to download and compute artifact hashes (way slower, even more data). + :return: The entries. + """ + return _get_malaysia(constants.CC_MALAYSIA_CERTIFIED_URL, enhanced, artifacts) + + +def get_malaysia_archived(enhanced: bool = True, artifacts: bool = False) -> list[dict[str, Any]]: + """ + Get Malaysian "archived product" entries. + + :param enhanced: Whether to enhance the results by following links (slower, more data). + :param artifacts: Whether to download and compute artifact hashes (way slower, even more data). + :return: The entries. + """ + return _get_malaysia(constants.CC_MALAYSIA_ARCHIVED_URL, enhanced, artifacts) + + def get_malaysia_in_evaluation() -> list[dict[str, Any]]: """ Get Malaysian "product in evaluation" entries. @@ -1531,7 +1587,11 @@ class CCScheme(ComplexSerializableType): EntryType.Certified: get_japan_certified, EntryType.Archived: get_japan_archived, }, - "MY": {EntryType.Certified: get_malaysia_certified, EntryType.InEvaluation: get_malaysia_in_evaluation}, + "MY": { + EntryType.Certified: get_malaysia_certified, + EntryType.Archived: get_malaysia_archived, + EntryType.InEvaluation: get_malaysia_in_evaluation, + }, "NL": {EntryType.Certified: get_netherlands_certified, EntryType.InEvaluation: get_netherlands_in_evaluation}, "NO": {EntryType.Certified: get_norway_certified, EntryType.Archived: get_norway_archived}, "KO": {EntryType.Certified: get_korea_certified, EntryType.Archived: get_korea_archived}, diff --git a/src/sec_certs/sample/cve.py b/src/sec_certs/sample/cve.py index 7f1a7cba..e1df1dbe 100644 --- a/src/sec_certs/sample/cve.py +++ b/src/sec_certs/sample/cve.py @@ -195,7 +195,7 @@ class CVE(PandasSerializableType, ComplexSerializableType): @staticmethod def parse_single_configuration( - configuration: dict[str, Any] + configuration: dict[str, Any], ) -> tuple[list[CPEMatchCriteria], CPEMatchCriteriaConfiguration | None]: if CVE.configuration_is_simple(configuration): return CVE.get_simple_criteria_from_cpe_matches(configuration["nodes"][0]["cpeMatch"]), None diff --git a/tests/cc/test_cc_schemes.py b/tests/cc/test_cc_schemes.py index 7880ca1c..984666ef 100644 --- a/tests/cc/test_cc_schemes.py +++ b/tests/cc/test_cc_schemes.py @@ -95,6 +95,9 @@ def test_malaysia(): certified = CCSchemes.get_malaysia_certified() assert len(certified) != 0 assert absolute_urls(certified) + archived = CCSchemes.get_malaysia_archived() + assert len(archived) != 0 + assert absolute_urls(archived) ineval = CCSchemes.get_malaysia_in_evaluation() assert len(ineval) != 0 assert absolute_urls(ineval) |
