diff options
| author | J08nY | 2023-04-14 16:31:41 +0200 |
|---|---|---|
| committer | J08nY | 2023-04-14 16:31:41 +0200 |
| commit | addafd675b42cebc8d0eefdffeb97b5d810d612d (patch) | |
| tree | 54f572ebfea85ab46fcec14a15479de0f288d63f /src | |
| parent | 0d37077da92516fe028513f235fd151f34b239f5 (diff) | |
| download | sec-certs-addafd675b42cebc8d0eefdffeb97b5d810d612d.tar.gz sec-certs-addafd675b42cebc8d0eefdffeb97b5d810d612d.tar.zst sec-certs-addafd675b42cebc8d0eefdffeb97b5d810d612d.zip | |
Add US scheme to enhanced download.
Diffstat (limited to 'src')
| -rw-r--r-- | src/sec_certs/constants.py | 2 | ||||
| -rw-r--r-- | src/sec_certs/dataset/cc_scheme.py | 168 |
2 files changed, 117 insertions, 53 deletions
diff --git a/src/sec_certs/constants.py b/src/sec_certs/constants.py index dcbf33a8..8f3ab05b 100644 --- a/src/sec_certs/constants.py +++ b/src/sec_certs/constants.py @@ -129,7 +129,7 @@ CC_SWEDEN_INEVAL_URL = CC_SWEDEN_BASE_URL + "/verksamhet/ovrig-verksamhet/csec/p CC_SWEDEN_ARCHIVED_URL = CC_SWEDEN_BASE_URL + "/verksamhet/ovrig-verksamhet/csec/arkiverade-certifikat-aldre-an-5-ar/" CC_TURKEY_ARCHIVED_URL = "https://statik.tse.org.tr/upload/tr/dosya/icerikyonetimi/3300/03112021143434-2.pdf" CC_USA_BASE_URL = "https://www.niap-ccevs.org" -CC_USA_PRODUCT_URL = CC_USA_BASE_URL + "/Product" +CC_USA_PRODUCT_URL = CC_USA_BASE_URL + "/Product/" CC_USA_CERTIFIED_URL = CC_USA_BASE_URL + "/Product/PCL.cfm" CC_USA_INEVAL_URL = CC_USA_BASE_URL + "/Product/PINE.cfm" CC_USA_ARCHIVED_URL = CC_USA_BASE_URL + "/Product/Archived.cfm" diff --git a/src/sec_certs/dataset/cc_scheme.py b/src/sec_certs/dataset/cc_scheme.py index 510aefc2..d2b68927 100644 --- a/src/sec_certs/dataset/cc_scheme.py +++ b/src/sec_certs/dataset/cc_scheme.py @@ -4,6 +4,7 @@ from __future__ import annotations import hashlib import tempfile +import warnings from pathlib import Path from typing import Any from urllib.parse import urljoin @@ -12,6 +13,7 @@ import requests import tabula from bs4 import BeautifulSoup, NavigableString, Tag from requests import Response +from urllib3.connectionpool import InsecureRequestWarning from sec_certs import constants from sec_certs.utils.sanitization import sanitize_navigable_string as sns @@ -29,8 +31,10 @@ class CCSchemeDataset: @staticmethod def _get(url: str, session, **kwargs) -> Response: - conn = session if session else requests - resp = conn.get(url, headers={"User-Agent": "seccerts.org"}, verify=False, **kwargs) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=InsecureRequestWarning) + conn = session if session else requests + resp = conn.get(url, headers={"User-Agent": "seccerts.org"}, verify=False, **kwargs) resp.raise_for_status() return resp @@ -47,7 +51,7 @@ class CCSchemeDataset: return h.digest() @staticmethod - def get_australia_in_evaluation(enhanced: bool = True): # noqa: C901 + def get_australia_in_evaluation(enhanced: bool = True) -> list[dict[str, Any]]: # noqa: C901 """ Get Australia "products in evaluation" entries. @@ -111,7 +115,7 @@ class CCSchemeDataset: return results @staticmethod - def get_canada_certified(): + def get_canada_certified() -> list[dict[str, Any]]: """ Get Canada "certified product" entries. @@ -134,7 +138,7 @@ class CCSchemeDataset: return results @staticmethod - def get_canada_in_evaluation(): + def get_canada_in_evaluation() -> list[dict[str, Any]]: """ Get Canada "products in evaluation" entries. @@ -157,7 +161,7 @@ class CCSchemeDataset: return results @staticmethod - def get_france_certified(enhanced: bool = True, artifacts: bool = False): # noqa: C901 + def get_france_certified(enhanced: bool = True, artifacts: bool = False) -> list[dict[str, Any]]: # noqa: C901 """ Get French "certified product" entries. @@ -250,7 +254,7 @@ class CCSchemeDataset: return results @staticmethod - def get_germany_certified(enhanced: bool = True, artifacts: bool = False): # noqa: C901 + def get_germany_certified(enhanced: bool = True, artifacts: bool = False) -> list[dict[str, Any]]: # noqa: C901 """ Get German "certified product" entries. @@ -353,11 +357,11 @@ class CCSchemeDataset: return results @staticmethod - def _fix_india_link(link): + def _fix_india_link(link: str) -> str: return link.replace("/index.php", "") @staticmethod - def get_india_certified(): + def get_india_certified() -> list[dict[str, Any]]: """ Get Indian "certified product" entries. @@ -413,7 +417,7 @@ class CCSchemeDataset: return results @staticmethod - def get_india_archived(): + def get_india_archived() -> list[dict[str, Any]]: """ Get Indian "archived product" entries. @@ -471,7 +475,7 @@ class CCSchemeDataset: return results @staticmethod - def get_italy_certified(): # noqa: C901 + def get_italy_certified() -> list[dict[str, Any]]: # noqa: C901 """ Get Italian "certified product" entries. @@ -486,7 +490,7 @@ class CCSchemeDataset: cert = {"title": title} for data_p in data_div.find_all("p"): p_text = sns(data_p.text) - if ":" not in p_text: + if not p_text or ":" not in p_text: continue p_name, p_data = p_text.split(":") p_data = p_data @@ -513,7 +517,7 @@ class CCSchemeDataset: return results @staticmethod - def get_italy_in_evaluation(): + def get_italy_in_evaluation() -> list[dict[str, Any]]: """ Get Italian "product in evaluation" entries. @@ -528,7 +532,7 @@ class CCSchemeDataset: cert = {"title": title} for data_p in data_div.find_all("p"): p_text = sns(data_p.text) - if ":" not in p_text: + if not p_text or ":" not in p_text: continue p_name, p_data = p_text.split(":") p_data = p_data @@ -542,8 +546,7 @@ class CCSchemeDataset: return results @staticmethod - def _get_japan(url, enhanced, artifacts): # noqa: C901 - # TODO: Information could be expanded by following toe link. + def _get_japan(url, enhanced, artifacts) -> list[dict[str, Any]]: # noqa: C901 soup = CCSchemeDataset._get_page(url) table = soup.find("table", class_="cert-table") results = [] @@ -553,7 +556,7 @@ class CCSchemeDataset: if not tds: continue if len(tds) == 6: - cert = { + cert: dict[str, Any] = { "cert_id": sns(tds[0].text), "supplier": sns(tds[1].text), "toe_overseas_name": sns(tds[2].text), @@ -565,7 +568,7 @@ class CCSchemeDataset: toe_link = urljoin(constants.CC_JAPAN_CERT_BASE_URL, toe_a["href"]) else: toe_link = None - if "Assurance Continuity" in cert_date: + if cert_date and "Assurance Continuity" in cert_date: cert["revalidations"] = [{"date": cert_date.split("(")[0], "link": toe_link}] else: cert["certification_date"] = cert_date @@ -651,7 +654,7 @@ class CCSchemeDataset: return results @staticmethod - def get_japan_certified(enhanced: bool = True, artifacts: bool = False): + def get_japan_certified(enhanced: bool = True, artifacts: bool = False) -> list[dict[str, Any]]: """ Get Japanese "certified product" entries. @@ -664,7 +667,7 @@ class CCSchemeDataset: return japan_sw + japan_hw @staticmethod - def get_japan_archived(enhanced: bool = True, artifacts: bool = False): + def get_japan_archived(enhanced: bool = True, artifacts: bool = False) -> list[dict[str, Any]]: """ Get Japanese "archived product" entries. @@ -675,7 +678,7 @@ class CCSchemeDataset: return CCSchemeDataset._get_japan(constants.CC_JAPAN_ARCHIVED_SW_URL, enhanced, artifacts) @staticmethod - def get_japan_in_evaluation(): + def get_japan_in_evaluation() -> list[dict[str, Any]]: """ Get Japanese "product in evaluation" entries. @@ -699,7 +702,7 @@ class CCSchemeDataset: return results @staticmethod - def get_malaysia_certified(): + def get_malaysia_certified() -> list[dict[str, Any]]: """ Get Malaysian "certified product" entries. @@ -733,7 +736,7 @@ class CCSchemeDataset: return results @staticmethod - def get_malaysia_in_evaluation(): + def get_malaysia_in_evaluation() -> list[dict[str, Any]]: """ Get Malaysian "product in evaluation" entries. @@ -758,7 +761,7 @@ class CCSchemeDataset: return results @staticmethod - def get_netherlands_certified(artifacts: bool = False): # noqa: C901 + def get_netherlands_certified(artifacts: bool = False) -> list[dict[str, Any]]: # noqa: C901 """ Get Dutch "certified product" entries. @@ -806,7 +809,7 @@ class CCSchemeDataset: return results @staticmethod - def get_netherlands_in_evaluation(): + def get_netherlands_in_evaluation() -> list[dict[str, Any]]: """ Get Dutch "product in evaluation" entries. @@ -828,7 +831,7 @@ class CCSchemeDataset: return results @staticmethod - def _get_norway(url: str, enhanced: bool, artifacts: bool): # noqa: C901 + def _get_norway(url: str, enhanced: bool, artifacts: bool) -> list[dict[str, Any]]: # noqa: C901 soup = CCSchemeDataset._get_page(url) results = [] for tr in soup.find_all("tr", class_="certified-product"): @@ -903,7 +906,7 @@ class CCSchemeDataset: return results @staticmethod - def get_norway_certified(enhanced: bool = True, artifacts: bool = False): + def get_norway_certified(enhanced: bool = True, artifacts: bool = False) -> list[dict[str, Any]]: """ Get Norwegian "certified product" entries. @@ -914,7 +917,7 @@ class CCSchemeDataset: return CCSchemeDataset._get_norway(constants.CC_NORWAY_CERTIFIED_URL, enhanced, artifacts) @staticmethod - def get_norway_archived(enhanced: bool = True, artifacts: bool = False): + def get_norway_archived(enhanced: bool = True, artifacts: bool = False) -> list[dict[str, Any]]: """ Get Norwegian "archived product" entries. @@ -925,7 +928,7 @@ class CCSchemeDataset: return CCSchemeDataset._get_norway(constants.CC_NORWAY_ARCHIVED_URL, enhanced, artifacts) @staticmethod - def _get_korea(product_class, enhanced, artifacts): # noqa: C901 + def _get_korea(product_class: int, enhanced: bool, artifacts: bool) -> list[dict[str, Any]]: # noqa: C901 session = requests.session() session.get(constants.CC_KOREA_EN_URL) # Get base page @@ -946,7 +949,7 @@ class CCSchemeDataset: continue link = tds[0].find("a") id = link["id"].split("-")[1] - cert = { + cert: dict[str, Any] = { "product": sns(tds[0].text), "cert_id": sns(tds[1].text), "product_link": constants.CC_KOREA_PRODUCT_URL.format(id), @@ -957,6 +960,8 @@ class CCSchemeDataset: } if enhanced: e: dict[str, Any] = {} + if not cert["product_link"]: + continue cert_page = CCSchemeDataset._get_page(cert["product_link"], session) main = cert_page.find("div", class_="mainContent") table = main.find("table", class_="shortenedWidth") @@ -1024,7 +1029,7 @@ class CCSchemeDataset: return results @staticmethod - def get_korea_certified(enhanced: bool = True, artifacts: bool = False): + def get_korea_certified(enhanced: bool = True, artifacts: bool = False) -> list[dict[str, Any]]: """ Get Korean "certified product" entries. @@ -1035,7 +1040,7 @@ class CCSchemeDataset: return CCSchemeDataset._get_korea(product_class=1, enhanced=enhanced, artifacts=artifacts) @staticmethod - def get_korea_suspended(enhanced: bool = True, artifacts: bool = False): + def get_korea_suspended(enhanced: bool = True, artifacts: bool = False) -> list[dict[str, Any]]: """ Get Korean "suspended product" entries. @@ -1046,7 +1051,7 @@ class CCSchemeDataset: return CCSchemeDataset._get_korea(product_class=2, enhanced=enhanced, artifacts=artifacts) @staticmethod - def get_korea_archived(enhanced: bool = True, artifacts: bool = False): + def get_korea_archived(enhanced: bool = True, artifacts: bool = False) -> list[dict[str, Any]]: """ Get Korean "product in evaluation" entries. @@ -1057,7 +1062,7 @@ class CCSchemeDataset: return CCSchemeDataset._get_korea(product_class=4, enhanced=enhanced, artifacts=artifacts) @staticmethod - def _get_singapore(url, artifacts): + def _get_singapore(url: str, artifacts: bool) -> list[dict[str, Any]]: soup = CCSchemeDataset._get_page(url) page_id = str(soup.find("input", id="CurrentPageId").value) page = 1 @@ -1073,10 +1078,10 @@ class CCSchemeDataset: ) api_json = api_call.json() total = api_json["total"] - results = [] + results: list[dict[str, Any]] = [] while len(results) != total: for obj in api_json["objects"]: - cert = { + cert: dict[str, Any] = { "level": obj["assuranceLevel"], "product": obj["productName"], "vendor": obj["productDeveloper"], @@ -1111,7 +1116,7 @@ class CCSchemeDataset: return results @staticmethod - def get_singapore_certified(artifacts: bool = False): + def get_singapore_certified(artifacts: bool = False) -> list[dict[str, Any]]: """ Get Singaporean "certified product" entries. @@ -1121,7 +1126,7 @@ class CCSchemeDataset: return CCSchemeDataset._get_singapore(constants.CC_SINGAPORE_CERTIFIED_URL, artifacts) @staticmethod - def get_singapore_in_evaluation(): + def get_singapore_in_evaluation() -> list[dict[str, Any]]: """ Get Singaporean "product in evaluation" entries. @@ -1147,7 +1152,7 @@ class CCSchemeDataset: return results @staticmethod - def get_singapore_archived(artifacts: bool = False): + def get_singapore_archived(artifacts: bool = False) -> list[dict[str, Any]]: """ Get Singaporean "archived product" entries. @@ -1157,7 +1162,7 @@ class CCSchemeDataset: return CCSchemeDataset._get_singapore(constants.CC_SINGAPORE_ARCHIVED_URL, artifacts) @staticmethod - def get_spain_certified(): + def get_spain_certified() -> list[dict[str, Any]]: """ Get Spanish "certified product" entries. @@ -1179,14 +1184,19 @@ class CCSchemeDataset: return results @staticmethod - def _get_sweden(url, enhanced, artifacts): # noqa: C901 + def _get_sweden(url: str, enhanced: bool, artifacts: bool) -> list[dict[str, Any]]: # noqa: C901 soup = CCSchemeDataset._get_page(url) nav = soup.find("main").find("nav", class_="component-nav-box__list") results = [] for link in nav.find_all("a"): - cert = {"product": sns(link.text), "url": urljoin(constants.CC_SWEDEN_BASE_URL, link["href"])} + cert: dict[str, Any] = { + "product": sns(link.text), + "url": urljoin(constants.CC_SWEDEN_BASE_URL, link["href"]), + } if enhanced: - e = {} + e: dict[str, Any] = {} + if not cert["url"]: + continue cert_page = CCSchemeDataset._get_page(cert["url"]) content = cert_page.find("section", class_="container-article") head = content.find("h1") @@ -1237,7 +1247,7 @@ class CCSchemeDataset: return results @staticmethod - def get_sweden_certified(enhanced: bool = True, artifacts: bool = False): + def get_sweden_certified(enhanced: bool = True, artifacts: bool = False) -> list[dict[str, Any]]: """ Get Swedish "certified product" entries. @@ -1248,7 +1258,7 @@ class CCSchemeDataset: return CCSchemeDataset._get_sweden(constants.CC_SWEDEN_CERTIFIED_URL, enhanced, artifacts) @staticmethod - def get_sweden_in_evaluation(enhanced: bool = True, artifacts: bool = False): + def get_sweden_in_evaluation(enhanced: bool = True, artifacts: bool = False) -> list[dict[str, Any]]: """ Get Swedish "product in evaluation" entries. @@ -1259,7 +1269,7 @@ class CCSchemeDataset: return CCSchemeDataset._get_sweden(constants.CC_SWEDEN_INEVAL_URL, enhanced, artifacts) @staticmethod - def get_sweden_archived(enhanced: bool = True, artifacts: bool = False): + def get_sweden_archived(enhanced: bool = True, artifacts: bool = False) -> list[dict[str, Any]]: """ Get Swedish "archived product" entries. @@ -1270,7 +1280,7 @@ class CCSchemeDataset: return CCSchemeDataset._get_sweden(constants.CC_SWEDEN_ARCHIVED_URL, enhanced, artifacts) @staticmethod - def get_turkey_certified(): + def get_turkey_certified() -> list[dict[str, Any]]: """ Get Turkish "certified product" entries. @@ -1304,13 +1314,14 @@ class CCSchemeDataset: return results @staticmethod - def get_usa_certified(): + def get_usa_certified(enhanced: bool = True, artifacts: bool = False) -> list[dict[str, Any]]: # noqa: C901 """ Get American "certified product" entries. + :param enhanced: Whether to enhance the results by following links (slower, more data). + :param artifacts: Whether to download and compute artifact hashes (way slower, even more data). :return: The entries. """ - # TODO: Information could be expanded by following product link. # TODO: Information could be expanded by following the cc_claims (has links to protection profiles). soup = CCSchemeDataset._get_page(constants.CC_USA_CERTIFIED_URL) tbody = soup.find("table", class_="tablesorter").find("tbody") @@ -1323,7 +1334,7 @@ class CCSchemeDataset: # Only return the US certifications. if scheme_img["title"] != "USA": continue - cert = { + cert: dict[str, Any] = { "product": sns(product_link.text), "vendor": sns(vendor_span.text), "product_link": urljoin(constants.CC_USA_PRODUCT_URL, product_link["href"]), @@ -1333,11 +1344,64 @@ class CCSchemeDataset: "certification_date": sns(tds[4].text), "assurance_maintenance_date": sns(tds[5].text), } + if enhanced: + e: dict[str, Any] = {} + if not cert["product_link"]: + continue + cert_page = CCSchemeDataset._get_page(cert["product_link"]) + details = cert_page.find("div", class_="txt2 lma") + for span in details.find_all("span"): + title = sns(span.text) + if not title: + continue + sibling = span.next_sibling + value = sns(sibling.text) + if "Certificate Date" in title: + e["certification_date"] = value + elif "Product Type" in title: + e["product_type"] = value + elif "Conformance Claim" in title: + e["cc_claim"] = value + elif "Validation Report Number" in title: + e["cert_id"] = value + elif "PP Identifier" in title: + e["protection_profile"] = sns(span.find_next_sibling("a").text) + elif "CC Testing Lab" in title: + e["evaluation_facility"] = sns(span.find_next_sibling("a").text) + links = cert_page.find_all("a", class_="pseudobtn1") + for link in links: + name = sns(link.text) + href = urljoin(constants.CC_USA_BASE_URL, sns(link["href"])) + if not name: + continue + if "CC Certificate" in name: + e["cert_link"] = href + if artifacts: + e["cert_hash"] = CCSchemeDataset._get_hash(href).hex() + elif "Security Target" in name: + e["target_link"] = href + if artifacts: + e["target_hash"] = CCSchemeDataset._get_hash(href).hex() + elif "Validation Report" in name: + e["report_link"] = href + if artifacts: + e["report_hash"] = CCSchemeDataset._get_hash(href).hex() + elif "Assurance Activity" in name: + e["assurance_activity_link"] = href + if artifacts: + e["assurance_activity_hash"] = CCSchemeDataset._get_hash(href).hex() + elif "Administrative Guide" in name: + guides = e.setdefault("administrative_guides", []) + guide = {"link": href} + guides.append(guide) + if artifacts: + guide["hash"] = CCSchemeDataset._get_hash(href).hex() + cert["enhanced"] = e results.append(cert) return results @staticmethod - def get_usa_in_evaluation(): + def get_usa_in_evaluation() -> list[dict[str, Any]]: """ Get American "product in evaluation" entries. @@ -1368,7 +1432,7 @@ class CCSchemeDataset: return results @staticmethod - def get_usa_archived(): + def get_usa_archived() -> list[dict[str, Any]]: """ Get American "archived product" entries. |
