Complexity of functions

author: mmstanone 2022-01-14 19:01:08 +0100
committer: mmstanone 2022-01-14 19:01:08 +0100
commit: 3190f5f85ce8a8a91084661d69fcafa768699d51 (patch)
tree: c7b83fb71a6d42670059fcccdbe78869df63b966
parent: 06ae452a8663e1d4600fa7747f611a780df0cdb4 (diff)
download: sec-certs-3190f5f85ce8a8a91084661d69fcafa768699d51.tar.gz
sec-certs-3190f5f85ce8a8a91084661d69fcafa768699d51.tar.zst
sec-certs-3190f5f85ce8a8a91084661d69fcafa768699d51.zip
7 files changed, 239 insertions, 198 deletions
diff --git a/.flake8 b/.flake8
index fb532ca0..a8a34373 100644
--- a/.flake8
+++ b/.flake8
@@ -20,4 +20,3 @@ ignore =
     E501,  # line length, should be handleded by black
     W503,  # line break before binary operator, depracated
     E203,  # whitespace before :, not PEP8 compliant
-    C901,  # Temporary fix, remove once https://github.com/crocs-muni/sec-certs/issues/146 is solved.
diff --git a/sec_certs/dataset/fips.py b/sec_certs/dataset/fips.py
index 0fa9da01..f3c2f61e 100644
--- a/sec_certs/dataset/fips.py
+++ b/sec_certs/dataset/fips.py
@@ -390,6 +390,15 @@ class FIPSDataset(Dataset, ComplexSerializableType):
                 and cert_id != current_cert.cert_id
             ]
 
+    @staticmethod
+    def _match_with_algorithm(processed_cert: FIPSCertificate, cert_candidate_id: str):
+        for cert_alg in processed_cert.heuristics.algorithms:
+            for certificate in cert_alg["Certificate"]:
+                curr_id = "".join(filter(str.isdigit, certificate))
+                if curr_id == cert_candidate_id:
+                    return False
+        return True
+
     def _validate_id(self, processed_cert: FIPSCertificate, cert_candidate_id: str) -> bool:
         candidate_dgst = fips_dgst(cert_candidate_id)
         if candidate_dgst not in self.certs or not cert_candidate_id.isdecimal():
@@ -407,11 +416,8 @@ class FIPSDataset(Dataset, ComplexSerializableType):
         if cert_candidate_id not in self.algorithms.certs:
             return True
 
-        for cert_alg in processed_cert.heuristics.algorithms:
-            for certificate in cert_alg["Certificate"]:
-                curr_id = "".join(filter(str.isdigit, certificate))
-                if curr_id == cert_candidate_id:
-                    return False
+        if not FIPSDataset._match_with_algorithm(processed_cert, cert_candidate_id):
+            return False
 
         algs = self.algorithms.certs[cert_candidate_id]
         for current_alg in algs:
diff --git a/sec_certs/model/cpe_matching.py b/sec_certs/model/cpe_matching.py
index 610b409a..db63f712 100644
--- a/sec_certs/model/cpe_matching.py
+++ b/sec_certs/model/cpe_matching.py
@@ -180,6 +180,29 @@ class CPEClassifier(BaseEstimator):
             string = string.lower().replace(CPEClassifier._replace_special_chars_with_space(x.lower()), "").strip()
         return string
 
+    def _process_manufacturer(self, manufacturer: str, result: Set) -> Optional[List[str]]:
+        tokenized = manufacturer.split()
+        if tokenized[0] in self.vendors_:
+            result.add(tokenized[0])
+        if len(tokenized) > 1 and tokenized[0] + tokenized[1] in self.vendors_:
+            result.add(tokenized[0] + tokenized[1])
+
+        # Below are completely manual fixes
+        if "hewlett" in tokenized or "hewlett-packard" in tokenized or manufacturer == "hewlett packard":
+            result.add("hp")
+        if "thales" in tokenized:
+            result.add("thalesesecurity")
+            result.add("thalesgroup")
+        if "stmicroelectronics" in tokenized:
+            result.add("st")
+        if "athena" in tokenized and "smartcard" in tokenized:
+            result.add("athena-scs")
+        if tokenized[0] == "the" and not result:
+            candidate_result = self.get_candidate_list_of_vendors(" ".join(tokenized[1:]))
+            return list(candidate_result) if candidate_result else None
+
+        return list(result) if result else None
+
     def get_candidate_list_of_vendors(self, manufacturer: str) -> Optional[List[str]]:
         """
         Given manufacturer name, this method will find list of plausible vendors from CPE dataset that are likely related.
@@ -203,27 +226,7 @@ class CPEClassifier(BaseEstimator):
         if manufacturer in self.vendors_:
             result.add(manufacturer)
 
-        tokenized = manufacturer.split()
-        if tokenized[0] in self.vendors_:
-            result.add(tokenized[0])
-        if len(tokenized) > 1 and tokenized[0] + tokenized[1] in self.vendors_:
-            result.add(tokenized[0] + tokenized[1])
-
-        # Below are completely manual fixes
-        if "hewlett" in tokenized or "hewlett-packard" in tokenized or manufacturer == "hewlett packard":
-            result.add("hp")
-        if "thales" in tokenized:
-            result.add("thalesesecurity")
-            result.add("thalesgroup")
-        if "stmicroelectronics" in tokenized:
-            result.add("st")
-        if "athena" in tokenized and "smartcard" in tokenized:
-            result.add("athena-scs")
-        if tokenized[0] == "the" and not result:
-            candidate_result = self.get_candidate_list_of_vendors(" ".join(tokenized[1:]))
-            return list(candidate_result) if candidate_result else None
-
-        return list(result) if result else None
+        return self._process_manufacturer(manufacturer, result)
 
     def get_candidate_vendor_version_pairs(
         self, cert_candidate_cpe_vendors: List[str], cert_candidate_versions: List[str]
diff --git a/sec_certs/model/dependency_finder.py b/sec_certs/model/dependency_finder.py
index 07ccb199..acd2e09b 100644
--- a/sec_certs/model/dependency_finder.py
+++ b/sec_certs/model/dependency_finder.py
@@ -20,6 +20,24 @@ class DependencyFinder:
             referenced_by[cert_id].append(this_cert_id)
 
     @staticmethod
+    def _process_references(referenced_by: ReferencedByDirect, referenced_by_indirect: ReferencedByIndirect):
+        new_change_detected = True
+        while new_change_detected:
+            new_change_detected = False
+            certs_id_list = referenced_by.keys()
+
+            for cert_id in certs_id_list:
+                tmp_referenced_by_indirect_nums = referenced_by_indirect[cert_id].copy()
+                for referencing in tmp_referenced_by_indirect_nums:
+                    if referencing in referenced_by.keys():
+                        tmp_referencing = referenced_by_indirect[referencing].copy()
+                        newly_discovered_references = [
+                            x for x in tmp_referencing if x not in referenced_by_indirect[cert_id]
+                        ]
+                        referenced_by_indirect[cert_id].update(newly_discovered_references)
+                        new_change_detected = True if newly_discovered_references else False
+
+    @staticmethod
     def _build_cert_references(certificates: Certificates) -> Tuple[ReferencedByDirect, ReferencedByIndirect]:
         referenced_by: ReferencedByDirect = {}
 
@@ -43,22 +61,7 @@ class DependencyFinder:
             for item in referenced_by[cert_id]:
                 referenced_by_indirect[cert_id].add(item)
 
-        new_change_detected = True
-        while new_change_detected:
-            new_change_detected = False
-            certs_id_list = referenced_by.keys()
-
-            for cert_id in certs_id_list:
-                tmp_referenced_by_indirect_nums = referenced_by_indirect[cert_id].copy()
-                for referencing in tmp_referenced_by_indirect_nums:
-                    if referencing in referenced_by.keys():
-                        tmp_referencing = referenced_by_indirect[referencing].copy()
-                        newly_discovered_references = [
-                            x for x in tmp_referencing if x not in referenced_by_indirect[cert_id]
-                        ]
-                        referenced_by_indirect[cert_id].update(newly_discovered_references)
-                        new_change_detected = True if newly_discovered_references else False
-
+        DependencyFinder._process_references(referenced_by, referenced_by_indirect)
         return referenced_by, referenced_by_indirect
 
     @staticmethod
diff --git a/sec_certs/sample/common_criteria.py b/sec_certs/sample/common_criteria.py
index a353ff61..d8efc4fa 100644
--- a/sec_certs/sample/common_criteria.py
+++ b/sec_certs/sample/common_criteria.py
@@ -359,109 +359,120 @@ class CommonCriteriaCert(Certificate, PandasSerializableType, ComplexSerializabl
         new_dct["protection_profiles"] = set(dct["protection_profiles"])
         return super(cls, CommonCriteriaCert).from_dict(new_dct)
 
-    @classmethod
-    def from_html_row(cls, row: Tag, status: str, category: str) -> "CommonCriteriaCert":
-        """
-        Creates a CC sample from html row
-        """
+    @staticmethod
+    def _get_name(cell: Tag) -> str:
+        return list(cell.stripped_strings)[0]
 
-        def _get_name(cell: Tag) -> str:
-            return list(cell.stripped_strings)[0]
+    @staticmethod
+    def _get_manufacturer(cell: Tag) -> Optional[str]:
+        if lst := list(cell.stripped_strings):
+            return lst[0]
+        else:
+            return None
 
-        def _get_manufacturer(cell: Tag) -> Optional[str]:
-            if lst := list(cell.stripped_strings):
-                return lst[0]
-            else:
-                return None
+    @staticmethod
+    def _get_scheme(cell: Tag) -> str:
+        return list(cell.stripped_strings)[0]
 
-        def _get_scheme(cell: Tag) -> str:
-            return list(cell.stripped_strings)[0]
+    @staticmethod
+    def _get_security_level(cell: Tag) -> set:
+        return set(cell.stripped_strings)
 
-        def _get_security_level(cell: Tag) -> set:
-            return set(cell.stripped_strings)
+    @staticmethod
+    def _get_manufacturer_web(cell: Tag) -> Optional[str]:
+        for link in cell.find_all("a"):
+            if link is not None and link.get("title") == "Vendor's web site" and link.get("href") != "http://":
+                return link.get("href")
+        return None
 
-        def _get_manufacturer_web(cell: Tag) -> Optional[str]:
-            for link in cell.find_all("a"):
-                if link is not None and link.get("title") == "Vendor's web site" and link.get("href") != "http://":
-                    return link.get("href")
-            return None
+    @staticmethod
+    def _get_protection_profiles(cell: Tag) -> set:
+        protection_profiles = set()
+        for link in list(cell.find_all("a")):
+            if link.get("href") is not None and "/ppfiles/" in link.get("href"):
+                protection_profiles.add(
+                    ProtectionProfile(str(link.contents[0]), CommonCriteriaCert.cc_url + link.get("href"))
+                )
+        return protection_profiles
 
-        def _get_protection_profiles(cell: Tag) -> set:
-            protection_profiles = set()
-            for link in list(cell.find_all("a")):
-                if link.get("href") is not None and "/ppfiles/" in link.get("href"):
-                    protection_profiles.add(
-                        ProtectionProfile(str(link.contents[0]), CommonCriteriaCert.cc_url + link.get("href"))
-                    )
-            return protection_profiles
+    @staticmethod
+    def _get_date(cell: Tag) -> Optional[date]:
+        text = cell.get_text()
+        extracted_date = datetime.strptime(text, "%Y-%m-%d").date() if text else None
+        return extracted_date
 
-        def _get_date(cell: Tag) -> Optional[date]:
-            text = cell.get_text()
-            extracted_date = datetime.strptime(text, "%Y-%m-%d").date() if text else None
-            return extracted_date
+    @staticmethod
+    def _get_report_st_links(cell: Tag) -> Tuple[str, str]:
+        links = cell.find_all("a")
+        # TODO: Exception checks
+        assert links[1].get("title").startswith("Certification Report")
+        assert links[2].get("title").startswith("Security Target")
 
-        def _get_report_st_links(cell: Tag) -> Tuple[str, str]:
-            links = cell.find_all("a")
-            # TODO: Exception checks
-            assert links[1].get("title").startswith("Certification Report")
-            assert links[2].get("title").startswith("Security Target")
+        report_link = CommonCriteriaCert.cc_url + links[1].get("href")
+        security_target_link = CommonCriteriaCert.cc_url + links[2].get("href")
 
-            report_link = CommonCriteriaCert.cc_url + links[1].get("href")
-            security_target_link = CommonCriteriaCert.cc_url + links[2].get("href")
+        return report_link, security_target_link
 
-            return report_link, security_target_link
+    @staticmethod
+    def _get_cert_link(cell: Tag) -> Optional[str]:
+        links = cell.find_all("a")
+        return CommonCriteriaCert.cc_url + links[0].get("href") if links else None
 
-        def _get_cert_link(cell: Tag) -> Optional[str]:
-            links = cell.find_all("a")
-            return CommonCriteriaCert.cc_url + links[0].get("href") if links else None
+    @staticmethod
+    def _get_maintenance_div(cell: Tag) -> Optional[Tag]:
+        divs = cell.find_all("div")
+        for d in divs:
+            if d.find("div") and d.stripped_strings and list(d.stripped_strings)[0] == "Maintenance Report(s)":
+                return d
+        return None
 
-        def _get_maintenance_div(cell: Tag) -> Optional[Tag]:
-            divs = cell.find_all("div")
-            for d in divs:
-                if d.find("div") and d.stripped_strings and list(d.stripped_strings)[0] == "Maintenance Report(s)":
-                    return d
-            return None
+    @staticmethod
+    def _get_maintenance_updates(main_div: Tag) -> set:
+        possible_updates = list(main_div.find_all("li"))
+        maintenance_updates = set()
+        for u in possible_updates:
+            text = list(u.stripped_strings)[0]
+            main_date = datetime.strptime(text.split(" ")[0], "%Y-%m-%d").date() if text else None
+            main_title = text.split("– ")[1]
+            main_report_link = None
+            main_st_link = None
+            links = u.find_all("a")
+            for link in links:
+                if link.get("title").startswith("Maintenance Report:"):
+                    main_report_link = CommonCriteriaCert.cc_url + link.get("href")
+                elif link.get("title").startswith("Maintenance ST"):
+                    main_st_link = CommonCriteriaCert.cc_url + link.get("href")
+                else:
+                    logger.error("Unknown link in Maintenance part!")
+            maintenance_updates.add(
+                CommonCriteriaCert.MaintenanceReport(main_date, main_title, main_report_link, main_st_link)
+            )
+        return maintenance_updates
 
-        def _get_maintenance_updates(main_div: Tag) -> set:
-            possible_updates = list(main_div.find_all("li"))
-            maintenance_updates = set()
-            for u in possible_updates:
-                text = list(u.stripped_strings)[0]
-                main_date = datetime.strptime(text.split(" ")[0], "%Y-%m-%d").date() if text else None
-                main_title = text.split("– ")[1]
-                main_report_link = None
-                main_st_link = None
-                links = u.find_all("a")
-                for link in links:
-                    if link.get("title").startswith("Maintenance Report:"):
-                        main_report_link = CommonCriteriaCert.cc_url + link.get("href")
-                    elif link.get("title").startswith("Maintenance ST"):
-                        main_st_link = CommonCriteriaCert.cc_url + link.get("href")
-                    else:
-                        logger.error("Unknown link in Maintenance part!")
-                maintenance_updates.add(
-                    CommonCriteriaCert.MaintenanceReport(main_date, main_title, main_report_link, main_st_link)
-                )
-            return maintenance_updates
+    @classmethod
+    def from_html_row(cls, row: Tag, status: str, category: str) -> "CommonCriteriaCert":
+        """
+        Creates a CC sample from html row
+        """
 
         cells = list(row.find_all("td"))
         if len(cells) != 7:
             logger.error("Unexpected number of cells in CC html row.")
             raise
 
-        name = _get_name(cells[0])
-        manufacturer = _get_manufacturer(cells[1])
-        manufacturer_web = _get_manufacturer_web(cells[1])
-        scheme = _get_scheme(cells[6])
-        security_level = _get_security_level(cells[5])
-        protection_profiles = _get_protection_profiles(cells[0])
-        not_valid_before = _get_date(cells[3])
-        not_valid_after = _get_date(cells[4])
-        report_link, st_link = _get_report_st_links(cells[0])
-        cert_link = _get_cert_link(cells[2])
+        name = CommonCriteriaCert._get_name(cells[0])
+        manufacturer = CommonCriteriaCert._get_manufacturer(cells[1])
+        manufacturer_web = CommonCriteriaCert._get_manufacturer_web(cells[1])
+        scheme = CommonCriteriaCert._get_scheme(cells[6])
+        security_level = CommonCriteriaCert._get_security_level(cells[5])
+        protection_profiles = CommonCriteriaCert._get_protection_profiles(cells[0])
+        not_valid_before = CommonCriteriaCert._get_date(cells[3])
+        not_valid_after = CommonCriteriaCert._get_date(cells[4])
+        report_link, st_link = CommonCriteriaCert._get_report_st_links(cells[0])
+        cert_link = CommonCriteriaCert._get_cert_link(cells[2])
 
-        maintenance_div = _get_maintenance_div(cells[0])
-        maintenances = _get_maintenance_updates(maintenance_div) if maintenance_div else set()
+        maintenance_div = CommonCriteriaCert._get_maintenance_div(cells[0])
+        maintenances = CommonCriteriaCert._get_maintenance_updates(maintenance_div) if maintenance_div else set()
 
         return cls(
             status,
diff --git a/sec_certs/sample/cve.py b/sec_certs/sample/cve.py
index 4607ec04..99c9b2af 100644
--- a/sec_certs/sample/cve.py
+++ b/sec_certs/sample/cve.py
@@ -105,6 +105,29 @@ class CVE(PandasSerializableType, ComplexSerializableType):
             "published_date": self.published_date.isoformat(),
         }
 
+    @staticmethod
+    def _parse_nist_dict(lst: List, cpe_uris: List):
+        for x in lst:
+            if x["vulnerable"]:
+                cpe_uri = x["cpe23Uri"]
+                version_start: Optional[Tuple[str, str]]
+                version_end: Optional[Tuple[str, str]]
+                if "versionStartIncluding" in x and x["versionStartIncluding"]:
+                    version_start = ("including", x["versionStartIncluding"])
+                elif "versionStartExcluding" in x and x["versionStartExcluding"]:
+                    version_start = ("excluding", x["versionStartExcluding"])
+                else:
+                    version_start = None
+
+                if "versionEndIncluding" in x and x["versionEndIncluding"]:
+                    version_end = ("including", x["versionEndIncluding"])
+                elif "versionEndExcluding" in x and x["versionEndExcluding"]:
+                    version_end = ("excluding", x["versionEndExcluding"])
+                else:
+                    version_end = None
+
+                cpe_uris.append(cached_cpe(cpe_uri, start_version=version_start, end_version=version_end))
+
     @classmethod
     def from_nist_dict(cls, dct: Dict) -> "CVE":
         """
@@ -117,28 +140,12 @@ class CVE(PandasSerializableType, ComplexSerializableType):
                 if "children" in node:
                     for child in node["children"]:
                         cpe_uris += get_vulnerable_cpes_from_node(child)
-                if "cpe_match" in node:
-                    lst = node["cpe_match"]
-                    for x in lst:
-                        if x["vulnerable"]:
-                            cpe_uri = x["cpe23Uri"]
-                            version_start: Optional[Tuple[str, str]]
-                            version_end: Optional[Tuple[str, str]]
-                            if "versionStartIncluding" in x and x["versionStartIncluding"]:
-                                version_start = ("including", x["versionStartIncluding"])
-                            elif "versionStartExcluding" in x and x["versionStartExcluding"]:
-                                version_start = ("excluding", x["versionStartExcluding"])
-                            else:
-                                version_start = None
 
-                            if "versionEndIncluding" in x and x["versionEndIncluding"]:
-                                version_end = ("including", x["versionEndIncluding"])
-                            elif "versionEndExcluding" in x and x["versionEndExcluding"]:
-                                version_end = ("excluding", x["versionEndExcluding"])
-                            else:
-                                version_end = None
+                if "cpe_match" not in node:
+                    return cpe_uris
 
-                            cpe_uris.append(cached_cpe(cpe_uri, start_version=version_start, end_version=version_end))
+                lst = node["cpe_match"]
+                CVE._parse_nist_dict(lst, cpe_uris)
 
                 return cpe_uris
 
diff --git a/sec_certs/sample/fips.py b/sec_certs/sample/fips.py
index 3deee713..35904157 100644
--- a/sec_certs/sample/fips.py
+++ b/sec_certs/sample/fips.py
@@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
-from typing import ClassVar, Dict, List, Optional, Pattern, Set, Tuple, Union
+from typing import ClassVar, Dict, List, Match, Optional, Pattern, Set, Tuple, Union
 
 import requests
 from bs4 import BeautifulSoup, NavigableString, Tag
@@ -613,6 +613,49 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
         return text_to_parse
 
     @staticmethod
+    def _highlight_matches(items_found_all: Dict, whole_text_with_newlines: str):
+        all_matches = []
+        for rule_group in items_found_all.keys():
+            items_found = items_found_all[rule_group]
+            for rule in items_found.keys():
+                for match in items_found[rule]:
+                    all_matches.append(match)
+
+            # if AES string is removed before AES-128, -128 would be left in text => sort by length first
+            # sort before replacement based on the length of match
+            all_matches.sort(key=len, reverse=True)
+            for match in all_matches:
+                whole_text_with_newlines = whole_text_with_newlines.replace(match, "x" * len(match))
+
+        return whole_text_with_newlines
+
+    @staticmethod
+    def _process_match(rule: Pattern, items_found: Dict, rule_str: str, m: Match[str]):
+        # insert rule if at least one match for it was found
+        if rule not in items_found:
+            items_found[rule_str] = {}
+
+        match = m.group()
+        match = normalize_match_string(match)
+
+        MAX_ALLOWED_MATCH_LENGTH = 300
+        match_len = len(match)
+        if match_len > MAX_ALLOWED_MATCH_LENGTH:
+            logger.warning("Excessive match with length of {} detected for rule {}".format(match_len, rule))
+
+        if match not in items_found[rule_str]:
+            items_found[rule_str][match] = {}
+            items_found[rule_str][match][constants.TAG_MATCH_COUNTER] = 0
+            if constants.APPEND_DETAILED_MATCH_MATCHES:
+                items_found[rule_str][match][constants.TAG_MATCH_MATCHES] = []
+
+        items_found[rule_str][match][constants.TAG_MATCH_COUNTER] += 1
+        match_span = m.span()
+
+        if constants.APPEND_DETAILED_MATCH_MATCHES:
+            items_found[rule_str][match][constants.TAG_MATCH_MATCHES].append([match_span[0], match_span[1]])
+
+    @staticmethod
     def parse_cert_file_common(
         text_to_parse: str, whole_text_with_newlines: str, search_rules: Dict
     ) -> Tuple[Dict[Pattern, Dict], str]:
@@ -634,48 +677,11 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
                     rule_and_sep = rule + REGEXEC_SEP
 
                 for m in re.finditer(rule_and_sep, text_to_parse):
-                    # insert rule if at least one match for it was found
-                    if rule not in items_found:
-                        items_found[rule_str] = {}
-
-                    match = m.group()
-                    match = normalize_match_string(match)
-
-                    MAX_ALLOWED_MATCH_LENGTH = 300
-                    match_len = len(match)
-                    if match_len > MAX_ALLOWED_MATCH_LENGTH:
-                        logger.warning("Excessive match with length of {} detected for rule {}".format(match_len, rule))
-
-                    if match not in items_found[rule_str]:
-                        items_found[rule_str][match] = {}
-                        items_found[rule_str][match][constants.TAG_MATCH_COUNTER] = 0
-                        if constants.APPEND_DETAILED_MATCH_MATCHES:
-                            items_found[rule_str][match][constants.TAG_MATCH_MATCHES] = []
-                        # else:
-                        #     items_found[rule_str][match][TAG_MATCH_MATCHES] = ['List of matches positions disabled. Set APPEND_DETAILED_MATCH_MATCHES to True']
-
-                    items_found[rule_str][match][constants.TAG_MATCH_COUNTER] += 1
-                    match_span = m.span()
-                    # estimate line in original text file
-                    # line_number = get_line_number(lines, line_length_compensation, match_span[0])
-                    # start index, end index, line number
-                    # items_found[rule_str][match][TAG_MATCH_MATCHES].append([match_span[0], match_span[1], line_number])
-                    if constants.APPEND_DETAILED_MATCH_MATCHES:
-                        items_found[rule_str][match][constants.TAG_MATCH_MATCHES].append([match_span[0], match_span[1]])
+                    FIPSCertificate._process_match(rule, items_found, rule_str, m)
 
         # highlight all found strings (by xxxxx) from the input text and store the rest
-        all_matches = []
-        for rule_group in items_found_all.keys():
-            items_found = items_found_all[rule_group]
-            for rule in items_found.keys():
-                for match in items_found[rule]:
-                    all_matches.append(match)
 
-            # if AES string is removed before AES-128, -128 would be left in text => sort by length first
-            # sort before replacement based on the length of match
-            all_matches.sort(key=len, reverse=True)
-            for match in all_matches:
-                whole_text_with_newlines = whole_text_with_newlines.replace(match, "x" * len(match))
+        whole_text_with_newlines = FIPSCertificate._highlight_matches(items_found_all, whole_text_with_newlines)
 
         return items_found_all, whole_text_with_newlines
 
@@ -764,6 +770,23 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
             result.update(cert for cert in alg["Certificate"])
         return result
 
+    def _process_to_pop(self, reg_to_match: Pattern, cert: str, to_pop: Set):
+        for alg in self.heuristics.keywords["rules_fips_algorithms"]:
+            for found in self.heuristics.keywords["rules_fips_algorithms"][alg]:
+                match_in_found = reg_to_match.search(found)
+                match_in_cert = reg_to_match.search(cert)
+                if (
+                    match_in_found is not None
+                    and match_in_cert is not None
+                    and match_in_found.group("id") == match_in_cert.group("id")
+                ):
+                    to_pop.add(cert)
+
+        for alg_cert in self.heuristics.algorithms:
+            for cert_no in alg_cert["Certificate"]:
+                if int("".join(filter(str.isdigit, cert_no))) == int("".join(filter(str.isdigit, cert))):
+                    to_pop.add(cert)
+
     def remove_algorithms(self):
         self.state.file_status = True
         if not self.pdf_scan.keywords:
@@ -784,19 +807,8 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
                 if cert in alg_set:
                     to_pop.add(cert)
                     continue
-                for alg in self.heuristics.keywords["rules_fips_algorithms"]:
-                    for found in self.heuristics.keywords["rules_fips_algorithms"][alg]:
-                        if (
-                            rr.search(found)
-                            and rr.search(cert)
-                            and rr.search(found).group("id") == rr.search(cert).group("id")
-                        ):
-                            to_pop.add(cert)
+                self._process_to_pop(rr, cert, to_pop)
 
-                for alg_cert in self.heuristics.algorithms:
-                    for cert_no in alg_cert["Certificate"]:
-                        if int("".join(filter(str.isdigit, cert_no))) == int("".join(filter(str.isdigit, cert))):
-                            to_pop.add(cert)
             for r in to_pop:
                 self.heuristics.keywords["rules_cert_id"][rule].pop(r, None)
author	mmstanone	2022-01-14 19:01:08 +0100
committer	mmstanone	2022-01-14 19:01:08 +0100
commit	3190f5f85ce8a8a91084661d69fcafa768699d51 (patch)
tree	c7b83fb71a6d42670059fcccdbe78869df63b966
parent	06ae452a8663e1d4600fa7747f611a780df0cdb4 (diff)
download	sec-certs-3190f5f85ce8a8a91084661d69fcafa768699d51.tar.gz sec-certs-3190f5f85ce8a8a91084661d69fcafa768699d51.tar.zst sec-certs-3190f5f85ce8a8a91084661d69fcafa768699d51.zip