Merge pull request #75 from crocs-muni/fips_pr

Changes in CAVP webpages parsing, fixed bugs, tests, and everything from the last PR
author: adamjanovsky 2021-05-14 16:27:35 +0200
committer: GitHub 2021-05-14 16:27:35 +0200
commit: cfab313d013b530c5ceed5b29877be71f74da7e8 (patch)
tree: 4dda7878a195428061d4e366296f52476e903134
parent: e3c002a63725e9e79ce81a09a7c7055c61ba5010 (diff)
parent: 92f49eb5a0b92be60be4ab3a662fcd6487865052 (diff)
download: sec-certs-cfab313d013b530c5ceed5b29877be71f74da7e8.tar.gz
sec-certs-cfab313d013b530c5ceed5b29877be71f74da7e8.tar.zst
sec-certs-cfab313d013b530c5ceed5b29877be71f74da7e8.zip
12 files changed, 1146 insertions, 291 deletions
diff --git a/examples/fips_oop_demo.py b/examples/fips_oop_demo.py
index 7015574e..dd47f74f 100644
--- a/examples/fips_oop_demo.py
+++ b/examples/fips_oop_demo.py
@@ -2,71 +2,64 @@ from pathlib import Path
 from datetime import datetime
 import logging
 import click
+
 from sec_certs.dataset import FIPSDataset, FIPSAlgorithmDataset
 from sec_certs.configuration import config
-from sec_certs.helpers import analyze_matched_algs
+
 
 @click.command()
 @click.option('--config-file', help='Path to config file')
 @click.option('--json-file', help='Path to dataset json file')
-@click.option('--no-download-algs', help='don\'t download algs', is_flag=True)
-def main(config_file, json_file, no_download_algs):
+@click.option('--no-download-algs', help='Redo scan of html files', is_flag=True)
+@click.option('--redo-web-scan', help='Redo scan of PDF files', is_flag=True)
+@click.option('--redo-keyword-scan', help='Don\'t download algs', is_flag=True)
+@click.option('--higher-precision-results',
+              help='Redo table search for certificates with high error rate. Behaviour undefined if used on a newly instantiated dataset.',
+              is_flag=True)
+def main(config_file, json_file, no_download_algs, redo_web_scan, redo_keyword_scan, higher_precision_results):
     logging.basicConfig(level=logging.INFO)
     start = datetime.now()
 
     # Load config
-    config.load(config_file if config_file else 'sec_certs/settings.yaml')
+    config.load(config_file if config_file else '../sec_certs/settings.yaml')
 
     # Create empty dataset
-    dset = FIPSDataset({}, Path('./fips_dataset'), 'sample_dataset', 'sample dataset description')
+    dset = FIPSDataset({}, Path('../fips_dataset'), 'sample_dataset', 'sample dataset description')
 
     # this is for creating test dataset, usually with small number of pdfs
     # dset = FIPSDataset({}, Path('./fips_test_dataset'), 'small dataset', 'small dataset for keyword testing')
 
     # Load metadata for certificates from CSV and HTML sources
-    dset.get_certs_from_web(json_file=json_file, redo=True)
+    dset.get_certs_from_web(json_file=json_file, redo=redo_web_scan)
 
     logging.info(f'Finished parsing. Have dataset with {len(dset)} certificates.')
-    # Dump dataset into JSON
-    dset.to_json(dset.root_dir / 'fips_full_dataset.json')
     logging.info(f'Dataset saved to {dset.root_dir}/fips_full_dataset.json')
 
     logging.info("Converting pdfs")
     dset.convert_all_pdfs()
-    dset.to_json(dset.root_dir / 'fips_full_dataset.json')
 
     logging.info("Extracting keywords now.")
-    dset.extract_keywords(redo=True)
+    dset.extract_keywords(redo=redo_keyword_scan)
 
     logging.info(f'Finished extracting certificates for {len(dset.certs)} items.')
-    logging.info("Dumping dataset again...")
-    dset.to_json(dset.root_dir / 'fips_full_dataset.json')
 
     logging.info("Searching for tables in pdfs")
 
-    not_decoded_files = dset.extract_certs_from_tables()
+    not_decoded_files = dset.extract_certs_from_tables(higher_precision_results)
 
     logging.info(f"Done. Files not decoded: {not_decoded_files}")
-    dset.to_json(dset.root_dir / 'fips_mentioned.json')
     logging.info("Parsing algorithms")
     if not no_download_algs:
-        aset = FIPSAlgorithmDataset({}, Path('fips_dataset/web/algorithms'), 'algorithms', 'sample algs')
+        aset = FIPSAlgorithmDataset({}, Path(dset.root_dir / 'web/algorithms'), 'algorithms', 'sample algs')
         aset.get_certs_from_web()
+        logging.info(f'Finished parsing. Have algorithm dataset with {len(aset)} algorithm numbers.')
 
         dset.algorithms = aset
 
     logging.info("finalizing results.")
-
     dset.finalize_results()
 
-    logging.info('dump again')
-    dset.to_json(dset.root_dir / 'fips_full_dataset.json')
-
-    dset.get_dot_graph('different_new')
-
-    data = dset.match_algs()
-    analyze_matched_algs(data)
-
+    dset.plot_graphs(show=False)
     end = datetime.now()
     logging.info(f'The computation took {(end - start)} seconds.')
 
diff --git a/sec_certs/analyze_certificates.py b/sec_certs/analyze_certificates.py
index f20a0e2e..f513664b 100644
--- a/sec_certs/analyze_certificates.py
+++ b/sec_certs/analyze_certificates.py
@@ -313,6 +313,11 @@ def build_cert_references(filter_rules_group, all_items_found):
     # build list of references
     referenced_by = {}
     for cert_long_id in all_items_found.keys():
+        # handle FIPS
+        if 'FIPS Certificate' in all_items_found[cert_long_id]['frontpage_scan']:
+            referenced_by[cert_long_id] = copy.deepcopy(all_items_found[cert_long_id]['processed']['connections'])
+            continue
+
         # do not continue if no keywords were extracted ()
         if 'keywords_scan' not in all_items_found[cert_long_id].keys():
             continue
@@ -368,9 +373,15 @@ def build_cert_references(filter_rules_group, all_items_found):
 
 
 def analyze_references_graph(filter_rules_group, all_items_found, filter_label):
+    handling_fips_items = False
     # build cert_id to item name mapping
     certid_info = {}
     for cert_long_id in all_items_found.keys():
+        if 'FIPS Certificate' in all_items_found[cert_long_id]['frontpage_scan']:
+            certid_info[cert_long_id] = cert_long_id
+            handling_fips_items = True
+            continue
+
         cert = all_items_found[cert_long_id]
         if is_in_dict(cert, ['processed', 'cert_id']):
             if is_in_dict(cert, ['frontpage_scan', 'cert_item']):
@@ -383,6 +394,10 @@ def analyze_references_graph(filter_rules_group, all_items_found, filter_label):
     # build cert_id to cert_long_id mapping
     cert_id_to_long_id_mapping = {}
     for cert_long_id in all_items_found.keys():
+        if 'FIPS Certificate' in all_items_found[cert_long_id]['frontpage_scan']:
+            cert_id_to_long_id_mapping[cert_long_id] = cert_long_id
+            continue
+
         cert = all_items_found[cert_long_id]
         if is_in_dict(cert, ['processed', 'cert_id']):
             if is_in_dict(cert, ['frontpage_scan', 'cert_item']):
@@ -483,7 +498,10 @@ def plot_schemes_multi_line_graph(x_ticks, data, prominent_data, x_label, y_labe
 
         # change line type to prevent color repetitions
         num_lines_plotted += 1
-        color_index += 1
+        if color_index < len(GRAPHS_COLOR_PALETTE) - 1:
+            color_index += 1
+        else:
+            color_index = 0
 
     plt.rcParams.update({'font.size': 16})
     plt.legend(loc=2)
@@ -1125,28 +1143,30 @@ def transform_fips_to_cc_dict(all_cert_items_fips):
 
         cc_item = {}
         cc_item["csv_scan"] = {}
-        cc_item["frontpage_scan"] = {}
-        cc_item["keywords_scan"] = {}
-        cc_item["pdfmeta_scan"] = {}
-        cc_item["processed"] = {}
+        cc_item["frontpage_scan"] = {'FIPS Certificate': 1}
+        cc_item["keywords_scan"] = fips_item['pdf_scan']['keywords']
+        cc_item["pdfmeta_scan"] = fips_item['pdf_scan']
+        cc_item["processed"] = fips_item['processed']
+
+        fips_web_scan = fips_item['web_scan']
 
-        cc_item["processed"]["cc_manufacturer_list"] = fips_item["vendor"]
-        cc_item['processed']['cc_manufacturer_simple_list'] = [fips_item["vendor"]]
+        cc_item["processed"]["cc_manufacturer_list"] = fips_web_scan["vendor"]
+        cc_item['processed']['cc_manufacturer_simple_list'] = [fips_web_scan["vendor"]]
         cc_item["processed"]["cert_id"] = fips_item["cert_id"]
-        cc_item["processed"]["cert_lab"] = fips_item["lab"]
-        if fips_item["exceptions"] is None:
-            cc_item["processed"]["cc_security_level"] = 'Level ' + fips_item["level"]
+        cc_item["processed"]["cert_lab"] = fips_web_scan["lab"]
+        if fips_web_scan["exceptions"] is None:
+            cc_item["processed"]["cc_security_level"] = 'Level ' + fips_web_scan["level"]
         else:
-            cc_item["processed"]["cc_security_level"] = 'Level ' + fips_item["level"] + '+'
+            cc_item["processed"]["cc_security_level"] = 'Level ' + fips_web_scan["level"] + '+'
 
         cc_item['csv_scan']['cc_scheme'] = 'NIST'
-        cc_item["csv_scan"]["cc_certification_date"] = fips_item["date_validation"][0]
-        cc_item["csv_scan"]["cc_archived_date"] = fips_item["date_sunset"]
-        cc_item["csv_scan"]["cc_category"] = '{} {}'.format(fips_item["type"], fips_item["embodiment"])
+        cc_item["csv_scan"]["cc_certification_date"] = fips_web_scan["date_validation"][0]
+        cc_item["csv_scan"]["cc_archived_date"] = fips_web_scan["date_sunset"]
+        cc_item["csv_scan"]["cc_category"] = '{} {}'.format(fips_web_scan["module_type"], fips_web_scan["embodiment"])
 
         cc_item["processed"]["cc_security_level_augments"] = []
-        if fips_item["exceptions"]:
-            for exception in fips_item["exceptions"]:
+        if fips_web_scan["exceptions"]:
+            for exception in fips_web_scan["exceptions"]:
                 try:
                     ex_name, ex_level = exception.split(':')
                     ex_name = ex_name.strip()
diff --git a/sec_certs/cert_rules.py b/sec_certs/cert_rules.py
index 3be285c5..ba857300 100644
--- a/sec_certs/cert_rules.py
+++ b/sec_certs/cert_rules.py
@@ -321,64 +321,65 @@ rules_other = [
 rules_fips_remove_algorithm_ids = [
 
 # --- HMAC(-SHA)(-1) - (bits) (method) ((hardware/firmware cert) #id) ---
-    r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?\s?(\d{4})",
-    r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?\s?(\d{3})",
-    r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?\s?(\d{2})",
-    r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?\s?(\d{1})",
+# + added (and #id) everywhere
+    r"HMAC(?:[- –]*SHA)?(?:[- –]*1)?[– -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?[\s(\[]*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?[\s#]*?(\d{4})(?:[\s#]*and[\s#]*\d+)?",
+    r"HMAC(?:[- –]*SHA)?(?:[- –]*1)?[– -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?[\s(\[]*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?[\s#]*?(\d{3})(?:[\s#]*and[\s#]*\d+)?",
+    r"HMAC(?:[- –]*SHA)?(?:[- –]*1)?[– -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?[\s(\[]*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?[\s#]*?(\d{2})(?:[\s#]*and[\s#]*\d+)?",
+    r"HMAC(?:[- –]*SHA)?(?:[- –]*1)?[– -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?[\s(\[]*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?[\s#]*?(\d{1})(?:[\s#]*and[\s#]*\d+)?",
 
 # --- same as above, without hw or fw ---
-    r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{4})",
-    r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{3})",
-    r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{2})",
-    r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{1})",
+    r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{4})",
+    r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{3})",
+    r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{2})",
+    r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{1})",
 
 # --- SHS/A - (bits) (method) ((cert #) numbers) ---
-    r"SH[SA][-– ]*(?:160|224|256|384|512)?(?:[\s(\[]*?(?:KAT)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)(\d{4})(?:\)?\[#?\d+\])?",
-    r"SH[SA][-– ]*(?:160|224|256|384|512)?(?:[\s(\[]*?(?:KAT)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)(\d{3})(?:\)?\[#?\d+\])?",
-    r"SH[SA][-– ]*(?:160|224|256|384|512)?(?:[\s(\[]*?(?:KAT)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)(\d{2})(?:\)?\[#?\d+\])?",
-    r"SH[SA][-– ]*(?:160|224|256|384|512)?(?:[\s(\[]*?(?:KAT)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)(\d{1})(?:\)?\[#?\d+\])?",
+    r"SH[SA][-– 123]*(?:;|\/|160|224|256|384|512)?(?:[\s(\[]*?(?:KAT|[Bb]yte [Oo]riented)*?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{4})(?:\)?\[#?\d+\])?(?:[\s#]*?and[\s#]*?\d+)?",
+    r"SH[SA][-– 123]*(?:;|\/|160|224|256|384|512)?(?:[\s(\[]*?(?:KAT|[Bb]yte [Oo]riented)*?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{3})(?:\)?\[#?\d+\])?(?:[\s#]*?and[\s#]*?\d+)?",
+    r"SH[SA][-– 123]*(?:;|\/|160|224|256|384|512)?(?:[\s(\[]*?(?:KAT|[Bb]yte [Oo]riented)*?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{2})(?:\)?\[#?\d+\])?(?:[\s#]*?and[\s#]*?\d+)?",
+    r"SH[SA][-– 123]*(?:;|\/|160|224|256|384|512)?(?:[\s(\[]*?(?:KAT|[Bb]yte [Oo]riented)*?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{1})(?:\)?\[#?\d+\])?(?:[\s#]*?and[\s#]*?\d+)?",
 
 # --- RSA (bits) (method) ((cert #)) ---
-    r"RSA(?:[-– ]*(?:512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{4})",
-    r"RSA(?:[-– ]*(?:512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{3})",
-    r"RSA(?:[-– ]*(?:512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{2})",
-    r"RSA(?:[-– ]*(?:512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{1})",
+    r"RSA(?:[-– ]*(?:;|\/|512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:;|\/|KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{4})",
+    r"RSA(?:[-– ]*(?:;|\/|512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:;|\/|KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{3})",
+    r"RSA(?:[-– ]*(?:;|\/|512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:;|\/|KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{2})",
+    r"RSA(?:[-– ]*(?:;|\/|512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:;|\/|KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{1})",
 
 # --- RSA (SSA) (PKCS) (version) (#) ---
-    r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?\s?(\d{4})?",
-    r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?\s?(\d{3})?",
-    r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?\s?(\d{2})?",
-    r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?\s?(\d{1})?",
+    r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?[\s#]*?(\d{4})?",
+    r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?[\s#]*?(\d{3})?",
+    r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?[\s#]*?(\d{2})?",
+    r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?[\s#]*?(\d{1})?",
 
 # --- AES (bits) (method) ((cert #)) ---
-    r"AES[-– ]*((?:128|192|256|)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{4})(?:\)?\[#?\d+\])?",
-    r"AES[-– ]*((?:128|192|256|)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{3})(?:\)?\[#?\d+\])?",
-    r"AES[-– ]*((?:128|192|256|)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{2})(?:\)?\[#?\d+\])?",
-    r"AES[-– ]*((?:128|192|256|)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{1})(?:\)?\[#?\d+\])?",
+    r"AES[-– ]*((?: |;|\/|bit|key|128|192|256|CBC)*(?: |\/|;|[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV|CBC)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{4})(?:\)?[\s#]*?\[#?\d+\])?(?:[\s#]*?and[\s#]*?(\d+))?",
+    r"AES[-– ]*((?: |;|\/|bit|key|128|192|256|CBC)*(?: |\/|;|[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV|CBC)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{3})(?:\)?[\s#]*?\[#?\d+\])?(?:[\s#]*?and[\s#]*?(\d+))?",
+    r"AES[-– ]*((?: |;|\/|bit|key|128|192|256|CBC)*(?: |\/|;|[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV|CBC)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{2})(?:\)?[\s#]*?\[#?\d+\])?(?:[\s#]*?and[\s#]*?(\d+))?",
+    r"AES[-– ]*((?: |;|\/|bit|key|128|192|256|CBC)*(?: |\/|;|[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV|CBC)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{1})(?:\)?[\s#]*?\[#?\d+\])?(?:[\s#]*?and[\s#]*?(\d+))?",
 
 # --- Diffie Helman (CVL) ((cert #)) ---
-    r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?\s?(\d{4})",
-    r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?\s?(\d{3})",
-    r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?\s?(\d{2})",
-    r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?\s?(\d{1})",
+    r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?[\s#]*?(\d{4})",
+    r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?[\s#]*?(\d{3})",
+    r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?[\s#]*?(\d{2})",
+    r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?[\s#]*?(\d{1})",
 
 # --- DRBG (bits) (method) (cert #) ---
-    r"DRBG[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{4})",
-    r"DRBG[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{3})",
-    r"DRBG[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{2})",
-    r"DRBG[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{1})",
+    r"DRBG[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{4})",
+    r"DRBG[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{3})",
+    r"DRBG[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{2})",
+    r"DRBG[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{1})",
 
 # --- DES (bits) (method) (cert #)
-    r"DES[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{4})",
-    r"DES[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{3})",
-    r"DES[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{2})",
-    r"DES[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{1})",
+    r"DES[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT|CBC|(?:\d(?: and \d)? keying options?))*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)*?[\s#]*?)?[\s#]*?(\d{4})(?:[\s#]*?and[\s#]*?(\d+))?",
+    r"DES[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT|CBC|(?:\d(?: and \d)? keying options?))*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)*?[\s#]*?)?[\s#]*?(\d{3})(?:[\s#]*?and[\s#]*?(\d+))?",
+    r"DES[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT|CBC|(?:\d(?: and \d)? keying options?))*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)*?[\s#]*?)?[\s#]*?(\d{2})(?:[\s#]*?and[\s#]*?(\d+))?",
+    r"DES[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT|CBC|(?:\d(?: and \d)? keying options?))*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)*?[\s#]*?)?[\s#]*?(\d{1})(?:[\s#]*?and[\s#]*?(\d+))?",
 
 # --- DSA (bits) (method) (cert #)
-    r"DSA[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{4})",
-    r"DSA[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{3})",
-    r"DSA[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{2})",
-    r"DSA[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{1})",
+    r"DSA[ –-]*((?:;|\/|160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{4})",
+    r"DSA[ –-]*((?:;|\/|160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{3})",
+    r"DSA[ –-]*((?:;|\/|160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{2})",
+    r"DSA[ –-]*((?:;|\/|160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{1})",
 
 # --- platforms (#)+ - this is used in modification history ---
     r"[Pp]latforms? #\d+(?:#\d+|,| |-|and)*[^\n]*",
@@ -400,12 +401,14 @@ rules_fips_remove_algorithm_ids = [
 
 # --- PKCS (#) ---
     r"PKCS[\s]?#?\d+",
-    r"PKSC[\s]?#?\d+" # typo, #625
-]
+    r"PKSC[\s]?#?\d+", # typo, #625
 
+# --- # C and # A (just in case) ---
+    r"#\s+?[Cc]\d+",
+    r"#\s+?[Aa]\d+"
+]
 rules_fips_to_remove = [
 # --- random words found ---
-    r"Survey #192",  # why would they get an address like this /o\ cert 2079
     r"[Ss]lot #\d",  # a card slot, #2069
     r"[Ss]eals? ?\(?#\d - #\d", #  #1232
     r"\[#\d*\]", # some certs use this as references
@@ -413,16 +416,19 @@ rules_fips_to_remove = [
     r"[Pp]ower [Ss]upply #\d", # #604
     r"TEL #\d and #\d", # #3337
     r"#\d+ - #\d+", # labels, seals... #1232
+    r"#\d+‐#?\d+", # labels, seals... #3530
+    r"#\d+ to #?\d+", # labels, seals... #3058
+    r"see #\d+", # labels, seals... #3058
     r"#\d+, ?#\d+",
     r"#?\d+ and #?\d+",
     r"label \(#\d+\)",
     r"[Ll]abel #\d+",
     r"\(#\d\)",
     r"IETF[25\s]*RFC[26\s]*#\d+", # #3425
-    r"Bendix Road North #760", # #3325
-    r"5080 Spectrum Drive, #1000E",
     r"Document # 540-105000-A1",
     r"Certificate #2287-1 from EMCE Engineering", # ???
+    r"[sS]cenarios?\s?#\d+", # 3789
+    r"#\d+\s?\(\S\)", # 2159
 ]
 
 rules_fips_cert = [
@@ -430,10 +436,10 @@ rules_fips_cert = [
     #     r"(?:#\s?|Cert\.?[^. ]*?\s?)(?P<id>\d{3})",
     #     r"(?:#\s?|Cert\.?[^. ]*?\s?)(?P<id>\d{2})",
     #     r"(?:#\s?|Cert\.?[^. ]*?\s?)(?P<id>\d{1})
-    r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{4}[^\d])",
-    r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{3}[^\d])",
-    r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{2}[^\d])",
-    r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{1}[^\d])"
+    r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{4})(?!\d)",
+    r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{3})(?!\d)",
+    r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{2})(?!\d)",
+    r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{1})(?!\d)"
 ]
 
 #  rule still too "general"
@@ -508,8 +514,7 @@ fips_rules['rules_to_remove'] = rules_fips_to_remove
 fips_rules['rules_security_level'] = rules_fips_security_level
 fips_rules['rules_cert_id'] = rules_fips_cert
 fips_common_rules = copy.deepcopy(common_rules)  # make separate copy not to process cc rules by fips's re.compile
-#fips_rules.update(fips_common_rules)
 
 for rule in fips_rules:
     for current_rule in range(len(fips_rules[rule])):
-        fips_rules[rule][current_rule] = re.compile(fips_rules[rule][current_rule])
-\ No newline at end of file
+        fips_rules[rule][current_rule] = re.compile(fips_rules[rule][current_rule])
diff --git a/sec_certs/certificate.py b/sec_certs/certificate.py
index 7b0770de..98ddc6bc 100644
--- a/sec_certs/certificate.py
+++ b/sec_certs/certificate.py
@@ -133,7 +133,7 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
         algorithms: Optional[List[Dict[str, str]]]
         tested_conf: Optional[List[str]]
         description: Optional[str]
-        mentioned_certs: Optional[List[str]]
+        mentioned_certs: Optional[Dict[str, Dict[str, int]]]
         vendor: Optional[str]
         vendor_www: Optional[str]
         lab: Optional[str]
@@ -147,6 +147,7 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
         revoked_link: Optional[str]
         sw_versions: Optional[str]
         product_url: Optional[str]
+        connections: List[str]
 
         def __post_init__(self):
             self.date_validation = [parser.parse(x).date() for x in
@@ -177,6 +178,7 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
         cert_id: int
         keywords: Dict
         algorithms: List
+        connections: List[str]
 
         @property
         def dgst(self):
@@ -199,9 +201,10 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
 
     @dataclass(eq=True)
     class Processed(ComplexSerializableType):
-        keywords: Optional[Dict]
-        algorithms: Dict
-        connections: List
+        keywords: Optional[Dict[str, Dict]]
+        algorithms: Dict[str, Dict]
+        connections: List[str]
+        unmatched_algs: int
 
         @property
         def dgst(self):
@@ -263,26 +266,27 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
              'type': None, 'embodiment': None, 'tested_conf': None, 'description': None,
              'vendor': None, 'vendor_www': None, 'lab': None, 'lab_nvlap': None,
              'historical_reason': None, 'revoked_reason': None, 'revoked_link': None, 'algorithms': [],
-             'mentioned_certs': [], 'tables_done': False, 'security_policy_www': None, 'certificate_www': None,
+             'mentioned_certs': {}, 'tables_done': False, 'security_policy_www': None, 'certificate_www': None,
              'hw_versions': None, 'fw_versions': None, 'sw_versions': None, 'product_url': None}
 
         return d
 
     @staticmethod
-    def parse_caveat(current_text: str) -> List:
+    def parse_caveat(current_text: str) -> Dict[str, Dict[str, int]]:
         """
         Parses content of "Caveat" of FIPS CMVP .html file
         :param current_text: text of "Caveat"
-        :return: list of all found algorithm IDs
+        :return: dictionary of all found algorithm IDs
         """
-        ids_found = []
-        r_key = r"(?:#\s?|Cert\.?(?!.\s)\s?|Certificate\s?)(?P<id>\d+)"
+        ids_found = {}
+        r_key = r"(?P<word>\w+)?\s?(?:#\s?|Cert\.?(?!.\s)\s?|Certificate\s?)+(?P<id>\d+)"
         for m in re.finditer(r_key, current_text):
-            if r_key in ids_found and m.group() in ids_found[0]:
-                ids_found[0][m.group()]['count'] += 1
+            if m.group('word') and m.group('word').lower() in {'rsa', 'shs', 'dsa', 'pkcs', 'aes'}:
+                continue
+            if m.group('id') in ids_found:
+                ids_found[m.group('id')]['count'] += 1
             else:
-                ids_found.append(
-                    {r"(?:#\s?|Cert\.?(?!.\s)\s?|Certificate\s?)(?P<id>\d+?})": {m.group(): {'count': 1}}})
+                ids_found[m.group('id')] = {'count': 1}
 
         return ids_found
 
@@ -302,7 +306,7 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
         for m in re.finditer(reg, current_text):
             set_items.add(m.group())
 
-        return [{"Certificate": list(set_items)}]
+        return [{"Certificate": list(set_items)}] if len(set_items) > 0 else []
 
     @staticmethod
     def parse_table(element: Union[Tag, NavigableString]) -> List[Dict]:
@@ -315,9 +319,10 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
         trs = element.find_all('tr')
         for tr in trs:
             tds = tr.find_all('td')
+            cert = FIPSCertificate.extract_algorithm_certificates(tds[1].text)
             found_items.append(
                 {'Name': tds[0].text,
-                 'Certificate': FIPSCertificate.extract_algorithm_certificates(tds[1].text)[0]['Certificate'],
+                 'Certificate': cert[0]['Certificate'] if cert != [] else [],
                  'Links': [str(x) for x in tds[1].find_all('a')],
                  'Raw': str(tr)})
 
@@ -335,8 +340,8 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
 
             elif 'caveat' in pairs[title]:
                 html_items_found[pairs[title]] = content
-                html_items_found['mentioned_certs'] += FIPSCertificate.parse_caveat(
-                    content)
+                html_items_found['mentioned_certs'].update(FIPSCertificate.parse_caveat(
+                    content))
 
             elif 'FIPS Algorithms' in title:
                 html_items_found['algorithms'] += FIPSCertificate.parse_table(
@@ -486,13 +491,16 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
                                    items_found['revoked_reason'] if 'revoked_reason' in items_found else None,
                                    items_found['revoked_link'] if 'revoked_link' in items_found else None,
                                    items_found['sw_versions'] if 'sw_versions' in items_found else None,
-                                   items_found['product_url']) if 'product_url' in items_found else None,
+                                   items_found['product_url'] if 'product_url' in items_found else None,
+                                   []
+                               ),  # connections
                                FIPSCertificate.PdfScan(
                                    items_found['cert_id'],
                                    {} if not initialized else initialized.pdf_scan.keywords,
-                                   [] if not initialized else initialized.pdf_scan.algorithms
+                                   [] if not initialized else initialized.pdf_scan.algorithms,
+                                   []  # connections
                                ),
-                               FIPSCertificate.Processed(None, {}, []),
+                               FIPSCertificate.Processed(None, {}, [], 0),
                                state
                                )
 
@@ -508,6 +516,17 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
                 cert.state.txt_state = True
         return cert
 
+
+    @staticmethod
+    def _declare_state(text: str):
+        """
+        If less then half of the text is formed of alphabet characters,
+        we declare the security policy as "non-parsable"
+        :param text: security policy content
+        :return: True if parsable, otherwise False
+        """
+        return len(text) * 0.5 <= len(''.join(filter(str.isalpha, text)))
+
     @staticmethod
     def find_keywords(cert: 'FIPSCertificate') -> Tuple[Optional[Dict], 'FIPSCertificate']:
         if not cert.state.txt_state:
@@ -518,8 +537,12 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
 
         text_to_parse = text_with_newlines if config.use_text_with_newlines_during_parsing['value'] else text
 
-        items_found, fips_text = FIPSCertificate.parse_cert_file(FIPSCertificate.remove_platforms(text_to_parse),
-                                                                 cert.web_scan.algorithms)
+        cert.state.txt_state = FIPSCertificate._declare_state(text)
+
+        if config.ignore_first_page:
+            text_to_parse = text_to_parse[text_to_parse.index(""):]
+
+        items_found, fips_text = FIPSCertificate.parse_cert_file(FIPSCertificate.remove_platforms(text_to_parse))
 
         save_modified_cert_file(cert.state.fragment_path.with_suffix('.fips.txt'), fips_text, unicode_error)
 
@@ -550,15 +573,11 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
             for web_alg in alg_list:
                 if ''.join(filter(str.isdigit, web_alg)) not in all_algorithms:
                     not_found.append(web_alg)
-        logger.error(
-            f"For cert {cert.dgst}:\n\tNOT FOUND: {len(not_found)}\n"
-            f"\tFOUND: {sum([len(a['Certificate']) for a in cert.web_scan.algorithms]) - len(not_found)}")
-        logger.error(f"Not found: {not_found}")
         return len(not_found)
 
     @staticmethod
     def remove_platforms(text_to_parse: str):
-        pat = re.compile(r"(?:modification|revision|change) history\n[\s\S]*?", re.IGNORECASE)
+        pat = re.compile(r"(?:(?:modification|revision|change) history|version control)\n[\s\S]*?", re.IGNORECASE)
         for match in pat.finditer(text_to_parse):
             text_to_parse = text_to_parse.replace(
                 match.group(), 'x' * len(match.group()))
@@ -566,7 +585,7 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
 
     @staticmethod
     def parse_cert_file_common(text_to_parse: str, whole_text_with_newlines: str,
-                               search_rules: Dict) -> Tuple[Optional[Dict], str]:
+                               search_rules: Dict) -> Tuple[Optional[Dict[Pattern, Dict]], str]:
         # apply all rules
         items_found_all = {}
         for rule_group in search_rules.keys():
@@ -632,10 +651,10 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
         return items_found_all, whole_text_with_newlines
 
     @staticmethod
-    def parse_cert_file(text_to_parse: str, algorithms: List[Dict]) \
-            -> Tuple[Optional[Dict], str]:
+    def parse_cert_file(text_to_parse: str) -> Tuple[Optional[Dict[Pattern, Dict]], str]:
         # apply all rules
         items_found_all: Dict = {}
+
         for rule_group in fips_rules.keys():
             if rule_group not in items_found_all:
                 items_found_all[rule_group] = {}
@@ -667,21 +686,27 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
         return items_found_all, text_to_parse
 
     @staticmethod
-    def analyze_tables(cert: 'FIPSCertificate') -> Tuple[bool, 'FIPSCertificate', List]:
+    def analyze_tables(tup: Tuple['FIPSCertificate', bool]) -> Tuple[bool, 'FIPSCertificate', List]:
+        cert, precision = tup
+        if not (precision and cert.state.tables_done) \
+                or (precision and cert.processed.unmatched_algs < config.cert_threshold['value']):
+            return cert.state.tables_done, cert, []
+
         cert_file = cert.state.sp_path
         txt_file = cert_file.with_suffix('.pdf.txt')
         with open(txt_file, 'r', encoding='utf-8') as f:
             tables = helpers.find_tables(f.read(), txt_file)
+        all_pages = precision and cert.processed.unmatched_algs > config.cert_threshold['value']  # bool value
 
         lst: List = []
         if tables:
             try:
-                data = read_pdf(cert_file, pages=tables, silent=True)
+                data = read_pdf(cert_file, pages='all' if all_pages else tables, silent=True)
             except Exception as e:
                 try:
                     logger.error(e)
                     helpers.repair_pdf(cert_file)
-                    data = read_pdf(cert_file, pages=tables, silent=True)
+                    data = read_pdf(cert_file, pages='all' if all_pages else tables, silent=True)
 
                 except Exception as ex:
                     logger.error(ex)
@@ -691,11 +716,12 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
             for df in data:
                 for col in range(len(df.columns)):
                     if 'cert' in df.columns[col].lower() or 'algo' in df.columns[col].lower():
-                        lst += FIPSCertificate.extract_algorithm_certificates(
+                        tmp =  FIPSCertificate.extract_algorithm_certificates(
                             df.iloc[:, col].to_string(index=False), True)
-
+                        lst += tmp if tmp != [{"Certificate": []}] else []
                 # Parse again if someone picks not so descriptive column names
-                lst += FIPSCertificate.extract_algorithm_certificates(df.to_string(index=False))
+                tmp = FIPSCertificate.extract_algorithm_certificates(df.to_string(index=False))
+                lst += tmp if tmp != [{"Certificate": []}] else []
         return True, cert, lst
 
     def _create_alg_set(self) -> Set:
@@ -710,9 +736,10 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
             return
 
         self.processed.keywords = copy.deepcopy(self.pdf_scan.keywords)
+        # TODO figure out why can't I delete this
         if self.web_scan.mentioned_certs:
-            for item in self.web_scan.mentioned_certs:
-                self.processed.keywords['rules_cert_id'].update(item)
+            for item, value in self.web_scan.mentioned_certs.items():
+                self.processed.keywords['rules_cert_id'].update({'caveat_item': {item: value}})
 
         alg_set = self._create_alg_set()
 
@@ -742,8 +769,8 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
     @staticmethod
     def get_compare(vendor: str):
         vendor_split = vendor.replace(',', '') \
-            .replace('-', ' ').replace('+', ' ').replace('®', '').split()
-        return vendor_split[0] if len(vendor_split) > 0 else vendor
+            .replace('-', ' ').replace('+', ' ').replace('®', '').replace('(R)', '').split()
+        return vendor_split[0][:4] if len(vendor_split) > 0 else vendor
 
 
 class CommonCriteriaCert(Certificate, ComplexSerializableType):
diff --git a/sec_certs/constants.py b/sec_certs/constants.py
index f19919a6..99d81640 100644
--- a/sec_certs/constants.py
+++ b/sec_certs/constants.py
@@ -46,5 +46,5 @@ TAG_PP_EDITOR = 'pp_editor'
 TAG_PP_REVIEWER = 'pp_reviewer'
 TAG_KEYWORDS = 'keywords'
 FIPS_NOT_AVAILABLE_CERT_SIZE = 10000
-FIPS_ALG_URL = 'https://csrc.nist.gov/projects/cryptographic-algorithm-validation-program/validation-search?searchMode=validation&page='
+FIPS_ALG_URL = 'https://csrc.nist.gov/projects/cryptographic-algorithm-validation-program/validation-search?searchMode=implementation&page='
 
diff --git a/sec_certs/dataset.py b/sec_certs/dataset.py
index c1ac4f35..91ec113a 100644
--- a/sec_certs/dataset.py
+++ b/sec_certs/dataset.py
@@ -1,4 +1,5 @@
 import os
+import sys
 from datetime import datetime
 import locale
 import logging
@@ -25,6 +26,8 @@ import sec_certs.constants as constants
 import sec_certs.cert_processing as cert_processing
 import sec_certs.files as files
 
+from sec_certs.analyze_certificates import plot_bar_graph
+
 from sec_certs.certificate import CommonCriteriaCert, Certificate, FIPSCertificate
 from sec_certs.serialization import ComplexSerializableType, CustomJSONDecoder, CustomJSONEncoder
 from sec_certs.configuration import config
@@ -743,7 +746,8 @@ class CCDataset(Dataset, ComplexSerializableType):
                     try:
                         inpts = [int(x) for x in inpts]
                         if min(inpts) < 0 or max(inpts) > len(x.heuristics.cpe_matches) - 1:
-                            raise ValueError(f'Incorrect number chosen, choose in range 0-{len(x.heuristics.cpe_matches) - 1}')
+                            raise ValueError(
+                                f'Incorrect number chosen, choose in range 0-{len(x.heuristics.cpe_matches) - 1}')
                     except ValueError as e:
                         logger.error(f'Bad input from user, repeating instance: {e}')
                         print(f'Bad input from user, repeating instance: {e}')
@@ -772,7 +776,8 @@ class CCDataset(Dataset, ComplexSerializableType):
 
         verified_cpe_rich_certs = [x for x in self if x.heuristics.verified_cpe_matches]
         if not verified_cpe_rich_certs:
-            logger.error('No certificates with verified CPE match detected. You must run dset.manually_verify_cpe_matches() first. Returning.')
+            logger.error(
+                'No certificates with verified CPE match detected. You must run dset.manually_verify_cpe_matches() first. Returning.')
             return
         for cert in verified_cpe_rich_certs:
             cert.compute_heuristics_related_cves(cve_dset)
@@ -820,7 +825,7 @@ class FIPSDataset(Dataset, ComplexSerializableType):
                 not_available.append(i)
         return missing, not_available
 
-    def extract_keywords(self, redo=False):
+    def extract_keywords(self, redo=False, update_json: bool = True):
         self.fragments_dir.mkdir(parents=True, exist_ok=True)
 
         keywords = cert_processing.process_parallel(FIPSCertificate.find_keywords,
@@ -830,21 +835,27 @@ class FIPSDataset(Dataset, ComplexSerializableType):
                                                     use_threading=False)
         for keyword, cert in keywords:
             self.certs[cert.dgst].pdf_scan.keywords = keyword
+        
+        if update_json:
+            self.to_json(self.root_dir / 'fips_full_dataset.json')
 
-    def match_algs(self, show_graph=False) -> Dict:
+    def match_algs(self) -> Dict:
         output = {}
+        cert: FIPSCertificate
         for cert in self.certs.values():
             output[cert.dgst] = FIPSCertificate.match_web_algs_to_pdf(cert)
+            cert.processed.unmatched_algs = output[cert.dgst]
 
+        output = {k: v for k, v in output.items() if v != 0}
         return output
 
-
     def download_all_pdfs(self):
         sp_paths, sp_urls = [], []
         self.policies_dir.mkdir(exist_ok=True)
 
         for cert_id in list(self.certs.keys()):
-            if not (self.policies_dir / f'{cert_id}.pdf').exists() or not self.certs[cert_id].state.txt_state:
+            if not (self.policies_dir / f'{cert_id}.pdf').exists() or (self.certs[cert_id]
+                                                                       and not self.certs[cert_id].state.txt_state):
                 sp_urls.append(
                     f"https://csrc.nist.gov/CSRC/media/projects/cryptographic-module-validation-program/documents/security-policies/140sp{cert_id}.pdf")
                 sp_paths.append(self.policies_dir / f"{cert_id}.pdf")
@@ -866,7 +877,7 @@ class FIPSDataset(Dataset, ComplexSerializableType):
 
         logging.info(f"downloading {len(html_urls)} module html files")
         failed = cert_processing.process_parallel(FIPSCertificate.download_html_page, list(zip(html_urls, html_paths)),
-                                         constants.N_THREADS)
+                                                  constants.N_THREADS)
         failed = [c for c in failed if c]
 
         self.new_files += len(html_urls)
@@ -875,7 +886,7 @@ class FIPSDataset(Dataset, ComplexSerializableType):
                                          constants.N_THREADS)
         return new_files
 
-    def convert_all_pdfs(self):
+    def convert_all_pdfs(self, update_json: bool = True):
         logger.info('Converting FIPS certificate reports to .txt')
         tuples = [
             (cert, self.policies_dir / f'{cert.cert_id}.pdf', self.policies_dir / f'{cert.cert_id}.pdf.txt')
@@ -884,22 +895,45 @@ class FIPSDataset(Dataset, ComplexSerializableType):
         ]
         cert_processing.process_parallel(FIPSCertificate.convert_pdf_file, tuples, constants.N_THREADS)
 
-    def get_certs_from_web(self, redo: bool = False, json_file: Optional[Path] = None):
+        if update_json:
+            self.to_json(self.root_dir / 'fips_full_dataset.json')
+
+    def prepare_dataset(self, test: Optional[Path] = None):
+        if test:
+            html_files = [test]
+        else:
+            html_files = ['fips_modules_active.html',
+                          'fips_modules_historical.html', 'fips_modules_revoked.html']
+            helpers.download_file(
+                "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search?SearchMode=Advanced&CertificateStatus=Active&ValidationYear=0",
+                self.web_dir / "fips_modules_active.html")
+            helpers.download_file(
+                "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search?SearchMode=Advanced&CertificateStatus=Historical&ValidationYear=0",
+                self.web_dir / "fips_modules_historical.html")
+            helpers.download_file(
+                "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search?SearchMode=Advanced&CertificateStatus=Revoked&ValidationYear=0",
+                self.web_dir / "fips_modules_revoked.html")
+
+        # Parse those files and get list of currently processable files (always)
+        for f in html_files:
+            self._get_certificates_from_html(self.web_dir / f)
+
+    def _get_certificates_from_html(self, html_file: Path) -> None:
+        logger.info(f'Getting certificate ids from {html_file}')
+        with open(html_file, 'r', encoding='utf-8') as handle:
+            html = BeautifulSoup(handle.read(), 'html.parser')
+
+        table = [x for x in html.find(
+            id='searchResultsTable').tbody.contents if x != '\n']
+        for entry in table:
+            self.certs[entry.find('a').text] = None
+
+    def get_certs_from_web(self, redo: bool = False, json_file: Optional[Path] = None, test: Optional[Path] = None, update_json: bool = True):
         def download_html_pages() -> List[str]:
             new_files = self.download_all_htmls()
             self.download_all_pdfs()
             return new_files
 
-        def get_certificates_from_html(html_file: Path) -> None:
-            logger.info(f'Getting certificate ids from {html_file}')
-            with open(html_file, 'r', encoding='utf-8') as handle:
-                html = BeautifulSoup(handle.read(), 'html.parser')
-
-            table = [x for x in html.find(
-                id='searchResultsTable').tbody.contents if x != '\n']
-            for entry in table:
-                self.certs[entry.find('a').text] = {}
-
         logger.info("Downloading required html files")
 
         self.web_dir.mkdir(parents=True, exist_ok=True)
@@ -907,21 +941,7 @@ class FIPSDataset(Dataset, ComplexSerializableType):
         self.algs_dir.mkdir(exist_ok=True)
 
         # Download files containing all available module certs (always)
-        html_files = ['fips_modules_active.html',
-                      'fips_modules_historical.html', 'fips_modules_revoked.html']
-        helpers.download_file(
-            "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search?SearchMode=Advanced&CertificateStatus=Active&ValidationYear=0",
-            self.web_dir / "fips_modules_active.html")
-        helpers.download_file(
-            "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search?SearchMode=Advanced&CertificateStatus=Historical&ValidationYear=0",
-            self.web_dir / "fips_modules_historical.html")
-        helpers.download_file(
-            "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search?SearchMode=Advanced&CertificateStatus=Revoked&ValidationYear=0",
-            self.web_dir / "fips_modules_revoked.html")
-
-        # Parse those files and get list of currently processable files (always)
-        for f in html_files:
-            get_certificates_from_html(self.web_dir / f)
+        self.prepare_dataset(test)
 
         logger.info('Downloading certificate html and security policies')
 
@@ -931,7 +951,7 @@ class FIPSDataset(Dataset, ComplexSerializableType):
         if json_file.exists():
             logger.info("Certs loaded from previous scanning")
             dataset = self.from_json(json_file)
-            self.certs = dataset.certs
+            self.certs.update(dataset.certs)
             self.algorithms = dataset.algorithms
 
         new_certs = download_html_pages()
@@ -954,15 +974,19 @@ class FIPSDataset(Dataset, ComplexSerializableType):
                                       (self.web_dir / cert_id).with_suffix('.html'),
                                       (self.fragments_dir / cert_id).with_suffix('.txt'), False, None, False),
                 cert, redo=redo)
+        
+        if update_json:
+            self.to_json(self.root_dir / 'fips_full_dataset.json')
 
-    def extract_certs_from_tables(self) -> List[Path]:
+    def extract_certs_from_tables(self, high_precision: bool, update_json: bool = True) -> List[Path]:
         """
         Function that extracts algorithm IDs from tables in security policies files.
         :return: list of files that couldn't have been decoded
         """
         result = cert_processing.process_parallel(FIPSCertificate.analyze_tables,
-                                                  [cert for cert in self.certs.values() if
-                                                   not cert.state.tables_done and cert.state.txt_state],
+                                                  [(cert, high_precision) for cert in self.certs.values() if
+                                                   (
+                                                           not cert.state.tables_done or high_precision) and cert.state.txt_state],
                                                   constants.N_THREADS // 4,  # tabula already processes by parallel, so
                                                   # it's counterproductive to use all threads
                                                   use_threading=False)
@@ -972,6 +996,9 @@ class FIPSDataset(Dataset, ComplexSerializableType):
             self.certs[cert.dgst].state.tables_done = state
             self.certs[cert.dgst].pdf_scan.algorithms += algorithms
 
+        if update_json:
+            self.to_json(self.root_dir / 'fips_full_dataset.json')
+
         return not_decoded
 
     def remove_algorithms_from_extracted_data(self):
@@ -991,90 +1018,135 @@ class FIPSDataset(Dataset, ComplexSerializableType):
                     new_algorithms.append({'Certificate': [algorithm]})
             certificate.processed.algorithms = new_algorithms
 
-    def validate_results(self):
-        """
-        Function that validates results and finds the final connection output
-        """
-
-        def validate_id(processed_cert: FIPSCertificate, cert_candidate: str) -> bool:
+                # returns True if candidates should _not_ be matched
+    def _compare_certs(self, current_certificate: 'FIPSCertificate', other_id: str):
+        cert_first = current_certificate.web_scan.date_validation[0].year
+        cert_last = current_certificate.web_scan.date_validation[-1].year
+        conn_first = self.certs[other_id].web_scan.date_validation[0].year
+        conn_last = self.certs[other_id].web_scan.date_validation[-1].year
 
-            # returns True if candidates should _not_ be matched
-            def compare_certs(current_certificate: 'FIPSCertificate', other_id: str):
-                cert_first = current_certificate.web_scan.date_validation[0].year
-                cert_last = current_certificate.web_scan.date_validation[-1].year
-                conn_first = self.certs[other_id].web_scan.date_validation[0].year
-                conn_last = self.certs[other_id].web_scan.date_validation[-1].year
+        return cert_first - conn_first > config.year_difference_between_validations['value'] \
+                and cert_last - conn_last > config.year_difference_between_validations['value'] \
+                or cert_first < conn_first
 
-                return cert_first - conn_first > config.year_difference_between_validations['value'] \
-                       and cert_last - conn_last > config.year_difference_between_validations['value'] \
-                       or cert_first < conn_first
+    def _remove_false_positives_for_cert(self, current_cert: FIPSCertificate):
+        for rule in current_cert.processed.keywords['rules_cert_id']:
+            matches = current_cert.processed.keywords['rules_cert_id'][rule]
+            current_cert.processed.keywords['rules_cert_id'][rule] = [cert_id for cert_id in matches if
+                                                                        self._validate_id(current_cert,
+                                                                                    cert_id.replace('Cert.', '')
+                                                                                        .replace('cert.', '')
+                                                                                        .lstrip("#CA0 "))
+                                                                            and cert_id != current_cert.cert_id]
 
-            # "< number" still needs to be used, because of some old certs being revalidated
-            if cert_candidate.isdecimal() \
-                    and int(cert_candidate) < config.smallest_certificate_id_to_connect['value'] or \
-                    compare_certs(processed_cert, cert_candidate):
-                return False
-            if cert_candidate not in self.algorithms.certs:
-                return True
+    def _validate_id(self, processed_cert: FIPSCertificate, cert_candidate: str) -> bool:
+        if cert_candidate not in self.certs or not cert_candidate.isdecimal():
+            return False
 
-            for cert_alg in processed_cert.processed.algorithms:
-                for certificate in cert_alg['Certificate']:
-                    curr_id = ''.join(filter(str.isdigit, certificate))
-                    if curr_id == cert_candidate:
-                        return False
+        # "< number" still needs to be used, because of some old certs being revalidated
+        if int(cert_candidate) < config.smallest_certificate_id_to_connect['value'] or \
+                self._compare_certs(processed_cert, cert_candidate):
+            return False
+        if cert_candidate not in self.algorithms.certs:
+            return True
 
-            algs = self.algorithms.certs[cert_candidate]
-            for current_alg in algs:
-                if FIPSCertificate.get_compare(processed_cert.web_scan.vendor) == FIPSCertificate.get_compare(
-                        current_alg.vendor):
+        for cert_alg in processed_cert.processed.algorithms:
+            for certificate in cert_alg['Certificate']:
+                curr_id = ''.join(filter(str.isdigit, certificate))
+                if curr_id == cert_candidate:
                     return False
-            return True
 
-        broken_files = set()
+        algs = self.algorithms.certs[cert_candidate]
+        for current_alg in algs:
+            if FIPSCertificate.get_compare(processed_cert.web_scan.vendor) == FIPSCertificate.get_compare(
+                    current_alg.vendor):
+                return False
+        return True
+
+    @staticmethod
+    def _find_connections(current_cert: FIPSCertificate):
+        current_cert.processed.connections = []
+        current_cert.web_scan.connections = []
+        current_cert.pdf_scan.connections = []
+        if not current_cert.state.file_status or not current_cert.processed.keywords:
+            return
+        if current_cert.processed.keywords['rules_cert_id'] == {}:
+            return
+        for rule in current_cert.processed.keywords['rules_cert_id']:
+            for cert in current_cert.processed.keywords['rules_cert_id'][rule]:
+                cert_id = ''.join(filter(str.isdigit, cert))
+                if cert_id not in current_cert.processed.connections:
+                    current_cert.processed.connections.append(cert_id)
+                    current_cert.pdf_scan.connections.append(cert_id)
+
+        # We want connections parsed in caveat to bypass age check, because we are 100 % sure they are right
+        if current_cert.web_scan.mentioned_certs:
+            for item in current_cert.web_scan.mentioned_certs:
+                cert_id = ''.join(filter(str.isdigit, item))
+                if cert_id not in current_cert.processed.connections and cert_id != '':
+                    current_cert.processed.connections.append(cert_id)
+                    current_cert.web_scan.connections.append(cert_id)
 
+    def validate_results(self):
+        """
+        Function that validates results and finds the final connection output
+        """
         current_cert: FIPSCertificate
 
         for current_cert in self.certs.values():
             if not current_cert.state.txt_state:
                 continue
-            for rule in current_cert.processed.keywords['rules_cert_id']:
-                for cert in current_cert.processed.keywords['rules_cert_id'][rule]:
-                    cert_id = ''.join(filter(str.isdigit, cert))
-
-                    if cert_id == '' or cert_id not in self.certs:
-                        broken_files.add(current_cert.dgst)
-                        current_cert.state.file_status = False
-                        break
-
-        if broken_files:
-            logger.warning("CERTIFICATE FILES WITH WRONG CERTIFICATES PARSED")
-            logger.warning(broken_files)
-            logger.warning("... skipping these...")
-            logger.warning(f"Total non-analyzable files:{len(broken_files)}")
+            self._remove_false_positives_for_cert(current_cert)
 
         for current_cert in self.certs.values():
-            current_cert.processed.connections = []
-            if not current_cert.state.file_status or not current_cert.processed.keywords:
-                continue
-            if current_cert.processed.keywords['rules_cert_id'] == {}:
-                continue
-            for rule in current_cert.processed.keywords['rules_cert_id']:
-                for cert in current_cert.processed.keywords['rules_cert_id'][rule]:
-                    cert_id = ''.join(filter(str.isdigit, cert))
-                    if cert_id not in current_cert.processed.connections and validate_id(current_cert, cert_id):
-                        current_cert.processed.connections.append(cert_id)
+            FIPSDataset._find_connections(current_cert)
 
-    def finalize_results(self):
+    def finalize_results(self, update_json: bool = True):
         self.unify_algorithms()
         self.remove_algorithms_from_extracted_data()
         self.validate_results()
 
-    def get_dot_graph(self, output_file_name: str):
+        if update_json:
+            self.to_json(self.root_dir / 'fips_full_dataset.json')
+
+    def _highlight_vendor_in_dot(self, dot: Digraph, current_key: str, highlighted_vendor: str):
+        if self.certs[current_key].web_scan.vendor != highlighted_vendor:
+            return
+
+        dot.attr('node', color='red')
+        if self.certs[current_key].web_scan.status == 'Revoked':
+            dot.attr('node', color='grey32')
+        if self.certs[current_key].web_scan.status == 'Historical':
+            dot.attr('node', color='gold3')
+            
+    def _add_colored_node(self, dot: Digraph, current_key: str, highlighted_vendor: str):
+        dot.attr('node', color='lightgreen')
+        if self.certs[current_key].web_scan.status == 'Revoked':
+            dot.attr('node', color='lightgrey')
+        if self.certs[current_key].web_scan.status == 'Historical':
+            dot.attr('node', color='gold')
+        self._highlight_vendor_in_dot(dot, current_key, highlighted_vendor)
+        dot.node(current_key, label=current_key + '&#10;'
+                 + self.certs[current_key].web_scan.vendor
+                 + '&#10;'
+                 + (self.certs[current_key].web_scan.module_name if
+                    self.certs[current_key].web_scan.module_name else ''))
+
+    def _get_processed_list(self, connection_list: str, key: str):
+        attr = {'pdf': 'pdf_scan', 'web': 'web_scan', 'processed': 'processed'}[connection_list]
+        return getattr(self.certs[key], attr).connections
+
+    def get_dot_graph(self, output_file_name: str, connection_list: str = 'processed',
+                      highlighted_vendor: str = 'Red Hat®, Inc.', show: bool = True):
         """
         Function that plots .dot graph of dependencies between certificates
         Certificates with at least one dependency are displayed in "{output_file_name}connections.pdf", remaining
         certificates are displayed in {output_file_name}single.pdf
+        :param show: display graph right on screen
+        :param highlighted_vendor: vendor whose certificates should be highlighted in red color
         :param output_file_name: prefix to "connections", "connections.pdf", "single" and "single.pdf"
+        :param connection_list: 'processed', 'web', or 'pdf' - plots a graph from this source
+                                default - processed
         """
         dot = Digraph(comment='Certificate ecosystem')
         single_dot = Digraph(comment='Modules with no dependencies')
@@ -1083,57 +1155,37 @@ class FIPSDataset(Dataset, ComplexSerializableType):
         dot.attr('graph', label='Dependencies', labelloc='t', fontsize='30')
         dot.attr('node', style='filled')
 
-        def found_interesting_cert(current_key):
-            if self.certs[current_key].web_scan.vendor == highlighted_vendor:
-                dot.attr('node', color='red')
-                if self.certs[current_key].web_scan.status == 'Revoked':
-                    dot.attr('node', color='grey32')
-                if self.certs[current_key].web_scan.status == 'Historical':
-                    dot.attr('node', color='gold3')
-            if self.certs[current_key].web_scan.vendor == "SUSE, LLC":
-                dot.attr('node', color='lightblue')
-
-        def color_check(current_key):
-            dot.attr('node', color='lightgreen')
-            if self.certs[current_key].web_scan.status == 'Revoked':
-                dot.attr('node', color='lightgrey')
-            if self.certs[current_key].web_scan.status == 'Historical':
-                dot.attr('node', color='gold')
-            found_interesting_cert(current_key)
-            dot.node(current_key,
-                     label=current_key +
-                           '&#10;' +
-                           self.certs[current_key].web_scan.vendor +
-                           '&#10;' +
-                           (self.certs[current_key].web_scan.module_name
-                            if self.certs[current_key].web_scan.module_name else ''))
-
         keys = 0
         edges = 0
 
-        highlighted_vendor = 'Red Hat®, Inc.'
         for key in self.certs:
-            if key != 'Not found' and self.certs[key].state.file_status:
-                if self.certs[key].processed.connections:
-                    color_check(key)
-                    keys += 1
-                else:
-                    single_dot.attr('node', color='lightblue')
-                    found_interesting_cert(key)
-                    single_dot.node(key, label=key + '\r\n' + self.certs[key].web_scan.vendor + (
-                        '\r\n' + self.certs[key].web_scan.module_name if self.certs[key].web_scan.module_name else ''))
+            if key == 'Not found' or not self.certs[key].state.file_status:
+                continue
+
+            processed = self._get_processed_list(connection_list, key)
+
+            if processed:
+                self._add_colored_node(key)
+                keys += 1
+            else:
+                single_dot.attr('node', color='lightblue')
+                self._highlight_vendor_in_dot(key)
+                single_dot.node(key, label=key + '\r\n' + self.certs[key].web_scan.vendor + (
+                    '\r\n' + self.certs[key].web_scan.module_name if self.certs[key].web_scan.module_name else ''))
 
         for key in self.certs:
-            if key != 'Not found' and self.certs[key].state.file_status:
-                for conn in self.certs[key].processed.connections:
-                    color_check(conn)
-                    dot.edge(key, conn)
-                    edges += 1
+            if key == 'Not found' or not self.certs[key].state.file_status:
+                continue
+            processed = self._get_processed_list(connection_list, key)
+            for conn in processed:
+                self._add_colored_node(dot, conn, highlighted_vendor)
+                dot.edge(key, conn)
+                edges += 1
 
-        logging.info(f"rendering {keys} keys and {edges} edges")
+        logging.info(f"rendering for {connection_list}: {keys} keys and {edges} edges")
 
-        dot.render(str(output_file_name) + '_connections', view=True)
-        single_dot.render(str(output_file_name) + '_single', view=True)
+        dot.render(self.root_dir / (str(output_file_name) + '_connections'), view=show)
+        single_dot.render(self.root_dir / (str(output_file_name) + '_single'), view=show)
 
     def to_dict(self):
         return {'timestamp': self.timestamp, 'sha256_digest': self.sha256_digest,
@@ -1171,6 +1223,11 @@ class FIPSDataset(Dataset, ComplexSerializableType):
 
         return vendors
 
+    def plot_graphs(self, show: bool = False):
+        self.get_dot_graph('full_graph', show=show)
+        self.get_dot_graph('web_only_graph', 'web', show=show)
+        self.get_dot_graph('pdf_only_graph', 'pdf', show=show)
+
 
 class FIPSAlgorithmDataset(Dataset, ComplexSerializableType):
 
@@ -1187,18 +1244,36 @@ class FIPSAlgorithmDataset(Dataset, ComplexSerializableType):
             soup = BeautifulSoup(alg_file.read(), 'html.parser')
             num_pages = soup.select('span[data-total-pages]')[0].attrs
 
-        for i in range(1, int(num_pages['data-total-pages'])):
+        for i in range(2, int(num_pages['data-total-pages'])):
             if not (self.root_dir / f'page{i}.html').exists():
                 algs_urls.append(
                     constants.FIPS_ALG_URL + str(i))
                 algs_paths.append(self.root_dir / f"page{i}.html")
 
+        helpers.download_file(constants.FIPS_ALG_URL + num_pages['data-total-pages'],
+                              self.root_dir / f"page{int(num_pages['data-total-pages'])}.html")
         logging.info(f"downloading {len(algs_urls)} algs html files")
         cert_processing.process_parallel(FIPSCertificate.download_html_page, list(zip(algs_urls, algs_paths)),
                                          constants.N_THREADS)
 
         self.parse_html()
 
+    @staticmethod
+    def _extract_algorithm_information(elements, vendor, date, product, validation):
+        for elem in elements:
+            # td > a > (vendor or date)
+            attachments = elem.find_all('a')
+
+            if len(attachments) == 0:
+                vendor = elem.text.strip() if 'vendor-name' in elem['id'] else vendor
+                date = elem.text.strip() if 'validation-date' in elem['id'] else date
+                continue
+
+            for attachment in attachments:
+                product = elem.text.strip() if 'product-name' in attachment['id'] else product
+                validation = elem.text.strip() if 'validation-number' in attachment['id'] else validation
+        return vendor, date, product, validation
+
     def parse_html(self):
         def split_alg(alg_string):
             cert_type = alg_string.rstrip('0123456789')
@@ -1210,18 +1285,19 @@ class FIPSAlgorithmDataset(Dataset, ComplexSerializableType):
                 html_soup = BeautifulSoup(handle.read(), 'html.parser')
 
             table = html_soup.find('table', class_='table table-condensed publications-table table-bordered')
-            spans = table.find_all('span')
-            for span in spans:
-                elements = span.find_all('td')
-                vendor, implementation = elements[0].text, elements[1].text
-                elements_sliced = elements[2:]
-                for i in range(0, len(elements_sliced), 2):
-                    alg_type, alg_id = split_alg(elements_sliced[i].text.strip())
-                    validation_date = elements_sliced[i + 1].text.strip()
-                    fips_alg = FIPSCertificate.Algorithm(alg_id, vendor, implementation, alg_type, validation_date)
-                    if alg_id not in self.certs:
-                        self.certs[alg_id] = []
-                    self.certs[alg_id].append(fips_alg)
+            tbody_contents = table.find('tbody').find_all('tr')
+            vendor = product = validation = date = ""
+            for tr in tbody_contents:
+                elements = tr.find_all('td')
+                vendor, date, product, validation = FIPSAlgorithmDataset._extract_algorithm_information(
+                    elements, vendor, date, product, validation
+                )
+
+                alg_type, alg_id = split_alg(validation)
+                fips_alg = FIPSCertificate.Algorithm(alg_id, vendor, product, alg_type, date)
+                if alg_id not in self.certs:
+                    self.certs[alg_id] = []
+                self.certs[alg_id].append(fips_alg)
 
     def convert_all_pdfs(self):
         raise NotImplementedError('Not meant to be implemented')
diff --git a/sec_certs/helpers.py b/sec_certs/helpers.py
index 87f6e25e..80a54549 100644
--- a/sec_certs/helpers.py
+++ b/sec_certs/helpers.py
@@ -134,8 +134,15 @@ def find_tables_iterative(file_text: str) -> List[int]:
             current_page += 1
         if line.startswith('Table ') or line.startswith('Exhibit'):
             pages.add(current_page)
+            pages.add(current_page + 1)
+            if current_page > 2:
+                pages.add(current_page - 1)
     if not pages:
         logger.warning('No pages found')
+    for page in pages:
+        if page > current_page - 1:
+            return list(pages - {page})
+
     return list(pages)
 
 
@@ -491,11 +498,14 @@ def extract_keywords(filepath: Path) -> Tuple[int, Optional[Dict[str, str]]]:
     return constants.RETURNCODE_OK, result
 
 
-def analyze_matched_algs(data: Dict):
+def plot_dataframe_graph(data: Dict, label: str, file_name: str, density: bool = False, cumulative: bool = False, bins: int = 50, log: bool = True, show: bool = True):
     pd_data = pd.Series(data)
-    pd_data.hist(bins=50)
-    plt.show()
+    pd_data.hist(bins=bins, label=label, density=density, cumulative=cumulative)
+    plt.savefig(file_name)
+    if show:
+        plt.show()
 
-    sorted_data = pd_data.value_counts(ascending=True)
+    if log:
+        sorted_data = pd_data.value_counts(ascending=True)
 
-    logging.info(sorted_data.where(sorted_data > 1).dropna())
-\ No newline at end of file
+        logging.info(sorted_data.where(sorted_data > 1).dropna())
+\ No newline at end of file
diff --git a/sec_certs/settings.yaml b/sec_certs/settings.yaml
index 0c8b130f..9b07a8be 100644
--- a/sec_certs/settings.yaml
+++ b/sec_certs/settings.yaml
@@ -6,7 +6,14 @@ smallest_certificate_id_to_connect:
 year_difference_between_validations:
   description: During validation we don't connect certificates with validation dates
     difference higher than _this_
-  value: 5
+  value: 7
 use_text_with_newlines_during_parsing:
   description: During keyword search, search in text with newlines
   value: true
+ignore_first_page:
+  description: During keyword search, first page usually contains addresses - ignore it.
+  value: true
+cert_threshold:
+  description: Used with --higher-precision-results. Determines the amount of mismatched algorithms to be considered faulty.
+  value: 5
+
diff --git a/test/data/test_fips_oop/algorithms.json b/test/data/test_fips_oop/algorithms.json
new file mode 100644
index 00000000..7845d93d
--- /dev/null
+++ b/test/data/test_fips_oop/algorithms.json
@@ -0,0 +1,513 @@
+{
+    "_type": "FIPSAlgorithmDataset",
+    "certs": {
+        "2351": [
+            {
+                "_type": "Algorithm",
+                "cert_id": "2351",
+                "date": "9/21/2018",
+                "implementation": "Apple CoreCrypto Kernel Module v9.0 for ARM (iOS12, A11 Bionic, Assembler_VNG)",
+                "type": "DRBG",
+                "vendor": "Apple Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2351",
+                "date": "11/27/2015",
+                "implementation": "Apple iOS CoreCrypto Kernel Module (Optimized SHA, A6)",
+                "type": "HMAC",
+                "vendor": "Apple Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2351",
+                "date": "1/27/2017",
+                "implementation": "OpenSSL using assembler for AES and SHA",
+                "type": "RSA",
+                "vendor": "Canonical Ltd."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2351",
+                "date": "1/19/2017",
+                "implementation": "Junos FIPS Version Junos 15.1 X49 - Dataplane_CN7020",
+                "type": "TDES",
+                "vendor": "Juniper Networks, Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2351",
+                "date": "3/8/2013",
+                "implementation": "Samsung OpenSSL Cryptographic Module",
+                "type": "AES",
+                "vendor": "Samsung Electronics Co., Ltd"
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2351",
+                "date": "3/7/2014",
+                "implementation": "Symantec PGP Cryptographic Engine",
+                "type": "SHS",
+                "vendor": "Symantec Corporation"
+            }
+        ],
+        "2352": [
+            {
+                "_type": "Algorithm",
+                "cert_id": "2352",
+                "date": "9/21/2018",
+                "implementation": "Apple CoreCrypto Kernel Module v9.0 for ARM (iOS12, A10X Fusion, Assembler_VNG)",
+                "type": "DRBG",
+                "vendor": "Apple Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2352",
+                "date": "3/8/2013",
+                "implementation": "AES-256 Core",
+                "type": "AES",
+                "vendor": "Altera Canada"
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2352",
+                "date": "11/27/2015",
+                "implementation": "Apple iOS CoreCrypto Kernel Module (Optimized SHA, A6X)",
+                "type": "HMAC",
+                "vendor": "Apple Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2352",
+                "date": "1/27/2017",
+                "implementation": "OpenSSL using support from Power ISA 2.07 for AES and SHA",
+                "type": "RSA",
+                "vendor": "Canonical Ltd."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2352",
+                "date": "1/19/2017",
+                "implementation": "Junos FIPS Version Junos 15.1 X49 - Dataplane_CN7130",
+                "type": "TDES",
+                "vendor": "Juniper Networks, Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2352",
+                "date": "3/21/2014",
+                "implementation": "Karnak SHA in Hardware",
+                "type": "SHS",
+                "vendor": "Seagate Technology, LLC."
+            }
+        ],
+        "2600": [
+            {
+                "_type": "Algorithm",
+                "cert_id": "2600",
+                "date": "12/15/2017",
+                "implementation": "Apple iOS CoreCrypto v8 Kernel Module (Generic Software Implementation)",
+                "type": "TDES",
+                "vendor": "Apple Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2600",
+                "date": "6/10/2016",
+                "implementation": "IOS Common Cryptographic Module (IC2M) Algorithm Module",
+                "type": "HMAC",
+                "vendor": "Cisco Systems, Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2600",
+                "date": "8/16/2013",
+                "implementation": "Blade System Virtual Connect",
+                "type": "AES",
+                "vendor": "Hewlett-Packard Development Company, L.P."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2600",
+                "date": "12/5/2014",
+                "implementation": "Cryptographic Security Kernel",
+                "type": "SHS",
+                "vendor": "IBM Corporation"
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2600",
+                "date": "9/1/2017",
+                "implementation": "IBM z/OS(R) Cryptographic Services System SSL - 31bit",
+                "type": "RSA",
+                "vendor": "IBM Corporation"
+            }
+        ],
+        "2601": [
+            {
+                "_type": "Algorithm",
+                "cert_id": "2601",
+                "date": "12/5/2014",
+                "implementation": "SHA256 Library on Canon MFP Security Chip",
+                "type": "SHS",
+                "vendor": "Canon Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2601",
+                "date": "8/16/2013",
+                "implementation": "Dell AppAssure Crypto Library",
+                "type": "AES",
+                "vendor": "Dell, Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2601",
+                "date": "6/10/2016",
+                "implementation": "EFJ Communication Cryptographic Library",
+                "type": "HMAC",
+                "vendor": "EFJohnson Technologies"
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2601",
+                "date": "9/1/2017",
+                "implementation": "IBM z/OS(R) Cryptographic Services System SSL - 64bit",
+                "type": "RSA",
+                "vendor": "IBM Corporation"
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2601",
+                "date": "12/22/2017",
+                "implementation": "Oracle Linux 7 GnuTLS C Implementation",
+                "type": "TDES",
+                "vendor": "Oracle Corporation"
+            }
+        ],
+        "2602": [
+            {
+                "_type": "Algorithm",
+                "cert_id": "2602",
+                "date": "12/22/2017",
+                "implementation": "Apple tvOS CoreCrypto Kernel Module v8.0 (Generic Software Implementation)",
+                "type": "TDES",
+                "vendor": "Apple Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2602",
+                "date": "6/10/2016",
+                "implementation": "FIPS-ALGORITHMS.1.5.0v",
+                "type": "HMAC",
+                "vendor": "Mercury Systems"
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2602",
+                "date": "8/16/2013",
+                "implementation": "RSA BSAFE\u00ae Crypto-J Software Module",
+                "type": "AES",
+                "vendor": "RSA Security, Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2602",
+                "date": "12/5/2014",
+                "implementation": "SHA Library",
+                "type": "SHS",
+                "vendor": "Sage Microelectronics Corp"
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2602",
+                "date": "9/1/2017",
+                "implementation": "Bouncy Castle FIPS Java API",
+                "type": "RSA",
+                "vendor": "Legion of the Bouncy Castle Inc."
+            }
+        ],
+        "2700": [
+            {
+                "_type": "Algorithm",
+                "cert_id": "2700",
+                "date": "3/13/2015",
+                "implementation": "Apple OSX CoreCrypto Module (Generic, Xeon)",
+                "type": "SHS",
+                "vendor": "Apple Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2700",
+                "date": "10/21/2016",
+                "implementation": "Axway OpenSSL",
+                "type": "HMAC",
+                "vendor": "Axway Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2700",
+                "date": "11/30/2017",
+                "implementation": "Brocade Fabric OS FIPS Cryptographic Module",
+                "type": "RSA",
+                "vendor": "Brocade Communications Systems LLC"
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2700",
+                "date": "3/30/2018",
+                "implementation": "Junos OS 17.4R1-S1 - Dataplane",
+                "type": "TDES",
+                "vendor": "Juniper Networks, Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2700",
+                "date": "11/29/2013",
+                "implementation": "VMware NSS Cryptographic Module",
+                "type": "AES",
+                "vendor": "VMware, Inc."
+            }
+        ],
+        "2701": [
+            {
+                "_type": "Algorithm",
+                "cert_id": "2701",
+                "date": "3/30/2018",
+                "implementation": "Security Builder GSE-J Crypto Core",
+                "type": "TDES",
+                "vendor": "BlackBerry Certicom"
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2701",
+                "date": "11/30/2017",
+                "implementation": "ngfips_rsa",
+                "type": "RSA",
+                "vendor": "Cavium, Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2701",
+                "date": "10/28/2016",
+                "implementation": "Cisco_SSL_Implementation-1",
+                "type": "HMAC",
+                "vendor": "Cisco Systems, Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2701",
+                "date": "3/13/2015",
+                "implementation": "RSA BSAFE\u00ae Crypto-J JSAFE and JCE Software Module",
+                "type": "SHS",
+                "vendor": "RSA, The Security Division of EMC"
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2701",
+                "date": "11/29/2013",
+                "implementation": "VMware Cryptographic Module",
+                "type": "AES",
+                "vendor": "VMware, Inc."
+            }
+        ],
+        "2702": [
+            {
+                "_type": "Algorithm",
+                "cert_id": "2702",
+                "date": "3/30/2018",
+                "implementation": "Security Builder GSE-J Crypto Core",
+                "type": "TDES",
+                "vendor": "BlackBerry Certicom"
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2702",
+                "date": "11/30/2017",
+                "implementation": "DELPHI RSA2048 Signature Verification Algorithm Implementation",
+                "type": "RSA",
+                "vendor": "DELPHI"
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2702",
+                "date": "12/6/2013",
+                "implementation": "RSA BSAFE Crypto-J",
+                "type": "AES",
+                "vendor": "McAfee, Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2702",
+                "date": "10/28/2016",
+                "implementation": "OpenSSL Crypto Library",
+                "type": "HMAC",
+                "vendor": "MikroM GmbH"
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "2702",
+                "date": "3/13/2015",
+                "implementation": "OpenSSL FIPS Object Module",
+                "type": "SHS",
+                "vendor": "OpenSSL Validation Services, Inc."
+            }
+        ],
+        "3415": [
+            {
+                "_type": "Algorithm",
+                "cert_id": "3415",
+                "date": "1/26/2018",
+                "implementation": "Apple Secure Key Store CoreCrypto Module (Generic Software Implementation)",
+                "type": "HMAC",
+                "vendor": "Apple Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "3415",
+                "date": "6/5/2015",
+                "implementation": "Motorola Solutions Subscriber \u00b5Mace AES256",
+                "type": "AES",
+                "vendor": "Motorola Solutions Inc"
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "3415",
+                "date": "11/18/2016",
+                "implementation": "Secure Parser Library",
+                "type": "SHS",
+                "vendor": "Security First Corp."
+            }
+        ],
+        "3426": [
+            {
+                "_type": "Algorithm",
+                "cert_id": "3426",
+                "date": "6/11/2015",
+                "implementation": "Apple iOS CoreCrypto Module (KeyWrap A8 32 bit)",
+                "type": "AES",
+                "vendor": "Apple Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "3426",
+                "date": "12/2/2016",
+                "implementation": "Apple iOS CoreCrypto Module (Generic)",
+                "type": "SHS",
+                "vendor": "Apple Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "3426",
+                "date": "1/26/2018",
+                "implementation": "Apple Secure Key Store CoreCrypto Module (VNG)",
+                "type": "HMAC",
+                "vendor": "Apple Inc."
+            }
+        ],
+        "3427": [
+            {
+                "_type": "Algorithm",
+                "cert_id": "3427",
+                "date": "12/2/2016",
+                "implementation": "Apple iOS CoreCrypto Module (Generic)",
+                "type": "SHS",
+                "vendor": "Apple Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "3427",
+                "date": "1/26/2018",
+                "implementation": "Forcepoint NGFW FIPS Java API",
+                "type": "HMAC",
+                "vendor": "Forcepoint"
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "3427",
+                "date": "6/11/2015",
+                "implementation": "HP ESKM OpenSSL",
+                "type": "AES",
+                "vendor": "Hewlett Packard Enterprise"
+            }
+        ],
+        "3447": [
+            {
+                "_type": "Algorithm",
+                "cert_id": "3447",
+                "date": "12/2/2016",
+                "implementation": "Apple OSX CoreCrypto Module (Optimized SHA nosse)",
+                "type": "SHS",
+                "vendor": "Apple Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "3447",
+                "date": "7/2/2015",
+                "implementation": "FireEye Algorithms Implementation",
+                "type": "AES",
+                "vendor": "FireEye, Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "3447",
+                "date": "2/9/2018",
+                "implementation": "OpenSSL (no AVX2/AVX/AESNI/SSSE3, x86_64, 64-bit library)",
+                "type": "HMAC",
+                "vendor": "Red Hat, Inc."
+            }
+        ],
+        "3451": [
+            {
+                "_type": "Algorithm",
+                "cert_id": "3451",
+                "date": "12/2/2016",
+                "implementation": "Apple OSX CoreCrypto Module (Optimized SHA nosse)",
+                "type": "SHS",
+                "vendor": "Apple Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "3451",
+                "date": "7/2/2015",
+                "implementation": "OpenSSL FIPS Object Module",
+                "type": "AES",
+                "vendor": "OpenSSL Software Foundation, Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "3451",
+                "date": "2/9/2018",
+                "implementation": "OpenSSL (no AVX2/AVX/AESNI, x86_64, 64-bit library)",
+                "type": "HMAC",
+                "vendor": "Red Hat, Inc."
+            }
+        ],
+        "3464": [
+            {
+                "_type": "Algorithm",
+                "cert_id": "3464",
+                "date": "12/9/2016",
+                "implementation": "Apple OSX CoreCrypto Module (Generic)",
+                "type": "SHS",
+                "vendor": "Apple Inc."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "3464",
+                "date": "7/10/2015",
+                "implementation": "Security Builder Linux Kernel Crypto Core",
+                "type": "AES",
+                "vendor": "Certicom Corp."
+            },
+            {
+                "_type": "Algorithm",
+                "cert_id": "3464",
+                "date": "2/9/2018",
+                "implementation": "HPE Secure Encryption Engine v1.1",
+                "type": "HMAC",
+                "vendor": "Hewlett-Packard Development Company, L.P."
+            }
+        ]
+    }
+}
+\ No newline at end of file
diff --git a/test/fips_test_utils.py b/test/fips_test_utils.py
new file mode 100644
index 00000000..94fb406a
--- /dev/null
+++ b/test/fips_test_utils.py
@@ -0,0 +1,51 @@
+from typing import List
+from pathlib import Path
+
+def generate_html(ids: List[str], path: Path):
+    def generate_entry(certificate_id: str) -> str:
+        return f'''
+            <tr id="cert-row-0">
+                <td class="text-center">
+                    <a href="/projects/cryptographic-module-validation-program/certificate/3898" id="cert-number-link-0">{certificate_id}</a>
+                </td>
+            </tr>
+        '''
+
+    html_head = '''
+    <!DOCTYPE html>
+    <html lang="en-us" xml:lang="en-us">
+    <head>
+        <meta charset="utf-8" />
+        <title>Cryptographic Module Validation Program | CSRC</title>
+        <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
+        <meta http-equiv="content-style-type" content="text/css" />
+        <meta http-equiv="content-script-type" content="text/javascript" />
+        <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+        <meta name="msapplication-config" content="/CSRC/Media/images/favicons/browserconfig.xml" />
+        <meta name="theme-color" content="#000000" />
+        <meta name="google-site-verification" content="xbrnrVYDgLD-Bd64xHLCt4XsPXzUhQ-4lGMj4TdUUTA" />
+    </head>
+    '''
+    rows = ""
+    for cert_id in ids:
+        rows += f"\n{generate_entry(cert_id)}\n"
+    html_body = f'''
+    <body>
+        <table class="table table-striped table-condensed publications-table table-bordered" id="searchResultsTable">
+            <thead>
+                <tr>
+                    <th class="text-center">Certificate Number</th>
+                    <th class="text-center">Vendor Name</th>
+                    <th class="text-center">Module Name</th>
+                    <th class="text-center">Module Type</th>
+                    <th class="text-center">Validation Date</th>
+                </tr>
+            </thead>
+            <tbody>
+            {rows}
+            </tbody>
+        </table>
+    </body>
+	'''
+    with open(path, 'w') as f:
+        f.write(f"{html_head}\n{html_body}\n")
diff --git a/test/settings_test.yaml b/test/settings_test.yaml
new file mode 100644
index 00000000..9b07a8be
--- /dev/null
+++ b/test/settings_test.yaml
@@ -0,0 +1,19 @@
+---
+smallest_certificate_id_to_connect:
+  description: During validation we don't connect certificates with number lower than
+    _this_ to connections
+  value: 40
+year_difference_between_validations:
+  description: During validation we don't connect certificates with validation dates
+    difference higher than _this_
+  value: 7
+use_text_with_newlines_during_parsing:
+  description: During keyword search, search in text with newlines
+  value: true
+ignore_first_page:
+  description: During keyword search, first page usually contains addresses - ignore it.
+  value: true
+cert_threshold:
+  description: Used with --higher-precision-results. Determines the amount of mismatched algorithms to be considered faulty.
+  value: 5
+
diff --git a/test/test_fips_oop.py b/test/test_fips_oop.py
new file mode 100644
index 00000000..107f0c5d
--- /dev/null
+++ b/test/test_fips_oop.py
@@ -0,0 +1,134 @@
+from unittest import TestCase
+from pathlib import Path
+from tempfile import TemporaryDirectory
+
+from sec_certs.dataset import FIPSDataset, FIPSAlgorithmDataset
+from sec_certs.configuration import config
+from fips_test_utils import generate_html
+
+
+
+def _set_up_dataset(td, certs):
+    dataset = FIPSDataset({}, Path(td), 'test_dataset', 'fips_test_dataset')
+    generate_html(certs, td + '/test_search.html')
+    dataset.get_certs_from_web(test=td + '/test_search.html', update_json=False)
+    return dataset
+
+
+def _set_up_dataset_for_full(td, certs):
+    dataset = _set_up_dataset(td, certs)
+    dataset.convert_all_pdfs()
+    dataset.extract_keywords()
+    dataset.extract_certs_from_tables(high_precision=True)
+    dataset.algorithms = FIPSAlgorithmDataset.from_json(Path(__file__).parent / 'data/test_fips_oop/algorithms.json')
+    dataset.finalize_results()
+    return dataset
+
+
+class TestFipsOOP(TestCase):
+    def setUp(self) -> None:
+        self.data_dir: Path = Path(__file__).parent / 'data' / 'test_fips_oop'
+        self.dataset = FIPSDataset({}, self.data_dir, 'test_dataset', 'fips_test_dataset')
+        self.certs_to_parse = [
+            ['3099', '2549', '2484', '3038', '2472', '2435', '2471', '1930'],  # openSUSE chunk
+            ['23', '24', '25', '26'],
+            ['3095', '3651', '3093', '3090', '3197', '3196', '3089', '3195', '3480', '3615', '3194', '3091', '3690',
+             '3644', '3527', '3094', '3544', '3096', '3092'],  # microsoft chunk
+            ['2630', '2721', '2997', '2441', '2711', '2633', '2798', '3613', '3733', '2908', '2446', '2742', '2447'],
+            # redhat chunk
+            ['3850', '2779', '2860', '2665', '1883', '3518', '3141', '2590'],  # Document signing chunk
+            ['3493', '3495', '3711', '3176', '3488', '3126', '3269', '3524', '3220', '2398', '3543', '2676', '3313',
+             '3363', '3608', '3158'],  # Chunk referencing openSSL FIPS Object Module SE
+        ]
+        config.load(Path(__file__).parent / 'settings_test.yaml')
+
+    def test_size(self):
+        for certs in self.certs_to_parse:
+            with TemporaryDirectory() as td:
+                dataset = _set_up_dataset(td, certs)
+                self.assertEqual(len(dataset.certs), len(certs), "Wrong number of parsed certs")
+
+    def test_connections_microsoft(self):
+        certs = self.certs_to_parse[2]
+        with TemporaryDirectory() as td:
+            dataset = _set_up_dataset_for_full(td, certs)
+
+            self.assertEqual(set(dataset.certs['3095'].processed.connections), {x for x in ['3093', '3096', '3094']})
+            self.assertEqual(set(dataset.certs['3651'].processed.connections), {x for x in ['3615']})
+            self.assertEqual(set(dataset.certs['3093'].processed.connections), {x for x in ['3090', '3091']})
+            self.assertEqual(set(dataset.certs['3090'].processed.connections), {x for x in ['3089']})
+            self.assertEqual(set(dataset.certs['3197'].processed.connections),
+                             {x for x in ['3195', '3096', '3196', '3644', '3651']})
+            self.assertEqual(set(dataset.certs['3196'].processed.connections),
+                             {x for x in ['3194', '3091', '3480', '3615']})
+            self.assertEqual(set(dataset.certs['3089'].processed.connections), {x for x in []})
+            self.assertEqual(set(dataset.certs['3195'].processed.connections), {x for x in ['3194', '3091', '3480']})
+            self.assertEqual(set(dataset.certs['3480'].processed.connections), {x for x in ['3089']})
+            self.assertEqual(set(dataset.certs['3615'].processed.connections), {x for x in ['3089']})
+            self.assertEqual(set(dataset.certs['3194'].processed.connections), {x for x in ['3089']})
+            self.assertEqual(set(dataset.certs['3091'].processed.connections), {x for x in ['3089']})
+            self.assertEqual(set(dataset.certs['3690'].processed.connections), {x for x in ['3644', '3196', '3651']})
+            self.assertEqual(set(dataset.certs['3644'].processed.connections), {x for x in ['3615']})
+            self.assertEqual(set(dataset.certs['3527'].processed.connections), {x for x in ['3090', '3091']})
+            self.assertEqual(set(dataset.certs['3094'].processed.connections), {x for x in ['3090', '3091']})
+            self.assertEqual(set(dataset.certs['3544'].processed.connections), {x for x in ['3093', '3096', '3527']})
+            self.assertEqual(set(dataset.certs['3096'].processed.connections),
+                             {x for x in ['3090', '3194', '3091', '3480']})
+            self.assertEqual(set(dataset.certs['3092'].processed.connections),
+                             {x for x in ['3093', '3195', '3096', '3644', '3651']})
+
+    def test_connections_redhat(self):
+        certs = self.certs_to_parse[3]
+        with TemporaryDirectory() as td:
+            dataset = _set_up_dataset_for_full(td, certs)
+            self.assertEqual(set(dataset.certs['2630'].processed.connections), {x for x in ['2441']})
+            self.assertEqual(set(dataset.certs['2633'].processed.connections), {x for x in ['2441']})
+            self.assertEqual(set(dataset.certs['2441'].processed.connections), {x for x in []})
+            self.assertEqual(set(dataset.certs['2997'].processed.connections), {x for x in ['2711']})
+            self.assertEqual(set(dataset.certs['2446'].processed.connections), {x for x in ['2441']})
+            self.assertEqual(set(dataset.certs['2447'].processed.connections), {x for x in ['2441']})
+            self.assertEqual(set(dataset.certs['3733'].processed.connections), {x for x in ['2441']})
+            self.assertEqual(set(dataset.certs['2441'].processed.connections), {x for x in []})
+            self.assertEqual(set(dataset.certs['2711'].processed.connections), {x for x in []})
+            self.assertEqual(set(dataset.certs['2908'].processed.connections), {x for x in ['2711']})
+            self.assertEqual(set(dataset.certs['3613'].processed.connections), {x for x in ['2997']})
+            self.assertEqual(set(dataset.certs['2721'].processed.connections), {x for x in ['2441', '2711']})
+            self.assertEqual(set(dataset.certs['2798'].processed.connections), {x for x in ['2721', '2711']})
+            self.assertEqual(set(dataset.certs['2711'].processed.connections), {x for x in []})
+            self.assertEqual(set(dataset.certs['2997'].processed.connections), {x for x in ['2711']})
+            self.assertEqual(set(dataset.certs['2742'].processed.connections), {x for x in ['2721', '2711']})
+            self.assertEqual(set(dataset.certs['2721'].processed.connections), {x for x in ['2441', '2711']})
+
+    def test_docusign_chunk(self):
+        certs = self.certs_to_parse[4]
+        with TemporaryDirectory() as td:
+            dataset = _set_up_dataset_for_full(td, certs)
+            self.assertEqual(set(dataset.certs['3850'].processed.connections), {x for x in ['3518', '1883']})
+            self.assertEqual(set(dataset.certs['2779'].processed.connections), {x for x in ['1883']})
+            self.assertEqual(set(dataset.certs['2860'].processed.connections), {x for x in ['1883']})
+            self.assertEqual(set(dataset.certs['2665'].processed.connections), {x for x in ['1883']})
+            self.assertEqual(set(dataset.certs['1883'].processed.connections), {x for x in []})
+            self.assertEqual(set(dataset.certs['3518'].processed.connections), {x for x in ['1883']})
+            self.assertEqual(set(dataset.certs['3141'].processed.connections), {x for x in ['1883']})
+            self.assertEqual(set(dataset.certs['2590'].processed.connections), {x for x in ['1883']})
+
+    def test_openssl_chunk(self):
+        certs = self.certs_to_parse[5]
+        with TemporaryDirectory() as td:
+            dataset = _set_up_dataset_for_full(td, certs)
+            self.assertEqual(set(dataset.certs['3493'].processed.connections), {x for x in ['2398']})
+            self.assertEqual(set(dataset.certs['3495'].processed.connections), {x for x in ['2398']})
+            self.assertEqual(set(dataset.certs['3711'].processed.connections), {x for x in ['3220']})
+            self.assertEqual(set(dataset.certs['3176'].processed.connections), {x for x in ['2398']})
+            self.assertEqual(set(dataset.certs['3488'].processed.connections), {x for x in ['2398']})
+            self.assertEqual(set(dataset.certs['3126'].processed.connections), {x for x in ['3126', '2398']})
+            self.assertEqual(set(dataset.certs['3269'].processed.connections), {x for x in ['3269', '3220']})
+            self.assertEqual(set(dataset.certs['3524'].processed.connections), {x for x in ['3220']})
+            self.assertEqual(set(dataset.certs['3220'].processed.connections), {x for x in ['3220', '2398']})
+            self.assertEqual(set(dataset.certs['2398'].processed.connections), {x for x in []})
+            self.assertEqual(set(dataset.certs['3543'].processed.connections), {x for x in ['2398']})
+            self.assertEqual(set(dataset.certs['2676'].processed.connections), {x for x in ['2398']})
+            self.assertEqual(set(dataset.certs['3313'].processed.connections), {x for x in ['3313', '3220']})
+            self.assertEqual(set(dataset.certs['3363'].processed.connections), {x for x in []})
+            self.assertEqual(set(dataset.certs['3608'].processed.connections), {x for x in ['2398']})
+            self.assertEqual(set(dataset.certs['3158'].processed.connections), {x for x in ['2398']})
author	adamjanovsky	2021-05-14 16:27:35 +0200
committer	GitHub	2021-05-14 16:27:35 +0200
commit	cfab313d013b530c5ceed5b29877be71f74da7e8 (patch)
tree	4dda7878a195428061d4e366296f52476e903134
parent	e3c002a63725e9e79ce81a09a7c7055c61ba5010 (diff)
parent	92f49eb5a0b92be60be4ab3a662fcd6487865052 (diff)
download	sec-certs-cfab313d013b530c5ceed5b29877be71f74da7e8.tar.gz sec-certs-cfab313d013b530c5ceed5b29877be71f74da7e8.tar.zst sec-certs-cfab313d013b530c5ceed5b29877be71f74da7e8.zip