aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authoradamjanovsky2021-05-14 16:27:35 +0200
committerGitHub2021-05-14 16:27:35 +0200
commitcfab313d013b530c5ceed5b29877be71f74da7e8 (patch)
tree4dda7878a195428061d4e366296f52476e903134
parente3c002a63725e9e79ce81a09a7c7055c61ba5010 (diff)
parent92f49eb5a0b92be60be4ab3a662fcd6487865052 (diff)
downloadsec-certs-cfab313d013b530c5ceed5b29877be71f74da7e8.tar.gz
sec-certs-cfab313d013b530c5ceed5b29877be71f74da7e8.tar.zst
sec-certs-cfab313d013b530c5ceed5b29877be71f74da7e8.zip
Merge pull request #75 from crocs-muni/fips_pr
Changes in CAVP webpages parsing, fixed bugs, tests, and everything from the last PR
-rw-r--r--examples/fips_oop_demo.py41
-rw-r--r--sec_certs/analyze_certificates.py52
-rw-r--r--sec_certs/cert_rules.py107
-rw-r--r--sec_certs/certificate.py107
-rw-r--r--sec_certs/constants.py2
-rw-r--r--sec_certs/dataset.py382
-rw-r--r--sec_certs/helpers.py20
-rw-r--r--sec_certs/settings.yaml9
-rw-r--r--test/data/test_fips_oop/algorithms.json513
-rw-r--r--test/fips_test_utils.py51
-rw-r--r--test/settings_test.yaml19
-rw-r--r--test/test_fips_oop.py134
12 files changed, 1146 insertions, 291 deletions
diff --git a/examples/fips_oop_demo.py b/examples/fips_oop_demo.py
index 7015574e..dd47f74f 100644
--- a/examples/fips_oop_demo.py
+++ b/examples/fips_oop_demo.py
@@ -2,71 +2,64 @@ from pathlib import Path
from datetime import datetime
import logging
import click
+
from sec_certs.dataset import FIPSDataset, FIPSAlgorithmDataset
from sec_certs.configuration import config
-from sec_certs.helpers import analyze_matched_algs
+
@click.command()
@click.option('--config-file', help='Path to config file')
@click.option('--json-file', help='Path to dataset json file')
-@click.option('--no-download-algs', help='don\'t download algs', is_flag=True)
-def main(config_file, json_file, no_download_algs):
+@click.option('--no-download-algs', help='Redo scan of html files', is_flag=True)
+@click.option('--redo-web-scan', help='Redo scan of PDF files', is_flag=True)
+@click.option('--redo-keyword-scan', help='Don\'t download algs', is_flag=True)
+@click.option('--higher-precision-results',
+ help='Redo table search for certificates with high error rate. Behaviour undefined if used on a newly instantiated dataset.',
+ is_flag=True)
+def main(config_file, json_file, no_download_algs, redo_web_scan, redo_keyword_scan, higher_precision_results):
logging.basicConfig(level=logging.INFO)
start = datetime.now()
# Load config
- config.load(config_file if config_file else 'sec_certs/settings.yaml')
+ config.load(config_file if config_file else '../sec_certs/settings.yaml')
# Create empty dataset
- dset = FIPSDataset({}, Path('./fips_dataset'), 'sample_dataset', 'sample dataset description')
+ dset = FIPSDataset({}, Path('../fips_dataset'), 'sample_dataset', 'sample dataset description')
# this is for creating test dataset, usually with small number of pdfs
# dset = FIPSDataset({}, Path('./fips_test_dataset'), 'small dataset', 'small dataset for keyword testing')
# Load metadata for certificates from CSV and HTML sources
- dset.get_certs_from_web(json_file=json_file, redo=True)
+ dset.get_certs_from_web(json_file=json_file, redo=redo_web_scan)
logging.info(f'Finished parsing. Have dataset with {len(dset)} certificates.')
- # Dump dataset into JSON
- dset.to_json(dset.root_dir / 'fips_full_dataset.json')
logging.info(f'Dataset saved to {dset.root_dir}/fips_full_dataset.json')
logging.info("Converting pdfs")
dset.convert_all_pdfs()
- dset.to_json(dset.root_dir / 'fips_full_dataset.json')
logging.info("Extracting keywords now.")
- dset.extract_keywords(redo=True)
+ dset.extract_keywords(redo=redo_keyword_scan)
logging.info(f'Finished extracting certificates for {len(dset.certs)} items.')
- logging.info("Dumping dataset again...")
- dset.to_json(dset.root_dir / 'fips_full_dataset.json')
logging.info("Searching for tables in pdfs")
- not_decoded_files = dset.extract_certs_from_tables()
+ not_decoded_files = dset.extract_certs_from_tables(higher_precision_results)
logging.info(f"Done. Files not decoded: {not_decoded_files}")
- dset.to_json(dset.root_dir / 'fips_mentioned.json')
logging.info("Parsing algorithms")
if not no_download_algs:
- aset = FIPSAlgorithmDataset({}, Path('fips_dataset/web/algorithms'), 'algorithms', 'sample algs')
+ aset = FIPSAlgorithmDataset({}, Path(dset.root_dir / 'web/algorithms'), 'algorithms', 'sample algs')
aset.get_certs_from_web()
+ logging.info(f'Finished parsing. Have algorithm dataset with {len(aset)} algorithm numbers.')
dset.algorithms = aset
logging.info("finalizing results.")
-
dset.finalize_results()
- logging.info('dump again')
- dset.to_json(dset.root_dir / 'fips_full_dataset.json')
-
- dset.get_dot_graph('different_new')
-
- data = dset.match_algs()
- analyze_matched_algs(data)
-
+ dset.plot_graphs(show=False)
end = datetime.now()
logging.info(f'The computation took {(end - start)} seconds.')
diff --git a/sec_certs/analyze_certificates.py b/sec_certs/analyze_certificates.py
index f20a0e2e..f513664b 100644
--- a/sec_certs/analyze_certificates.py
+++ b/sec_certs/analyze_certificates.py
@@ -313,6 +313,11 @@ def build_cert_references(filter_rules_group, all_items_found):
# build list of references
referenced_by = {}
for cert_long_id in all_items_found.keys():
+ # handle FIPS
+ if 'FIPS Certificate' in all_items_found[cert_long_id]['frontpage_scan']:
+ referenced_by[cert_long_id] = copy.deepcopy(all_items_found[cert_long_id]['processed']['connections'])
+ continue
+
# do not continue if no keywords were extracted ()
if 'keywords_scan' not in all_items_found[cert_long_id].keys():
continue
@@ -368,9 +373,15 @@ def build_cert_references(filter_rules_group, all_items_found):
def analyze_references_graph(filter_rules_group, all_items_found, filter_label):
+ handling_fips_items = False
# build cert_id to item name mapping
certid_info = {}
for cert_long_id in all_items_found.keys():
+ if 'FIPS Certificate' in all_items_found[cert_long_id]['frontpage_scan']:
+ certid_info[cert_long_id] = cert_long_id
+ handling_fips_items = True
+ continue
+
cert = all_items_found[cert_long_id]
if is_in_dict(cert, ['processed', 'cert_id']):
if is_in_dict(cert, ['frontpage_scan', 'cert_item']):
@@ -383,6 +394,10 @@ def analyze_references_graph(filter_rules_group, all_items_found, filter_label):
# build cert_id to cert_long_id mapping
cert_id_to_long_id_mapping = {}
for cert_long_id in all_items_found.keys():
+ if 'FIPS Certificate' in all_items_found[cert_long_id]['frontpage_scan']:
+ cert_id_to_long_id_mapping[cert_long_id] = cert_long_id
+ continue
+
cert = all_items_found[cert_long_id]
if is_in_dict(cert, ['processed', 'cert_id']):
if is_in_dict(cert, ['frontpage_scan', 'cert_item']):
@@ -483,7 +498,10 @@ def plot_schemes_multi_line_graph(x_ticks, data, prominent_data, x_label, y_labe
# change line type to prevent color repetitions
num_lines_plotted += 1
- color_index += 1
+ if color_index < len(GRAPHS_COLOR_PALETTE) - 1:
+ color_index += 1
+ else:
+ color_index = 0
plt.rcParams.update({'font.size': 16})
plt.legend(loc=2)
@@ -1125,28 +1143,30 @@ def transform_fips_to_cc_dict(all_cert_items_fips):
cc_item = {}
cc_item["csv_scan"] = {}
- cc_item["frontpage_scan"] = {}
- cc_item["keywords_scan"] = {}
- cc_item["pdfmeta_scan"] = {}
- cc_item["processed"] = {}
+ cc_item["frontpage_scan"] = {'FIPS Certificate': 1}
+ cc_item["keywords_scan"] = fips_item['pdf_scan']['keywords']
+ cc_item["pdfmeta_scan"] = fips_item['pdf_scan']
+ cc_item["processed"] = fips_item['processed']
+
+ fips_web_scan = fips_item['web_scan']
- cc_item["processed"]["cc_manufacturer_list"] = fips_item["vendor"]
- cc_item['processed']['cc_manufacturer_simple_list'] = [fips_item["vendor"]]
+ cc_item["processed"]["cc_manufacturer_list"] = fips_web_scan["vendor"]
+ cc_item['processed']['cc_manufacturer_simple_list'] = [fips_web_scan["vendor"]]
cc_item["processed"]["cert_id"] = fips_item["cert_id"]
- cc_item["processed"]["cert_lab"] = fips_item["lab"]
- if fips_item["exceptions"] is None:
- cc_item["processed"]["cc_security_level"] = 'Level ' + fips_item["level"]
+ cc_item["processed"]["cert_lab"] = fips_web_scan["lab"]
+ if fips_web_scan["exceptions"] is None:
+ cc_item["processed"]["cc_security_level"] = 'Level ' + fips_web_scan["level"]
else:
- cc_item["processed"]["cc_security_level"] = 'Level ' + fips_item["level"] + '+'
+ cc_item["processed"]["cc_security_level"] = 'Level ' + fips_web_scan["level"] + '+'
cc_item['csv_scan']['cc_scheme'] = 'NIST'
- cc_item["csv_scan"]["cc_certification_date"] = fips_item["date_validation"][0]
- cc_item["csv_scan"]["cc_archived_date"] = fips_item["date_sunset"]
- cc_item["csv_scan"]["cc_category"] = '{} {}'.format(fips_item["type"], fips_item["embodiment"])
+ cc_item["csv_scan"]["cc_certification_date"] = fips_web_scan["date_validation"][0]
+ cc_item["csv_scan"]["cc_archived_date"] = fips_web_scan["date_sunset"]
+ cc_item["csv_scan"]["cc_category"] = '{} {}'.format(fips_web_scan["module_type"], fips_web_scan["embodiment"])
cc_item["processed"]["cc_security_level_augments"] = []
- if fips_item["exceptions"]:
- for exception in fips_item["exceptions"]:
+ if fips_web_scan["exceptions"]:
+ for exception in fips_web_scan["exceptions"]:
try:
ex_name, ex_level = exception.split(':')
ex_name = ex_name.strip()
diff --git a/sec_certs/cert_rules.py b/sec_certs/cert_rules.py
index 3be285c5..ba857300 100644
--- a/sec_certs/cert_rules.py
+++ b/sec_certs/cert_rules.py
@@ -321,64 +321,65 @@ rules_other = [
rules_fips_remove_algorithm_ids = [
# --- HMAC(-SHA)(-1) - (bits) (method) ((hardware/firmware cert) #id) ---
- r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?\s?(\d{4})",
- r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?\s?(\d{3})",
- r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?\s?(\d{2})",
- r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?\s?(\d{1})",
+# + added (and #id) everywhere
+ r"HMAC(?:[- –]*SHA)?(?:[- –]*1)?[– -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?[\s(\[]*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?[\s#]*?(\d{4})(?:[\s#]*and[\s#]*\d+)?",
+ r"HMAC(?:[- –]*SHA)?(?:[- –]*1)?[– -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?[\s(\[]*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?[\s#]*?(\d{3})(?:[\s#]*and[\s#]*\d+)?",
+ r"HMAC(?:[- –]*SHA)?(?:[- –]*1)?[– -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?[\s(\[]*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?[\s#]*?(\d{2})(?:[\s#]*and[\s#]*\d+)?",
+ r"HMAC(?:[- –]*SHA)?(?:[- –]*1)?[– -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?[\s(\[]*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?[\s#]*?(\d{1})(?:[\s#]*and[\s#]*\d+)?",
# --- same as above, without hw or fw ---
- r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{4})",
- r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{3})",
- r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{2})",
- r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{1})",
+ r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{4})",
+ r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{3})",
+ r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{2})",
+ r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{1})",
# --- SHS/A - (bits) (method) ((cert #) numbers) ---
- r"SH[SA][-– ]*(?:160|224|256|384|512)?(?:[\s(\[]*?(?:KAT)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)(\d{4})(?:\)?\[#?\d+\])?",
- r"SH[SA][-– ]*(?:160|224|256|384|512)?(?:[\s(\[]*?(?:KAT)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)(\d{3})(?:\)?\[#?\d+\])?",
- r"SH[SA][-– ]*(?:160|224|256|384|512)?(?:[\s(\[]*?(?:KAT)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)(\d{2})(?:\)?\[#?\d+\])?",
- r"SH[SA][-– ]*(?:160|224|256|384|512)?(?:[\s(\[]*?(?:KAT)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)(\d{1})(?:\)?\[#?\d+\])?",
+ r"SH[SA][-– 123]*(?:;|\/|160|224|256|384|512)?(?:[\s(\[]*?(?:KAT|[Bb]yte [Oo]riented)*?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{4})(?:\)?\[#?\d+\])?(?:[\s#]*?and[\s#]*?\d+)?",
+ r"SH[SA][-– 123]*(?:;|\/|160|224|256|384|512)?(?:[\s(\[]*?(?:KAT|[Bb]yte [Oo]riented)*?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{3})(?:\)?\[#?\d+\])?(?:[\s#]*?and[\s#]*?\d+)?",
+ r"SH[SA][-– 123]*(?:;|\/|160|224|256|384|512)?(?:[\s(\[]*?(?:KAT|[Bb]yte [Oo]riented)*?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{2})(?:\)?\[#?\d+\])?(?:[\s#]*?and[\s#]*?\d+)?",
+ r"SH[SA][-– 123]*(?:;|\/|160|224|256|384|512)?(?:[\s(\[]*?(?:KAT|[Bb]yte [Oo]riented)*?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{1})(?:\)?\[#?\d+\])?(?:[\s#]*?and[\s#]*?\d+)?",
# --- RSA (bits) (method) ((cert #)) ---
- r"RSA(?:[-– ]*(?:512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{4})",
- r"RSA(?:[-– ]*(?:512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{3})",
- r"RSA(?:[-– ]*(?:512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{2})",
- r"RSA(?:[-– ]*(?:512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{1})",
+ r"RSA(?:[-– ]*(?:;|\/|512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:;|\/|KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{4})",
+ r"RSA(?:[-– ]*(?:;|\/|512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:;|\/|KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{3})",
+ r"RSA(?:[-– ]*(?:;|\/|512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:;|\/|KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{2})",
+ r"RSA(?:[-– ]*(?:;|\/|512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:;|\/|KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{1})",
# --- RSA (SSA) (PKCS) (version) (#) ---
- r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?\s?(\d{4})?",
- r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?\s?(\d{3})?",
- r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?\s?(\d{2})?",
- r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?\s?(\d{1})?",
+ r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?[\s#]*?(\d{4})?",
+ r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?[\s#]*?(\d{3})?",
+ r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?[\s#]*?(\d{2})?",
+ r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?[\s#]*?(\d{1})?",
# --- AES (bits) (method) ((cert #)) ---
- r"AES[-– ]*((?:128|192|256|)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{4})(?:\)?\[#?\d+\])?",
- r"AES[-– ]*((?:128|192|256|)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{3})(?:\)?\[#?\d+\])?",
- r"AES[-– ]*((?:128|192|256|)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{2})(?:\)?\[#?\d+\])?",
- r"AES[-– ]*((?:128|192|256|)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{1})(?:\)?\[#?\d+\])?",
+ r"AES[-– ]*((?: |;|\/|bit|key|128|192|256|CBC)*(?: |\/|;|[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV|CBC)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{4})(?:\)?[\s#]*?\[#?\d+\])?(?:[\s#]*?and[\s#]*?(\d+))?",
+ r"AES[-– ]*((?: |;|\/|bit|key|128|192|256|CBC)*(?: |\/|;|[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV|CBC)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{3})(?:\)?[\s#]*?\[#?\d+\])?(?:[\s#]*?and[\s#]*?(\d+))?",
+ r"AES[-– ]*((?: |;|\/|bit|key|128|192|256|CBC)*(?: |\/|;|[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV|CBC)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{2})(?:\)?[\s#]*?\[#?\d+\])?(?:[\s#]*?and[\s#]*?(\d+))?",
+ r"AES[-– ]*((?: |;|\/|bit|key|128|192|256|CBC)*(?: |\/|;|[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV|CBC)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{1})(?:\)?[\s#]*?\[#?\d+\])?(?:[\s#]*?and[\s#]*?(\d+))?",
# --- Diffie Helman (CVL) ((cert #)) ---
- r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?\s?(\d{4})",
- r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?\s?(\d{3})",
- r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?\s?(\d{2})",
- r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?\s?(\d{1})",
+ r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?[\s#]*?(\d{4})",
+ r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?[\s#]*?(\d{3})",
+ r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?[\s#]*?(\d{2})",
+ r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?[\s#]*?(\d{1})",
# --- DRBG (bits) (method) (cert #) ---
- r"DRBG[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{4})",
- r"DRBG[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{3})",
- r"DRBG[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{2})",
- r"DRBG[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{1})",
+ r"DRBG[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{4})",
+ r"DRBG[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{3})",
+ r"DRBG[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{2})",
+ r"DRBG[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{1})",
# --- DES (bits) (method) (cert #)
- r"DES[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{4})",
- r"DES[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{3})",
- r"DES[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{2})",
- r"DES[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{1})",
+ r"DES[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT|CBC|(?:\d(?: and \d)? keying options?))*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)*?[\s#]*?)?[\s#]*?(\d{4})(?:[\s#]*?and[\s#]*?(\d+))?",
+ r"DES[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT|CBC|(?:\d(?: and \d)? keying options?))*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)*?[\s#]*?)?[\s#]*?(\d{3})(?:[\s#]*?and[\s#]*?(\d+))?",
+ r"DES[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT|CBC|(?:\d(?: and \d)? keying options?))*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)*?[\s#]*?)?[\s#]*?(\d{2})(?:[\s#]*?and[\s#]*?(\d+))?",
+ r"DES[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT|CBC|(?:\d(?: and \d)? keying options?))*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)*?[\s#]*?)?[\s#]*?(\d{1})(?:[\s#]*?and[\s#]*?(\d+))?",
# --- DSA (bits) (method) (cert #)
- r"DSA[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{4})",
- r"DSA[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{3})",
- r"DSA[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{2})",
- r"DSA[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{1})",
+ r"DSA[ –-]*((?:;|\/|160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{4})",
+ r"DSA[ –-]*((?:;|\/|160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{3})",
+ r"DSA[ –-]*((?:;|\/|160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{2})",
+ r"DSA[ –-]*((?:;|\/|160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{1})",
# --- platforms (#)+ - this is used in modification history ---
r"[Pp]latforms? #\d+(?:#\d+|,| |-|and)*[^\n]*",
@@ -400,12 +401,14 @@ rules_fips_remove_algorithm_ids = [
# --- PKCS (#) ---
r"PKCS[\s]?#?\d+",
- r"PKSC[\s]?#?\d+" # typo, #625
-]
+ r"PKSC[\s]?#?\d+", # typo, #625
+# --- # C and # A (just in case) ---
+ r"#\s+?[Cc]\d+",
+ r"#\s+?[Aa]\d+"
+]
rules_fips_to_remove = [
# --- random words found ---
- r"Survey #192", # why would they get an address like this /o\ cert 2079
r"[Ss]lot #\d", # a card slot, #2069
r"[Ss]eals? ?\(?#\d - #\d", # #1232
r"\[#\d*\]", # some certs use this as references
@@ -413,16 +416,19 @@ rules_fips_to_remove = [
r"[Pp]ower [Ss]upply #\d", # #604
r"TEL #\d and #\d", # #3337
r"#\d+ - #\d+", # labels, seals... #1232
+ r"#\d+‐#?\d+", # labels, seals... #3530
+ r"#\d+ to #?\d+", # labels, seals... #3058
+ r"see #\d+", # labels, seals... #3058
r"#\d+, ?#\d+",
r"#?\d+ and #?\d+",
r"label \(#\d+\)",
r"[Ll]abel #\d+",
r"\(#\d\)",
r"IETF[25\s]*RFC[26\s]*#\d+", # #3425
- r"Bendix Road North #760", # #3325
- r"5080 Spectrum Drive, #1000E",
r"Document # 540-105000-A1",
r"Certificate #2287-1 from EMCE Engineering", # ???
+ r"[sS]cenarios?\s?#\d+", # 3789
+ r"#\d+\s?\(\S\)", # 2159
]
rules_fips_cert = [
@@ -430,10 +436,10 @@ rules_fips_cert = [
# r"(?:#\s?|Cert\.?[^. ]*?\s?)(?P<id>\d{3})",
# r"(?:#\s?|Cert\.?[^. ]*?\s?)(?P<id>\d{2})",
# r"(?:#\s?|Cert\.?[^. ]*?\s?)(?P<id>\d{1})
- r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{4}[^\d])",
- r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{3}[^\d])",
- r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{2}[^\d])",
- r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{1}[^\d])"
+ r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{4})(?!\d)",
+ r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{3})(?!\d)",
+ r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{2})(?!\d)",
+ r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{1})(?!\d)"
]
# rule still too "general"
@@ -508,8 +514,7 @@ fips_rules['rules_to_remove'] = rules_fips_to_remove
fips_rules['rules_security_level'] = rules_fips_security_level
fips_rules['rules_cert_id'] = rules_fips_cert
fips_common_rules = copy.deepcopy(common_rules) # make separate copy not to process cc rules by fips's re.compile
-#fips_rules.update(fips_common_rules)
for rule in fips_rules:
for current_rule in range(len(fips_rules[rule])):
- fips_rules[rule][current_rule] = re.compile(fips_rules[rule][current_rule]) \ No newline at end of file
+ fips_rules[rule][current_rule] = re.compile(fips_rules[rule][current_rule])
diff --git a/sec_certs/certificate.py b/sec_certs/certificate.py
index 7b0770de..98ddc6bc 100644
--- a/sec_certs/certificate.py
+++ b/sec_certs/certificate.py
@@ -133,7 +133,7 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
algorithms: Optional[List[Dict[str, str]]]
tested_conf: Optional[List[str]]
description: Optional[str]
- mentioned_certs: Optional[List[str]]
+ mentioned_certs: Optional[Dict[str, Dict[str, int]]]
vendor: Optional[str]
vendor_www: Optional[str]
lab: Optional[str]
@@ -147,6 +147,7 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
revoked_link: Optional[str]
sw_versions: Optional[str]
product_url: Optional[str]
+ connections: List[str]
def __post_init__(self):
self.date_validation = [parser.parse(x).date() for x in
@@ -177,6 +178,7 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
cert_id: int
keywords: Dict
algorithms: List
+ connections: List[str]
@property
def dgst(self):
@@ -199,9 +201,10 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
@dataclass(eq=True)
class Processed(ComplexSerializableType):
- keywords: Optional[Dict]
- algorithms: Dict
- connections: List
+ keywords: Optional[Dict[str, Dict]]
+ algorithms: Dict[str, Dict]
+ connections: List[str]
+ unmatched_algs: int
@property
def dgst(self):
@@ -263,26 +266,27 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
'type': None, 'embodiment': None, 'tested_conf': None, 'description': None,
'vendor': None, 'vendor_www': None, 'lab': None, 'lab_nvlap': None,
'historical_reason': None, 'revoked_reason': None, 'revoked_link': None, 'algorithms': [],
- 'mentioned_certs': [], 'tables_done': False, 'security_policy_www': None, 'certificate_www': None,
+ 'mentioned_certs': {}, 'tables_done': False, 'security_policy_www': None, 'certificate_www': None,
'hw_versions': None, 'fw_versions': None, 'sw_versions': None, 'product_url': None}
return d
@staticmethod
- def parse_caveat(current_text: str) -> List:
+ def parse_caveat(current_text: str) -> Dict[str, Dict[str, int]]:
"""
Parses content of "Caveat" of FIPS CMVP .html file
:param current_text: text of "Caveat"
- :return: list of all found algorithm IDs
+ :return: dictionary of all found algorithm IDs
"""
- ids_found = []
- r_key = r"(?:#\s?|Cert\.?(?!.\s)\s?|Certificate\s?)(?P<id>\d+)"
+ ids_found = {}
+ r_key = r"(?P<word>\w+)?\s?(?:#\s?|Cert\.?(?!.\s)\s?|Certificate\s?)+(?P<id>\d+)"
for m in re.finditer(r_key, current_text):
- if r_key in ids_found and m.group() in ids_found[0]:
- ids_found[0][m.group()]['count'] += 1
+ if m.group('word') and m.group('word').lower() in {'rsa', 'shs', 'dsa', 'pkcs', 'aes'}:
+ continue
+ if m.group('id') in ids_found:
+ ids_found[m.group('id')]['count'] += 1
else:
- ids_found.append(
- {r"(?:#\s?|Cert\.?(?!.\s)\s?|Certificate\s?)(?P<id>\d+?})": {m.group(): {'count': 1}}})
+ ids_found[m.group('id')] = {'count': 1}
return ids_found
@@ -302,7 +306,7 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
for m in re.finditer(reg, current_text):
set_items.add(m.group())
- return [{"Certificate": list(set_items)}]
+ return [{"Certificate": list(set_items)}] if len(set_items) > 0 else []
@staticmethod
def parse_table(element: Union[Tag, NavigableString]) -> List[Dict]:
@@ -315,9 +319,10 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
trs = element.find_all('tr')
for tr in trs:
tds = tr.find_all('td')
+ cert = FIPSCertificate.extract_algorithm_certificates(tds[1].text)
found_items.append(
{'Name': tds[0].text,
- 'Certificate': FIPSCertificate.extract_algorithm_certificates(tds[1].text)[0]['Certificate'],
+ 'Certificate': cert[0]['Certificate'] if cert != [] else [],
'Links': [str(x) for x in tds[1].find_all('a')],
'Raw': str(tr)})
@@ -335,8 +340,8 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
elif 'caveat' in pairs[title]:
html_items_found[pairs[title]] = content
- html_items_found['mentioned_certs'] += FIPSCertificate.parse_caveat(
- content)
+ html_items_found['mentioned_certs'].update(FIPSCertificate.parse_caveat(
+ content))
elif 'FIPS Algorithms' in title:
html_items_found['algorithms'] += FIPSCertificate.parse_table(
@@ -486,13 +491,16 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
items_found['revoked_reason'] if 'revoked_reason' in items_found else None,
items_found['revoked_link'] if 'revoked_link' in items_found else None,
items_found['sw_versions'] if 'sw_versions' in items_found else None,
- items_found['product_url']) if 'product_url' in items_found else None,
+ items_found['product_url'] if 'product_url' in items_found else None,
+ []
+ ), # connections
FIPSCertificate.PdfScan(
items_found['cert_id'],
{} if not initialized else initialized.pdf_scan.keywords,
- [] if not initialized else initialized.pdf_scan.algorithms
+ [] if not initialized else initialized.pdf_scan.algorithms,
+ [] # connections
),
- FIPSCertificate.Processed(None, {}, []),
+ FIPSCertificate.Processed(None, {}, [], 0),
state
)
@@ -508,6 +516,17 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
cert.state.txt_state = True
return cert
+
+ @staticmethod
+ def _declare_state(text: str):
+ """
+ If less then half of the text is formed of alphabet characters,
+ we declare the security policy as "non-parsable"
+ :param text: security policy content
+ :return: True if parsable, otherwise False
+ """
+ return len(text) * 0.5 <= len(''.join(filter(str.isalpha, text)))
+
@staticmethod
def find_keywords(cert: 'FIPSCertificate') -> Tuple[Optional[Dict], 'FIPSCertificate']:
if not cert.state.txt_state:
@@ -518,8 +537,12 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
text_to_parse = text_with_newlines if config.use_text_with_newlines_during_parsing['value'] else text
- items_found, fips_text = FIPSCertificate.parse_cert_file(FIPSCertificate.remove_platforms(text_to_parse),
- cert.web_scan.algorithms)
+ cert.state.txt_state = FIPSCertificate._declare_state(text)
+
+ if config.ignore_first_page:
+ text_to_parse = text_to_parse[text_to_parse.index(" "):]
+
+ items_found, fips_text = FIPSCertificate.parse_cert_file(FIPSCertificate.remove_platforms(text_to_parse))
save_modified_cert_file(cert.state.fragment_path.with_suffix('.fips.txt'), fips_text, unicode_error)
@@ -550,15 +573,11 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
for web_alg in alg_list:
if ''.join(filter(str.isdigit, web_alg)) not in all_algorithms:
not_found.append(web_alg)
- logger.error(
- f"For cert {cert.dgst}:\n\tNOT FOUND: {len(not_found)}\n"
- f"\tFOUND: {sum([len(a['Certificate']) for a in cert.web_scan.algorithms]) - len(not_found)}")
- logger.error(f"Not found: {not_found}")
return len(not_found)
@staticmethod
def remove_platforms(text_to_parse: str):
- pat = re.compile(r"(?:modification|revision|change) history\n[\s\S]*? ", re.IGNORECASE)
+ pat = re.compile(r"(?:(?:modification|revision|change) history|version control)\n[\s\S]*? ", re.IGNORECASE)
for match in pat.finditer(text_to_parse):
text_to_parse = text_to_parse.replace(
match.group(), 'x' * len(match.group()))
@@ -566,7 +585,7 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
@staticmethod
def parse_cert_file_common(text_to_parse: str, whole_text_with_newlines: str,
- search_rules: Dict) -> Tuple[Optional[Dict], str]:
+ search_rules: Dict) -> Tuple[Optional[Dict[Pattern, Dict]], str]:
# apply all rules
items_found_all = {}
for rule_group in search_rules.keys():
@@ -632,10 +651,10 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
return items_found_all, whole_text_with_newlines
@staticmethod
- def parse_cert_file(text_to_parse: str, algorithms: List[Dict]) \
- -> Tuple[Optional[Dict], str]:
+ def parse_cert_file(text_to_parse: str) -> Tuple[Optional[Dict[Pattern, Dict]], str]:
# apply all rules
items_found_all: Dict = {}
+
for rule_group in fips_rules.keys():
if rule_group not in items_found_all:
items_found_all[rule_group] = {}
@@ -667,21 +686,27 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
return items_found_all, text_to_parse
@staticmethod
- def analyze_tables(cert: 'FIPSCertificate') -> Tuple[bool, 'FIPSCertificate', List]:
+ def analyze_tables(tup: Tuple['FIPSCertificate', bool]) -> Tuple[bool, 'FIPSCertificate', List]:
+ cert, precision = tup
+ if not (precision and cert.state.tables_done) \
+ or (precision and cert.processed.unmatched_algs < config.cert_threshold['value']):
+ return cert.state.tables_done, cert, []
+
cert_file = cert.state.sp_path
txt_file = cert_file.with_suffix('.pdf.txt')
with open(txt_file, 'r', encoding='utf-8') as f:
tables = helpers.find_tables(f.read(), txt_file)
+ all_pages = precision and cert.processed.unmatched_algs > config.cert_threshold['value'] # bool value
lst: List = []
if tables:
try:
- data = read_pdf(cert_file, pages=tables, silent=True)
+ data = read_pdf(cert_file, pages='all' if all_pages else tables, silent=True)
except Exception as e:
try:
logger.error(e)
helpers.repair_pdf(cert_file)
- data = read_pdf(cert_file, pages=tables, silent=True)
+ data = read_pdf(cert_file, pages='all' if all_pages else tables, silent=True)
except Exception as ex:
logger.error(ex)
@@ -691,11 +716,12 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
for df in data:
for col in range(len(df.columns)):
if 'cert' in df.columns[col].lower() or 'algo' in df.columns[col].lower():
- lst += FIPSCertificate.extract_algorithm_certificates(
+ tmp = FIPSCertificate.extract_algorithm_certificates(
df.iloc[:, col].to_string(index=False), True)
-
+ lst += tmp if tmp != [{"Certificate": []}] else []
# Parse again if someone picks not so descriptive column names
- lst += FIPSCertificate.extract_algorithm_certificates(df.to_string(index=False))
+ tmp = FIPSCertificate.extract_algorithm_certificates(df.to_string(index=False))
+ lst += tmp if tmp != [{"Certificate": []}] else []
return True, cert, lst
def _create_alg_set(self) -> Set:
@@ -710,9 +736,10 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
return
self.processed.keywords = copy.deepcopy(self.pdf_scan.keywords)
+ # TODO figure out why can't I delete this
if self.web_scan.mentioned_certs:
- for item in self.web_scan.mentioned_certs:
- self.processed.keywords['rules_cert_id'].update(item)
+ for item, value in self.web_scan.mentioned_certs.items():
+ self.processed.keywords['rules_cert_id'].update({'caveat_item': {item: value}})
alg_set = self._create_alg_set()
@@ -742,8 +769,8 @@ class FIPSCertificate(Certificate, ComplexSerializableType):
@staticmethod
def get_compare(vendor: str):
vendor_split = vendor.replace(',', '') \
- .replace('-', ' ').replace('+', ' ').replace('®', '').split()
- return vendor_split[0] if len(vendor_split) > 0 else vendor
+ .replace('-', ' ').replace('+', ' ').replace('®', '').replace('(R)', '').split()
+ return vendor_split[0][:4] if len(vendor_split) > 0 else vendor
class CommonCriteriaCert(Certificate, ComplexSerializableType):
diff --git a/sec_certs/constants.py b/sec_certs/constants.py
index f19919a6..99d81640 100644
--- a/sec_certs/constants.py
+++ b/sec_certs/constants.py
@@ -46,5 +46,5 @@ TAG_PP_EDITOR = 'pp_editor'
TAG_PP_REVIEWER = 'pp_reviewer'
TAG_KEYWORDS = 'keywords'
FIPS_NOT_AVAILABLE_CERT_SIZE = 10000
-FIPS_ALG_URL = 'https://csrc.nist.gov/projects/cryptographic-algorithm-validation-program/validation-search?searchMode=validation&page='
+FIPS_ALG_URL = 'https://csrc.nist.gov/projects/cryptographic-algorithm-validation-program/validation-search?searchMode=implementation&page='
diff --git a/sec_certs/dataset.py b/sec_certs/dataset.py
index c1ac4f35..91ec113a 100644
--- a/sec_certs/dataset.py
+++ b/sec_certs/dataset.py
@@ -1,4 +1,5 @@
import os
+import sys
from datetime import datetime
import locale
import logging
@@ -25,6 +26,8 @@ import sec_certs.constants as constants
import sec_certs.cert_processing as cert_processing
import sec_certs.files as files
+from sec_certs.analyze_certificates import plot_bar_graph
+
from sec_certs.certificate import CommonCriteriaCert, Certificate, FIPSCertificate
from sec_certs.serialization import ComplexSerializableType, CustomJSONDecoder, CustomJSONEncoder
from sec_certs.configuration import config
@@ -743,7 +746,8 @@ class CCDataset(Dataset, ComplexSerializableType):
try:
inpts = [int(x) for x in inpts]
if min(inpts) < 0 or max(inpts) > len(x.heuristics.cpe_matches) - 1:
- raise ValueError(f'Incorrect number chosen, choose in range 0-{len(x.heuristics.cpe_matches) - 1}')
+ raise ValueError(
+ f'Incorrect number chosen, choose in range 0-{len(x.heuristics.cpe_matches) - 1}')
except ValueError as e:
logger.error(f'Bad input from user, repeating instance: {e}')
print(f'Bad input from user, repeating instance: {e}')
@@ -772,7 +776,8 @@ class CCDataset(Dataset, ComplexSerializableType):
verified_cpe_rich_certs = [x for x in self if x.heuristics.verified_cpe_matches]
if not verified_cpe_rich_certs:
- logger.error('No certificates with verified CPE match detected. You must run dset.manually_verify_cpe_matches() first. Returning.')
+ logger.error(
+ 'No certificates with verified CPE match detected. You must run dset.manually_verify_cpe_matches() first. Returning.')
return
for cert in verified_cpe_rich_certs:
cert.compute_heuristics_related_cves(cve_dset)
@@ -820,7 +825,7 @@ class FIPSDataset(Dataset, ComplexSerializableType):
not_available.append(i)
return missing, not_available
- def extract_keywords(self, redo=False):
+ def extract_keywords(self, redo=False, update_json: bool = True):
self.fragments_dir.mkdir(parents=True, exist_ok=True)
keywords = cert_processing.process_parallel(FIPSCertificate.find_keywords,
@@ -830,21 +835,27 @@ class FIPSDataset(Dataset, ComplexSerializableType):
use_threading=False)
for keyword, cert in keywords:
self.certs[cert.dgst].pdf_scan.keywords = keyword
+
+ if update_json:
+ self.to_json(self.root_dir / 'fips_full_dataset.json')
- def match_algs(self, show_graph=False) -> Dict:
+ def match_algs(self) -> Dict:
output = {}
+ cert: FIPSCertificate
for cert in self.certs.values():
output[cert.dgst] = FIPSCertificate.match_web_algs_to_pdf(cert)
+ cert.processed.unmatched_algs = output[cert.dgst]
+ output = {k: v for k, v in output.items() if v != 0}
return output
-
def download_all_pdfs(self):
sp_paths, sp_urls = [], []
self.policies_dir.mkdir(exist_ok=True)
for cert_id in list(self.certs.keys()):
- if not (self.policies_dir / f'{cert_id}.pdf').exists() or not self.certs[cert_id].state.txt_state:
+ if not (self.policies_dir / f'{cert_id}.pdf').exists() or (self.certs[cert_id]
+ and not self.certs[cert_id].state.txt_state):
sp_urls.append(
f"https://csrc.nist.gov/CSRC/media/projects/cryptographic-module-validation-program/documents/security-policies/140sp{cert_id}.pdf")
sp_paths.append(self.policies_dir / f"{cert_id}.pdf")
@@ -866,7 +877,7 @@ class FIPSDataset(Dataset, ComplexSerializableType):
logging.info(f"downloading {len(html_urls)} module html files")
failed = cert_processing.process_parallel(FIPSCertificate.download_html_page, list(zip(html_urls, html_paths)),
- constants.N_THREADS)
+ constants.N_THREADS)
failed = [c for c in failed if c]
self.new_files += len(html_urls)
@@ -875,7 +886,7 @@ class FIPSDataset(Dataset, ComplexSerializableType):
constants.N_THREADS)
return new_files
- def convert_all_pdfs(self):
+ def convert_all_pdfs(self, update_json: bool = True):
logger.info('Converting FIPS certificate reports to .txt')
tuples = [
(cert, self.policies_dir / f'{cert.cert_id}.pdf', self.policies_dir / f'{cert.cert_id}.pdf.txt')
@@ -884,22 +895,45 @@ class FIPSDataset(Dataset, ComplexSerializableType):
]
cert_processing.process_parallel(FIPSCertificate.convert_pdf_file, tuples, constants.N_THREADS)
- def get_certs_from_web(self, redo: bool = False, json_file: Optional[Path] = None):
+ if update_json:
+ self.to_json(self.root_dir / 'fips_full_dataset.json')
+
+ def prepare_dataset(self, test: Optional[Path] = None):
+ if test:
+ html_files = [test]
+ else:
+ html_files = ['fips_modules_active.html',
+ 'fips_modules_historical.html', 'fips_modules_revoked.html']
+ helpers.download_file(
+ "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search?SearchMode=Advanced&CertificateStatus=Active&ValidationYear=0",
+ self.web_dir / "fips_modules_active.html")
+ helpers.download_file(
+ "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search?SearchMode=Advanced&CertificateStatus=Historical&ValidationYear=0",
+ self.web_dir / "fips_modules_historical.html")
+ helpers.download_file(
+ "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search?SearchMode=Advanced&CertificateStatus=Revoked&ValidationYear=0",
+ self.web_dir / "fips_modules_revoked.html")
+
+ # Parse those files and get list of currently processable files (always)
+ for f in html_files:
+ self._get_certificates_from_html(self.web_dir / f)
+
+ def _get_certificates_from_html(self, html_file: Path) -> None:
+ logger.info(f'Getting certificate ids from {html_file}')
+ with open(html_file, 'r', encoding='utf-8') as handle:
+ html = BeautifulSoup(handle.read(), 'html.parser')
+
+ table = [x for x in html.find(
+ id='searchResultsTable').tbody.contents if x != '\n']
+ for entry in table:
+ self.certs[entry.find('a').text] = None
+
+ def get_certs_from_web(self, redo: bool = False, json_file: Optional[Path] = None, test: Optional[Path] = None, update_json: bool = True):
def download_html_pages() -> List[str]:
new_files = self.download_all_htmls()
self.download_all_pdfs()
return new_files
- def get_certificates_from_html(html_file: Path) -> None:
- logger.info(f'Getting certificate ids from {html_file}')
- with open(html_file, 'r', encoding='utf-8') as handle:
- html = BeautifulSoup(handle.read(), 'html.parser')
-
- table = [x for x in html.find(
- id='searchResultsTable').tbody.contents if x != '\n']
- for entry in table:
- self.certs[entry.find('a').text] = {}
-
logger.info("Downloading required html files")
self.web_dir.mkdir(parents=True, exist_ok=True)
@@ -907,21 +941,7 @@ class FIPSDataset(Dataset, ComplexSerializableType):
self.algs_dir.mkdir(exist_ok=True)
# Download files containing all available module certs (always)
- html_files = ['fips_modules_active.html',
- 'fips_modules_historical.html', 'fips_modules_revoked.html']
- helpers.download_file(
- "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search?SearchMode=Advanced&CertificateStatus=Active&ValidationYear=0",
- self.web_dir / "fips_modules_active.html")
- helpers.download_file(
- "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search?SearchMode=Advanced&CertificateStatus=Historical&ValidationYear=0",
- self.web_dir / "fips_modules_historical.html")
- helpers.download_file(
- "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search?SearchMode=Advanced&CertificateStatus=Revoked&ValidationYear=0",
- self.web_dir / "fips_modules_revoked.html")
-
- # Parse those files and get list of currently processable files (always)
- for f in html_files:
- get_certificates_from_html(self.web_dir / f)
+ self.prepare_dataset(test)
logger.info('Downloading certificate html and security policies')
@@ -931,7 +951,7 @@ class FIPSDataset(Dataset, ComplexSerializableType):
if json_file.exists():
logger.info("Certs loaded from previous scanning")
dataset = self.from_json(json_file)
- self.certs = dataset.certs
+ self.certs.update(dataset.certs)
self.algorithms = dataset.algorithms
new_certs = download_html_pages()
@@ -954,15 +974,19 @@ class FIPSDataset(Dataset, ComplexSerializableType):
(self.web_dir / cert_id).with_suffix('.html'),
(self.fragments_dir / cert_id).with_suffix('.txt'), False, None, False),
cert, redo=redo)
+
+ if update_json:
+ self.to_json(self.root_dir / 'fips_full_dataset.json')
- def extract_certs_from_tables(self) -> List[Path]:
+ def extract_certs_from_tables(self, high_precision: bool, update_json: bool = True) -> List[Path]:
"""
Function that extracts algorithm IDs from tables in security policies files.
:return: list of files that couldn't have been decoded
"""
result = cert_processing.process_parallel(FIPSCertificate.analyze_tables,
- [cert for cert in self.certs.values() if
- not cert.state.tables_done and cert.state.txt_state],
+ [(cert, high_precision) for cert in self.certs.values() if
+ (
+ not cert.state.tables_done or high_precision) and cert.state.txt_state],
constants.N_THREADS // 4, # tabula already processes by parallel, so
# it's counterproductive to use all threads
use_threading=False)
@@ -972,6 +996,9 @@ class FIPSDataset(Dataset, ComplexSerializableType):
self.certs[cert.dgst].state.tables_done = state
self.certs[cert.dgst].pdf_scan.algorithms += algorithms
+ if update_json:
+ self.to_json(self.root_dir / 'fips_full_dataset.json')
+
return not_decoded
def remove_algorithms_from_extracted_data(self):
@@ -991,90 +1018,135 @@ class FIPSDataset(Dataset, ComplexSerializableType):
new_algorithms.append({'Certificate': [algorithm]})
certificate.processed.algorithms = new_algorithms
- def validate_results(self):
- """
- Function that validates results and finds the final connection output
- """
-
- def validate_id(processed_cert: FIPSCertificate, cert_candidate: str) -> bool:
+ # returns True if candidates should _not_ be matched
+ def _compare_certs(self, current_certificate: 'FIPSCertificate', other_id: str):
+ cert_first = current_certificate.web_scan.date_validation[0].year
+ cert_last = current_certificate.web_scan.date_validation[-1].year
+ conn_first = self.certs[other_id].web_scan.date_validation[0].year
+ conn_last = self.certs[other_id].web_scan.date_validation[-1].year
- # returns True if candidates should _not_ be matched
- def compare_certs(current_certificate: 'FIPSCertificate', other_id: str):
- cert_first = current_certificate.web_scan.date_validation[0].year
- cert_last = current_certificate.web_scan.date_validation[-1].year
- conn_first = self.certs[other_id].web_scan.date_validation[0].year
- conn_last = self.certs[other_id].web_scan.date_validation[-1].year
+ return cert_first - conn_first > config.year_difference_between_validations['value'] \
+ and cert_last - conn_last > config.year_difference_between_validations['value'] \
+ or cert_first < conn_first
- return cert_first - conn_first > config.year_difference_between_validations['value'] \
- and cert_last - conn_last > config.year_difference_between_validations['value'] \
- or cert_first < conn_first
+ def _remove_false_positives_for_cert(self, current_cert: FIPSCertificate):
+ for rule in current_cert.processed.keywords['rules_cert_id']:
+ matches = current_cert.processed.keywords['rules_cert_id'][rule]
+ current_cert.processed.keywords['rules_cert_id'][rule] = [cert_id for cert_id in matches if
+ self._validate_id(current_cert,
+ cert_id.replace('Cert.', '')
+ .replace('cert.', '')
+ .lstrip("#CA0 "))
+ and cert_id != current_cert.cert_id]
- # "< number" still needs to be used, because of some old certs being revalidated
- if cert_candidate.isdecimal() \
- and int(cert_candidate) < config.smallest_certificate_id_to_connect['value'] or \
- compare_certs(processed_cert, cert_candidate):
- return False
- if cert_candidate not in self.algorithms.certs:
- return True
+ def _validate_id(self, processed_cert: FIPSCertificate, cert_candidate: str) -> bool:
+ if cert_candidate not in self.certs or not cert_candidate.isdecimal():
+ return False
- for cert_alg in processed_cert.processed.algorithms:
- for certificate in cert_alg['Certificate']:
- curr_id = ''.join(filter(str.isdigit, certificate))
- if curr_id == cert_candidate:
- return False
+ # "< number" still needs to be used, because of some old certs being revalidated
+ if int(cert_candidate) < config.smallest_certificate_id_to_connect['value'] or \
+ self._compare_certs(processed_cert, cert_candidate):
+ return False
+ if cert_candidate not in self.algorithms.certs:
+ return True
- algs = self.algorithms.certs[cert_candidate]
- for current_alg in algs:
- if FIPSCertificate.get_compare(processed_cert.web_scan.vendor) == FIPSCertificate.get_compare(
- current_alg.vendor):
+ for cert_alg in processed_cert.processed.algorithms:
+ for certificate in cert_alg['Certificate']:
+ curr_id = ''.join(filter(str.isdigit, certificate))
+ if curr_id == cert_candidate:
return False
- return True
- broken_files = set()
+ algs = self.algorithms.certs[cert_candidate]
+ for current_alg in algs:
+ if FIPSCertificate.get_compare(processed_cert.web_scan.vendor) == FIPSCertificate.get_compare(
+ current_alg.vendor):
+ return False
+ return True
+
+ @staticmethod
+ def _find_connections(current_cert: FIPSCertificate):
+ current_cert.processed.connections = []
+ current_cert.web_scan.connections = []
+ current_cert.pdf_scan.connections = []
+ if not current_cert.state.file_status or not current_cert.processed.keywords:
+ return
+ if current_cert.processed.keywords['rules_cert_id'] == {}:
+ return
+ for rule in current_cert.processed.keywords['rules_cert_id']:
+ for cert in current_cert.processed.keywords['rules_cert_id'][rule]:
+ cert_id = ''.join(filter(str.isdigit, cert))
+ if cert_id not in current_cert.processed.connections:
+ current_cert.processed.connections.append(cert_id)
+ current_cert.pdf_scan.connections.append(cert_id)
+
+ # We want connections parsed in caveat to bypass age check, because we are 100 % sure they are right
+ if current_cert.web_scan.mentioned_certs:
+ for item in current_cert.web_scan.mentioned_certs:
+ cert_id = ''.join(filter(str.isdigit, item))
+ if cert_id not in current_cert.processed.connections and cert_id != '':
+ current_cert.processed.connections.append(cert_id)
+ current_cert.web_scan.connections.append(cert_id)
+ def validate_results(self):
+ """
+ Function that validates results and finds the final connection output
+ """
current_cert: FIPSCertificate
for current_cert in self.certs.values():
if not current_cert.state.txt_state:
continue
- for rule in current_cert.processed.keywords['rules_cert_id']:
- for cert in current_cert.processed.keywords['rules_cert_id'][rule]:
- cert_id = ''.join(filter(str.isdigit, cert))
-
- if cert_id == '' or cert_id not in self.certs:
- broken_files.add(current_cert.dgst)
- current_cert.state.file_status = False
- break
-
- if broken_files:
- logger.warning("CERTIFICATE FILES WITH WRONG CERTIFICATES PARSED")
- logger.warning(broken_files)
- logger.warning("... skipping these...")
- logger.warning(f"Total non-analyzable files:{len(broken_files)}")
+ self._remove_false_positives_for_cert(current_cert)
for current_cert in self.certs.values():
- current_cert.processed.connections = []
- if not current_cert.state.file_status or not current_cert.processed.keywords:
- continue
- if current_cert.processed.keywords['rules_cert_id'] == {}:
- continue
- for rule in current_cert.processed.keywords['rules_cert_id']:
- for cert in current_cert.processed.keywords['rules_cert_id'][rule]:
- cert_id = ''.join(filter(str.isdigit, cert))
- if cert_id not in current_cert.processed.connections and validate_id(current_cert, cert_id):
- current_cert.processed.connections.append(cert_id)
+ FIPSDataset._find_connections(current_cert)
- def finalize_results(self):
+ def finalize_results(self, update_json: bool = True):
self.unify_algorithms()
self.remove_algorithms_from_extracted_data()
self.validate_results()
- def get_dot_graph(self, output_file_name: str):
+ if update_json:
+ self.to_json(self.root_dir / 'fips_full_dataset.json')
+
+ def _highlight_vendor_in_dot(self, dot: Digraph, current_key: str, highlighted_vendor: str):
+ if self.certs[current_key].web_scan.vendor != highlighted_vendor:
+ return
+
+ dot.attr('node', color='red')
+ if self.certs[current_key].web_scan.status == 'Revoked':
+ dot.attr('node', color='grey32')
+ if self.certs[current_key].web_scan.status == 'Historical':
+ dot.attr('node', color='gold3')
+
+ def _add_colored_node(self, dot: Digraph, current_key: str, highlighted_vendor: str):
+ dot.attr('node', color='lightgreen')
+ if self.certs[current_key].web_scan.status == 'Revoked':
+ dot.attr('node', color='lightgrey')
+ if self.certs[current_key].web_scan.status == 'Historical':
+ dot.attr('node', color='gold')
+ self._highlight_vendor_in_dot(dot, current_key, highlighted_vendor)
+ dot.node(current_key, label=current_key + '&#10;'
+ + self.certs[current_key].web_scan.vendor
+ + '&#10;'
+ + (self.certs[current_key].web_scan.module_name if
+ self.certs[current_key].web_scan.module_name else ''))
+
+ def _get_processed_list(self, connection_list: str, key: str):
+ attr = {'pdf': 'pdf_scan', 'web': 'web_scan', 'processed': 'processed'}[connection_list]
+ return getattr(self.certs[key], attr).connections
+
+ def get_dot_graph(self, output_file_name: str, connection_list: str = 'processed',
+ highlighted_vendor: str = 'Red Hat®, Inc.', show: bool = True):
"""
Function that plots .dot graph of dependencies between certificates
Certificates with at least one dependency are displayed in "{output_file_name}connections.pdf", remaining
certificates are displayed in {output_file_name}single.pdf
+ :param show: display graph right on screen
+ :param highlighted_vendor: vendor whose certificates should be highlighted in red color
:param output_file_name: prefix to "connections", "connections.pdf", "single" and "single.pdf"
+ :param connection_list: 'processed', 'web', or 'pdf' - plots a graph from this source
+ default - processed
"""
dot = Digraph(comment='Certificate ecosystem')
single_dot = Digraph(comment='Modules with no dependencies')
@@ -1083,57 +1155,37 @@ class FIPSDataset(Dataset, ComplexSerializableType):
dot.attr('graph', label='Dependencies', labelloc='t', fontsize='30')
dot.attr('node', style='filled')
- def found_interesting_cert(current_key):
- if self.certs[current_key].web_scan.vendor == highlighted_vendor:
- dot.attr('node', color='red')
- if self.certs[current_key].web_scan.status == 'Revoked':
- dot.attr('node', color='grey32')
- if self.certs[current_key].web_scan.status == 'Historical':
- dot.attr('node', color='gold3')
- if self.certs[current_key].web_scan.vendor == "SUSE, LLC":
- dot.attr('node', color='lightblue')
-
- def color_check(current_key):
- dot.attr('node', color='lightgreen')
- if self.certs[current_key].web_scan.status == 'Revoked':
- dot.attr('node', color='lightgrey')
- if self.certs[current_key].web_scan.status == 'Historical':
- dot.attr('node', color='gold')
- found_interesting_cert(current_key)
- dot.node(current_key,
- label=current_key +
- '&#10;' +
- self.certs[current_key].web_scan.vendor +
- '&#10;' +
- (self.certs[current_key].web_scan.module_name
- if self.certs[current_key].web_scan.module_name else ''))
-
keys = 0
edges = 0
- highlighted_vendor = 'Red Hat®, Inc.'
for key in self.certs:
- if key != 'Not found' and self.certs[key].state.file_status:
- if self.certs[key].processed.connections:
- color_check(key)
- keys += 1
- else:
- single_dot.attr('node', color='lightblue')
- found_interesting_cert(key)
- single_dot.node(key, label=key + '\r\n' + self.certs[key].web_scan.vendor + (
- '\r\n' + self.certs[key].web_scan.module_name if self.certs[key].web_scan.module_name else ''))
+ if key == 'Not found' or not self.certs[key].state.file_status:
+ continue
+
+ processed = self._get_processed_list(connection_list, key)
+
+ if processed:
+ self._add_colored_node(key)
+ keys += 1
+ else:
+ single_dot.attr('node', color='lightblue')
+ self._highlight_vendor_in_dot(key)
+ single_dot.node(key, label=key + '\r\n' + self.certs[key].web_scan.vendor + (
+ '\r\n' + self.certs[key].web_scan.module_name if self.certs[key].web_scan.module_name else ''))
for key in self.certs:
- if key != 'Not found' and self.certs[key].state.file_status:
- for conn in self.certs[key].processed.connections:
- color_check(conn)
- dot.edge(key, conn)
- edges += 1
+ if key == 'Not found' or not self.certs[key].state.file_status:
+ continue
+ processed = self._get_processed_list(connection_list, key)
+ for conn in processed:
+ self._add_colored_node(dot, conn, highlighted_vendor)
+ dot.edge(key, conn)
+ edges += 1
- logging.info(f"rendering {keys} keys and {edges} edges")
+ logging.info(f"rendering for {connection_list}: {keys} keys and {edges} edges")
- dot.render(str(output_file_name) + '_connections', view=True)
- single_dot.render(str(output_file_name) + '_single', view=True)
+ dot.render(self.root_dir / (str(output_file_name) + '_connections'), view=show)
+ single_dot.render(self.root_dir / (str(output_file_name) + '_single'), view=show)
def to_dict(self):
return {'timestamp': self.timestamp, 'sha256_digest': self.sha256_digest,
@@ -1171,6 +1223,11 @@ class FIPSDataset(Dataset, ComplexSerializableType):
return vendors
+ def plot_graphs(self, show: bool = False):
+ self.get_dot_graph('full_graph', show=show)
+ self.get_dot_graph('web_only_graph', 'web', show=show)
+ self.get_dot_graph('pdf_only_graph', 'pdf', show=show)
+
class FIPSAlgorithmDataset(Dataset, ComplexSerializableType):
@@ -1187,18 +1244,36 @@ class FIPSAlgorithmDataset(Dataset, ComplexSerializableType):
soup = BeautifulSoup(alg_file.read(), 'html.parser')
num_pages = soup.select('span[data-total-pages]')[0].attrs
- for i in range(1, int(num_pages['data-total-pages'])):
+ for i in range(2, int(num_pages['data-total-pages'])):
if not (self.root_dir / f'page{i}.html').exists():
algs_urls.append(
constants.FIPS_ALG_URL + str(i))
algs_paths.append(self.root_dir / f"page{i}.html")
+ helpers.download_file(constants.FIPS_ALG_URL + num_pages['data-total-pages'],
+ self.root_dir / f"page{int(num_pages['data-total-pages'])}.html")
logging.info(f"downloading {len(algs_urls)} algs html files")
cert_processing.process_parallel(FIPSCertificate.download_html_page, list(zip(algs_urls, algs_paths)),
constants.N_THREADS)
self.parse_html()
+ @staticmethod
+ def _extract_algorithm_information(elements, vendor, date, product, validation):
+ for elem in elements:
+ # td > a > (vendor or date)
+ attachments = elem.find_all('a')
+
+ if len(attachments) == 0:
+ vendor = elem.text.strip() if 'vendor-name' in elem['id'] else vendor
+ date = elem.text.strip() if 'validation-date' in elem['id'] else date
+ continue
+
+ for attachment in attachments:
+ product = elem.text.strip() if 'product-name' in attachment['id'] else product
+ validation = elem.text.strip() if 'validation-number' in attachment['id'] else validation
+ return vendor, date, product, validation
+
def parse_html(self):
def split_alg(alg_string):
cert_type = alg_string.rstrip('0123456789')
@@ -1210,18 +1285,19 @@ class FIPSAlgorithmDataset(Dataset, ComplexSerializableType):
html_soup = BeautifulSoup(handle.read(), 'html.parser')
table = html_soup.find('table', class_='table table-condensed publications-table table-bordered')
- spans = table.find_all('span')
- for span in spans:
- elements = span.find_all('td')
- vendor, implementation = elements[0].text, elements[1].text
- elements_sliced = elements[2:]
- for i in range(0, len(elements_sliced), 2):
- alg_type, alg_id = split_alg(elements_sliced[i].text.strip())
- validation_date = elements_sliced[i + 1].text.strip()
- fips_alg = FIPSCertificate.Algorithm(alg_id, vendor, implementation, alg_type, validation_date)
- if alg_id not in self.certs:
- self.certs[alg_id] = []
- self.certs[alg_id].append(fips_alg)
+ tbody_contents = table.find('tbody').find_all('tr')
+ vendor = product = validation = date = ""
+ for tr in tbody_contents:
+ elements = tr.find_all('td')
+ vendor, date, product, validation = FIPSAlgorithmDataset._extract_algorithm_information(
+ elements, vendor, date, product, validation
+ )
+
+ alg_type, alg_id = split_alg(validation)
+ fips_alg = FIPSCertificate.Algorithm(alg_id, vendor, product, alg_type, date)
+ if alg_id not in self.certs:
+ self.certs[alg_id] = []
+ self.certs[alg_id].append(fips_alg)
def convert_all_pdfs(self):
raise NotImplementedError('Not meant to be implemented')
diff --git a/sec_certs/helpers.py b/sec_certs/helpers.py
index 87f6e25e..80a54549 100644
--- a/sec_certs/helpers.py
+++ b/sec_certs/helpers.py
@@ -134,8 +134,15 @@ def find_tables_iterative(file_text: str) -> List[int]:
current_page += 1
if line.startswith('Table ') or line.startswith('Exhibit'):
pages.add(current_page)
+ pages.add(current_page + 1)
+ if current_page > 2:
+ pages.add(current_page - 1)
if not pages:
logger.warning('No pages found')
+ for page in pages:
+ if page > current_page - 1:
+ return list(pages - {page})
+
return list(pages)
@@ -491,11 +498,14 @@ def extract_keywords(filepath: Path) -> Tuple[int, Optional[Dict[str, str]]]:
return constants.RETURNCODE_OK, result
-def analyze_matched_algs(data: Dict):
+def plot_dataframe_graph(data: Dict, label: str, file_name: str, density: bool = False, cumulative: bool = False, bins: int = 50, log: bool = True, show: bool = True):
pd_data = pd.Series(data)
- pd_data.hist(bins=50)
- plt.show()
+ pd_data.hist(bins=bins, label=label, density=density, cumulative=cumulative)
+ plt.savefig(file_name)
+ if show:
+ plt.show()
- sorted_data = pd_data.value_counts(ascending=True)
+ if log:
+ sorted_data = pd_data.value_counts(ascending=True)
- logging.info(sorted_data.where(sorted_data > 1).dropna()) \ No newline at end of file
+ logging.info(sorted_data.where(sorted_data > 1).dropna()) \ No newline at end of file
diff --git a/sec_certs/settings.yaml b/sec_certs/settings.yaml
index 0c8b130f..9b07a8be 100644
--- a/sec_certs/settings.yaml
+++ b/sec_certs/settings.yaml
@@ -6,7 +6,14 @@ smallest_certificate_id_to_connect:
year_difference_between_validations:
description: During validation we don't connect certificates with validation dates
difference higher than _this_
- value: 5
+ value: 7
use_text_with_newlines_during_parsing:
description: During keyword search, search in text with newlines
value: true
+ignore_first_page:
+ description: During keyword search, first page usually contains addresses - ignore it.
+ value: true
+cert_threshold:
+ description: Used with --higher-precision-results. Determines the amount of mismatched algorithms to be considered faulty.
+ value: 5
+
diff --git a/test/data/test_fips_oop/algorithms.json b/test/data/test_fips_oop/algorithms.json
new file mode 100644
index 00000000..7845d93d
--- /dev/null
+++ b/test/data/test_fips_oop/algorithms.json
@@ -0,0 +1,513 @@
+{
+ "_type": "FIPSAlgorithmDataset",
+ "certs": {
+ "2351": [
+ {
+ "_type": "Algorithm",
+ "cert_id": "2351",
+ "date": "9/21/2018",
+ "implementation": "Apple CoreCrypto Kernel Module v9.0 for ARM (iOS12, A11 Bionic, Assembler_VNG)",
+ "type": "DRBG",
+ "vendor": "Apple Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2351",
+ "date": "11/27/2015",
+ "implementation": "Apple iOS CoreCrypto Kernel Module (Optimized SHA, A6)",
+ "type": "HMAC",
+ "vendor": "Apple Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2351",
+ "date": "1/27/2017",
+ "implementation": "OpenSSL using assembler for AES and SHA",
+ "type": "RSA",
+ "vendor": "Canonical Ltd."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2351",
+ "date": "1/19/2017",
+ "implementation": "Junos FIPS Version Junos 15.1 X49 - Dataplane_CN7020",
+ "type": "TDES",
+ "vendor": "Juniper Networks, Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2351",
+ "date": "3/8/2013",
+ "implementation": "Samsung OpenSSL Cryptographic Module",
+ "type": "AES",
+ "vendor": "Samsung Electronics Co., Ltd"
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2351",
+ "date": "3/7/2014",
+ "implementation": "Symantec PGP Cryptographic Engine",
+ "type": "SHS",
+ "vendor": "Symantec Corporation"
+ }
+ ],
+ "2352": [
+ {
+ "_type": "Algorithm",
+ "cert_id": "2352",
+ "date": "9/21/2018",
+ "implementation": "Apple CoreCrypto Kernel Module v9.0 for ARM (iOS12, A10X Fusion, Assembler_VNG)",
+ "type": "DRBG",
+ "vendor": "Apple Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2352",
+ "date": "3/8/2013",
+ "implementation": "AES-256 Core",
+ "type": "AES",
+ "vendor": "Altera Canada"
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2352",
+ "date": "11/27/2015",
+ "implementation": "Apple iOS CoreCrypto Kernel Module (Optimized SHA, A6X)",
+ "type": "HMAC",
+ "vendor": "Apple Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2352",
+ "date": "1/27/2017",
+ "implementation": "OpenSSL using support from Power ISA 2.07 for AES and SHA",
+ "type": "RSA",
+ "vendor": "Canonical Ltd."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2352",
+ "date": "1/19/2017",
+ "implementation": "Junos FIPS Version Junos 15.1 X49 - Dataplane_CN7130",
+ "type": "TDES",
+ "vendor": "Juniper Networks, Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2352",
+ "date": "3/21/2014",
+ "implementation": "Karnak SHA in Hardware",
+ "type": "SHS",
+ "vendor": "Seagate Technology, LLC."
+ }
+ ],
+ "2600": [
+ {
+ "_type": "Algorithm",
+ "cert_id": "2600",
+ "date": "12/15/2017",
+ "implementation": "Apple iOS CoreCrypto v8 Kernel Module (Generic Software Implementation)",
+ "type": "TDES",
+ "vendor": "Apple Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2600",
+ "date": "6/10/2016",
+ "implementation": "IOS Common Cryptographic Module (IC2M) Algorithm Module",
+ "type": "HMAC",
+ "vendor": "Cisco Systems, Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2600",
+ "date": "8/16/2013",
+ "implementation": "Blade System Virtual Connect",
+ "type": "AES",
+ "vendor": "Hewlett-Packard Development Company, L.P."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2600",
+ "date": "12/5/2014",
+ "implementation": "Cryptographic Security Kernel",
+ "type": "SHS",
+ "vendor": "IBM Corporation"
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2600",
+ "date": "9/1/2017",
+ "implementation": "IBM z/OS(R) Cryptographic Services System SSL - 31bit",
+ "type": "RSA",
+ "vendor": "IBM Corporation"
+ }
+ ],
+ "2601": [
+ {
+ "_type": "Algorithm",
+ "cert_id": "2601",
+ "date": "12/5/2014",
+ "implementation": "SHA256 Library on Canon MFP Security Chip",
+ "type": "SHS",
+ "vendor": "Canon Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2601",
+ "date": "8/16/2013",
+ "implementation": "Dell AppAssure Crypto Library",
+ "type": "AES",
+ "vendor": "Dell, Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2601",
+ "date": "6/10/2016",
+ "implementation": "EFJ Communication Cryptographic Library",
+ "type": "HMAC",
+ "vendor": "EFJohnson Technologies"
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2601",
+ "date": "9/1/2017",
+ "implementation": "IBM z/OS(R) Cryptographic Services System SSL - 64bit",
+ "type": "RSA",
+ "vendor": "IBM Corporation"
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2601",
+ "date": "12/22/2017",
+ "implementation": "Oracle Linux 7 GnuTLS C Implementation",
+ "type": "TDES",
+ "vendor": "Oracle Corporation"
+ }
+ ],
+ "2602": [
+ {
+ "_type": "Algorithm",
+ "cert_id": "2602",
+ "date": "12/22/2017",
+ "implementation": "Apple tvOS CoreCrypto Kernel Module v8.0 (Generic Software Implementation)",
+ "type": "TDES",
+ "vendor": "Apple Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2602",
+ "date": "6/10/2016",
+ "implementation": "FIPS-ALGORITHMS.1.5.0v",
+ "type": "HMAC",
+ "vendor": "Mercury Systems"
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2602",
+ "date": "8/16/2013",
+ "implementation": "RSA BSAFE\u00ae Crypto-J Software Module",
+ "type": "AES",
+ "vendor": "RSA Security, Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2602",
+ "date": "12/5/2014",
+ "implementation": "SHA Library",
+ "type": "SHS",
+ "vendor": "Sage Microelectronics Corp"
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2602",
+ "date": "9/1/2017",
+ "implementation": "Bouncy Castle FIPS Java API",
+ "type": "RSA",
+ "vendor": "Legion of the Bouncy Castle Inc."
+ }
+ ],
+ "2700": [
+ {
+ "_type": "Algorithm",
+ "cert_id": "2700",
+ "date": "3/13/2015",
+ "implementation": "Apple OSX CoreCrypto Module (Generic, Xeon)",
+ "type": "SHS",
+ "vendor": "Apple Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2700",
+ "date": "10/21/2016",
+ "implementation": "Axway OpenSSL",
+ "type": "HMAC",
+ "vendor": "Axway Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2700",
+ "date": "11/30/2017",
+ "implementation": "Brocade Fabric OS FIPS Cryptographic Module",
+ "type": "RSA",
+ "vendor": "Brocade Communications Systems LLC"
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2700",
+ "date": "3/30/2018",
+ "implementation": "Junos OS 17.4R1-S1 - Dataplane",
+ "type": "TDES",
+ "vendor": "Juniper Networks, Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2700",
+ "date": "11/29/2013",
+ "implementation": "VMware NSS Cryptographic Module",
+ "type": "AES",
+ "vendor": "VMware, Inc."
+ }
+ ],
+ "2701": [
+ {
+ "_type": "Algorithm",
+ "cert_id": "2701",
+ "date": "3/30/2018",
+ "implementation": "Security Builder GSE-J Crypto Core",
+ "type": "TDES",
+ "vendor": "BlackBerry Certicom"
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2701",
+ "date": "11/30/2017",
+ "implementation": "ngfips_rsa",
+ "type": "RSA",
+ "vendor": "Cavium, Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2701",
+ "date": "10/28/2016",
+ "implementation": "Cisco_SSL_Implementation-1",
+ "type": "HMAC",
+ "vendor": "Cisco Systems, Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2701",
+ "date": "3/13/2015",
+ "implementation": "RSA BSAFE\u00ae Crypto-J JSAFE and JCE Software Module",
+ "type": "SHS",
+ "vendor": "RSA, The Security Division of EMC"
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2701",
+ "date": "11/29/2013",
+ "implementation": "VMware Cryptographic Module",
+ "type": "AES",
+ "vendor": "VMware, Inc."
+ }
+ ],
+ "2702": [
+ {
+ "_type": "Algorithm",
+ "cert_id": "2702",
+ "date": "3/30/2018",
+ "implementation": "Security Builder GSE-J Crypto Core",
+ "type": "TDES",
+ "vendor": "BlackBerry Certicom"
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2702",
+ "date": "11/30/2017",
+ "implementation": "DELPHI RSA2048 Signature Verification Algorithm Implementation",
+ "type": "RSA",
+ "vendor": "DELPHI"
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2702",
+ "date": "12/6/2013",
+ "implementation": "RSA BSAFE Crypto-J",
+ "type": "AES",
+ "vendor": "McAfee, Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2702",
+ "date": "10/28/2016",
+ "implementation": "OpenSSL Crypto Library",
+ "type": "HMAC",
+ "vendor": "MikroM GmbH"
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "2702",
+ "date": "3/13/2015",
+ "implementation": "OpenSSL FIPS Object Module",
+ "type": "SHS",
+ "vendor": "OpenSSL Validation Services, Inc."
+ }
+ ],
+ "3415": [
+ {
+ "_type": "Algorithm",
+ "cert_id": "3415",
+ "date": "1/26/2018",
+ "implementation": "Apple Secure Key Store CoreCrypto Module (Generic Software Implementation)",
+ "type": "HMAC",
+ "vendor": "Apple Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "3415",
+ "date": "6/5/2015",
+ "implementation": "Motorola Solutions Subscriber \u00b5Mace AES256",
+ "type": "AES",
+ "vendor": "Motorola Solutions Inc"
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "3415",
+ "date": "11/18/2016",
+ "implementation": "Secure Parser Library",
+ "type": "SHS",
+ "vendor": "Security First Corp."
+ }
+ ],
+ "3426": [
+ {
+ "_type": "Algorithm",
+ "cert_id": "3426",
+ "date": "6/11/2015",
+ "implementation": "Apple iOS CoreCrypto Module (KeyWrap A8 32 bit)",
+ "type": "AES",
+ "vendor": "Apple Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "3426",
+ "date": "12/2/2016",
+ "implementation": "Apple iOS CoreCrypto Module (Generic)",
+ "type": "SHS",
+ "vendor": "Apple Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "3426",
+ "date": "1/26/2018",
+ "implementation": "Apple Secure Key Store CoreCrypto Module (VNG)",
+ "type": "HMAC",
+ "vendor": "Apple Inc."
+ }
+ ],
+ "3427": [
+ {
+ "_type": "Algorithm",
+ "cert_id": "3427",
+ "date": "12/2/2016",
+ "implementation": "Apple iOS CoreCrypto Module (Generic)",
+ "type": "SHS",
+ "vendor": "Apple Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "3427",
+ "date": "1/26/2018",
+ "implementation": "Forcepoint NGFW FIPS Java API",
+ "type": "HMAC",
+ "vendor": "Forcepoint"
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "3427",
+ "date": "6/11/2015",
+ "implementation": "HP ESKM OpenSSL",
+ "type": "AES",
+ "vendor": "Hewlett Packard Enterprise"
+ }
+ ],
+ "3447": [
+ {
+ "_type": "Algorithm",
+ "cert_id": "3447",
+ "date": "12/2/2016",
+ "implementation": "Apple OSX CoreCrypto Module (Optimized SHA nosse)",
+ "type": "SHS",
+ "vendor": "Apple Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "3447",
+ "date": "7/2/2015",
+ "implementation": "FireEye Algorithms Implementation",
+ "type": "AES",
+ "vendor": "FireEye, Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "3447",
+ "date": "2/9/2018",
+ "implementation": "OpenSSL (no AVX2/AVX/AESNI/SSSE3, x86_64, 64-bit library)",
+ "type": "HMAC",
+ "vendor": "Red Hat, Inc."
+ }
+ ],
+ "3451": [
+ {
+ "_type": "Algorithm",
+ "cert_id": "3451",
+ "date": "12/2/2016",
+ "implementation": "Apple OSX CoreCrypto Module (Optimized SHA nosse)",
+ "type": "SHS",
+ "vendor": "Apple Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "3451",
+ "date": "7/2/2015",
+ "implementation": "OpenSSL FIPS Object Module",
+ "type": "AES",
+ "vendor": "OpenSSL Software Foundation, Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "3451",
+ "date": "2/9/2018",
+ "implementation": "OpenSSL (no AVX2/AVX/AESNI, x86_64, 64-bit library)",
+ "type": "HMAC",
+ "vendor": "Red Hat, Inc."
+ }
+ ],
+ "3464": [
+ {
+ "_type": "Algorithm",
+ "cert_id": "3464",
+ "date": "12/9/2016",
+ "implementation": "Apple OSX CoreCrypto Module (Generic)",
+ "type": "SHS",
+ "vendor": "Apple Inc."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "3464",
+ "date": "7/10/2015",
+ "implementation": "Security Builder Linux Kernel Crypto Core",
+ "type": "AES",
+ "vendor": "Certicom Corp."
+ },
+ {
+ "_type": "Algorithm",
+ "cert_id": "3464",
+ "date": "2/9/2018",
+ "implementation": "HPE Secure Encryption Engine v1.1",
+ "type": "HMAC",
+ "vendor": "Hewlett-Packard Development Company, L.P."
+ }
+ ]
+ }
+} \ No newline at end of file
diff --git a/test/fips_test_utils.py b/test/fips_test_utils.py
new file mode 100644
index 00000000..94fb406a
--- /dev/null
+++ b/test/fips_test_utils.py
@@ -0,0 +1,51 @@
+from typing import List
+from pathlib import Path
+
+def generate_html(ids: List[str], path: Path):
+ def generate_entry(certificate_id: str) -> str:
+ return f'''
+ <tr id="cert-row-0">
+ <td class="text-center">
+ <a href="/projects/cryptographic-module-validation-program/certificate/3898" id="cert-number-link-0">{certificate_id}</a>
+ </td>
+ </tr>
+ '''
+
+ html_head = '''
+ <!DOCTYPE html>
+ <html lang="en-us" xml:lang="en-us">
+ <head>
+ <meta charset="utf-8" />
+ <title>Cryptographic Module Validation Program | CSRC</title>
+ <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
+ <meta http-equiv="content-style-type" content="text/css" />
+ <meta http-equiv="content-script-type" content="text/javascript" />
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+ <meta name="msapplication-config" content="/CSRC/Media/images/favicons/browserconfig.xml" />
+ <meta name="theme-color" content="#000000" />
+ <meta name="google-site-verification" content="xbrnrVYDgLD-Bd64xHLCt4XsPXzUhQ-4lGMj4TdUUTA" />
+ </head>
+ '''
+ rows = ""
+ for cert_id in ids:
+ rows += f"\n{generate_entry(cert_id)}\n"
+ html_body = f'''
+ <body>
+ <table class="table table-striped table-condensed publications-table table-bordered" id="searchResultsTable">
+ <thead>
+ <tr>
+ <th class="text-center">Certificate Number</th>
+ <th class="text-center">Vendor Name</th>
+ <th class="text-center">Module Name</th>
+ <th class="text-center">Module Type</th>
+ <th class="text-center">Validation Date</th>
+ </tr>
+ </thead>
+ <tbody>
+ {rows}
+ </tbody>
+ </table>
+ </body>
+ '''
+ with open(path, 'w') as f:
+ f.write(f"{html_head}\n{html_body}\n")
diff --git a/test/settings_test.yaml b/test/settings_test.yaml
new file mode 100644
index 00000000..9b07a8be
--- /dev/null
+++ b/test/settings_test.yaml
@@ -0,0 +1,19 @@
+---
+smallest_certificate_id_to_connect:
+ description: During validation we don't connect certificates with number lower than
+ _this_ to connections
+ value: 40
+year_difference_between_validations:
+ description: During validation we don't connect certificates with validation dates
+ difference higher than _this_
+ value: 7
+use_text_with_newlines_during_parsing:
+ description: During keyword search, search in text with newlines
+ value: true
+ignore_first_page:
+ description: During keyword search, first page usually contains addresses - ignore it.
+ value: true
+cert_threshold:
+ description: Used with --higher-precision-results. Determines the amount of mismatched algorithms to be considered faulty.
+ value: 5
+
diff --git a/test/test_fips_oop.py b/test/test_fips_oop.py
new file mode 100644
index 00000000..107f0c5d
--- /dev/null
+++ b/test/test_fips_oop.py
@@ -0,0 +1,134 @@
+from unittest import TestCase
+from pathlib import Path
+from tempfile import TemporaryDirectory
+
+from sec_certs.dataset import FIPSDataset, FIPSAlgorithmDataset
+from sec_certs.configuration import config
+from fips_test_utils import generate_html
+
+
+
+def _set_up_dataset(td, certs):
+ dataset = FIPSDataset({}, Path(td), 'test_dataset', 'fips_test_dataset')
+ generate_html(certs, td + '/test_search.html')
+ dataset.get_certs_from_web(test=td + '/test_search.html', update_json=False)
+ return dataset
+
+
+def _set_up_dataset_for_full(td, certs):
+ dataset = _set_up_dataset(td, certs)
+ dataset.convert_all_pdfs()
+ dataset.extract_keywords()
+ dataset.extract_certs_from_tables(high_precision=True)
+ dataset.algorithms = FIPSAlgorithmDataset.from_json(Path(__file__).parent / 'data/test_fips_oop/algorithms.json')
+ dataset.finalize_results()
+ return dataset
+
+
+class TestFipsOOP(TestCase):
+ def setUp(self) -> None:
+ self.data_dir: Path = Path(__file__).parent / 'data' / 'test_fips_oop'
+ self.dataset = FIPSDataset({}, self.data_dir, 'test_dataset', 'fips_test_dataset')
+ self.certs_to_parse = [
+ ['3099', '2549', '2484', '3038', '2472', '2435', '2471', '1930'], # openSUSE chunk
+ ['23', '24', '25', '26'],
+ ['3095', '3651', '3093', '3090', '3197', '3196', '3089', '3195', '3480', '3615', '3194', '3091', '3690',
+ '3644', '3527', '3094', '3544', '3096', '3092'], # microsoft chunk
+ ['2630', '2721', '2997', '2441', '2711', '2633', '2798', '3613', '3733', '2908', '2446', '2742', '2447'],
+ # redhat chunk
+ ['3850', '2779', '2860', '2665', '1883', '3518', '3141', '2590'], # Document signing chunk
+ ['3493', '3495', '3711', '3176', '3488', '3126', '3269', '3524', '3220', '2398', '3543', '2676', '3313',
+ '3363', '3608', '3158'], # Chunk referencing openSSL FIPS Object Module SE
+ ]
+ config.load(Path(__file__).parent / 'settings_test.yaml')
+
+ def test_size(self):
+ for certs in self.certs_to_parse:
+ with TemporaryDirectory() as td:
+ dataset = _set_up_dataset(td, certs)
+ self.assertEqual(len(dataset.certs), len(certs), "Wrong number of parsed certs")
+
+ def test_connections_microsoft(self):
+ certs = self.certs_to_parse[2]
+ with TemporaryDirectory() as td:
+ dataset = _set_up_dataset_for_full(td, certs)
+
+ self.assertEqual(set(dataset.certs['3095'].processed.connections), {x for x in ['3093', '3096', '3094']})
+ self.assertEqual(set(dataset.certs['3651'].processed.connections), {x for x in ['3615']})
+ self.assertEqual(set(dataset.certs['3093'].processed.connections), {x for x in ['3090', '3091']})
+ self.assertEqual(set(dataset.certs['3090'].processed.connections), {x for x in ['3089']})
+ self.assertEqual(set(dataset.certs['3197'].processed.connections),
+ {x for x in ['3195', '3096', '3196', '3644', '3651']})
+ self.assertEqual(set(dataset.certs['3196'].processed.connections),
+ {x for x in ['3194', '3091', '3480', '3615']})
+ self.assertEqual(set(dataset.certs['3089'].processed.connections), {x for x in []})
+ self.assertEqual(set(dataset.certs['3195'].processed.connections), {x for x in ['3194', '3091', '3480']})
+ self.assertEqual(set(dataset.certs['3480'].processed.connections), {x for x in ['3089']})
+ self.assertEqual(set(dataset.certs['3615'].processed.connections), {x for x in ['3089']})
+ self.assertEqual(set(dataset.certs['3194'].processed.connections), {x for x in ['3089']})
+ self.assertEqual(set(dataset.certs['3091'].processed.connections), {x for x in ['3089']})
+ self.assertEqual(set(dataset.certs['3690'].processed.connections), {x for x in ['3644', '3196', '3651']})
+ self.assertEqual(set(dataset.certs['3644'].processed.connections), {x for x in ['3615']})
+ self.assertEqual(set(dataset.certs['3527'].processed.connections), {x for x in ['3090', '3091']})
+ self.assertEqual(set(dataset.certs['3094'].processed.connections), {x for x in ['3090', '3091']})
+ self.assertEqual(set(dataset.certs['3544'].processed.connections), {x for x in ['3093', '3096', '3527']})
+ self.assertEqual(set(dataset.certs['3096'].processed.connections),
+ {x for x in ['3090', '3194', '3091', '3480']})
+ self.assertEqual(set(dataset.certs['3092'].processed.connections),
+ {x for x in ['3093', '3195', '3096', '3644', '3651']})
+
+ def test_connections_redhat(self):
+ certs = self.certs_to_parse[3]
+ with TemporaryDirectory() as td:
+ dataset = _set_up_dataset_for_full(td, certs)
+ self.assertEqual(set(dataset.certs['2630'].processed.connections), {x for x in ['2441']})
+ self.assertEqual(set(dataset.certs['2633'].processed.connections), {x for x in ['2441']})
+ self.assertEqual(set(dataset.certs['2441'].processed.connections), {x for x in []})
+ self.assertEqual(set(dataset.certs['2997'].processed.connections), {x for x in ['2711']})
+ self.assertEqual(set(dataset.certs['2446'].processed.connections), {x for x in ['2441']})
+ self.assertEqual(set(dataset.certs['2447'].processed.connections), {x for x in ['2441']})
+ self.assertEqual(set(dataset.certs['3733'].processed.connections), {x for x in ['2441']})
+ self.assertEqual(set(dataset.certs['2441'].processed.connections), {x for x in []})
+ self.assertEqual(set(dataset.certs['2711'].processed.connections), {x for x in []})
+ self.assertEqual(set(dataset.certs['2908'].processed.connections), {x for x in ['2711']})
+ self.assertEqual(set(dataset.certs['3613'].processed.connections), {x for x in ['2997']})
+ self.assertEqual(set(dataset.certs['2721'].processed.connections), {x for x in ['2441', '2711']})
+ self.assertEqual(set(dataset.certs['2798'].processed.connections), {x for x in ['2721', '2711']})
+ self.assertEqual(set(dataset.certs['2711'].processed.connections), {x for x in []})
+ self.assertEqual(set(dataset.certs['2997'].processed.connections), {x for x in ['2711']})
+ self.assertEqual(set(dataset.certs['2742'].processed.connections), {x for x in ['2721', '2711']})
+ self.assertEqual(set(dataset.certs['2721'].processed.connections), {x for x in ['2441', '2711']})
+
+ def test_docusign_chunk(self):
+ certs = self.certs_to_parse[4]
+ with TemporaryDirectory() as td:
+ dataset = _set_up_dataset_for_full(td, certs)
+ self.assertEqual(set(dataset.certs['3850'].processed.connections), {x for x in ['3518', '1883']})
+ self.assertEqual(set(dataset.certs['2779'].processed.connections), {x for x in ['1883']})
+ self.assertEqual(set(dataset.certs['2860'].processed.connections), {x for x in ['1883']})
+ self.assertEqual(set(dataset.certs['2665'].processed.connections), {x for x in ['1883']})
+ self.assertEqual(set(dataset.certs['1883'].processed.connections), {x for x in []})
+ self.assertEqual(set(dataset.certs['3518'].processed.connections), {x for x in ['1883']})
+ self.assertEqual(set(dataset.certs['3141'].processed.connections), {x for x in ['1883']})
+ self.assertEqual(set(dataset.certs['2590'].processed.connections), {x for x in ['1883']})
+
+ def test_openssl_chunk(self):
+ certs = self.certs_to_parse[5]
+ with TemporaryDirectory() as td:
+ dataset = _set_up_dataset_for_full(td, certs)
+ self.assertEqual(set(dataset.certs['3493'].processed.connections), {x for x in ['2398']})
+ self.assertEqual(set(dataset.certs['3495'].processed.connections), {x for x in ['2398']})
+ self.assertEqual(set(dataset.certs['3711'].processed.connections), {x for x in ['3220']})
+ self.assertEqual(set(dataset.certs['3176'].processed.connections), {x for x in ['2398']})
+ self.assertEqual(set(dataset.certs['3488'].processed.connections), {x for x in ['2398']})
+ self.assertEqual(set(dataset.certs['3126'].processed.connections), {x for x in ['3126', '2398']})
+ self.assertEqual(set(dataset.certs['3269'].processed.connections), {x for x in ['3269', '3220']})
+ self.assertEqual(set(dataset.certs['3524'].processed.connections), {x for x in ['3220']})
+ self.assertEqual(set(dataset.certs['3220'].processed.connections), {x for x in ['3220', '2398']})
+ self.assertEqual(set(dataset.certs['2398'].processed.connections), {x for x in []})
+ self.assertEqual(set(dataset.certs['3543'].processed.connections), {x for x in ['2398']})
+ self.assertEqual(set(dataset.certs['2676'].processed.connections), {x for x in ['2398']})
+ self.assertEqual(set(dataset.certs['3313'].processed.connections), {x for x in ['3313', '3220']})
+ self.assertEqual(set(dataset.certs['3363'].processed.connections), {x for x in []})
+ self.assertEqual(set(dataset.certs['3608'].processed.connections), {x for x in ['2398']})
+ self.assertEqual(set(dataset.certs['3158'].processed.connections), {x for x in ['2398']})