diff options
| author | adamjanovsky | 2021-05-14 16:27:35 +0200 |
|---|---|---|
| committer | GitHub | 2021-05-14 16:27:35 +0200 |
| commit | cfab313d013b530c5ceed5b29877be71f74da7e8 (patch) | |
| tree | 4dda7878a195428061d4e366296f52476e903134 | |
| parent | e3c002a63725e9e79ce81a09a7c7055c61ba5010 (diff) | |
| parent | 92f49eb5a0b92be60be4ab3a662fcd6487865052 (diff) | |
| download | sec-certs-cfab313d013b530c5ceed5b29877be71f74da7e8.tar.gz sec-certs-cfab313d013b530c5ceed5b29877be71f74da7e8.tar.zst sec-certs-cfab313d013b530c5ceed5b29877be71f74da7e8.zip | |
Merge pull request #75 from crocs-muni/fips_pr
Changes in CAVP webpages parsing, fixed bugs, tests, and everything from the last PR
| -rw-r--r-- | examples/fips_oop_demo.py | 41 | ||||
| -rw-r--r-- | sec_certs/analyze_certificates.py | 52 | ||||
| -rw-r--r-- | sec_certs/cert_rules.py | 107 | ||||
| -rw-r--r-- | sec_certs/certificate.py | 107 | ||||
| -rw-r--r-- | sec_certs/constants.py | 2 | ||||
| -rw-r--r-- | sec_certs/dataset.py | 382 | ||||
| -rw-r--r-- | sec_certs/helpers.py | 20 | ||||
| -rw-r--r-- | sec_certs/settings.yaml | 9 | ||||
| -rw-r--r-- | test/data/test_fips_oop/algorithms.json | 513 | ||||
| -rw-r--r-- | test/fips_test_utils.py | 51 | ||||
| -rw-r--r-- | test/settings_test.yaml | 19 | ||||
| -rw-r--r-- | test/test_fips_oop.py | 134 |
12 files changed, 1146 insertions, 291 deletions
diff --git a/examples/fips_oop_demo.py b/examples/fips_oop_demo.py index 7015574e..dd47f74f 100644 --- a/examples/fips_oop_demo.py +++ b/examples/fips_oop_demo.py @@ -2,71 +2,64 @@ from pathlib import Path from datetime import datetime import logging import click + from sec_certs.dataset import FIPSDataset, FIPSAlgorithmDataset from sec_certs.configuration import config -from sec_certs.helpers import analyze_matched_algs + @click.command() @click.option('--config-file', help='Path to config file') @click.option('--json-file', help='Path to dataset json file') -@click.option('--no-download-algs', help='don\'t download algs', is_flag=True) -def main(config_file, json_file, no_download_algs): +@click.option('--no-download-algs', help='Redo scan of html files', is_flag=True) +@click.option('--redo-web-scan', help='Redo scan of PDF files', is_flag=True) +@click.option('--redo-keyword-scan', help='Don\'t download algs', is_flag=True) +@click.option('--higher-precision-results', + help='Redo table search for certificates with high error rate. Behaviour undefined if used on a newly instantiated dataset.', + is_flag=True) +def main(config_file, json_file, no_download_algs, redo_web_scan, redo_keyword_scan, higher_precision_results): logging.basicConfig(level=logging.INFO) start = datetime.now() # Load config - config.load(config_file if config_file else 'sec_certs/settings.yaml') + config.load(config_file if config_file else '../sec_certs/settings.yaml') # Create empty dataset - dset = FIPSDataset({}, Path('./fips_dataset'), 'sample_dataset', 'sample dataset description') + dset = FIPSDataset({}, Path('../fips_dataset'), 'sample_dataset', 'sample dataset description') # this is for creating test dataset, usually with small number of pdfs # dset = FIPSDataset({}, Path('./fips_test_dataset'), 'small dataset', 'small dataset for keyword testing') # Load metadata for certificates from CSV and HTML sources - dset.get_certs_from_web(json_file=json_file, redo=True) + dset.get_certs_from_web(json_file=json_file, redo=redo_web_scan) logging.info(f'Finished parsing. Have dataset with {len(dset)} certificates.') - # Dump dataset into JSON - dset.to_json(dset.root_dir / 'fips_full_dataset.json') logging.info(f'Dataset saved to {dset.root_dir}/fips_full_dataset.json') logging.info("Converting pdfs") dset.convert_all_pdfs() - dset.to_json(dset.root_dir / 'fips_full_dataset.json') logging.info("Extracting keywords now.") - dset.extract_keywords(redo=True) + dset.extract_keywords(redo=redo_keyword_scan) logging.info(f'Finished extracting certificates for {len(dset.certs)} items.') - logging.info("Dumping dataset again...") - dset.to_json(dset.root_dir / 'fips_full_dataset.json') logging.info("Searching for tables in pdfs") - not_decoded_files = dset.extract_certs_from_tables() + not_decoded_files = dset.extract_certs_from_tables(higher_precision_results) logging.info(f"Done. Files not decoded: {not_decoded_files}") - dset.to_json(dset.root_dir / 'fips_mentioned.json') logging.info("Parsing algorithms") if not no_download_algs: - aset = FIPSAlgorithmDataset({}, Path('fips_dataset/web/algorithms'), 'algorithms', 'sample algs') + aset = FIPSAlgorithmDataset({}, Path(dset.root_dir / 'web/algorithms'), 'algorithms', 'sample algs') aset.get_certs_from_web() + logging.info(f'Finished parsing. Have algorithm dataset with {len(aset)} algorithm numbers.') dset.algorithms = aset logging.info("finalizing results.") - dset.finalize_results() - logging.info('dump again') - dset.to_json(dset.root_dir / 'fips_full_dataset.json') - - dset.get_dot_graph('different_new') - - data = dset.match_algs() - analyze_matched_algs(data) - + dset.plot_graphs(show=False) end = datetime.now() logging.info(f'The computation took {(end - start)} seconds.') diff --git a/sec_certs/analyze_certificates.py b/sec_certs/analyze_certificates.py index f20a0e2e..f513664b 100644 --- a/sec_certs/analyze_certificates.py +++ b/sec_certs/analyze_certificates.py @@ -313,6 +313,11 @@ def build_cert_references(filter_rules_group, all_items_found): # build list of references referenced_by = {} for cert_long_id in all_items_found.keys(): + # handle FIPS + if 'FIPS Certificate' in all_items_found[cert_long_id]['frontpage_scan']: + referenced_by[cert_long_id] = copy.deepcopy(all_items_found[cert_long_id]['processed']['connections']) + continue + # do not continue if no keywords were extracted () if 'keywords_scan' not in all_items_found[cert_long_id].keys(): continue @@ -368,9 +373,15 @@ def build_cert_references(filter_rules_group, all_items_found): def analyze_references_graph(filter_rules_group, all_items_found, filter_label): + handling_fips_items = False # build cert_id to item name mapping certid_info = {} for cert_long_id in all_items_found.keys(): + if 'FIPS Certificate' in all_items_found[cert_long_id]['frontpage_scan']: + certid_info[cert_long_id] = cert_long_id + handling_fips_items = True + continue + cert = all_items_found[cert_long_id] if is_in_dict(cert, ['processed', 'cert_id']): if is_in_dict(cert, ['frontpage_scan', 'cert_item']): @@ -383,6 +394,10 @@ def analyze_references_graph(filter_rules_group, all_items_found, filter_label): # build cert_id to cert_long_id mapping cert_id_to_long_id_mapping = {} for cert_long_id in all_items_found.keys(): + if 'FIPS Certificate' in all_items_found[cert_long_id]['frontpage_scan']: + cert_id_to_long_id_mapping[cert_long_id] = cert_long_id + continue + cert = all_items_found[cert_long_id] if is_in_dict(cert, ['processed', 'cert_id']): if is_in_dict(cert, ['frontpage_scan', 'cert_item']): @@ -483,7 +498,10 @@ def plot_schemes_multi_line_graph(x_ticks, data, prominent_data, x_label, y_labe # change line type to prevent color repetitions num_lines_plotted += 1 - color_index += 1 + if color_index < len(GRAPHS_COLOR_PALETTE) - 1: + color_index += 1 + else: + color_index = 0 plt.rcParams.update({'font.size': 16}) plt.legend(loc=2) @@ -1125,28 +1143,30 @@ def transform_fips_to_cc_dict(all_cert_items_fips): cc_item = {} cc_item["csv_scan"] = {} - cc_item["frontpage_scan"] = {} - cc_item["keywords_scan"] = {} - cc_item["pdfmeta_scan"] = {} - cc_item["processed"] = {} + cc_item["frontpage_scan"] = {'FIPS Certificate': 1} + cc_item["keywords_scan"] = fips_item['pdf_scan']['keywords'] + cc_item["pdfmeta_scan"] = fips_item['pdf_scan'] + cc_item["processed"] = fips_item['processed'] + + fips_web_scan = fips_item['web_scan'] - cc_item["processed"]["cc_manufacturer_list"] = fips_item["vendor"] - cc_item['processed']['cc_manufacturer_simple_list'] = [fips_item["vendor"]] + cc_item["processed"]["cc_manufacturer_list"] = fips_web_scan["vendor"] + cc_item['processed']['cc_manufacturer_simple_list'] = [fips_web_scan["vendor"]] cc_item["processed"]["cert_id"] = fips_item["cert_id"] - cc_item["processed"]["cert_lab"] = fips_item["lab"] - if fips_item["exceptions"] is None: - cc_item["processed"]["cc_security_level"] = 'Level ' + fips_item["level"] + cc_item["processed"]["cert_lab"] = fips_web_scan["lab"] + if fips_web_scan["exceptions"] is None: + cc_item["processed"]["cc_security_level"] = 'Level ' + fips_web_scan["level"] else: - cc_item["processed"]["cc_security_level"] = 'Level ' + fips_item["level"] + '+' + cc_item["processed"]["cc_security_level"] = 'Level ' + fips_web_scan["level"] + '+' cc_item['csv_scan']['cc_scheme'] = 'NIST' - cc_item["csv_scan"]["cc_certification_date"] = fips_item["date_validation"][0] - cc_item["csv_scan"]["cc_archived_date"] = fips_item["date_sunset"] - cc_item["csv_scan"]["cc_category"] = '{} {}'.format(fips_item["type"], fips_item["embodiment"]) + cc_item["csv_scan"]["cc_certification_date"] = fips_web_scan["date_validation"][0] + cc_item["csv_scan"]["cc_archived_date"] = fips_web_scan["date_sunset"] + cc_item["csv_scan"]["cc_category"] = '{} {}'.format(fips_web_scan["module_type"], fips_web_scan["embodiment"]) cc_item["processed"]["cc_security_level_augments"] = [] - if fips_item["exceptions"]: - for exception in fips_item["exceptions"]: + if fips_web_scan["exceptions"]: + for exception in fips_web_scan["exceptions"]: try: ex_name, ex_level = exception.split(':') ex_name = ex_name.strip() diff --git a/sec_certs/cert_rules.py b/sec_certs/cert_rules.py index 3be285c5..ba857300 100644 --- a/sec_certs/cert_rules.py +++ b/sec_certs/cert_rules.py @@ -321,64 +321,65 @@ rules_other = [ rules_fips_remove_algorithm_ids = [ # --- HMAC(-SHA)(-1) - (bits) (method) ((hardware/firmware cert) #id) --- - r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?\s?(\d{4})", - r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?\s?(\d{3})", - r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?\s?(\d{2})", - r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?\s?(\d{1})", +# + added (and #id) everywhere + r"HMAC(?:[- –]*SHA)?(?:[- –]*1)?[– -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?[\s(\[]*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?[\s#]*?(\d{4})(?:[\s#]*and[\s#]*\d+)?", + r"HMAC(?:[- –]*SHA)?(?:[- –]*1)?[– -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?[\s(\[]*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?[\s#]*?(\d{3})(?:[\s#]*and[\s#]*\d+)?", + r"HMAC(?:[- –]*SHA)?(?:[- –]*1)?[– -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?[\s(\[]*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?[\s#]*?(\d{2})(?:[\s#]*and[\s#]*\d+)?", + r"HMAC(?:[- –]*SHA)?(?:[- –]*1)?[– -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?\(?(?: |hardware|firmware)*?[\s(\[]*?(?:#|cert\.?|Cert\.?|Certificate|certificate)?[\s#]*?)?[\s#]*?(\d{1})(?:[\s#]*and[\s#]*\d+)?", # --- same as above, without hw or fw --- - r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{4})", - r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{3})", - r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{2})", - r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{1})", + r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{4})", + r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{3})", + r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{2})", + r"HMAC(?:-SHA)?(?:-1)?[ -]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[, ]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{1})", # --- SHS/A - (bits) (method) ((cert #) numbers) --- - r"SH[SA][-– ]*(?:160|224|256|384|512)?(?:[\s(\[]*?(?:KAT)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)(\d{4})(?:\)?\[#?\d+\])?", - r"SH[SA][-– ]*(?:160|224|256|384|512)?(?:[\s(\[]*?(?:KAT)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)(\d{3})(?:\)?\[#?\d+\])?", - r"SH[SA][-– ]*(?:160|224|256|384|512)?(?:[\s(\[]*?(?:KAT)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)(\d{2})(?:\)?\[#?\d+\])?", - r"SH[SA][-– ]*(?:160|224|256|384|512)?(?:[\s(\[]*?(?:KAT)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)(\d{1})(?:\)?\[#?\d+\])?", + r"SH[SA][-– 123]*(?:;|\/|160|224|256|384|512)?(?:[\s(\[]*?(?:KAT|[Bb]yte [Oo]riented)*?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{4})(?:\)?\[#?\d+\])?(?:[\s#]*?and[\s#]*?\d+)?", + r"SH[SA][-– 123]*(?:;|\/|160|224|256|384|512)?(?:[\s(\[]*?(?:KAT|[Bb]yte [Oo]riented)*?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{3})(?:\)?\[#?\d+\])?(?:[\s#]*?and[\s#]*?\d+)?", + r"SH[SA][-– 123]*(?:;|\/|160|224|256|384|512)?(?:[\s(\[]*?(?:KAT|[Bb]yte [Oo]riented)*?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{2})(?:\)?\[#?\d+\])?(?:[\s#]*?and[\s#]*?\d+)?", + r"SH[SA][-– 123]*(?:;|\/|160|224|256|384|512)?(?:[\s(\[]*?(?:KAT|[Bb]yte [Oo]riented)*?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{1})(?:\)?\[#?\d+\])?(?:[\s#]*?and[\s#]*?\d+)?", # --- RSA (bits) (method) ((cert #)) --- - r"RSA(?:[-– ]*(?:512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{4})", - r"RSA(?:[-– ]*(?:512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{3})", - r"RSA(?:[-– ]*(?:512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{2})", - r"RSA(?:[-– ]*(?:512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{1})", + r"RSA(?:[-– ]*(?:;|\/|512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:;|\/|KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{4})", + r"RSA(?:[-– ]*(?:;|\/|512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:;|\/|KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{3})", + r"RSA(?:[-– ]*(?:;|\/|512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:;|\/|KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{2})", + r"RSA(?:[-– ]*(?:;|\/|512|768|1024|1280|1536|2048|3072|4096|8192)\s\(\[]*?(?:(?:;|\/|KAT|Verify|PSS|\s)*?)?[\s,]*?[\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{1})", # --- RSA (SSA) (PKCS) (version) (#) --- - r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?\s?(\d{4})?", - r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?\s?(\d{3})?", - r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?\s?(\d{2})?", - r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?\s?(\d{1})?", + r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?[\s#]*?(\d{4})?", + r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?[\s#]*?(\d{3})?", + r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?[\s#]*?(\d{2})?", + r"(?:RSA)?[-– ]?(?:SSA)?[- ]?PKCS\s?#?\d(?:-[Vv]1_5| [Vv]1[-_]5)?[\s#]*?(\d{1})?", # --- AES (bits) (method) ((cert #)) --- - r"AES[-– ]*((?:128|192|256|)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{4})(?:\)?\[#?\d+\])?", - r"AES[-– ]*((?:128|192|256|)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{3})(?:\)?\[#?\d+\])?", - r"AES[-– ]*((?:128|192|256|)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{2})(?:\)?\[#?\d+\])?", - r"AES[-– ]*((?:128|192|256|)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{1})(?:\)?\[#?\d+\])?", + r"AES[-– ]*((?: |;|\/|bit|key|128|192|256|CBC)*(?: |\/|;|[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV|CBC)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{4})(?:\)?[\s#]*?\[#?\d+\])?(?:[\s#]*?and[\s#]*?(\d+))?", + r"AES[-– ]*((?: |;|\/|bit|key|128|192|256|CBC)*(?: |\/|;|[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV|CBC)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{3})(?:\)?[\s#]*?\[#?\d+\])?(?:[\s#]*?and[\s#]*?(\d+))?", + r"AES[-– ]*((?: |;|\/|bit|key|128|192|256|CBC)*(?: |\/|;|[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV|CBC)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{2})(?:\)?[\s#]*?\[#?\d+\])?(?:[\s#]*?and[\s#]*?(\d+))?", + r"AES[-– ]*((?: |;|\/|bit|key|128|192|256|CBC)*(?: |\/|;|[Dd]ecrypt|[Ee]ncrypt|KAT|CMAC|CTR|GCM|IV|CBC)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{1})(?:\)?[\s#]*?\[#?\d+\])?(?:[\s#]*?and[\s#]*?(\d+))?", # --- Diffie Helman (CVL) ((cert #)) --- - r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?\s?(\d{4})", - r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?\s?(\d{3})", - r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?\s?(\d{2})", - r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?\s?(\d{1})", + r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?[\s#]*?(\d{4})", + r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?[\s#]*?(\d{3})", + r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?[\s#]*?(\d{2})", + r"Diffie[-– ]*Hellman[,\s(\[]*?(?:CVL|\s)*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?[\s#]*?(\d{1})", # --- DRBG (bits) (method) (cert #) --- - r"DRBG[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{4})", - r"DRBG[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{3})", - r"DRBG[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{2})", - r"DRBG[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{1})", + r"DRBG[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{4})", + r"DRBG[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{3})", + r"DRBG[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{2})", + r"DRBG[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{1})", # --- DES (bits) (method) (cert #) - r"DES[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{4})", - r"DES[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{3})", - r"DES[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{2})", - r"DES[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{1})", + r"DES[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT|CBC|(?:\d(?: and \d)? keying options?))*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)*?[\s#]*?)?[\s#]*?(\d{4})(?:[\s#]*?and[\s#]*?(\d+))?", + r"DES[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT|CBC|(?:\d(?: and \d)? keying options?))*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)*?[\s#]*?)?[\s#]*?(\d{3})(?:[\s#]*?and[\s#]*?(\d+))?", + r"DES[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT|CBC|(?:\d(?: and \d)? keying options?))*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)*?[\s#]*?)?[\s#]*?(\d{2})(?:[\s#]*?and[\s#]*?(\d+))?", + r"DES[ –-]*((?:;|\/|160|224|256|384|512)?(?:;|\/| |[Dd]ecrypt|[Ee]ncrypt|KAT|CBC|(?:\d(?: and \d)? keying options?))*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)*?[\s#]*?)?[\s#]*?(\d{1})(?:[\s#]*?and[\s#]*?(\d+))?", # --- DSA (bits) (method) (cert #) - r"DSA[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{4})", - r"DSA[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{3})", - r"DSA[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{2})", - r"DSA[ –-]*((?:160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?\s?(\d{1})", + r"DSA[ –-]*((?:;|\/|160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{4})", + r"DSA[ –-]*((?:;|\/|160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{3})", + r"DSA[ –-]*((?:;|\/|160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{2})", + r"DSA[ –-]*((?:;|\/|160|224|256|384|512)?(?: |[Dd]ecrypt|[Ee]ncrypt|KAT)*?[,\s(\[]*?(?:#|cert\.?|certificate|Cert\.?|Certificate)?[\s#]*?)?[\s#]*?(\d{1})", # --- platforms (#)+ - this is used in modification history --- r"[Pp]latforms? #\d+(?:#\d+|,| |-|and)*[^\n]*", @@ -400,12 +401,14 @@ rules_fips_remove_algorithm_ids = [ # --- PKCS (#) --- r"PKCS[\s]?#?\d+", - r"PKSC[\s]?#?\d+" # typo, #625 -] + r"PKSC[\s]?#?\d+", # typo, #625 +# --- # C and # A (just in case) --- + r"#\s+?[Cc]\d+", + r"#\s+?[Aa]\d+" +] rules_fips_to_remove = [ # --- random words found --- - r"Survey #192", # why would they get an address like this /o\ cert 2079 r"[Ss]lot #\d", # a card slot, #2069 r"[Ss]eals? ?\(?#\d - #\d", # #1232 r"\[#\d*\]", # some certs use this as references @@ -413,16 +416,19 @@ rules_fips_to_remove = [ r"[Pp]ower [Ss]upply #\d", # #604 r"TEL #\d and #\d", # #3337 r"#\d+ - #\d+", # labels, seals... #1232 + r"#\d+‐#?\d+", # labels, seals... #3530 + r"#\d+ to #?\d+", # labels, seals... #3058 + r"see #\d+", # labels, seals... #3058 r"#\d+, ?#\d+", r"#?\d+ and #?\d+", r"label \(#\d+\)", r"[Ll]abel #\d+", r"\(#\d\)", r"IETF[25\s]*RFC[26\s]*#\d+", # #3425 - r"Bendix Road North #760", # #3325 - r"5080 Spectrum Drive, #1000E", r"Document # 540-105000-A1", r"Certificate #2287-1 from EMCE Engineering", # ??? + r"[sS]cenarios?\s?#\d+", # 3789 + r"#\d+\s?\(\S\)", # 2159 ] rules_fips_cert = [ @@ -430,10 +436,10 @@ rules_fips_cert = [ # r"(?:#\s?|Cert\.?[^. ]*?\s?)(?P<id>\d{3})", # r"(?:#\s?|Cert\.?[^. ]*?\s?)(?P<id>\d{2})", # r"(?:#\s?|Cert\.?[^. ]*?\s?)(?P<id>\d{1}) - r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{4}[^\d])", - r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{3}[^\d])", - r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{2}[^\d])", - r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{1}[^\d])" + r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{4})(?!\d)", + r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{3})(?!\d)", + r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{2})(?!\d)", + r"(?:#[^\S\r\n]?|Cert\.?(?!.\s)[^\S\r\n]?|Certificate[^\S\r\n]?)(?P<id>\d{1})(?!\d)" ] # rule still too "general" @@ -508,8 +514,7 @@ fips_rules['rules_to_remove'] = rules_fips_to_remove fips_rules['rules_security_level'] = rules_fips_security_level fips_rules['rules_cert_id'] = rules_fips_cert fips_common_rules = copy.deepcopy(common_rules) # make separate copy not to process cc rules by fips's re.compile -#fips_rules.update(fips_common_rules) for rule in fips_rules: for current_rule in range(len(fips_rules[rule])): - fips_rules[rule][current_rule] = re.compile(fips_rules[rule][current_rule])
\ No newline at end of file + fips_rules[rule][current_rule] = re.compile(fips_rules[rule][current_rule]) diff --git a/sec_certs/certificate.py b/sec_certs/certificate.py index 7b0770de..98ddc6bc 100644 --- a/sec_certs/certificate.py +++ b/sec_certs/certificate.py @@ -133,7 +133,7 @@ class FIPSCertificate(Certificate, ComplexSerializableType): algorithms: Optional[List[Dict[str, str]]] tested_conf: Optional[List[str]] description: Optional[str] - mentioned_certs: Optional[List[str]] + mentioned_certs: Optional[Dict[str, Dict[str, int]]] vendor: Optional[str] vendor_www: Optional[str] lab: Optional[str] @@ -147,6 +147,7 @@ class FIPSCertificate(Certificate, ComplexSerializableType): revoked_link: Optional[str] sw_versions: Optional[str] product_url: Optional[str] + connections: List[str] def __post_init__(self): self.date_validation = [parser.parse(x).date() for x in @@ -177,6 +178,7 @@ class FIPSCertificate(Certificate, ComplexSerializableType): cert_id: int keywords: Dict algorithms: List + connections: List[str] @property def dgst(self): @@ -199,9 +201,10 @@ class FIPSCertificate(Certificate, ComplexSerializableType): @dataclass(eq=True) class Processed(ComplexSerializableType): - keywords: Optional[Dict] - algorithms: Dict - connections: List + keywords: Optional[Dict[str, Dict]] + algorithms: Dict[str, Dict] + connections: List[str] + unmatched_algs: int @property def dgst(self): @@ -263,26 +266,27 @@ class FIPSCertificate(Certificate, ComplexSerializableType): 'type': None, 'embodiment': None, 'tested_conf': None, 'description': None, 'vendor': None, 'vendor_www': None, 'lab': None, 'lab_nvlap': None, 'historical_reason': None, 'revoked_reason': None, 'revoked_link': None, 'algorithms': [], - 'mentioned_certs': [], 'tables_done': False, 'security_policy_www': None, 'certificate_www': None, + 'mentioned_certs': {}, 'tables_done': False, 'security_policy_www': None, 'certificate_www': None, 'hw_versions': None, 'fw_versions': None, 'sw_versions': None, 'product_url': None} return d @staticmethod - def parse_caveat(current_text: str) -> List: + def parse_caveat(current_text: str) -> Dict[str, Dict[str, int]]: """ Parses content of "Caveat" of FIPS CMVP .html file :param current_text: text of "Caveat" - :return: list of all found algorithm IDs + :return: dictionary of all found algorithm IDs """ - ids_found = [] - r_key = r"(?:#\s?|Cert\.?(?!.\s)\s?|Certificate\s?)(?P<id>\d+)" + ids_found = {} + r_key = r"(?P<word>\w+)?\s?(?:#\s?|Cert\.?(?!.\s)\s?|Certificate\s?)+(?P<id>\d+)" for m in re.finditer(r_key, current_text): - if r_key in ids_found and m.group() in ids_found[0]: - ids_found[0][m.group()]['count'] += 1 + if m.group('word') and m.group('word').lower() in {'rsa', 'shs', 'dsa', 'pkcs', 'aes'}: + continue + if m.group('id') in ids_found: + ids_found[m.group('id')]['count'] += 1 else: - ids_found.append( - {r"(?:#\s?|Cert\.?(?!.\s)\s?|Certificate\s?)(?P<id>\d+?})": {m.group(): {'count': 1}}}) + ids_found[m.group('id')] = {'count': 1} return ids_found @@ -302,7 +306,7 @@ class FIPSCertificate(Certificate, ComplexSerializableType): for m in re.finditer(reg, current_text): set_items.add(m.group()) - return [{"Certificate": list(set_items)}] + return [{"Certificate": list(set_items)}] if len(set_items) > 0 else [] @staticmethod def parse_table(element: Union[Tag, NavigableString]) -> List[Dict]: @@ -315,9 +319,10 @@ class FIPSCertificate(Certificate, ComplexSerializableType): trs = element.find_all('tr') for tr in trs: tds = tr.find_all('td') + cert = FIPSCertificate.extract_algorithm_certificates(tds[1].text) found_items.append( {'Name': tds[0].text, - 'Certificate': FIPSCertificate.extract_algorithm_certificates(tds[1].text)[0]['Certificate'], + 'Certificate': cert[0]['Certificate'] if cert != [] else [], 'Links': [str(x) for x in tds[1].find_all('a')], 'Raw': str(tr)}) @@ -335,8 +340,8 @@ class FIPSCertificate(Certificate, ComplexSerializableType): elif 'caveat' in pairs[title]: html_items_found[pairs[title]] = content - html_items_found['mentioned_certs'] += FIPSCertificate.parse_caveat( - content) + html_items_found['mentioned_certs'].update(FIPSCertificate.parse_caveat( + content)) elif 'FIPS Algorithms' in title: html_items_found['algorithms'] += FIPSCertificate.parse_table( @@ -486,13 +491,16 @@ class FIPSCertificate(Certificate, ComplexSerializableType): items_found['revoked_reason'] if 'revoked_reason' in items_found else None, items_found['revoked_link'] if 'revoked_link' in items_found else None, items_found['sw_versions'] if 'sw_versions' in items_found else None, - items_found['product_url']) if 'product_url' in items_found else None, + items_found['product_url'] if 'product_url' in items_found else None, + [] + ), # connections FIPSCertificate.PdfScan( items_found['cert_id'], {} if not initialized else initialized.pdf_scan.keywords, - [] if not initialized else initialized.pdf_scan.algorithms + [] if not initialized else initialized.pdf_scan.algorithms, + [] # connections ), - FIPSCertificate.Processed(None, {}, []), + FIPSCertificate.Processed(None, {}, [], 0), state ) @@ -508,6 +516,17 @@ class FIPSCertificate(Certificate, ComplexSerializableType): cert.state.txt_state = True return cert + + @staticmethod + def _declare_state(text: str): + """ + If less then half of the text is formed of alphabet characters, + we declare the security policy as "non-parsable" + :param text: security policy content + :return: True if parsable, otherwise False + """ + return len(text) * 0.5 <= len(''.join(filter(str.isalpha, text))) + @staticmethod def find_keywords(cert: 'FIPSCertificate') -> Tuple[Optional[Dict], 'FIPSCertificate']: if not cert.state.txt_state: @@ -518,8 +537,12 @@ class FIPSCertificate(Certificate, ComplexSerializableType): text_to_parse = text_with_newlines if config.use_text_with_newlines_during_parsing['value'] else text - items_found, fips_text = FIPSCertificate.parse_cert_file(FIPSCertificate.remove_platforms(text_to_parse), - cert.web_scan.algorithms) + cert.state.txt_state = FIPSCertificate._declare_state(text) + + if config.ignore_first_page: + text_to_parse = text_to_parse[text_to_parse.index(""):] + + items_found, fips_text = FIPSCertificate.parse_cert_file(FIPSCertificate.remove_platforms(text_to_parse)) save_modified_cert_file(cert.state.fragment_path.with_suffix('.fips.txt'), fips_text, unicode_error) @@ -550,15 +573,11 @@ class FIPSCertificate(Certificate, ComplexSerializableType): for web_alg in alg_list: if ''.join(filter(str.isdigit, web_alg)) not in all_algorithms: not_found.append(web_alg) - logger.error( - f"For cert {cert.dgst}:\n\tNOT FOUND: {len(not_found)}\n" - f"\tFOUND: {sum([len(a['Certificate']) for a in cert.web_scan.algorithms]) - len(not_found)}") - logger.error(f"Not found: {not_found}") return len(not_found) @staticmethod def remove_platforms(text_to_parse: str): - pat = re.compile(r"(?:modification|revision|change) history\n[\s\S]*?", re.IGNORECASE) + pat = re.compile(r"(?:(?:modification|revision|change) history|version control)\n[\s\S]*?", re.IGNORECASE) for match in pat.finditer(text_to_parse): text_to_parse = text_to_parse.replace( match.group(), 'x' * len(match.group())) @@ -566,7 +585,7 @@ class FIPSCertificate(Certificate, ComplexSerializableType): @staticmethod def parse_cert_file_common(text_to_parse: str, whole_text_with_newlines: str, - search_rules: Dict) -> Tuple[Optional[Dict], str]: + search_rules: Dict) -> Tuple[Optional[Dict[Pattern, Dict]], str]: # apply all rules items_found_all = {} for rule_group in search_rules.keys(): @@ -632,10 +651,10 @@ class FIPSCertificate(Certificate, ComplexSerializableType): return items_found_all, whole_text_with_newlines @staticmethod - def parse_cert_file(text_to_parse: str, algorithms: List[Dict]) \ - -> Tuple[Optional[Dict], str]: + def parse_cert_file(text_to_parse: str) -> Tuple[Optional[Dict[Pattern, Dict]], str]: # apply all rules items_found_all: Dict = {} + for rule_group in fips_rules.keys(): if rule_group not in items_found_all: items_found_all[rule_group] = {} @@ -667,21 +686,27 @@ class FIPSCertificate(Certificate, ComplexSerializableType): return items_found_all, text_to_parse @staticmethod - def analyze_tables(cert: 'FIPSCertificate') -> Tuple[bool, 'FIPSCertificate', List]: + def analyze_tables(tup: Tuple['FIPSCertificate', bool]) -> Tuple[bool, 'FIPSCertificate', List]: + cert, precision = tup + if not (precision and cert.state.tables_done) \ + or (precision and cert.processed.unmatched_algs < config.cert_threshold['value']): + return cert.state.tables_done, cert, [] + cert_file = cert.state.sp_path txt_file = cert_file.with_suffix('.pdf.txt') with open(txt_file, 'r', encoding='utf-8') as f: tables = helpers.find_tables(f.read(), txt_file) + all_pages = precision and cert.processed.unmatched_algs > config.cert_threshold['value'] # bool value lst: List = [] if tables: try: - data = read_pdf(cert_file, pages=tables, silent=True) + data = read_pdf(cert_file, pages='all' if all_pages else tables, silent=True) except Exception as e: try: logger.error(e) helpers.repair_pdf(cert_file) - data = read_pdf(cert_file, pages=tables, silent=True) + data = read_pdf(cert_file, pages='all' if all_pages else tables, silent=True) except Exception as ex: logger.error(ex) @@ -691,11 +716,12 @@ class FIPSCertificate(Certificate, ComplexSerializableType): for df in data: for col in range(len(df.columns)): if 'cert' in df.columns[col].lower() or 'algo' in df.columns[col].lower(): - lst += FIPSCertificate.extract_algorithm_certificates( + tmp = FIPSCertificate.extract_algorithm_certificates( df.iloc[:, col].to_string(index=False), True) - + lst += tmp if tmp != [{"Certificate": []}] else [] # Parse again if someone picks not so descriptive column names - lst += FIPSCertificate.extract_algorithm_certificates(df.to_string(index=False)) + tmp = FIPSCertificate.extract_algorithm_certificates(df.to_string(index=False)) + lst += tmp if tmp != [{"Certificate": []}] else [] return True, cert, lst def _create_alg_set(self) -> Set: @@ -710,9 +736,10 @@ class FIPSCertificate(Certificate, ComplexSerializableType): return self.processed.keywords = copy.deepcopy(self.pdf_scan.keywords) + # TODO figure out why can't I delete this if self.web_scan.mentioned_certs: - for item in self.web_scan.mentioned_certs: - self.processed.keywords['rules_cert_id'].update(item) + for item, value in self.web_scan.mentioned_certs.items(): + self.processed.keywords['rules_cert_id'].update({'caveat_item': {item: value}}) alg_set = self._create_alg_set() @@ -742,8 +769,8 @@ class FIPSCertificate(Certificate, ComplexSerializableType): @staticmethod def get_compare(vendor: str): vendor_split = vendor.replace(',', '') \ - .replace('-', ' ').replace('+', ' ').replace('®', '').split() - return vendor_split[0] if len(vendor_split) > 0 else vendor + .replace('-', ' ').replace('+', ' ').replace('®', '').replace('(R)', '').split() + return vendor_split[0][:4] if len(vendor_split) > 0 else vendor class CommonCriteriaCert(Certificate, ComplexSerializableType): diff --git a/sec_certs/constants.py b/sec_certs/constants.py index f19919a6..99d81640 100644 --- a/sec_certs/constants.py +++ b/sec_certs/constants.py @@ -46,5 +46,5 @@ TAG_PP_EDITOR = 'pp_editor' TAG_PP_REVIEWER = 'pp_reviewer' TAG_KEYWORDS = 'keywords' FIPS_NOT_AVAILABLE_CERT_SIZE = 10000 -FIPS_ALG_URL = 'https://csrc.nist.gov/projects/cryptographic-algorithm-validation-program/validation-search?searchMode=validation&page=' +FIPS_ALG_URL = 'https://csrc.nist.gov/projects/cryptographic-algorithm-validation-program/validation-search?searchMode=implementation&page=' diff --git a/sec_certs/dataset.py b/sec_certs/dataset.py index c1ac4f35..91ec113a 100644 --- a/sec_certs/dataset.py +++ b/sec_certs/dataset.py @@ -1,4 +1,5 @@ import os +import sys from datetime import datetime import locale import logging @@ -25,6 +26,8 @@ import sec_certs.constants as constants import sec_certs.cert_processing as cert_processing import sec_certs.files as files +from sec_certs.analyze_certificates import plot_bar_graph + from sec_certs.certificate import CommonCriteriaCert, Certificate, FIPSCertificate from sec_certs.serialization import ComplexSerializableType, CustomJSONDecoder, CustomJSONEncoder from sec_certs.configuration import config @@ -743,7 +746,8 @@ class CCDataset(Dataset, ComplexSerializableType): try: inpts = [int(x) for x in inpts] if min(inpts) < 0 or max(inpts) > len(x.heuristics.cpe_matches) - 1: - raise ValueError(f'Incorrect number chosen, choose in range 0-{len(x.heuristics.cpe_matches) - 1}') + raise ValueError( + f'Incorrect number chosen, choose in range 0-{len(x.heuristics.cpe_matches) - 1}') except ValueError as e: logger.error(f'Bad input from user, repeating instance: {e}') print(f'Bad input from user, repeating instance: {e}') @@ -772,7 +776,8 @@ class CCDataset(Dataset, ComplexSerializableType): verified_cpe_rich_certs = [x for x in self if x.heuristics.verified_cpe_matches] if not verified_cpe_rich_certs: - logger.error('No certificates with verified CPE match detected. You must run dset.manually_verify_cpe_matches() first. Returning.') + logger.error( + 'No certificates with verified CPE match detected. You must run dset.manually_verify_cpe_matches() first. Returning.') return for cert in verified_cpe_rich_certs: cert.compute_heuristics_related_cves(cve_dset) @@ -820,7 +825,7 @@ class FIPSDataset(Dataset, ComplexSerializableType): not_available.append(i) return missing, not_available - def extract_keywords(self, redo=False): + def extract_keywords(self, redo=False, update_json: bool = True): self.fragments_dir.mkdir(parents=True, exist_ok=True) keywords = cert_processing.process_parallel(FIPSCertificate.find_keywords, @@ -830,21 +835,27 @@ class FIPSDataset(Dataset, ComplexSerializableType): use_threading=False) for keyword, cert in keywords: self.certs[cert.dgst].pdf_scan.keywords = keyword + + if update_json: + self.to_json(self.root_dir / 'fips_full_dataset.json') - def match_algs(self, show_graph=False) -> Dict: + def match_algs(self) -> Dict: output = {} + cert: FIPSCertificate for cert in self.certs.values(): output[cert.dgst] = FIPSCertificate.match_web_algs_to_pdf(cert) + cert.processed.unmatched_algs = output[cert.dgst] + output = {k: v for k, v in output.items() if v != 0} return output - def download_all_pdfs(self): sp_paths, sp_urls = [], [] self.policies_dir.mkdir(exist_ok=True) for cert_id in list(self.certs.keys()): - if not (self.policies_dir / f'{cert_id}.pdf').exists() or not self.certs[cert_id].state.txt_state: + if not (self.policies_dir / f'{cert_id}.pdf').exists() or (self.certs[cert_id] + and not self.certs[cert_id].state.txt_state): sp_urls.append( f"https://csrc.nist.gov/CSRC/media/projects/cryptographic-module-validation-program/documents/security-policies/140sp{cert_id}.pdf") sp_paths.append(self.policies_dir / f"{cert_id}.pdf") @@ -866,7 +877,7 @@ class FIPSDataset(Dataset, ComplexSerializableType): logging.info(f"downloading {len(html_urls)} module html files") failed = cert_processing.process_parallel(FIPSCertificate.download_html_page, list(zip(html_urls, html_paths)), - constants.N_THREADS) + constants.N_THREADS) failed = [c for c in failed if c] self.new_files += len(html_urls) @@ -875,7 +886,7 @@ class FIPSDataset(Dataset, ComplexSerializableType): constants.N_THREADS) return new_files - def convert_all_pdfs(self): + def convert_all_pdfs(self, update_json: bool = True): logger.info('Converting FIPS certificate reports to .txt') tuples = [ (cert, self.policies_dir / f'{cert.cert_id}.pdf', self.policies_dir / f'{cert.cert_id}.pdf.txt') @@ -884,22 +895,45 @@ class FIPSDataset(Dataset, ComplexSerializableType): ] cert_processing.process_parallel(FIPSCertificate.convert_pdf_file, tuples, constants.N_THREADS) - def get_certs_from_web(self, redo: bool = False, json_file: Optional[Path] = None): + if update_json: + self.to_json(self.root_dir / 'fips_full_dataset.json') + + def prepare_dataset(self, test: Optional[Path] = None): + if test: + html_files = [test] + else: + html_files = ['fips_modules_active.html', + 'fips_modules_historical.html', 'fips_modules_revoked.html'] + helpers.download_file( + "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search?SearchMode=Advanced&CertificateStatus=Active&ValidationYear=0", + self.web_dir / "fips_modules_active.html") + helpers.download_file( + "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search?SearchMode=Advanced&CertificateStatus=Historical&ValidationYear=0", + self.web_dir / "fips_modules_historical.html") + helpers.download_file( + "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search?SearchMode=Advanced&CertificateStatus=Revoked&ValidationYear=0", + self.web_dir / "fips_modules_revoked.html") + + # Parse those files and get list of currently processable files (always) + for f in html_files: + self._get_certificates_from_html(self.web_dir / f) + + def _get_certificates_from_html(self, html_file: Path) -> None: + logger.info(f'Getting certificate ids from {html_file}') + with open(html_file, 'r', encoding='utf-8') as handle: + html = BeautifulSoup(handle.read(), 'html.parser') + + table = [x for x in html.find( + id='searchResultsTable').tbody.contents if x != '\n'] + for entry in table: + self.certs[entry.find('a').text] = None + + def get_certs_from_web(self, redo: bool = False, json_file: Optional[Path] = None, test: Optional[Path] = None, update_json: bool = True): def download_html_pages() -> List[str]: new_files = self.download_all_htmls() self.download_all_pdfs() return new_files - def get_certificates_from_html(html_file: Path) -> None: - logger.info(f'Getting certificate ids from {html_file}') - with open(html_file, 'r', encoding='utf-8') as handle: - html = BeautifulSoup(handle.read(), 'html.parser') - - table = [x for x in html.find( - id='searchResultsTable').tbody.contents if x != '\n'] - for entry in table: - self.certs[entry.find('a').text] = {} - logger.info("Downloading required html files") self.web_dir.mkdir(parents=True, exist_ok=True) @@ -907,21 +941,7 @@ class FIPSDataset(Dataset, ComplexSerializableType): self.algs_dir.mkdir(exist_ok=True) # Download files containing all available module certs (always) - html_files = ['fips_modules_active.html', - 'fips_modules_historical.html', 'fips_modules_revoked.html'] - helpers.download_file( - "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search?SearchMode=Advanced&CertificateStatus=Active&ValidationYear=0", - self.web_dir / "fips_modules_active.html") - helpers.download_file( - "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search?SearchMode=Advanced&CertificateStatus=Historical&ValidationYear=0", - self.web_dir / "fips_modules_historical.html") - helpers.download_file( - "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search?SearchMode=Advanced&CertificateStatus=Revoked&ValidationYear=0", - self.web_dir / "fips_modules_revoked.html") - - # Parse those files and get list of currently processable files (always) - for f in html_files: - get_certificates_from_html(self.web_dir / f) + self.prepare_dataset(test) logger.info('Downloading certificate html and security policies') @@ -931,7 +951,7 @@ class FIPSDataset(Dataset, ComplexSerializableType): if json_file.exists(): logger.info("Certs loaded from previous scanning") dataset = self.from_json(json_file) - self.certs = dataset.certs + self.certs.update(dataset.certs) self.algorithms = dataset.algorithms new_certs = download_html_pages() @@ -954,15 +974,19 @@ class FIPSDataset(Dataset, ComplexSerializableType): (self.web_dir / cert_id).with_suffix('.html'), (self.fragments_dir / cert_id).with_suffix('.txt'), False, None, False), cert, redo=redo) + + if update_json: + self.to_json(self.root_dir / 'fips_full_dataset.json') - def extract_certs_from_tables(self) -> List[Path]: + def extract_certs_from_tables(self, high_precision: bool, update_json: bool = True) -> List[Path]: """ Function that extracts algorithm IDs from tables in security policies files. :return: list of files that couldn't have been decoded """ result = cert_processing.process_parallel(FIPSCertificate.analyze_tables, - [cert for cert in self.certs.values() if - not cert.state.tables_done and cert.state.txt_state], + [(cert, high_precision) for cert in self.certs.values() if + ( + not cert.state.tables_done or high_precision) and cert.state.txt_state], constants.N_THREADS // 4, # tabula already processes by parallel, so # it's counterproductive to use all threads use_threading=False) @@ -972,6 +996,9 @@ class FIPSDataset(Dataset, ComplexSerializableType): self.certs[cert.dgst].state.tables_done = state self.certs[cert.dgst].pdf_scan.algorithms += algorithms + if update_json: + self.to_json(self.root_dir / 'fips_full_dataset.json') + return not_decoded def remove_algorithms_from_extracted_data(self): @@ -991,90 +1018,135 @@ class FIPSDataset(Dataset, ComplexSerializableType): new_algorithms.append({'Certificate': [algorithm]}) certificate.processed.algorithms = new_algorithms - def validate_results(self): - """ - Function that validates results and finds the final connection output - """ - - def validate_id(processed_cert: FIPSCertificate, cert_candidate: str) -> bool: + # returns True if candidates should _not_ be matched + def _compare_certs(self, current_certificate: 'FIPSCertificate', other_id: str): + cert_first = current_certificate.web_scan.date_validation[0].year + cert_last = current_certificate.web_scan.date_validation[-1].year + conn_first = self.certs[other_id].web_scan.date_validation[0].year + conn_last = self.certs[other_id].web_scan.date_validation[-1].year - # returns True if candidates should _not_ be matched - def compare_certs(current_certificate: 'FIPSCertificate', other_id: str): - cert_first = current_certificate.web_scan.date_validation[0].year - cert_last = current_certificate.web_scan.date_validation[-1].year - conn_first = self.certs[other_id].web_scan.date_validation[0].year - conn_last = self.certs[other_id].web_scan.date_validation[-1].year + return cert_first - conn_first > config.year_difference_between_validations['value'] \ + and cert_last - conn_last > config.year_difference_between_validations['value'] \ + or cert_first < conn_first - return cert_first - conn_first > config.year_difference_between_validations['value'] \ - and cert_last - conn_last > config.year_difference_between_validations['value'] \ - or cert_first < conn_first + def _remove_false_positives_for_cert(self, current_cert: FIPSCertificate): + for rule in current_cert.processed.keywords['rules_cert_id']: + matches = current_cert.processed.keywords['rules_cert_id'][rule] + current_cert.processed.keywords['rules_cert_id'][rule] = [cert_id for cert_id in matches if + self._validate_id(current_cert, + cert_id.replace('Cert.', '') + .replace('cert.', '') + .lstrip("#CA0 ")) + and cert_id != current_cert.cert_id] - # "< number" still needs to be used, because of some old certs being revalidated - if cert_candidate.isdecimal() \ - and int(cert_candidate) < config.smallest_certificate_id_to_connect['value'] or \ - compare_certs(processed_cert, cert_candidate): - return False - if cert_candidate not in self.algorithms.certs: - return True + def _validate_id(self, processed_cert: FIPSCertificate, cert_candidate: str) -> bool: + if cert_candidate not in self.certs or not cert_candidate.isdecimal(): + return False - for cert_alg in processed_cert.processed.algorithms: - for certificate in cert_alg['Certificate']: - curr_id = ''.join(filter(str.isdigit, certificate)) - if curr_id == cert_candidate: - return False + # "< number" still needs to be used, because of some old certs being revalidated + if int(cert_candidate) < config.smallest_certificate_id_to_connect['value'] or \ + self._compare_certs(processed_cert, cert_candidate): + return False + if cert_candidate not in self.algorithms.certs: + return True - algs = self.algorithms.certs[cert_candidate] - for current_alg in algs: - if FIPSCertificate.get_compare(processed_cert.web_scan.vendor) == FIPSCertificate.get_compare( - current_alg.vendor): + for cert_alg in processed_cert.processed.algorithms: + for certificate in cert_alg['Certificate']: + curr_id = ''.join(filter(str.isdigit, certificate)) + if curr_id == cert_candidate: return False - return True - broken_files = set() + algs = self.algorithms.certs[cert_candidate] + for current_alg in algs: + if FIPSCertificate.get_compare(processed_cert.web_scan.vendor) == FIPSCertificate.get_compare( + current_alg.vendor): + return False + return True + + @staticmethod + def _find_connections(current_cert: FIPSCertificate): + current_cert.processed.connections = [] + current_cert.web_scan.connections = [] + current_cert.pdf_scan.connections = [] + if not current_cert.state.file_status or not current_cert.processed.keywords: + return + if current_cert.processed.keywords['rules_cert_id'] == {}: + return + for rule in current_cert.processed.keywords['rules_cert_id']: + for cert in current_cert.processed.keywords['rules_cert_id'][rule]: + cert_id = ''.join(filter(str.isdigit, cert)) + if cert_id not in current_cert.processed.connections: + current_cert.processed.connections.append(cert_id) + current_cert.pdf_scan.connections.append(cert_id) + + # We want connections parsed in caveat to bypass age check, because we are 100 % sure they are right + if current_cert.web_scan.mentioned_certs: + for item in current_cert.web_scan.mentioned_certs: + cert_id = ''.join(filter(str.isdigit, item)) + if cert_id not in current_cert.processed.connections and cert_id != '': + current_cert.processed.connections.append(cert_id) + current_cert.web_scan.connections.append(cert_id) + def validate_results(self): + """ + Function that validates results and finds the final connection output + """ current_cert: FIPSCertificate for current_cert in self.certs.values(): if not current_cert.state.txt_state: continue - for rule in current_cert.processed.keywords['rules_cert_id']: - for cert in current_cert.processed.keywords['rules_cert_id'][rule]: - cert_id = ''.join(filter(str.isdigit, cert)) - - if cert_id == '' or cert_id not in self.certs: - broken_files.add(current_cert.dgst) - current_cert.state.file_status = False - break - - if broken_files: - logger.warning("CERTIFICATE FILES WITH WRONG CERTIFICATES PARSED") - logger.warning(broken_files) - logger.warning("... skipping these...") - logger.warning(f"Total non-analyzable files:{len(broken_files)}") + self._remove_false_positives_for_cert(current_cert) for current_cert in self.certs.values(): - current_cert.processed.connections = [] - if not current_cert.state.file_status or not current_cert.processed.keywords: - continue - if current_cert.processed.keywords['rules_cert_id'] == {}: - continue - for rule in current_cert.processed.keywords['rules_cert_id']: - for cert in current_cert.processed.keywords['rules_cert_id'][rule]: - cert_id = ''.join(filter(str.isdigit, cert)) - if cert_id not in current_cert.processed.connections and validate_id(current_cert, cert_id): - current_cert.processed.connections.append(cert_id) + FIPSDataset._find_connections(current_cert) - def finalize_results(self): + def finalize_results(self, update_json: bool = True): self.unify_algorithms() self.remove_algorithms_from_extracted_data() self.validate_results() - def get_dot_graph(self, output_file_name: str): + if update_json: + self.to_json(self.root_dir / 'fips_full_dataset.json') + + def _highlight_vendor_in_dot(self, dot: Digraph, current_key: str, highlighted_vendor: str): + if self.certs[current_key].web_scan.vendor != highlighted_vendor: + return + + dot.attr('node', color='red') + if self.certs[current_key].web_scan.status == 'Revoked': + dot.attr('node', color='grey32') + if self.certs[current_key].web_scan.status == 'Historical': + dot.attr('node', color='gold3') + + def _add_colored_node(self, dot: Digraph, current_key: str, highlighted_vendor: str): + dot.attr('node', color='lightgreen') + if self.certs[current_key].web_scan.status == 'Revoked': + dot.attr('node', color='lightgrey') + if self.certs[current_key].web_scan.status == 'Historical': + dot.attr('node', color='gold') + self._highlight_vendor_in_dot(dot, current_key, highlighted_vendor) + dot.node(current_key, label=current_key + ' ' + + self.certs[current_key].web_scan.vendor + + ' ' + + (self.certs[current_key].web_scan.module_name if + self.certs[current_key].web_scan.module_name else '')) + + def _get_processed_list(self, connection_list: str, key: str): + attr = {'pdf': 'pdf_scan', 'web': 'web_scan', 'processed': 'processed'}[connection_list] + return getattr(self.certs[key], attr).connections + + def get_dot_graph(self, output_file_name: str, connection_list: str = 'processed', + highlighted_vendor: str = 'Red Hat®, Inc.', show: bool = True): """ Function that plots .dot graph of dependencies between certificates Certificates with at least one dependency are displayed in "{output_file_name}connections.pdf", remaining certificates are displayed in {output_file_name}single.pdf + :param show: display graph right on screen + :param highlighted_vendor: vendor whose certificates should be highlighted in red color :param output_file_name: prefix to "connections", "connections.pdf", "single" and "single.pdf" + :param connection_list: 'processed', 'web', or 'pdf' - plots a graph from this source + default - processed """ dot = Digraph(comment='Certificate ecosystem') single_dot = Digraph(comment='Modules with no dependencies') @@ -1083,57 +1155,37 @@ class FIPSDataset(Dataset, ComplexSerializableType): dot.attr('graph', label='Dependencies', labelloc='t', fontsize='30') dot.attr('node', style='filled') - def found_interesting_cert(current_key): - if self.certs[current_key].web_scan.vendor == highlighted_vendor: - dot.attr('node', color='red') - if self.certs[current_key].web_scan.status == 'Revoked': - dot.attr('node', color='grey32') - if self.certs[current_key].web_scan.status == 'Historical': - dot.attr('node', color='gold3') - if self.certs[current_key].web_scan.vendor == "SUSE, LLC": - dot.attr('node', color='lightblue') - - def color_check(current_key): - dot.attr('node', color='lightgreen') - if self.certs[current_key].web_scan.status == 'Revoked': - dot.attr('node', color='lightgrey') - if self.certs[current_key].web_scan.status == 'Historical': - dot.attr('node', color='gold') - found_interesting_cert(current_key) - dot.node(current_key, - label=current_key + - ' ' + - self.certs[current_key].web_scan.vendor + - ' ' + - (self.certs[current_key].web_scan.module_name - if self.certs[current_key].web_scan.module_name else '')) - keys = 0 edges = 0 - highlighted_vendor = 'Red Hat®, Inc.' for key in self.certs: - if key != 'Not found' and self.certs[key].state.file_status: - if self.certs[key].processed.connections: - color_check(key) - keys += 1 - else: - single_dot.attr('node', color='lightblue') - found_interesting_cert(key) - single_dot.node(key, label=key + '\r\n' + self.certs[key].web_scan.vendor + ( - '\r\n' + self.certs[key].web_scan.module_name if self.certs[key].web_scan.module_name else '')) + if key == 'Not found' or not self.certs[key].state.file_status: + continue + + processed = self._get_processed_list(connection_list, key) + + if processed: + self._add_colored_node(key) + keys += 1 + else: + single_dot.attr('node', color='lightblue') + self._highlight_vendor_in_dot(key) + single_dot.node(key, label=key + '\r\n' + self.certs[key].web_scan.vendor + ( + '\r\n' + self.certs[key].web_scan.module_name if self.certs[key].web_scan.module_name else '')) for key in self.certs: - if key != 'Not found' and self.certs[key].state.file_status: - for conn in self.certs[key].processed.connections: - color_check(conn) - dot.edge(key, conn) - edges += 1 + if key == 'Not found' or not self.certs[key].state.file_status: + continue + processed = self._get_processed_list(connection_list, key) + for conn in processed: + self._add_colored_node(dot, conn, highlighted_vendor) + dot.edge(key, conn) + edges += 1 - logging.info(f"rendering {keys} keys and {edges} edges") + logging.info(f"rendering for {connection_list}: {keys} keys and {edges} edges") - dot.render(str(output_file_name) + '_connections', view=True) - single_dot.render(str(output_file_name) + '_single', view=True) + dot.render(self.root_dir / (str(output_file_name) + '_connections'), view=show) + single_dot.render(self.root_dir / (str(output_file_name) + '_single'), view=show) def to_dict(self): return {'timestamp': self.timestamp, 'sha256_digest': self.sha256_digest, @@ -1171,6 +1223,11 @@ class FIPSDataset(Dataset, ComplexSerializableType): return vendors + def plot_graphs(self, show: bool = False): + self.get_dot_graph('full_graph', show=show) + self.get_dot_graph('web_only_graph', 'web', show=show) + self.get_dot_graph('pdf_only_graph', 'pdf', show=show) + class FIPSAlgorithmDataset(Dataset, ComplexSerializableType): @@ -1187,18 +1244,36 @@ class FIPSAlgorithmDataset(Dataset, ComplexSerializableType): soup = BeautifulSoup(alg_file.read(), 'html.parser') num_pages = soup.select('span[data-total-pages]')[0].attrs - for i in range(1, int(num_pages['data-total-pages'])): + for i in range(2, int(num_pages['data-total-pages'])): if not (self.root_dir / f'page{i}.html').exists(): algs_urls.append( constants.FIPS_ALG_URL + str(i)) algs_paths.append(self.root_dir / f"page{i}.html") + helpers.download_file(constants.FIPS_ALG_URL + num_pages['data-total-pages'], + self.root_dir / f"page{int(num_pages['data-total-pages'])}.html") logging.info(f"downloading {len(algs_urls)} algs html files") cert_processing.process_parallel(FIPSCertificate.download_html_page, list(zip(algs_urls, algs_paths)), constants.N_THREADS) self.parse_html() + @staticmethod + def _extract_algorithm_information(elements, vendor, date, product, validation): + for elem in elements: + # td > a > (vendor or date) + attachments = elem.find_all('a') + + if len(attachments) == 0: + vendor = elem.text.strip() if 'vendor-name' in elem['id'] else vendor + date = elem.text.strip() if 'validation-date' in elem['id'] else date + continue + + for attachment in attachments: + product = elem.text.strip() if 'product-name' in attachment['id'] else product + validation = elem.text.strip() if 'validation-number' in attachment['id'] else validation + return vendor, date, product, validation + def parse_html(self): def split_alg(alg_string): cert_type = alg_string.rstrip('0123456789') @@ -1210,18 +1285,19 @@ class FIPSAlgorithmDataset(Dataset, ComplexSerializableType): html_soup = BeautifulSoup(handle.read(), 'html.parser') table = html_soup.find('table', class_='table table-condensed publications-table table-bordered') - spans = table.find_all('span') - for span in spans: - elements = span.find_all('td') - vendor, implementation = elements[0].text, elements[1].text - elements_sliced = elements[2:] - for i in range(0, len(elements_sliced), 2): - alg_type, alg_id = split_alg(elements_sliced[i].text.strip()) - validation_date = elements_sliced[i + 1].text.strip() - fips_alg = FIPSCertificate.Algorithm(alg_id, vendor, implementation, alg_type, validation_date) - if alg_id not in self.certs: - self.certs[alg_id] = [] - self.certs[alg_id].append(fips_alg) + tbody_contents = table.find('tbody').find_all('tr') + vendor = product = validation = date = "" + for tr in tbody_contents: + elements = tr.find_all('td') + vendor, date, product, validation = FIPSAlgorithmDataset._extract_algorithm_information( + elements, vendor, date, product, validation + ) + + alg_type, alg_id = split_alg(validation) + fips_alg = FIPSCertificate.Algorithm(alg_id, vendor, product, alg_type, date) + if alg_id not in self.certs: + self.certs[alg_id] = [] + self.certs[alg_id].append(fips_alg) def convert_all_pdfs(self): raise NotImplementedError('Not meant to be implemented') diff --git a/sec_certs/helpers.py b/sec_certs/helpers.py index 87f6e25e..80a54549 100644 --- a/sec_certs/helpers.py +++ b/sec_certs/helpers.py @@ -134,8 +134,15 @@ def find_tables_iterative(file_text: str) -> List[int]: current_page += 1 if line.startswith('Table ') or line.startswith('Exhibit'): pages.add(current_page) + pages.add(current_page + 1) + if current_page > 2: + pages.add(current_page - 1) if not pages: logger.warning('No pages found') + for page in pages: + if page > current_page - 1: + return list(pages - {page}) + return list(pages) @@ -491,11 +498,14 @@ def extract_keywords(filepath: Path) -> Tuple[int, Optional[Dict[str, str]]]: return constants.RETURNCODE_OK, result -def analyze_matched_algs(data: Dict): +def plot_dataframe_graph(data: Dict, label: str, file_name: str, density: bool = False, cumulative: bool = False, bins: int = 50, log: bool = True, show: bool = True): pd_data = pd.Series(data) - pd_data.hist(bins=50) - plt.show() + pd_data.hist(bins=bins, label=label, density=density, cumulative=cumulative) + plt.savefig(file_name) + if show: + plt.show() - sorted_data = pd_data.value_counts(ascending=True) + if log: + sorted_data = pd_data.value_counts(ascending=True) - logging.info(sorted_data.where(sorted_data > 1).dropna())
\ No newline at end of file + logging.info(sorted_data.where(sorted_data > 1).dropna())
\ No newline at end of file diff --git a/sec_certs/settings.yaml b/sec_certs/settings.yaml index 0c8b130f..9b07a8be 100644 --- a/sec_certs/settings.yaml +++ b/sec_certs/settings.yaml @@ -6,7 +6,14 @@ smallest_certificate_id_to_connect: year_difference_between_validations: description: During validation we don't connect certificates with validation dates difference higher than _this_ - value: 5 + value: 7 use_text_with_newlines_during_parsing: description: During keyword search, search in text with newlines value: true +ignore_first_page: + description: During keyword search, first page usually contains addresses - ignore it. + value: true +cert_threshold: + description: Used with --higher-precision-results. Determines the amount of mismatched algorithms to be considered faulty. + value: 5 + diff --git a/test/data/test_fips_oop/algorithms.json b/test/data/test_fips_oop/algorithms.json new file mode 100644 index 00000000..7845d93d --- /dev/null +++ b/test/data/test_fips_oop/algorithms.json @@ -0,0 +1,513 @@ +{ + "_type": "FIPSAlgorithmDataset", + "certs": { + "2351": [ + { + "_type": "Algorithm", + "cert_id": "2351", + "date": "9/21/2018", + "implementation": "Apple CoreCrypto Kernel Module v9.0 for ARM (iOS12, A11 Bionic, Assembler_VNG)", + "type": "DRBG", + "vendor": "Apple Inc." + }, + { + "_type": "Algorithm", + "cert_id": "2351", + "date": "11/27/2015", + "implementation": "Apple iOS CoreCrypto Kernel Module (Optimized SHA, A6)", + "type": "HMAC", + "vendor": "Apple Inc." + }, + { + "_type": "Algorithm", + "cert_id": "2351", + "date": "1/27/2017", + "implementation": "OpenSSL using assembler for AES and SHA", + "type": "RSA", + "vendor": "Canonical Ltd." + }, + { + "_type": "Algorithm", + "cert_id": "2351", + "date": "1/19/2017", + "implementation": "Junos FIPS Version Junos 15.1 X49 - Dataplane_CN7020", + "type": "TDES", + "vendor": "Juniper Networks, Inc." + }, + { + "_type": "Algorithm", + "cert_id": "2351", + "date": "3/8/2013", + "implementation": "Samsung OpenSSL Cryptographic Module", + "type": "AES", + "vendor": "Samsung Electronics Co., Ltd" + }, + { + "_type": "Algorithm", + "cert_id": "2351", + "date": "3/7/2014", + "implementation": "Symantec PGP Cryptographic Engine", + "type": "SHS", + "vendor": "Symantec Corporation" + } + ], + "2352": [ + { + "_type": "Algorithm", + "cert_id": "2352", + "date": "9/21/2018", + "implementation": "Apple CoreCrypto Kernel Module v9.0 for ARM (iOS12, A10X Fusion, Assembler_VNG)", + "type": "DRBG", + "vendor": "Apple Inc." + }, + { + "_type": "Algorithm", + "cert_id": "2352", + "date": "3/8/2013", + "implementation": "AES-256 Core", + "type": "AES", + "vendor": "Altera Canada" + }, + { + "_type": "Algorithm", + "cert_id": "2352", + "date": "11/27/2015", + "implementation": "Apple iOS CoreCrypto Kernel Module (Optimized SHA, A6X)", + "type": "HMAC", + "vendor": "Apple Inc." + }, + { + "_type": "Algorithm", + "cert_id": "2352", + "date": "1/27/2017", + "implementation": "OpenSSL using support from Power ISA 2.07 for AES and SHA", + "type": "RSA", + "vendor": "Canonical Ltd." + }, + { + "_type": "Algorithm", + "cert_id": "2352", + "date": "1/19/2017", + "implementation": "Junos FIPS Version Junos 15.1 X49 - Dataplane_CN7130", + "type": "TDES", + "vendor": "Juniper Networks, Inc." + }, + { + "_type": "Algorithm", + "cert_id": "2352", + "date": "3/21/2014", + "implementation": "Karnak SHA in Hardware", + "type": "SHS", + "vendor": "Seagate Technology, LLC." + } + ], + "2600": [ + { + "_type": "Algorithm", + "cert_id": "2600", + "date": "12/15/2017", + "implementation": "Apple iOS CoreCrypto v8 Kernel Module (Generic Software Implementation)", + "type": "TDES", + "vendor": "Apple Inc." + }, + { + "_type": "Algorithm", + "cert_id": "2600", + "date": "6/10/2016", + "implementation": "IOS Common Cryptographic Module (IC2M) Algorithm Module", + "type": "HMAC", + "vendor": "Cisco Systems, Inc." + }, + { + "_type": "Algorithm", + "cert_id": "2600", + "date": "8/16/2013", + "implementation": "Blade System Virtual Connect", + "type": "AES", + "vendor": "Hewlett-Packard Development Company, L.P." + }, + { + "_type": "Algorithm", + "cert_id": "2600", + "date": "12/5/2014", + "implementation": "Cryptographic Security Kernel", + "type": "SHS", + "vendor": "IBM Corporation" + }, + { + "_type": "Algorithm", + "cert_id": "2600", + "date": "9/1/2017", + "implementation": "IBM z/OS(R) Cryptographic Services System SSL - 31bit", + "type": "RSA", + "vendor": "IBM Corporation" + } + ], + "2601": [ + { + "_type": "Algorithm", + "cert_id": "2601", + "date": "12/5/2014", + "implementation": "SHA256 Library on Canon MFP Security Chip", + "type": "SHS", + "vendor": "Canon Inc." + }, + { + "_type": "Algorithm", + "cert_id": "2601", + "date": "8/16/2013", + "implementation": "Dell AppAssure Crypto Library", + "type": "AES", + "vendor": "Dell, Inc." + }, + { + "_type": "Algorithm", + "cert_id": "2601", + "date": "6/10/2016", + "implementation": "EFJ Communication Cryptographic Library", + "type": "HMAC", + "vendor": "EFJohnson Technologies" + }, + { + "_type": "Algorithm", + "cert_id": "2601", + "date": "9/1/2017", + "implementation": "IBM z/OS(R) Cryptographic Services System SSL - 64bit", + "type": "RSA", + "vendor": "IBM Corporation" + }, + { + "_type": "Algorithm", + "cert_id": "2601", + "date": "12/22/2017", + "implementation": "Oracle Linux 7 GnuTLS C Implementation", + "type": "TDES", + "vendor": "Oracle Corporation" + } + ], + "2602": [ + { + "_type": "Algorithm", + "cert_id": "2602", + "date": "12/22/2017", + "implementation": "Apple tvOS CoreCrypto Kernel Module v8.0 (Generic Software Implementation)", + "type": "TDES", + "vendor": "Apple Inc." + }, + { + "_type": "Algorithm", + "cert_id": "2602", + "date": "6/10/2016", + "implementation": "FIPS-ALGORITHMS.1.5.0v", + "type": "HMAC", + "vendor": "Mercury Systems" + }, + { + "_type": "Algorithm", + "cert_id": "2602", + "date": "8/16/2013", + "implementation": "RSA BSAFE\u00ae Crypto-J Software Module", + "type": "AES", + "vendor": "RSA Security, Inc." + }, + { + "_type": "Algorithm", + "cert_id": "2602", + "date": "12/5/2014", + "implementation": "SHA Library", + "type": "SHS", + "vendor": "Sage Microelectronics Corp" + }, + { + "_type": "Algorithm", + "cert_id": "2602", + "date": "9/1/2017", + "implementation": "Bouncy Castle FIPS Java API", + "type": "RSA", + "vendor": "Legion of the Bouncy Castle Inc." + } + ], + "2700": [ + { + "_type": "Algorithm", + "cert_id": "2700", + "date": "3/13/2015", + "implementation": "Apple OSX CoreCrypto Module (Generic, Xeon)", + "type": "SHS", + "vendor": "Apple Inc." + }, + { + "_type": "Algorithm", + "cert_id": "2700", + "date": "10/21/2016", + "implementation": "Axway OpenSSL", + "type": "HMAC", + "vendor": "Axway Inc." + }, + { + "_type": "Algorithm", + "cert_id": "2700", + "date": "11/30/2017", + "implementation": "Brocade Fabric OS FIPS Cryptographic Module", + "type": "RSA", + "vendor": "Brocade Communications Systems LLC" + }, + { + "_type": "Algorithm", + "cert_id": "2700", + "date": "3/30/2018", + "implementation": "Junos OS 17.4R1-S1 - Dataplane", + "type": "TDES", + "vendor": "Juniper Networks, Inc." + }, + { + "_type": "Algorithm", + "cert_id": "2700", + "date": "11/29/2013", + "implementation": "VMware NSS Cryptographic Module", + "type": "AES", + "vendor": "VMware, Inc." + } + ], + "2701": [ + { + "_type": "Algorithm", + "cert_id": "2701", + "date": "3/30/2018", + "implementation": "Security Builder GSE-J Crypto Core", + "type": "TDES", + "vendor": "BlackBerry Certicom" + }, + { + "_type": "Algorithm", + "cert_id": "2701", + "date": "11/30/2017", + "implementation": "ngfips_rsa", + "type": "RSA", + "vendor": "Cavium, Inc." + }, + { + "_type": "Algorithm", + "cert_id": "2701", + "date": "10/28/2016", + "implementation": "Cisco_SSL_Implementation-1", + "type": "HMAC", + "vendor": "Cisco Systems, Inc." + }, + { + "_type": "Algorithm", + "cert_id": "2701", + "date": "3/13/2015", + "implementation": "RSA BSAFE\u00ae Crypto-J JSAFE and JCE Software Module", + "type": "SHS", + "vendor": "RSA, The Security Division of EMC" + }, + { + "_type": "Algorithm", + "cert_id": "2701", + "date": "11/29/2013", + "implementation": "VMware Cryptographic Module", + "type": "AES", + "vendor": "VMware, Inc." + } + ], + "2702": [ + { + "_type": "Algorithm", + "cert_id": "2702", + "date": "3/30/2018", + "implementation": "Security Builder GSE-J Crypto Core", + "type": "TDES", + "vendor": "BlackBerry Certicom" + }, + { + "_type": "Algorithm", + "cert_id": "2702", + "date": "11/30/2017", + "implementation": "DELPHI RSA2048 Signature Verification Algorithm Implementation", + "type": "RSA", + "vendor": "DELPHI" + }, + { + "_type": "Algorithm", + "cert_id": "2702", + "date": "12/6/2013", + "implementation": "RSA BSAFE Crypto-J", + "type": "AES", + "vendor": "McAfee, Inc." + }, + { + "_type": "Algorithm", + "cert_id": "2702", + "date": "10/28/2016", + "implementation": "OpenSSL Crypto Library", + "type": "HMAC", + "vendor": "MikroM GmbH" + }, + { + "_type": "Algorithm", + "cert_id": "2702", + "date": "3/13/2015", + "implementation": "OpenSSL FIPS Object Module", + "type": "SHS", + "vendor": "OpenSSL Validation Services, Inc." + } + ], + "3415": [ + { + "_type": "Algorithm", + "cert_id": "3415", + "date": "1/26/2018", + "implementation": "Apple Secure Key Store CoreCrypto Module (Generic Software Implementation)", + "type": "HMAC", + "vendor": "Apple Inc." + }, + { + "_type": "Algorithm", + "cert_id": "3415", + "date": "6/5/2015", + "implementation": "Motorola Solutions Subscriber \u00b5Mace AES256", + "type": "AES", + "vendor": "Motorola Solutions Inc" + }, + { + "_type": "Algorithm", + "cert_id": "3415", + "date": "11/18/2016", + "implementation": "Secure Parser Library", + "type": "SHS", + "vendor": "Security First Corp." + } + ], + "3426": [ + { + "_type": "Algorithm", + "cert_id": "3426", + "date": "6/11/2015", + "implementation": "Apple iOS CoreCrypto Module (KeyWrap A8 32 bit)", + "type": "AES", + "vendor": "Apple Inc." + }, + { + "_type": "Algorithm", + "cert_id": "3426", + "date": "12/2/2016", + "implementation": "Apple iOS CoreCrypto Module (Generic)", + "type": "SHS", + "vendor": "Apple Inc." + }, + { + "_type": "Algorithm", + "cert_id": "3426", + "date": "1/26/2018", + "implementation": "Apple Secure Key Store CoreCrypto Module (VNG)", + "type": "HMAC", + "vendor": "Apple Inc." + } + ], + "3427": [ + { + "_type": "Algorithm", + "cert_id": "3427", + "date": "12/2/2016", + "implementation": "Apple iOS CoreCrypto Module (Generic)", + "type": "SHS", + "vendor": "Apple Inc." + }, + { + "_type": "Algorithm", + "cert_id": "3427", + "date": "1/26/2018", + "implementation": "Forcepoint NGFW FIPS Java API", + "type": "HMAC", + "vendor": "Forcepoint" + }, + { + "_type": "Algorithm", + "cert_id": "3427", + "date": "6/11/2015", + "implementation": "HP ESKM OpenSSL", + "type": "AES", + "vendor": "Hewlett Packard Enterprise" + } + ], + "3447": [ + { + "_type": "Algorithm", + "cert_id": "3447", + "date": "12/2/2016", + "implementation": "Apple OSX CoreCrypto Module (Optimized SHA nosse)", + "type": "SHS", + "vendor": "Apple Inc." + }, + { + "_type": "Algorithm", + "cert_id": "3447", + "date": "7/2/2015", + "implementation": "FireEye Algorithms Implementation", + "type": "AES", + "vendor": "FireEye, Inc." + }, + { + "_type": "Algorithm", + "cert_id": "3447", + "date": "2/9/2018", + "implementation": "OpenSSL (no AVX2/AVX/AESNI/SSSE3, x86_64, 64-bit library)", + "type": "HMAC", + "vendor": "Red Hat, Inc." + } + ], + "3451": [ + { + "_type": "Algorithm", + "cert_id": "3451", + "date": "12/2/2016", + "implementation": "Apple OSX CoreCrypto Module (Optimized SHA nosse)", + "type": "SHS", + "vendor": "Apple Inc." + }, + { + "_type": "Algorithm", + "cert_id": "3451", + "date": "7/2/2015", + "implementation": "OpenSSL FIPS Object Module", + "type": "AES", + "vendor": "OpenSSL Software Foundation, Inc." + }, + { + "_type": "Algorithm", + "cert_id": "3451", + "date": "2/9/2018", + "implementation": "OpenSSL (no AVX2/AVX/AESNI, x86_64, 64-bit library)", + "type": "HMAC", + "vendor": "Red Hat, Inc." + } + ], + "3464": [ + { + "_type": "Algorithm", + "cert_id": "3464", + "date": "12/9/2016", + "implementation": "Apple OSX CoreCrypto Module (Generic)", + "type": "SHS", + "vendor": "Apple Inc." + }, + { + "_type": "Algorithm", + "cert_id": "3464", + "date": "7/10/2015", + "implementation": "Security Builder Linux Kernel Crypto Core", + "type": "AES", + "vendor": "Certicom Corp." + }, + { + "_type": "Algorithm", + "cert_id": "3464", + "date": "2/9/2018", + "implementation": "HPE Secure Encryption Engine v1.1", + "type": "HMAC", + "vendor": "Hewlett-Packard Development Company, L.P." + } + ] + } +}
\ No newline at end of file diff --git a/test/fips_test_utils.py b/test/fips_test_utils.py new file mode 100644 index 00000000..94fb406a --- /dev/null +++ b/test/fips_test_utils.py @@ -0,0 +1,51 @@ +from typing import List +from pathlib import Path + +def generate_html(ids: List[str], path: Path): + def generate_entry(certificate_id: str) -> str: + return f''' + <tr id="cert-row-0"> + <td class="text-center"> + <a href="/projects/cryptographic-module-validation-program/certificate/3898" id="cert-number-link-0">{certificate_id}</a> + </td> + </tr> + ''' + + html_head = ''' + <!DOCTYPE html> + <html lang="en-us" xml:lang="en-us"> + <head> + <meta charset="utf-8" /> + <title>Cryptographic Module Validation Program | CSRC</title> + <meta http-equiv="content-type" content="text/html; charset=UTF-8" /> + <meta http-equiv="content-style-type" content="text/css" /> + <meta http-equiv="content-script-type" content="text/javascript" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="msapplication-config" content="/CSRC/Media/images/favicons/browserconfig.xml" /> + <meta name="theme-color" content="#000000" /> + <meta name="google-site-verification" content="xbrnrVYDgLD-Bd64xHLCt4XsPXzUhQ-4lGMj4TdUUTA" /> + </head> + ''' + rows = "" + for cert_id in ids: + rows += f"\n{generate_entry(cert_id)}\n" + html_body = f''' + <body> + <table class="table table-striped table-condensed publications-table table-bordered" id="searchResultsTable"> + <thead> + <tr> + <th class="text-center">Certificate Number</th> + <th class="text-center">Vendor Name</th> + <th class="text-center">Module Name</th> + <th class="text-center">Module Type</th> + <th class="text-center">Validation Date</th> + </tr> + </thead> + <tbody> + {rows} + </tbody> + </table> + </body> + ''' + with open(path, 'w') as f: + f.write(f"{html_head}\n{html_body}\n") diff --git a/test/settings_test.yaml b/test/settings_test.yaml new file mode 100644 index 00000000..9b07a8be --- /dev/null +++ b/test/settings_test.yaml @@ -0,0 +1,19 @@ +--- +smallest_certificate_id_to_connect: + description: During validation we don't connect certificates with number lower than + _this_ to connections + value: 40 +year_difference_between_validations: + description: During validation we don't connect certificates with validation dates + difference higher than _this_ + value: 7 +use_text_with_newlines_during_parsing: + description: During keyword search, search in text with newlines + value: true +ignore_first_page: + description: During keyword search, first page usually contains addresses - ignore it. + value: true +cert_threshold: + description: Used with --higher-precision-results. Determines the amount of mismatched algorithms to be considered faulty. + value: 5 + diff --git a/test/test_fips_oop.py b/test/test_fips_oop.py new file mode 100644 index 00000000..107f0c5d --- /dev/null +++ b/test/test_fips_oop.py @@ -0,0 +1,134 @@ +from unittest import TestCase +from pathlib import Path +from tempfile import TemporaryDirectory + +from sec_certs.dataset import FIPSDataset, FIPSAlgorithmDataset +from sec_certs.configuration import config +from fips_test_utils import generate_html + + + +def _set_up_dataset(td, certs): + dataset = FIPSDataset({}, Path(td), 'test_dataset', 'fips_test_dataset') + generate_html(certs, td + '/test_search.html') + dataset.get_certs_from_web(test=td + '/test_search.html', update_json=False) + return dataset + + +def _set_up_dataset_for_full(td, certs): + dataset = _set_up_dataset(td, certs) + dataset.convert_all_pdfs() + dataset.extract_keywords() + dataset.extract_certs_from_tables(high_precision=True) + dataset.algorithms = FIPSAlgorithmDataset.from_json(Path(__file__).parent / 'data/test_fips_oop/algorithms.json') + dataset.finalize_results() + return dataset + + +class TestFipsOOP(TestCase): + def setUp(self) -> None: + self.data_dir: Path = Path(__file__).parent / 'data' / 'test_fips_oop' + self.dataset = FIPSDataset({}, self.data_dir, 'test_dataset', 'fips_test_dataset') + self.certs_to_parse = [ + ['3099', '2549', '2484', '3038', '2472', '2435', '2471', '1930'], # openSUSE chunk + ['23', '24', '25', '26'], + ['3095', '3651', '3093', '3090', '3197', '3196', '3089', '3195', '3480', '3615', '3194', '3091', '3690', + '3644', '3527', '3094', '3544', '3096', '3092'], # microsoft chunk + ['2630', '2721', '2997', '2441', '2711', '2633', '2798', '3613', '3733', '2908', '2446', '2742', '2447'], + # redhat chunk + ['3850', '2779', '2860', '2665', '1883', '3518', '3141', '2590'], # Document signing chunk + ['3493', '3495', '3711', '3176', '3488', '3126', '3269', '3524', '3220', '2398', '3543', '2676', '3313', + '3363', '3608', '3158'], # Chunk referencing openSSL FIPS Object Module SE + ] + config.load(Path(__file__).parent / 'settings_test.yaml') + + def test_size(self): + for certs in self.certs_to_parse: + with TemporaryDirectory() as td: + dataset = _set_up_dataset(td, certs) + self.assertEqual(len(dataset.certs), len(certs), "Wrong number of parsed certs") + + def test_connections_microsoft(self): + certs = self.certs_to_parse[2] + with TemporaryDirectory() as td: + dataset = _set_up_dataset_for_full(td, certs) + + self.assertEqual(set(dataset.certs['3095'].processed.connections), {x for x in ['3093', '3096', '3094']}) + self.assertEqual(set(dataset.certs['3651'].processed.connections), {x for x in ['3615']}) + self.assertEqual(set(dataset.certs['3093'].processed.connections), {x for x in ['3090', '3091']}) + self.assertEqual(set(dataset.certs['3090'].processed.connections), {x for x in ['3089']}) + self.assertEqual(set(dataset.certs['3197'].processed.connections), + {x for x in ['3195', '3096', '3196', '3644', '3651']}) + self.assertEqual(set(dataset.certs['3196'].processed.connections), + {x for x in ['3194', '3091', '3480', '3615']}) + self.assertEqual(set(dataset.certs['3089'].processed.connections), {x for x in []}) + self.assertEqual(set(dataset.certs['3195'].processed.connections), {x for x in ['3194', '3091', '3480']}) + self.assertEqual(set(dataset.certs['3480'].processed.connections), {x for x in ['3089']}) + self.assertEqual(set(dataset.certs['3615'].processed.connections), {x for x in ['3089']}) + self.assertEqual(set(dataset.certs['3194'].processed.connections), {x for x in ['3089']}) + self.assertEqual(set(dataset.certs['3091'].processed.connections), {x for x in ['3089']}) + self.assertEqual(set(dataset.certs['3690'].processed.connections), {x for x in ['3644', '3196', '3651']}) + self.assertEqual(set(dataset.certs['3644'].processed.connections), {x for x in ['3615']}) + self.assertEqual(set(dataset.certs['3527'].processed.connections), {x for x in ['3090', '3091']}) + self.assertEqual(set(dataset.certs['3094'].processed.connections), {x for x in ['3090', '3091']}) + self.assertEqual(set(dataset.certs['3544'].processed.connections), {x for x in ['3093', '3096', '3527']}) + self.assertEqual(set(dataset.certs['3096'].processed.connections), + {x for x in ['3090', '3194', '3091', '3480']}) + self.assertEqual(set(dataset.certs['3092'].processed.connections), + {x for x in ['3093', '3195', '3096', '3644', '3651']}) + + def test_connections_redhat(self): + certs = self.certs_to_parse[3] + with TemporaryDirectory() as td: + dataset = _set_up_dataset_for_full(td, certs) + self.assertEqual(set(dataset.certs['2630'].processed.connections), {x for x in ['2441']}) + self.assertEqual(set(dataset.certs['2633'].processed.connections), {x for x in ['2441']}) + self.assertEqual(set(dataset.certs['2441'].processed.connections), {x for x in []}) + self.assertEqual(set(dataset.certs['2997'].processed.connections), {x for x in ['2711']}) + self.assertEqual(set(dataset.certs['2446'].processed.connections), {x for x in ['2441']}) + self.assertEqual(set(dataset.certs['2447'].processed.connections), {x for x in ['2441']}) + self.assertEqual(set(dataset.certs['3733'].processed.connections), {x for x in ['2441']}) + self.assertEqual(set(dataset.certs['2441'].processed.connections), {x for x in []}) + self.assertEqual(set(dataset.certs['2711'].processed.connections), {x for x in []}) + self.assertEqual(set(dataset.certs['2908'].processed.connections), {x for x in ['2711']}) + self.assertEqual(set(dataset.certs['3613'].processed.connections), {x for x in ['2997']}) + self.assertEqual(set(dataset.certs['2721'].processed.connections), {x for x in ['2441', '2711']}) + self.assertEqual(set(dataset.certs['2798'].processed.connections), {x for x in ['2721', '2711']}) + self.assertEqual(set(dataset.certs['2711'].processed.connections), {x for x in []}) + self.assertEqual(set(dataset.certs['2997'].processed.connections), {x for x in ['2711']}) + self.assertEqual(set(dataset.certs['2742'].processed.connections), {x for x in ['2721', '2711']}) + self.assertEqual(set(dataset.certs['2721'].processed.connections), {x for x in ['2441', '2711']}) + + def test_docusign_chunk(self): + certs = self.certs_to_parse[4] + with TemporaryDirectory() as td: + dataset = _set_up_dataset_for_full(td, certs) + self.assertEqual(set(dataset.certs['3850'].processed.connections), {x for x in ['3518', '1883']}) + self.assertEqual(set(dataset.certs['2779'].processed.connections), {x for x in ['1883']}) + self.assertEqual(set(dataset.certs['2860'].processed.connections), {x for x in ['1883']}) + self.assertEqual(set(dataset.certs['2665'].processed.connections), {x for x in ['1883']}) + self.assertEqual(set(dataset.certs['1883'].processed.connections), {x for x in []}) + self.assertEqual(set(dataset.certs['3518'].processed.connections), {x for x in ['1883']}) + self.assertEqual(set(dataset.certs['3141'].processed.connections), {x for x in ['1883']}) + self.assertEqual(set(dataset.certs['2590'].processed.connections), {x for x in ['1883']}) + + def test_openssl_chunk(self): + certs = self.certs_to_parse[5] + with TemporaryDirectory() as td: + dataset = _set_up_dataset_for_full(td, certs) + self.assertEqual(set(dataset.certs['3493'].processed.connections), {x for x in ['2398']}) + self.assertEqual(set(dataset.certs['3495'].processed.connections), {x for x in ['2398']}) + self.assertEqual(set(dataset.certs['3711'].processed.connections), {x for x in ['3220']}) + self.assertEqual(set(dataset.certs['3176'].processed.connections), {x for x in ['2398']}) + self.assertEqual(set(dataset.certs['3488'].processed.connections), {x for x in ['2398']}) + self.assertEqual(set(dataset.certs['3126'].processed.connections), {x for x in ['3126', '2398']}) + self.assertEqual(set(dataset.certs['3269'].processed.connections), {x for x in ['3269', '3220']}) + self.assertEqual(set(dataset.certs['3524'].processed.connections), {x for x in ['3220']}) + self.assertEqual(set(dataset.certs['3220'].processed.connections), {x for x in ['3220', '2398']}) + self.assertEqual(set(dataset.certs['2398'].processed.connections), {x for x in []}) + self.assertEqual(set(dataset.certs['3543'].processed.connections), {x for x in ['2398']}) + self.assertEqual(set(dataset.certs['2676'].processed.connections), {x for x in ['2398']}) + self.assertEqual(set(dataset.certs['3313'].processed.connections), {x for x in ['3313', '3220']}) + self.assertEqual(set(dataset.certs['3363'].processed.connections), {x for x in []}) + self.assertEqual(set(dataset.certs['3608'].processed.connections), {x for x in ['2398']}) + self.assertEqual(set(dataset.certs['3158'].processed.connections), {x for x in ['2398']}) |
