blob: 29def971362404bcc127976d551f7d0896850916 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
from __future__ import annotations
import logging
import re
from pathlib import Path
from sec_certs.cert_rules import FIPS_LIST_OF_TABLES
logger = logging.getLogger(__name__)
def parse_list_of_tables(txt: str) -> set[int]:
"""
Parses list of tables in policy txt, returns page numbers of tables that mention algorithms
"""
rr = re.compile(r"^.+?(?:[Ff]unction|[Aa]lgorithm|[Ss]ecurity [Ff]unctions?).+?(?P<page_num>\d+)$", re.MULTILINE)
return {int(m.group("page_num")) for m in rr.finditer(txt)}
def get_table_rich_page_numbers_from_footer(file_text: str) -> set[int]:
"""
Parses page numbers of policy txt pages that may contain tables with algorithm data
"""
current_page = 1
pages = set()
for line in file_text.split("\n"):
if "\f" in line:
current_page += 1
if line.startswith("Table ") or line.startswith("Exhibit"):
pages.add(current_page)
pages.add(current_page + 1)
if current_page > 2:
pages.add(current_page - 1)
for page in pages:
if page > current_page - 1:
return pages - {page}
return pages
def find_pages_with_tables(txt_filepath: Path) -> set[int]:
"""
Identifies pages in txt file that may contain tables. Return their page numbers.
"""
with txt_filepath.open("r", encoding="utf-8") as handle:
txt = handle.read()
# Parse page numbers from list of tables if available
# Else look for "Table" in text and \f representing footer, then extract page number from footer
if list_of_tables := FIPS_LIST_OF_TABLES.search(txt):
result = parse_list_of_tables(list_of_tables.group())
else:
result = get_table_rich_page_numbers_from_footer(txt)
return result if result else set()
def get_algs_from_table(dataframe_text: str) -> set[str]:
reg = r"(?:#?\s?|(?:Cert)\.?[^. ]*?\s?)(?:[CcAa]\s)?(?P<id>[CcAa]? ?\d+)"
return {m.group() for m in re.finditer(reg, dataframe_text)}
|