aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/sec_certs
diff options
context:
space:
mode:
authorJán Jančár2025-11-22 13:52:24 +0100
committerGitHub2025-11-22 13:52:24 +0100
commit8436df07381295b05a0cc892cca27e475629ee80 (patch)
tree9b940ee0a8851baaff67567c4004cd3c2cc027fd /src/sec_certs
parent7cfa1c9fc498013d1a4807e5a11f55f39e6595aa (diff)
parent21aa562cdd1c21cc66e0b19b52b414af2782377e (diff)
downloadsec-certs-main.tar.gz
sec-certs-main.tar.zst
sec-certs-main.zip
Merge pull request #530 from crocs-muni/fix/try-spacyHEADmain
Move to uv
Diffstat (limited to 'src/sec_certs')
-rw-r--r--src/sec_certs/cli.py2
-rw-r--r--src/sec_certs/configuration.py6
-rw-r--r--src/sec_certs/model/references_nlp/__init__.py2
-rw-r--r--src/sec_certs/utils/extract.py17
4 files changed, 16 insertions, 11 deletions
diff --git a/src/sec_certs/cli.py b/src/sec_certs/cli.py
index 6276aa2f..fb5bf802 100644
--- a/src/sec_certs/cli.py
+++ b/src/sec_certs/cli.py
@@ -216,7 +216,7 @@ def main(
processing_step.run(dset)
end = datetime.now()
- logger.info(f"The computation took {(end-start)} seconds.")
+ logger.info(f"The computation took {(end - start)} seconds.")
except Exception as e:
click.echo(
f"Unhandled exception: {e}",
diff --git a/src/sec_certs/configuration.py b/src/sec_certs/configuration.py
index 17db9318..c06f872a 100644
--- a/src/sec_certs/configuration.py
+++ b/src/sec_certs/configuration.py
@@ -2,7 +2,7 @@ from __future__ import annotations
import json
from pathlib import Path
-from typing import Literal, Optional
+from typing import Literal
import yaml
from pydantic import AnyHttpUrl, Field
@@ -121,7 +121,7 @@ class Configuration(BaseSettings):
True,
description="During keyword search, first page usually contains addresses - ignore it.",
)
- cc_reference_annotator_dir: Optional[Path] = Field( # noqa: UP007
+ cc_reference_annotator_dir: Path | None = Field( # noqa: UP007
None,
description="Path to directory with serialized reference annotator model. If set to `null`, tool will search default directory for the given dataset.",
)
@@ -141,7 +141,7 @@ class Configuration(BaseSettings):
True,
description="If true, progress bars will be printed to stdout during computation.",
)
- nvd_api_key: Optional[str] = Field(None, description="NVD API key for access to CVEs and CPEs.") # noqa: UP007
+ nvd_api_key: str | None = Field(None, description="NVD API key for access to CVEs and CPEs.") # noqa: UP007
preferred_source_remote_datasets: Literal["sec-certs", "origin"] = Field(
"sec-certs",
description="If set to `sec-certs`, will fetch remote datasets from sec-certs.org."
diff --git a/src/sec_certs/model/references_nlp/__init__.py b/src/sec_certs/model/references_nlp/__init__.py
index d4e11e4d..8325c989 100644
--- a/src/sec_certs/model/references_nlp/__init__.py
+++ b/src/sec_certs/model/references_nlp/__init__.py
@@ -9,5 +9,5 @@ try:
except ImportError as e:
print(e)
print(
- "Requirements for ML annotation of references not met. Please run `pip install sec-certs[nlp]` or install `pip install -r requirements/nlp_requirements.txt."
+ "Requirements for ML annotation of references not met. Please install the 'nlp' extra, for example via: `pip install sec-certs[nlp]`."
)
diff --git a/src/sec_certs/utils/extract.py b/src/sec_certs/utils/extract.py
index 35361e94..f72fd837 100644
--- a/src/sec_certs/utils/extract.py
+++ b/src/sec_certs/utils/extract.py
@@ -6,7 +6,7 @@ import re
from collections import Counter
from enum import Enum
from pathlib import Path
-from typing import Any
+from typing import Any, no_type_check
import numpy as np
@@ -17,7 +17,8 @@ from sec_certs.constants import FILE_ERRORS_STRATEGY, LINE_SEPARATOR, MAX_ALLOWE
logger = logging.getLogger(__name__)
-def search_only_headers_anssi(filepath: Path): # noqa: C901
+@no_type_check
+def search_only_headers_anssi(filepath: Path): # type: ignore # noqa: C901
# TODO: Please, refactor me. I reallyyyyyyyyyyyyy need it!!!!!!
class HEADER_TYPE(Enum):
HEADER_FULL = 1
@@ -272,7 +273,8 @@ def search_only_headers_anssi(filepath: Path): # noqa: C901
return items_found
-def search_only_headers_bsi(filepath: Path): # noqa: C901
+@no_type_check
+def search_only_headers_bsi(filepath: Path): # type: ignore # noqa: C901
# TODO: Please, refactor me. I reallyyyyyyyyyyyyy need it!!!!!!
LINE_SEPARATOR_STRICT = " "
NUM_LINES_TO_INVESTIGATE = 15
@@ -371,7 +373,8 @@ def search_only_headers_bsi(filepath: Path): # noqa: C901
return items_found
-def search_only_headers_nscib(filepath: Path): # noqa: C901
+@no_type_check
+def search_only_headers_nscib(filepath: Path): # type: ignore # noqa: C901
# TODO: Please, refactor me. I reallyyyyyyyyyyyyy need it!!!!!!
LINE_SEPARATOR_STRICT = " "
NUM_LINES_TO_INVESTIGATE = 60
@@ -451,7 +454,8 @@ def search_only_headers_nscib(filepath: Path): # noqa: C901
return items_found
-def search_only_headers_niap(filepath: Path):
+@no_type_check
+def search_only_headers_niap(filepath: Path): # type: ignore # noqa: C901
# TODO: Please, refactor me. I reallyyyyyyyyyyyyy need it!!!!!!
LINE_SEPARATOR_STRICT = " "
NUM_LINES_TO_INVESTIGATE = 15
@@ -502,7 +506,8 @@ def search_only_headers_niap(filepath: Path):
return items_found
-def search_only_headers_canada(filepath: Path): # noqa: C901
+@no_type_check
+def search_only_headers_canada(filepath: Path): # type: ignore # noqa: C901
# TODO: Please, refactor me. I reallyyyyyyyyyyyyy need it!!!!!!
LINE_SEPARATOR_STRICT = " "
NUM_LINES_TO_INVESTIGATE = 20