aboutsummaryrefslogtreecommitdiffhomepage
path: root/tests/test_nvd_dataset_builder.py
blob: 599e0a28ada41064bff3210a9ef25e4f4b480e81 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from datetime import date, datetime
from importlib.resources import files
from typing import Any

import pytest

from sec_certs.configuration import config
from sec_certs.dataset import CPEDataset, CVEDataset
from sec_certs.utils.nvd_dataset_builder import (
    CpeMatchNvdDatasetBuilder,
    CpeNvdDatasetBuilder,
    CveNvdDatasetBuilder,
    NvdDatasetBuilder,
)


@pytest.fixture(autouse=True)
def load_test_config():
    with files("tests.data.common") / "settings_tests.yml" as path:
        config.load_from_yaml(path)


@pytest.mark.remote
@pytest.mark.parametrize(
    "default_dataset, builder_class",
    [
        (CPEDataset(), CpeNvdDatasetBuilder),
        (CVEDataset(), CveNvdDatasetBuilder),
        ({"timestamp": datetime.fromtimestamp(0).isoformat(), "match_strings": {}}, CpeMatchNvdDatasetBuilder),
    ],
)
def test_build_dataset(default_dataset: Any, builder_class: type[NvdDatasetBuilder]):
    def get_update_timestamp_from_dataset(dset) -> datetime:
        if isinstance(dset, CPEDataset | CVEDataset):
            return dset.last_update_timestamp
        return datetime.fromisoformat(dset["timestamp"])

    def get_dataset_len(dset) -> int:
        if isinstance(dset, CPEDataset | CVEDataset):
            return len(dset)
        return len(dset["match_strings"])

    config.preferred_source_remote_datasets = "origin"
    with builder_class(api_key=config.nvd_api_key) as dataset_builder:
        dataset = dataset_builder._init_new_dataset()
        assert dataset == default_dataset
        last_update = dataset_builder._get_last_update_from_previous_data(dataset)
        dataset_builder._fill_in_mod_dates(False, last_update)
        dataset_builder._build_arguments()

        assert len(dataset_builder._requests_to_process) > 90
        assert not dataset_builder._ok_responses
        assert dataset_builder._attempts_left == dataset_builder.max_attempts

        dataset_builder._requests_to_process = dataset_builder._requests_to_process[:1]
        dataset_builder._request_parallel_and_handle_responses()
        assert not dataset_builder._requests_to_process
        assert len(dataset_builder._ok_responses) == 1

        dataset = dataset_builder._process_responses(dataset_builder._ok_responses, dataset)

        assert get_update_timestamp_from_dataset(dataset).date() == date.today()
        assert (
            get_dataset_len(dataset) > 200
        )  # some items may be irrelevant, it's hard to tell how many, this is Bulgarian constant.