aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/sec_certs/dataset/cc_scheme.py
blob: a1ee31247393c57ef97fc5d7ccd1ea22110def67 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from __future__ import annotations

import logging
from collections.abc import Mapping
from pathlib import Path

from sec_certs.dataset.json_path_dataset import JSONPathDataset
from sec_certs.sample.cc_scheme import CCScheme
from sec_certs.serialization.json import ComplexSerializableType

logger = logging.getLogger()


class CCSchemeDataset(JSONPathDataset, ComplexSerializableType):
    """
    A dataset of data from CC scheme websites.

    Each `.get_*` method returns a list of dict entries from the given scheme.
    The entries do not share many keys, but each one has at least some form
    of a product name and most have a vendor/developer/manufacturer field.
    """

    def __init__(self, schemes: dict[str, CCScheme], json_path: str | Path | None = None):
        super().__init__(json_path)
        self.schemes = schemes

    @property
    def serialized_attributes(self) -> list[str]:
        return ["schemes"]

    def __iter__(self):
        yield from self.schemes.values()

    def __getitem__(self, scheme: str):
        return self.schemes.__getitem__(scheme.upper())

    def __setitem__(self, key: str, value):
        self.schemes.__setitem__(key.upper(), value)

    def __len__(self) -> int:
        return len(self.schemes)

    def to_dict(self):
        return {"schemes": self.schemes}

    @classmethod
    def from_dict(cls, dct: Mapping) -> CCSchemeDataset:
        return cls(dct["schemes"])

    @classmethod
    def from_web(
        cls,
        json_path: str | Path | None = None,
        only_schemes: set[str] | None = None,
        enhanced: bool | None = None,
        artifacts: bool | None = None,
    ) -> CCSchemeDataset:
        schemes = {}
        for scheme, sources in CCScheme.methods.items():
            if only_schemes is not None and scheme not in only_schemes:
                continue
            try:
                schemes[scheme] = CCScheme.from_web(scheme, sources.keys(), enhanced=enhanced, artifacts=artifacts)
            except Exception as e:
                logger.warning(f"Could not download CC scheme: {scheme} due to error {e}.")
        return cls(schemes, json_path=json_path)