diff options
| author | Ján Jančár | 2022-01-03 20:55:39 +0100 |
|---|---|---|
| committer | GitHub | 2022-01-03 20:55:39 +0100 |
| commit | 548b6b62f1abab0e5a7dcd2d64d4ab8d623f68cd (patch) | |
| tree | 6030a0958cfaa322175632d998575c5887f59799 | |
| parent | d6e34f998785a9335592bb0304cae5446347169e (diff) | |
| parent | 3568c33bbe132ca2285913f96a361751a164fa2e (diff) | |
| download | sec-certs-548b6b62f1abab0e5a7dcd2d64d4ab8d623f68cd.tar.gz sec-certs-548b6b62f1abab0e5a7dcd2d64d4ab8d623f68cd.tar.zst sec-certs-548b6b62f1abab0e5a7dcd2d64d4ab8d623f68cd.zip | |
Merge pull request #152 from crocs-muni/feat/canonical-to-dict
Keep ordering of version matches in heuristics.
| -rw-r--r-- | sec_certs/helpers.py | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/sec_certs/helpers.py b/sec_certs/helpers.py index d6404d42..90e250d8 100644 --- a/sec_certs/helpers.py +++ b/sec_certs/helpers.py @@ -832,16 +832,21 @@ def compute_heuristics_version(cert_name: str) -> List[str]: full_regex_string = r"|".join([without_version, short_version, long_version]) normalizer = r"(\d+\.*)+" - matched_strings = set([max(x, key=len) for x in re.findall(full_regex_string, cert_name, re.IGNORECASE)]) + matched_strings = [max(x, key=len) for x in re.findall(full_regex_string, cert_name, re.IGNORECASE)] if not matched_strings: - matched_strings = set([max(x, key=len) for x in re.findall(at_least_something, cert_name, re.IGNORECASE)]) + matched_strings = [max(x, key=len) for x in re.findall(at_least_something, cert_name, re.IGNORECASE)] + # Only keep the first occurrence but keep order. + matches = [] + for match in matched_strings: + if match not in matches: + matches.append(match) # identified_versions = list(set([max(x, key=len) for x in re.findall(VERSION_PATTERN, cert_name, re.IGNORECASE | re.VERBOSE)])) # return identified_versions if identified_versions else ['-'] - if not matched_strings: + if not matches: return ["-"] - matched = [re.search(normalizer, x) for x in matched_strings] + matched = [re.search(normalizer, x) for x in matches] return [x.group() for x in matched if x is not None] |
