Last active
August 9, 2023 16:28
-
-
Save atemate/7af9dec386a1dd9d0ead88b0251b07bb to your computer and use it in GitHub Desktop.
Finds a dict in a list of dicts that matches specific filter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class WildcardDict(dict): | |
def __init__(self, *args, enable_wildcards: bool = False, **kwargs) -> None: | |
self._enable_wildcards = enable_wildcards | |
return super().__init__(*args, **kwargs) | |
def __getitem__(self, key): | |
if not self._enable_wildcards: | |
return super().__getitem__(key) | |
for k, v in self.items(): | |
if fnmatch(k, key): | |
return v | |
raise KeyError(f"No matching keys found for pattern: {key}") | |
def __contains__(self, key): | |
if self._enable_wildcards: | |
try: | |
self.__getitem__(key) | |
return True | |
except KeyError: | |
return False | |
return super().__contains__(key) | |
def match_dict( | |
filter_dict: dict, | |
candidate_dicts: list[dict], | |
include_filter_values: bool = False, | |
enable_wildcards: bool = False, | |
): | |
def _match_recursive(filt, cand): | |
matched = {} | |
for k, v in filt.items(): | |
if k not in cand: | |
if v is ...: | |
continue # allow values with '...' not be present in actual data | |
return None # Key missing in candidate, no match | |
if v is ...: | |
matched[k] = cand[k] | |
elif isinstance(v, list): | |
if len(v) > 1: | |
raise NotImplementedError(f"Bad list in filter of len > 1: {v}") | |
if isinstance(cand[k], list): | |
found = None | |
for c in cand[k]: | |
nested = _match_recursive(v[0], c) | |
if nested is not None: | |
found = nested | |
break | |
if found is None: | |
return None | |
matched[k] = [found] | |
elif isinstance(v, dict) and isinstance(cand[k], dict): | |
nested = _match_recursive(v, cand[k]) | |
if nested is None: | |
return None | |
matched[k] = nested | |
else: | |
if cand[k] != v: | |
return None # Value mismatch, no match | |
elif include_filter_values: | |
matched[k] = v | |
return matched | |
filter_dict = WildcardDict(filter_dict, enable_wildcards=enable_wildcards) | |
for cand in candidate_dicts: | |
result = _match_recursive(filter_dict, cand) | |
if result is not None: | |
return result | |
return None # No match found | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pytest | |
from .utils import match_dict | |
FILTER_DICT = { | |
"a": { | |
"b": "B", | |
"c": [ | |
{ | |
"d": "D", | |
"e": ..., | |
} | |
], | |
"f": ..., | |
} | |
} | |
DICTS = [ | |
{ | |
"a": { | |
"b": "X", # mismatches | |
"c": [ | |
{ | |
"d": "D", | |
"e": "E1", | |
} | |
], | |
"f": "F1", | |
}, | |
}, | |
{ | |
"a": { | |
"b": "B", | |
"c": [ | |
{ | |
"d": "X", # mismatches | |
"e": "E2", | |
} | |
], | |
"f": "F2", | |
}, | |
}, | |
{ # <- matches! | |
"a": { | |
"b": "B", | |
"c": [ | |
{ | |
"d": "D", | |
"e": "E3", | |
} | |
], | |
# "f": "F3", # "f" is missing, it's ok | |
}, | |
}, | |
{ # missing required "b" | |
"a": { | |
"c": [ | |
{ | |
"e": "E4", | |
"d": "D", | |
} | |
], | |
"f": "F4", | |
}, | |
}, | |
{ # missing "d" | |
"a": { | |
"b": "B", | |
"c": [ | |
{ | |
"e": "E5", | |
} | |
], | |
"f": "F5", | |
}, | |
}, | |
] | |
def test_match_dict_match_dict_false_ok(): | |
actual = match_dict(FILTER_DICT, DICTS) | |
assert actual == { | |
"a": { | |
"c": [ | |
{ | |
"e": "E3", | |
} | |
], | |
}, | |
} | |
def test_match_dict_match_dict_true_ok(): | |
actual = match_dict(FILTER_DICT, DICTS, include_filter_values=True) | |
assert actual == { | |
"a": { | |
"b": "B", | |
"c": [ | |
{ | |
"e": "E3", | |
"d": "D", | |
} | |
], | |
}, | |
} | |
@pytest.mark.parametrize("include_filter_values", [True, False]) | |
def test_match_dict_match_dict_empty(include_filter_values): | |
actual = match_dict({}, DICTS, include_filter_values=include_filter_values) | |
assert actual == {} | |
def test_match_dict_match_dict_enable_wildcards_true(): | |
dicts = [ | |
{"abc": {"bcd": {"def": 1}}, "ghi-1": 1}, | |
{"abc": {"bcd": {"def": 1}}, "ghi-12": 1}, # first matched returns | |
{"abc": {"bcd": {"def": 1}}, "ghi-123": 1}, | |
] | |
filter_dict = {"abc": ..., "ghi-12": 1} | |
actual = match_dict( | |
filter_dict, | |
dicts, | |
include_filter_values=True, | |
enable_wildcards=True, | |
) | |
assert actual == {"abc": {"bcd": {"def": 1}}, "ghi-12": 1} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment