atemate/match_dict.py

## match_dict.py

class WildcardDict(dict):
    def __init__(self, *args, enable_wildcards: bool = False, **kwargs) -> None:
        self._enable_wildcards = enable_wildcards
        return super().__init__(*args, **kwargs)

    def __getitem__(self, key):
        if not self._enable_wildcards:
            return super().__getitem__(key)
        for k, v in self.items():
            if fnmatch(k, key):
                return v
        raise KeyError(f"No matching keys found for pattern: {key}")

    def __contains__(self, key):
        if self._enable_wildcards:
            try:
                self.__getitem__(key)
                return True
            except KeyError:
                return False
        return super().__contains__(key)


def match_dict(
    filter_dict: dict,
    candidate_dicts: list[dict],
    include_filter_values: bool = False,
    enable_wildcards: bool = False,
):
    def _match_recursive(filt, cand):
        matched = {}
        for k, v in filt.items():
            if k not in cand:
                if v is ...:
                    continue  # allow values with '...' not be present in actual data
                return None  # Key missing in candidate, no match
            if v is ...:
                matched[k] = cand[k]
            elif isinstance(v, list):
                if len(v) > 1:
                    raise NotImplementedError(f"Bad list in filter of len > 1: {v}")
                if isinstance(cand[k], list):
                    found = None
                    for c in cand[k]:
                        nested = _match_recursive(v[0], c)
                        if nested is not None:
                            found = nested
                            break
                    if found is None:
                        return None
                    matched[k] = [found]
            elif isinstance(v, dict) and isinstance(cand[k], dict):
                nested = _match_recursive(v, cand[k])
                if nested is None:
                    return None
                matched[k] = nested
            else:
                if cand[k] != v:
                    return None  # Value mismatch, no match
                elif include_filter_values:
                    matched[k] = v
        return matched

    filter_dict = WildcardDict(filter_dict, enable_wildcards=enable_wildcards)
    for cand in candidate_dicts:
        result = _match_recursive(filter_dict, cand)
        if result is not None:
            return result

    return None  # No match found


## test_match_dict.py
import pytest

from .utils import match_dict

FILTER_DICT = {
    "a": {
        "b": "B",
        "c": [
            {
                "d": "D",
                "e": ...,
            }
        ],
        "f": ...,
    }
}


DICTS = [
    {
        "a": {
            "b": "X",  # mismatches
            "c": [
                {
                    "d": "D",
                    "e": "E1",
                }
            ],
            "f": "F1",
        },
    },
    {
        "a": {
            "b": "B",
            "c": [
                {
                    "d": "X",  # mismatches
                    "e": "E2",
                }
            ],
            "f": "F2",
        },
    },
    {  # <- matches!
        "a": {
            "b": "B",
            "c": [
                {
                    "d": "D",
                    "e": "E3",
                }
            ],
            # "f": "F3",  # "f" is missing, it's ok
        },
    },
    {  # missing required "b"
        "a": {
            "c": [
                {
                    "e": "E4",
                    "d": "D",
                }
            ],
            "f": "F4",
        },
    },
    {  # missing "d"
        "a": {
            "b": "B",
            "c": [
                {
                    "e": "E5",
                }
            ],
            "f": "F5",
        },
    },
]


def test_match_dict_match_dict_false_ok():
    actual = match_dict(FILTER_DICT, DICTS)
    assert actual == {
        "a": {
            "c": [
                {
                    "e": "E3",
                }
            ],
        },
    }


def test_match_dict_match_dict_true_ok():
    actual = match_dict(FILTER_DICT, DICTS, include_filter_values=True)
    assert actual == {
        "a": {
            "b": "B",
            "c": [
                {
                    "e": "E3",
                    "d": "D",
                }
            ],
        },
    }


@pytest.mark.parametrize("include_filter_values", [True, False])
def test_match_dict_match_dict_empty(include_filter_values):
    actual = match_dict({}, DICTS, include_filter_values=include_filter_values)
    assert actual == {}


def test_match_dict_match_dict_enable_wildcards_true():
    dicts = [
        {"abc": {"bcd": {"def": 1}}, "ghi-1": 1},
        {"abc": {"bcd": {"def": 1}}, "ghi-12": 1},  # first matched returns
        {"abc": {"bcd": {"def": 1}}, "ghi-123": 1},
    ]
    filter_dict = {"abc": ..., "ghi-12": 1}
    actual = match_dict(
        filter_dict,
        dicts,
        include_filter_values=True,
        enable_wildcards=True,
    )
    assert actual == {"abc": {"bcd": {"def": 1}}, "ghi-12": 1}

	class WildcardDict(dict):
	def __init__(self, args, enable_wildcards: bool = False, *kwargs) -> None:
	self._enable_wildcards = enable_wildcards
	return super().__init__(args, *kwargs)

	def __getitem__(self, key):
	if not self._enable_wildcards:
	return super().__getitem__(key)
	for k, v in self.items():
	if fnmatch(k, key):
	return v
	raise KeyError(f"No matching keys found for pattern: {key}")

	def __contains__(self, key):
	if self._enable_wildcards:
	try:
	self.__getitem__(key)
	return True
	except KeyError:
	return False
	return super().__contains__(key)


	def match_dict(
	filter_dict: dict,
	candidate_dicts: list[dict],
	include_filter_values: bool = False,
	enable_wildcards: bool = False,
	):
	def _match_recursive(filt, cand):
	matched = {}
	for k, v in filt.items():
	if k not in cand:
	if v is ...:
	continue # allow values with '...' not be present in actual data
	return None # Key missing in candidate, no match
	if v is ...:
	matched[k] = cand[k]
	elif isinstance(v, list):
	if len(v) > 1:
	raise NotImplementedError(f"Bad list in filter of len > 1: {v}")
	if isinstance(cand[k], list):
	found = None
	for c in cand[k]:
	nested = _match_recursive(v[0], c)
	if nested is not None:
	found = nested
	break
	if found is None:
	return None
	matched[k] = [found]
	elif isinstance(v, dict) and isinstance(cand[k], dict):
	nested = _match_recursive(v, cand[k])
	if nested is None:
	return None
	matched[k] = nested
	else:
	if cand[k] != v:
	return None # Value mismatch, no match
	elif include_filter_values:
	matched[k] = v
	return matched

	filter_dict = WildcardDict(filter_dict, enable_wildcards=enable_wildcards)
	for cand in candidate_dicts:
	result = _match_recursive(filter_dict, cand)
	if result is not None:
	return result

	return None # No match found
	import pytest

	from .utils import match_dict

	FILTER_DICT = {
	"a": {
	"b": "B",
	"c": [
	{
	"d": "D",
	"e": ...,
	}
	],
	"f": ...,
	}
	}


	DICTS = [
	{
	"a": {
	"b": "X", # mismatches
	"c": [
	{
	"d": "D",
	"e": "E1",
	}
	],
	"f": "F1",
	},
	},
	{
	"a": {
	"b": "B",
	"c": [
	{
	"d": "X", # mismatches
	"e": "E2",
	}
	],
	"f": "F2",
	},
	},
	{ # <- matches!
	"a": {
	"b": "B",
	"c": [
	{
	"d": "D",
	"e": "E3",
	}
	],
	# "f": "F3", # "f" is missing, it's ok
	},
	},
	{ # missing required "b"
	"a": {
	"c": [
	{
	"e": "E4",
	"d": "D",
	}
	],
	"f": "F4",
	},
	},
	{ # missing "d"
	"a": {
	"b": "B",
	"c": [
	{
	"e": "E5",
	}
	],
	"f": "F5",
	},
	},
	]


	def test_match_dict_match_dict_false_ok():
	actual = match_dict(FILTER_DICT, DICTS)
	assert actual == {
	"a": {
	"c": [
	{
	"e": "E3",
	}
	],
	},
	}


	def test_match_dict_match_dict_true_ok():
	actual = match_dict(FILTER_DICT, DICTS, include_filter_values=True)
	assert actual == {
	"a": {
	"b": "B",
	"c": [
	{
	"e": "E3",
	"d": "D",
	}
	],
	},
	}


	@pytest.mark.parametrize("include_filter_values", [True, False])
	def test_match_dict_match_dict_empty(include_filter_values):
	actual = match_dict({}, DICTS, include_filter_values=include_filter_values)
	assert actual == {}


	def test_match_dict_match_dict_enable_wildcards_true():
	dicts = [
	{"abc": {"bcd": {"def": 1}}, "ghi-1": 1},
	{"abc": {"bcd": {"def": 1}}, "ghi-12": 1}, # first matched returns
	{"abc": {"bcd": {"def": 1}}, "ghi-123": 1},
	]
	filter_dict = {"abc": ..., "ghi-12": 1}
	actual = match_dict(
	filter_dict,
	dicts,
	include_filter_values=True,
	enable_wildcards=True,
	)
	assert actual == {"abc": {"bcd": {"def": 1}}, "ghi-12": 1}