Created
April 26, 2024 10:50
-
-
Save atemate/d25f0a1d096c2d606a3411e7e27fb6a8 to your computer and use it in GitHub Desktop.
Function to iterate over a dict of lists and yield key:values
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Any, Dict, List | |
def iter_dict_in_chunks(input_dict: Dict[Any, List], chunk_size: int, total_size: int): | |
"""Iterates a dict of lists and yields key:values pairs where len of values | |
is at most `chunk_size`, until `total_size` is exhausted. | |
Examples: | |
# Realistic cases: | |
>>> d = {'a': [1, 2], 'b': [3, 4, 5, 6, 7, 8, 9]} | |
>>> list(iter_dict_in_chunks(d, 3, 5)) | |
[('a', [1, 2]), ('b', [3, 4, 5])] | |
>>> list(iter_dict_in_chunks(d, 1, 6)) | |
[('a', [1]), ('a', [2]), ('b', [3]), ('b', [4]), ('b', [5]), ('b', [6]))] | |
# Corner cases: | |
>>> list(iter_dict_in_chunks({'a': [1, 2, 3], 'b': [4, 5]}, 3, 0)) # empty total size | |
[] | |
>>> list(iter_dict_in_chunks({'a': [1, 2, 3], 'b': [4, 5]}, 0, 10)) # empty chunk size | |
[] | |
>>> list(iter_dict_in_chunks({}, 2, 3)) # empty dict | |
[] | |
>>> list(iter_dict_in_chunks({'a': [], 'b': []}, 3, 5)) # empty lists | |
[] | |
""" | |
total_count = 0 | |
for key, values in input_dict.items(): | |
i = -1 | |
last_index = min(len(values), total_size - total_count) | |
while True: | |
i += 1 | |
a, b = i * chunk_size, min((i + 1) * chunk_size, last_index) | |
chunk = values[a:b] | |
if not chunk: | |
break | |
total_count += len(chunk) | |
yield key, chunk |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pytest | |
from my_package import iter_dict_in_chunks | |
DICT_REALISTIC = {"a": [1, 2], "b": [3, 4, 5, 6, 7, 8, 9]} | |
DICT_COMPLEX = {"a": [1, 2, 3], "b": [4, 5], "c": [6], "d": [7, 8, 9, 10, 11]} | |
@pytest.mark.parametrize( | |
"input_dict, chunk_size, total_size, expected", | |
[ | |
(DICT_REALISTIC, 1, 3, [("a", [1]), ("a", [2]), ("b", [3])]), | |
(DICT_REALISTIC, 2, 3, [("a", [1, 2]), ("b", [3])]), | |
(DICT_REALISTIC, 3, 3, [("a", [1, 2]), ("b", [3])]), | |
(DICT_REALISTIC, 3, 4, [("a", [1, 2]), ("b", [3, 4])]), | |
(DICT_REALISTIC, 3, 5, [("a", [1, 2]), ("b", [3, 4, 5])]), | |
(DICT_REALISTIC, 3, 6, [("a", [1, 2]), ("b", [3, 4, 5]), ("b", [6])]), | |
(DICT_REALISTIC, 3, 100, [("a", [1, 2]), ("b", [3, 4, 5]), ("b", [6, 7, 8]), ("b", [9])]), | |
(DICT_COMPLEX, 3, 1, [("a", [1])]), | |
(DICT_COMPLEX, 3, 2, [("a", [1, 2])]), | |
(DICT_COMPLEX, 3, 3, [("a", [1, 2, 3])]), | |
(DICT_COMPLEX, 3, 4, [("a", [1, 2, 3]), ("b", [4])]), | |
(DICT_COMPLEX, 3, 5, [("a", [1, 2, 3]), ("b", [4, 5])]), | |
(DICT_COMPLEX, 3, 6, [("a", [1, 2, 3]), ("b", [4, 5]), ("c", [6])]), | |
(DICT_COMPLEX, 3, 7, [("a", [1, 2, 3]), ("b", [4, 5]), ("c", [6]), ("d", [7])]), | |
(DICT_COMPLEX, 3, 8, [("a", [1, 2, 3]), ("b", [4, 5]), ("c", [6]), ("d", [7, 8])]), | |
(DICT_COMPLEX, 3, 9, [("a", [1, 2, 3]), ("b", [4, 5]), ("c", [6]), ("d", [7, 8, 9])]), | |
(DICT_COMPLEX, 3, 10, [("a", [1, 2, 3]), ("b", [4, 5]), ("c", [6]), ("d", [7, 8, 9]), ("d", [10])]), | |
(DICT_COMPLEX, 3, 11, [("a", [1, 2, 3]), ("b", [4, 5]), ("c", [6]), ("d", [7, 8, 9]), ("d", [10, 11])]), | |
(DICT_COMPLEX, 3, 12, [("a", [1, 2, 3]), ("b", [4, 5]), ("c", [6]), ("d", [7, 8, 9]), ("d", [10, 11])]), | |
({"a": [1, 2, 3], "b": [4, 5]}, 3, 0, []), # empty total size | |
({"a": [1, 2, 3], "b": [4, 5]}, 0, 10, []), # empty chunk size | |
({}, 2, 3, []), # empty dict | |
({"a": [], "b": []}, 3, 5, []), # empty lists | |
], | |
) | |
def test_iter_dict_in_chunks(input_dict: dict, chunk_size: int, total_size: int, expected: list): | |
actual = list(iter_dict_in_chunks(input_dict, chunk_size, total_size)) | |
assert actual == expected |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment