Skip to content

Instantly share code, notes, and snippets.

@atemate
Created April 26, 2024 10:50
Show Gist options
  • Save atemate/d25f0a1d096c2d606a3411e7e27fb6a8 to your computer and use it in GitHub Desktop.
Save atemate/d25f0a1d096c2d606a3411e7e27fb6a8 to your computer and use it in GitHub Desktop.
Function to iterate over a dict of lists and yield key:values
from typing import Any, Dict, List
def iter_dict_in_chunks(input_dict: Dict[Any, List], chunk_size: int, total_size: int):
"""Iterates a dict of lists and yields key:values pairs where len of values
is at most `chunk_size`, until `total_size` is exhausted.
Examples:
# Realistic cases:
>>> d = {'a': [1, 2], 'b': [3, 4, 5, 6, 7, 8, 9]}
>>> list(iter_dict_in_chunks(d, 3, 5))
[('a', [1, 2]), ('b', [3, 4, 5])]
>>> list(iter_dict_in_chunks(d, 1, 6))
[('a', [1]), ('a', [2]), ('b', [3]), ('b', [4]), ('b', [5]), ('b', [6]))]
# Corner cases:
>>> list(iter_dict_in_chunks({'a': [1, 2, 3], 'b': [4, 5]}, 3, 0)) # empty total size
[]
>>> list(iter_dict_in_chunks({'a': [1, 2, 3], 'b': [4, 5]}, 0, 10)) # empty chunk size
[]
>>> list(iter_dict_in_chunks({}, 2, 3)) # empty dict
[]
>>> list(iter_dict_in_chunks({'a': [], 'b': []}, 3, 5)) # empty lists
[]
"""
total_count = 0
for key, values in input_dict.items():
i = -1
last_index = min(len(values), total_size - total_count)
while True:
i += 1
a, b = i * chunk_size, min((i + 1) * chunk_size, last_index)
chunk = values[a:b]
if not chunk:
break
total_count += len(chunk)
yield key, chunk
import pytest
from my_package import iter_dict_in_chunks
DICT_REALISTIC = {"a": [1, 2], "b": [3, 4, 5, 6, 7, 8, 9]}
DICT_COMPLEX = {"a": [1, 2, 3], "b": [4, 5], "c": [6], "d": [7, 8, 9, 10, 11]}
@pytest.mark.parametrize(
"input_dict, chunk_size, total_size, expected",
[
(DICT_REALISTIC, 1, 3, [("a", [1]), ("a", [2]), ("b", [3])]),
(DICT_REALISTIC, 2, 3, [("a", [1, 2]), ("b", [3])]),
(DICT_REALISTIC, 3, 3, [("a", [1, 2]), ("b", [3])]),
(DICT_REALISTIC, 3, 4, [("a", [1, 2]), ("b", [3, 4])]),
(DICT_REALISTIC, 3, 5, [("a", [1, 2]), ("b", [3, 4, 5])]),
(DICT_REALISTIC, 3, 6, [("a", [1, 2]), ("b", [3, 4, 5]), ("b", [6])]),
(DICT_REALISTIC, 3, 100, [("a", [1, 2]), ("b", [3, 4, 5]), ("b", [6, 7, 8]), ("b", [9])]),
(DICT_COMPLEX, 3, 1, [("a", [1])]),
(DICT_COMPLEX, 3, 2, [("a", [1, 2])]),
(DICT_COMPLEX, 3, 3, [("a", [1, 2, 3])]),
(DICT_COMPLEX, 3, 4, [("a", [1, 2, 3]), ("b", [4])]),
(DICT_COMPLEX, 3, 5, [("a", [1, 2, 3]), ("b", [4, 5])]),
(DICT_COMPLEX, 3, 6, [("a", [1, 2, 3]), ("b", [4, 5]), ("c", [6])]),
(DICT_COMPLEX, 3, 7, [("a", [1, 2, 3]), ("b", [4, 5]), ("c", [6]), ("d", [7])]),
(DICT_COMPLEX, 3, 8, [("a", [1, 2, 3]), ("b", [4, 5]), ("c", [6]), ("d", [7, 8])]),
(DICT_COMPLEX, 3, 9, [("a", [1, 2, 3]), ("b", [4, 5]), ("c", [6]), ("d", [7, 8, 9])]),
(DICT_COMPLEX, 3, 10, [("a", [1, 2, 3]), ("b", [4, 5]), ("c", [6]), ("d", [7, 8, 9]), ("d", [10])]),
(DICT_COMPLEX, 3, 11, [("a", [1, 2, 3]), ("b", [4, 5]), ("c", [6]), ("d", [7, 8, 9]), ("d", [10, 11])]),
(DICT_COMPLEX, 3, 12, [("a", [1, 2, 3]), ("b", [4, 5]), ("c", [6]), ("d", [7, 8, 9]), ("d", [10, 11])]),
({"a": [1, 2, 3], "b": [4, 5]}, 3, 0, []), # empty total size
({"a": [1, 2, 3], "b": [4, 5]}, 0, 10, []), # empty chunk size
({}, 2, 3, []), # empty dict
({"a": [], "b": []}, 3, 5, []), # empty lists
],
)
def test_iter_dict_in_chunks(input_dict: dict, chunk_size: int, total_size: int, expected: list):
actual = list(iter_dict_in_chunks(input_dict, chunk_size, total_size))
assert actual == expected
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment