Last active
March 4, 2019 19:22
-
-
Save nbren12/e781c5a8fe03ee170628194c4b3c3160 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from itertools import product" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"shape = (10, 10, 10, 10)\n", | |
"dims = ['time' , 'z', 'y', 'x']\n", | |
"\n", | |
"np_arr = np.ones(shape)\n", | |
"arr = xr.DataArray(np_arr, dims=dims)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Here are some indices" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Length of `indices` 1000\n", | |
"Example indices:\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"[(0, slice(None, None, None), 0, 0),\n", | |
" (0, slice(None, None, None), 0, 1),\n", | |
" (0, slice(None, None, None), 0, 2),\n", | |
" (0, slice(None, None, None), 0, 3),\n", | |
" (0, slice(None, None, None), 0, 4),\n", | |
" (0, slice(None, None, None), 0, 5),\n", | |
" (0, slice(None, None, None), 0, 6),\n", | |
" (0, slice(None, None, None), 0, 7),\n", | |
" (0, slice(None, None, None), 0, 8),\n", | |
" (0, slice(None, None, None), 0, 9)]" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ranges = [range(n) for k, n in enumerate(shape) if k != 1]\n", | |
"indices = [(t, slice(None), y, x) for (t, y, x) in product(*ranges)]\n", | |
"named_indices = [dict(zip(dims, index)) for index in indices]\n", | |
"\n", | |
"\n", | |
"print(\"Length of `indices`\", len(indices))\n", | |
"print(\"Example indices:\")\n", | |
"indices[:10]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Example named_indices:\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 0},\n", | |
" {'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 1},\n", | |
" {'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 2},\n", | |
" {'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 3},\n", | |
" {'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 4},\n", | |
" {'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 5},\n", | |
" {'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 6},\n", | |
" {'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 7},\n", | |
" {'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 8},\n", | |
" {'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 9}]" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"print(\"Example named_indices:\")\n", | |
"named_indices[:10]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Timing info numpy indexing vs xarray isel" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Let's time the iteration for xarray objects" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def index_xarray(named_indices, arr):\n", | |
" for named_index in named_indices:\n", | |
" arr.isel(**named_index)\n", | |
" \n", | |
" \n", | |
"def index_numpy(indices, arr):\n", | |
" for index in indices:\n", | |
" arr[index]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"172 µs ± 16.4 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit -n 10\n", | |
"\n", | |
"index_numpy(indices, np_arr)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"130 ms ± 11.8 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit -n 10\n", | |
"index_xarray(named_indices, arr)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"The numpy code is about 1000 times faster. Is this also true when repeatedly grabbing data from one index?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"named_index = named_indices[0]\n", | |
"index = indices[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"153 µs ± 30.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit -n 1000 arr[index]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"153 µs ± 11.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit -n 1000 arr.isel(**named_index)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"173 ns ± 1.43 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit -n 1000 np_arr[index]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"In conclusion, both `isel` and bracket indexing of xarray objects are about 100 times slower than the comparable operations for numpy arrays." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Profiling" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" " | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
" 4210013 function calls (4200013 primitive calls) in 2.209 seconds\n", | |
"\n", | |
" Ordered by: cumulative time\n", | |
"\n", | |
" ncalls tottime percall cumtime percall filename:lineno(function)\n", | |
" 1 0.000 0.000 2.209 2.209 {built-in method builtins.exec}\n", | |
" 1 0.000 0.000 2.209 2.209 <string>:2(<module>)\n", | |
" 10 0.019 0.002 2.209 0.221 <ipython-input-5-2916ae1e6703>:1(index_xarray)\n", | |
" 10000 0.033 0.000 2.189 0.000 dataarray.py:807(isel)\n", | |
" 10000 0.073 0.000 1.913 0.000 dataset.py:1461(isel)\n", | |
" 10000 0.096 0.000 0.744 0.000 dataset.py:1379(_validate_indexers)\n", | |
" 10000 0.044 0.000 0.596 0.000 variable.py:881(isel)\n", | |
" 10000 0.019 0.000 0.517 0.000 variable.py:604(__getitem__)\n", | |
" 30000 0.083 0.000 0.389 0.000 variable.py:41(as_variable)\n", | |
" 10000 0.048 0.000 0.388 0.000 variable.py:434(_broadcast_indexes)\n", | |
" 40000 0.043 0.000 0.281 0.000 variable.py:239(__init__)\n", | |
" 20000 0.028 0.000 0.245 0.000 dataset.py:659(_replace_vars_and_dims)\n", | |
" 10000 0.047 0.000 0.200 0.000 dataset.py:1424(_get_indexers_coordinates)\n", | |
" 880000 0.129 0.000 0.185 0.000 {built-in method builtins.isinstance}\n", | |
" 10000 0.005 0.000 0.177 0.000 dataarray.py:280(_to_temp_dataset)\n", | |
" 10000 0.026 0.000 0.171 0.000 dataarray.py:304(_to_dataset_whole)\n", | |
" 30000 0.094 0.000 0.166 0.000 dataset.py:92(calculate_dimensions)\n", | |
" 30000 0.070 0.000 0.153 0.000 dataset.py:636(_construct_direct)\n", | |
" 40000 0.076 0.000 0.151 0.000 variable.py:137(as_compatible_data)\n", | |
" 50000 0.036 0.000 0.135 0.000 variable.py:460(<genexpr>)\n", | |
" 10000 0.014 0.000 0.120 0.000 dataset.py:654(_from_vars_and_coord_names)\n", | |
" 10000 0.023 0.000 0.107 0.000 variable.py:492(_broadcast_indexes_basic)\n", | |
" 260000 0.094 0.000 0.094 0.000 common.py:183(__setattr__)\n", | |
" 10000 0.025 0.000 0.090 0.000 dataset.py:1388(<listcomp>)\n", | |
" 40000 0.046 0.000 0.088 0.000 variable.py:414(_parse_dimensions)\n", | |
" 90000 0.021 0.000 0.078 0.000 numeric.py:433(asarray)\n", | |
" 10000 0.024 0.000 0.075 0.000 merge.py:110(merge_variables)\n", | |
" 30000 0.019 0.000 0.069 0.000 variable.py:287(data)\n", | |
" 90000 0.043 0.000 0.069 0.000 utils.py:450(ndim)\n", | |
" 10000 0.036 0.000 0.067 0.000 indexing.py:338(__init__)\n", | |
" 10000 0.008 0.000 0.065 0.000 dataarray.py:284(_from_temp_dataset)\n", | |
" 10000 0.012 0.000 0.061 0.000 variable.py:623(_finalize_indexing_result)\n", | |
" 50000 0.043 0.000 0.060 0.000 dataset.py:442(dims)\n", | |
" 80000 0.034 0.000 0.058 0.000 pycompat.py:18(iteritems)\n", | |
" 90000 0.057 0.000 0.057 0.000 {built-in method numpy.core.multiarray.array}\n", | |
" 30000 0.032 0.000 0.056 0.000 abc.py:180(__instancecheck__)\n", | |
" 10000 0.016 0.000 0.052 0.000 dataarray.py:245(_replace)\n", | |
" 50000 0.015 0.000 0.051 0.000 <frozen importlib._bootstrap>:997(_handle_fromlist)\n", | |
" 100000 0.051 0.000 0.051 0.000 {built-in method builtins.hasattr}\n", | |
" 10000 0.013 0.000 0.050 0.000 coordinates.py:317(assert_coordinate_consistent)\n", | |
" 30000 0.010 0.000 0.045 0.000 variable.py:381(values)\n", | |
" 50000 0.025 0.000 0.037 0.000 <frozen importlib._bootstrap>:416(parent)\n", | |
" 30000 0.020 0.000 0.035 0.000 variable.py:194(_as_array_or_item)\n", | |
" 10000 0.016 0.000 0.034 0.000 dataarray.py:166(__init__)\n", | |
" 10000 0.026 0.000 0.034 0.000 indexing.py:17(expanded_indexer)\n", | |
"100000/90000 0.020 0.000 0.033 0.000 {built-in method builtins.iter}\n", | |
" 10000 0.014 0.000 0.033 0.000 indexing.py:1172(__getitem__)\n", | |
" 10000 0.016 0.000 0.030 0.000 {built-in method builtins.min}\n", | |
" 30000 0.011 0.000 0.026 0.000 utils.py:187(is_dict_like)\n", | |
" 30000 0.010 0.000 0.026 0.000 utils.py:195(either_dict_or_kwargs)\n", | |
" 60000 0.025 0.000 0.025 0.000 _weakrefset.py:70(__contains__)\n", | |
" 10000 0.008 0.000 0.024 0.000 {built-in method builtins.all}\n", | |
" 120000 0.023 0.000 0.023 0.000 variable.py:272(shape)\n", | |
" 50000 0.017 0.000 0.023 0.000 utils.py:325(__contains__)\n", | |
" 10000 0.003 0.000 0.022 0.000 utils.py:319(__iter__)\n", | |
" 10000 0.018 0.000 0.022 0.000 dataset.py:1508(<dictcomp>)\n", | |
" 70000 0.022 0.000 0.022 0.000 {built-in method builtins.getattr}\n", | |
" 30000 0.016 0.000 0.020 0.000 dataset.py:100(<genexpr>)\n", | |
" 160000 0.019 0.000 0.019 0.000 variable.py:408(dims)\n", | |
" 50000 0.012 0.000 0.017 0.000 variable.py:464(<genexpr>)\n", | |
" 40000 0.012 0.000 0.017 0.000 variable.py:117(_maybe_wrap_data)\n", | |
" 10000 0.008 0.000 0.016 0.000 indexing.py:637(as_indexable)\n", | |
" 50000 0.011 0.000 0.016 0.000 variable.py:467(<genexpr>)\n", | |
" 10000 0.011 0.000 0.016 0.000 variable.py:900(<listcomp>)\n", | |
" 10000 0.005 0.000 0.015 0.000 variable.py:428(_item_key_to_tuple)\n", | |
" 10000 0.009 0.000 0.015 0.000 indexing.py:1154(_indexing_array_and_key)\n", | |
" 150000 0.015 0.000 0.015 0.000 {built-in method builtins.len}\n", | |
" 20000 0.009 0.000 0.015 0.000 variable.py:493(<genexpr>)\n", | |
" 10000 0.005 0.000 0.015 0.000 utils.py:355(__iter__)\n", | |
" 10000 0.010 0.000 0.014 0.000 indexing.py:323(as_integer_slice)\n", | |
" 30000 0.009 0.000 0.014 0.000 utils.py:208(is_scalar)\n", | |
" 20000 0.010 0.000 0.014 0.000 _collections_abc.py:657(get)\n", | |
" 120000 0.012 0.000 0.012 0.000 {method 'append' of 'list' objects}\n", | |
" 50000 0.012 0.000 0.012 0.000 {method 'rpartition' of 'str' objects}\n", | |
" 90000 0.011 0.000 0.011 0.000 {method 'items' of 'collections.OrderedDict' objects}\n", | |
" 10000 0.008 0.000 0.011 0.000 dataarray.py:540(coords)\n", | |
" 10000 0.005 0.000 0.010 0.000 coordinates.py:47(__contains__)\n", | |
" 10000 0.010 0.000 0.010 0.000 merge.py:101(__init__)\n", | |
" 50000 0.010 0.000 0.010 0.000 utils.py:343(__init__)\n", | |
" 10000 0.006 0.000 0.009 0.000 merge.py:92(_assert_compat_valid)\n", | |
" 10000 0.008 0.000 0.008 0.000 {built-in method builtins.sorted}\n", | |
" 30000 0.007 0.000 0.007 0.000 {built-in method __new__ of type object at 0x1053aa750}\n", | |
" 50000 0.007 0.000 0.007 0.000 utils.py:313(__init__)\n", | |
" 10000 0.007 0.000 0.007 0.000 indexing.py:306(__init__)\n", | |
" 40000 0.006 0.000 0.006 0.000 utils.py:361(__contains__)\n", | |
" 10000 0.006 0.000 0.006 0.000 coordinates.py:222(_names)\n", | |
" 10000 0.004 0.000 0.005 0.000 indexing.py:1138(__init__)\n", | |
" 10000 0.005 0.000 0.005 0.000 {method 'update' of 'collections.OrderedDict' objects}\n", | |
" 10000 0.005 0.000 0.005 0.000 {method 'pop' of 'collections.OrderedDict' objects}\n", | |
" 20000 0.005 0.000 0.005 0.000 {method 'intersection' of 'set' objects}\n", | |
" 30000 0.004 0.000 0.004 0.000 {method 'item' of 'numpy.ndarray' objects}\n", | |
" 20000 0.004 0.000 0.004 0.000 utils.py:316(__getitem__)\n", | |
" 30000 0.004 0.000 0.004 0.000 indexing.py:319(as_integer_or_none)\n", | |
" 20000 0.004 0.000 0.004 0.000 dataset.py:415(_attrs_copy)\n", | |
" 10000 0.003 0.000 0.004 0.000 indexing.py:1145(_ensure_ndarray)\n", | |
" 20000 0.003 0.000 0.003 0.000 {method 'items' of 'dict' objects}\n", | |
" 10000 0.003 0.000 0.003 0.000 {method 'copy' of 'collections.OrderedDict' objects}\n", | |
" 10000 0.003 0.000 0.003 0.000 {method 'union' of 'set' objects}\n", | |
" 10000 0.002 0.000 0.002 0.000 coordinates.py:219(__init__)\n", | |
" 10000 0.002 0.000 0.002 0.000 dataarray.py:356(name)\n", | |
" 10000 0.002 0.000 0.002 0.000 indexing.py:311(tuple)\n", | |
" 10000 0.002 0.000 0.002 0.000 dataarray.py:366(variable)\n", | |
" 10000 0.001 0.000 0.001 0.000 {method 'extend' of 'list' objects}\n", | |
" 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"%%prun -s cumulative \n", | |
"for _ in range(10): index_xarray(named_indices, arr)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" " | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
" 13 function calls in 0.002 seconds\n", | |
"\n", | |
" Ordered by: cumulative time\n", | |
"\n", | |
" ncalls tottime percall cumtime percall filename:lineno(function)\n", | |
" 1 0.000 0.000 0.002 0.002 {built-in method builtins.exec}\n", | |
" 1 0.000 0.000 0.002 0.002 <string>:3(<module>)\n", | |
" 10 0.002 0.000 0.002 0.000 <ipython-input-5-2916ae1e6703>:6(index_numpy)\n", | |
" 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"%%prun -s cumulative\n", | |
"\n", | |
"for _ in range(10): index_numpy(indices, np_arr)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment