Skip to content

Instantly share code, notes, and snippets.

@nbren12
Last active March 4, 2019 19:22
Show Gist options
  • Save nbren12/e781c5a8fe03ee170628194c4b3c3160 to your computer and use it in GitHub Desktop.
Save nbren12/e781c5a8fe03ee170628194c4b3c3160 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from itertools import product"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"shape = (10, 10, 10, 10)\n",
"dims = ['time' , 'z', 'y', 'x']\n",
"\n",
"np_arr = np.ones(shape)\n",
"arr = xr.DataArray(np_arr, dims=dims)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here are some indices"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Length of `indices` 1000\n",
"Example indices:\n"
]
},
{
"data": {
"text/plain": [
"[(0, slice(None, None, None), 0, 0),\n",
" (0, slice(None, None, None), 0, 1),\n",
" (0, slice(None, None, None), 0, 2),\n",
" (0, slice(None, None, None), 0, 3),\n",
" (0, slice(None, None, None), 0, 4),\n",
" (0, slice(None, None, None), 0, 5),\n",
" (0, slice(None, None, None), 0, 6),\n",
" (0, slice(None, None, None), 0, 7),\n",
" (0, slice(None, None, None), 0, 8),\n",
" (0, slice(None, None, None), 0, 9)]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ranges = [range(n) for k, n in enumerate(shape) if k != 1]\n",
"indices = [(t, slice(None), y, x) for (t, y, x) in product(*ranges)]\n",
"named_indices = [dict(zip(dims, index)) for index in indices]\n",
"\n",
"\n",
"print(\"Length of `indices`\", len(indices))\n",
"print(\"Example indices:\")\n",
"indices[:10]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Example named_indices:\n"
]
},
{
"data": {
"text/plain": [
"[{'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 0},\n",
" {'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 1},\n",
" {'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 2},\n",
" {'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 3},\n",
" {'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 4},\n",
" {'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 5},\n",
" {'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 6},\n",
" {'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 7},\n",
" {'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 8},\n",
" {'time': 0, 'z': slice(None, None, None), 'y': 0, 'x': 9}]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(\"Example named_indices:\")\n",
"named_indices[:10]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Timing info numpy indexing vs xarray isel"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's time the iteration for xarray objects"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def index_xarray(named_indices, arr):\n",
" for named_index in named_indices:\n",
" arr.isel(**named_index)\n",
" \n",
" \n",
"def index_numpy(indices, arr):\n",
" for index in indices:\n",
" arr[index]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"172 µs ± 16.4 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit -n 10\n",
"\n",
"index_numpy(indices, np_arr)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"130 ms ± 11.8 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit -n 10\n",
"index_xarray(named_indices, arr)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The numpy code is about 1000 times faster. Is this also true when repeatedly grabbing data from one index?"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"named_index = named_indices[0]\n",
"index = indices[0]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"153 µs ± 30.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
"source": [
"%timeit -n 1000 arr[index]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"153 µs ± 11.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
"source": [
"%timeit -n 1000 arr.isel(**named_index)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"173 ns ± 1.43 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
"source": [
"%timeit -n 1000 np_arr[index]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In conclusion, both `isel` and bracket indexing of xarray objects are about 100 times slower than the comparable operations for numpy arrays."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Profiling"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" "
]
},
{
"data": {
"text/plain": [
" 4210013 function calls (4200013 primitive calls) in 2.209 seconds\n",
"\n",
" Ordered by: cumulative time\n",
"\n",
" ncalls tottime percall cumtime percall filename:lineno(function)\n",
" 1 0.000 0.000 2.209 2.209 {built-in method builtins.exec}\n",
" 1 0.000 0.000 2.209 2.209 <string>:2(<module>)\n",
" 10 0.019 0.002 2.209 0.221 <ipython-input-5-2916ae1e6703>:1(index_xarray)\n",
" 10000 0.033 0.000 2.189 0.000 dataarray.py:807(isel)\n",
" 10000 0.073 0.000 1.913 0.000 dataset.py:1461(isel)\n",
" 10000 0.096 0.000 0.744 0.000 dataset.py:1379(_validate_indexers)\n",
" 10000 0.044 0.000 0.596 0.000 variable.py:881(isel)\n",
" 10000 0.019 0.000 0.517 0.000 variable.py:604(__getitem__)\n",
" 30000 0.083 0.000 0.389 0.000 variable.py:41(as_variable)\n",
" 10000 0.048 0.000 0.388 0.000 variable.py:434(_broadcast_indexes)\n",
" 40000 0.043 0.000 0.281 0.000 variable.py:239(__init__)\n",
" 20000 0.028 0.000 0.245 0.000 dataset.py:659(_replace_vars_and_dims)\n",
" 10000 0.047 0.000 0.200 0.000 dataset.py:1424(_get_indexers_coordinates)\n",
" 880000 0.129 0.000 0.185 0.000 {built-in method builtins.isinstance}\n",
" 10000 0.005 0.000 0.177 0.000 dataarray.py:280(_to_temp_dataset)\n",
" 10000 0.026 0.000 0.171 0.000 dataarray.py:304(_to_dataset_whole)\n",
" 30000 0.094 0.000 0.166 0.000 dataset.py:92(calculate_dimensions)\n",
" 30000 0.070 0.000 0.153 0.000 dataset.py:636(_construct_direct)\n",
" 40000 0.076 0.000 0.151 0.000 variable.py:137(as_compatible_data)\n",
" 50000 0.036 0.000 0.135 0.000 variable.py:460(<genexpr>)\n",
" 10000 0.014 0.000 0.120 0.000 dataset.py:654(_from_vars_and_coord_names)\n",
" 10000 0.023 0.000 0.107 0.000 variable.py:492(_broadcast_indexes_basic)\n",
" 260000 0.094 0.000 0.094 0.000 common.py:183(__setattr__)\n",
" 10000 0.025 0.000 0.090 0.000 dataset.py:1388(<listcomp>)\n",
" 40000 0.046 0.000 0.088 0.000 variable.py:414(_parse_dimensions)\n",
" 90000 0.021 0.000 0.078 0.000 numeric.py:433(asarray)\n",
" 10000 0.024 0.000 0.075 0.000 merge.py:110(merge_variables)\n",
" 30000 0.019 0.000 0.069 0.000 variable.py:287(data)\n",
" 90000 0.043 0.000 0.069 0.000 utils.py:450(ndim)\n",
" 10000 0.036 0.000 0.067 0.000 indexing.py:338(__init__)\n",
" 10000 0.008 0.000 0.065 0.000 dataarray.py:284(_from_temp_dataset)\n",
" 10000 0.012 0.000 0.061 0.000 variable.py:623(_finalize_indexing_result)\n",
" 50000 0.043 0.000 0.060 0.000 dataset.py:442(dims)\n",
" 80000 0.034 0.000 0.058 0.000 pycompat.py:18(iteritems)\n",
" 90000 0.057 0.000 0.057 0.000 {built-in method numpy.core.multiarray.array}\n",
" 30000 0.032 0.000 0.056 0.000 abc.py:180(__instancecheck__)\n",
" 10000 0.016 0.000 0.052 0.000 dataarray.py:245(_replace)\n",
" 50000 0.015 0.000 0.051 0.000 <frozen importlib._bootstrap>:997(_handle_fromlist)\n",
" 100000 0.051 0.000 0.051 0.000 {built-in method builtins.hasattr}\n",
" 10000 0.013 0.000 0.050 0.000 coordinates.py:317(assert_coordinate_consistent)\n",
" 30000 0.010 0.000 0.045 0.000 variable.py:381(values)\n",
" 50000 0.025 0.000 0.037 0.000 <frozen importlib._bootstrap>:416(parent)\n",
" 30000 0.020 0.000 0.035 0.000 variable.py:194(_as_array_or_item)\n",
" 10000 0.016 0.000 0.034 0.000 dataarray.py:166(__init__)\n",
" 10000 0.026 0.000 0.034 0.000 indexing.py:17(expanded_indexer)\n",
"100000/90000 0.020 0.000 0.033 0.000 {built-in method builtins.iter}\n",
" 10000 0.014 0.000 0.033 0.000 indexing.py:1172(__getitem__)\n",
" 10000 0.016 0.000 0.030 0.000 {built-in method builtins.min}\n",
" 30000 0.011 0.000 0.026 0.000 utils.py:187(is_dict_like)\n",
" 30000 0.010 0.000 0.026 0.000 utils.py:195(either_dict_or_kwargs)\n",
" 60000 0.025 0.000 0.025 0.000 _weakrefset.py:70(__contains__)\n",
" 10000 0.008 0.000 0.024 0.000 {built-in method builtins.all}\n",
" 120000 0.023 0.000 0.023 0.000 variable.py:272(shape)\n",
" 50000 0.017 0.000 0.023 0.000 utils.py:325(__contains__)\n",
" 10000 0.003 0.000 0.022 0.000 utils.py:319(__iter__)\n",
" 10000 0.018 0.000 0.022 0.000 dataset.py:1508(<dictcomp>)\n",
" 70000 0.022 0.000 0.022 0.000 {built-in method builtins.getattr}\n",
" 30000 0.016 0.000 0.020 0.000 dataset.py:100(<genexpr>)\n",
" 160000 0.019 0.000 0.019 0.000 variable.py:408(dims)\n",
" 50000 0.012 0.000 0.017 0.000 variable.py:464(<genexpr>)\n",
" 40000 0.012 0.000 0.017 0.000 variable.py:117(_maybe_wrap_data)\n",
" 10000 0.008 0.000 0.016 0.000 indexing.py:637(as_indexable)\n",
" 50000 0.011 0.000 0.016 0.000 variable.py:467(<genexpr>)\n",
" 10000 0.011 0.000 0.016 0.000 variable.py:900(<listcomp>)\n",
" 10000 0.005 0.000 0.015 0.000 variable.py:428(_item_key_to_tuple)\n",
" 10000 0.009 0.000 0.015 0.000 indexing.py:1154(_indexing_array_and_key)\n",
" 150000 0.015 0.000 0.015 0.000 {built-in method builtins.len}\n",
" 20000 0.009 0.000 0.015 0.000 variable.py:493(<genexpr>)\n",
" 10000 0.005 0.000 0.015 0.000 utils.py:355(__iter__)\n",
" 10000 0.010 0.000 0.014 0.000 indexing.py:323(as_integer_slice)\n",
" 30000 0.009 0.000 0.014 0.000 utils.py:208(is_scalar)\n",
" 20000 0.010 0.000 0.014 0.000 _collections_abc.py:657(get)\n",
" 120000 0.012 0.000 0.012 0.000 {method 'append' of 'list' objects}\n",
" 50000 0.012 0.000 0.012 0.000 {method 'rpartition' of 'str' objects}\n",
" 90000 0.011 0.000 0.011 0.000 {method 'items' of 'collections.OrderedDict' objects}\n",
" 10000 0.008 0.000 0.011 0.000 dataarray.py:540(coords)\n",
" 10000 0.005 0.000 0.010 0.000 coordinates.py:47(__contains__)\n",
" 10000 0.010 0.000 0.010 0.000 merge.py:101(__init__)\n",
" 50000 0.010 0.000 0.010 0.000 utils.py:343(__init__)\n",
" 10000 0.006 0.000 0.009 0.000 merge.py:92(_assert_compat_valid)\n",
" 10000 0.008 0.000 0.008 0.000 {built-in method builtins.sorted}\n",
" 30000 0.007 0.000 0.007 0.000 {built-in method __new__ of type object at 0x1053aa750}\n",
" 50000 0.007 0.000 0.007 0.000 utils.py:313(__init__)\n",
" 10000 0.007 0.000 0.007 0.000 indexing.py:306(__init__)\n",
" 40000 0.006 0.000 0.006 0.000 utils.py:361(__contains__)\n",
" 10000 0.006 0.000 0.006 0.000 coordinates.py:222(_names)\n",
" 10000 0.004 0.000 0.005 0.000 indexing.py:1138(__init__)\n",
" 10000 0.005 0.000 0.005 0.000 {method 'update' of 'collections.OrderedDict' objects}\n",
" 10000 0.005 0.000 0.005 0.000 {method 'pop' of 'collections.OrderedDict' objects}\n",
" 20000 0.005 0.000 0.005 0.000 {method 'intersection' of 'set' objects}\n",
" 30000 0.004 0.000 0.004 0.000 {method 'item' of 'numpy.ndarray' objects}\n",
" 20000 0.004 0.000 0.004 0.000 utils.py:316(__getitem__)\n",
" 30000 0.004 0.000 0.004 0.000 indexing.py:319(as_integer_or_none)\n",
" 20000 0.004 0.000 0.004 0.000 dataset.py:415(_attrs_copy)\n",
" 10000 0.003 0.000 0.004 0.000 indexing.py:1145(_ensure_ndarray)\n",
" 20000 0.003 0.000 0.003 0.000 {method 'items' of 'dict' objects}\n",
" 10000 0.003 0.000 0.003 0.000 {method 'copy' of 'collections.OrderedDict' objects}\n",
" 10000 0.003 0.000 0.003 0.000 {method 'union' of 'set' objects}\n",
" 10000 0.002 0.000 0.002 0.000 coordinates.py:219(__init__)\n",
" 10000 0.002 0.000 0.002 0.000 dataarray.py:356(name)\n",
" 10000 0.002 0.000 0.002 0.000 indexing.py:311(tuple)\n",
" 10000 0.002 0.000 0.002 0.000 dataarray.py:366(variable)\n",
" 10000 0.001 0.000 0.001 0.000 {method 'extend' of 'list' objects}\n",
" 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%prun -s cumulative \n",
"for _ in range(10): index_xarray(named_indices, arr)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" "
]
},
{
"data": {
"text/plain": [
" 13 function calls in 0.002 seconds\n",
"\n",
" Ordered by: cumulative time\n",
"\n",
" ncalls tottime percall cumtime percall filename:lineno(function)\n",
" 1 0.000 0.000 0.002 0.002 {built-in method builtins.exec}\n",
" 1 0.000 0.000 0.002 0.002 <string>:3(<module>)\n",
" 10 0.002 0.000 0.002 0.000 <ipython-input-5-2916ae1e6703>:6(index_numpy)\n",
" 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%prun -s cumulative\n",
"\n",
"for _ in range(10): index_numpy(indices, np_arr)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment