""" Tests for :py:mod:`stems.io.chunk`
"""
from collections import OrderedDict
import string
import dask.array as da
import numpy as np
import pytest
import xarray as xr
from stems.io import chunk
from stems.tests import build_data
TEST_PARAMS_CHUNKS = [
{'y': 5, 'x': 3, 'time': 25},
]
# ----------------------------------------------------------------------------
# read_chunks
@pytest.mark.parametrize('chunks', TEST_PARAMS_CHUNKS)
def test_read_chunks_netcdf4(tmpdir, chunks):
data_vars = list('bgrn')
dst = str(tmpdir.join('test.nc'))
dst_ = build_data.create_test_netcdf4(dst=dst,
data_vars=data_vars,
chunk_x=chunks['x'],
chunk_y=chunks['y'],
chunk_time=chunks['time'])
test = chunk.read_chunks(dst_)
for dv in data_vars:
assert test[dv] == chunks
# ----------------------------------------------------------------------------
# read_chunks_netcdf4
[docs]@pytest.mark.parametrize('chunks', TEST_PARAMS_CHUNKS)
def test_read_chunks_netcdf4(tmpdir, chunks):
data_vars = list('bgrn')
dst = str(tmpdir.join('test.nc'))
dst_ = build_data.create_test_netcdf4(dst=dst,
data_vars=data_vars,
chunk_x=chunks['x'],
chunk_y=chunks['y'],
chunk_time=chunks['time'])
test = chunk.read_chunks_netcdf4(dst_)
for dv in data_vars:
assert test[dv] == chunks
# ----------------------------------------------------------------------------
# read_chunks_rasterio
[docs]@pytest.mark.parametrize('chunks', TEST_PARAMS_CHUNKS)
def test_read_chunks_rasterio(tmpdir, chunks):
dst = str(tmpdir.join('test.tif'))
dst_, meta, info = build_data.create_test_raster(
dst,
height=99, width=49,
blockysize=chunks['y'],
blockxsize=49
)
# Try letting function open the file
test = chunk.read_chunks_rasterio(dst_)
assert test == {'y': chunks['y'], 'x': 49}
# Try using file handle
import rasterio
with rasterio.open(dst_) as riods:
test = chunk.read_chunks_rasterio(riods)
assert test == {'y': chunks['y'], 'x': 49}
# ----------------------------------------------------------------------------
# best_chunksizes
[docs]def test_best_chunksizes_1():
# All the same
d = {
'blu': {'x': 5, 'y': 5, 'time': 1},
'grn': {'x': 5, 'y': 5, 'time': 1},
'red': {'x': 5, 'y': 5, 'time': 1},
}
best = chunk.best_chunksizes(d)
assert best == d['blu']
[docs]def test_best_chunksizes_2():
# One obvious answer
d = {
'blu': {'x': 10, 'y': 5, 'time': 5},
'grn': {'x': 10, 'y': 10, 'time': 1},
'red': {'x': 5, 'y': 10, 'time': 5},
}
best = chunk.best_chunksizes(d)
assert best == {'x': 10, 'y': 10, 'time': 5}
[docs]def test_best_chunksizes_3():
# Break tie using max
d = {
'blu': {'x': 10, 'y': 5, 'time': 5},
'grn': {'x': 10, 'y': 10, 'time': 10},
'red': {'x': 5, 'y': 10, 'time': 5},
'nir': {'x': 5, 'y': 5, 'time': 10}
}
best = chunk.best_chunksizes(d)
assert best == {'x': 10, 'y': 10, 'time': 10}
# ----------------------------------------------------------------------------
# get_chunksizes
[docs]@pytest.mark.parametrize('chunks', (
(1, 5, 10, ),
(5, 5, ),
))
def test_get_chunksizes_dataarray_1(chunks):
ndim = len(chunks)
xarr = xr.DataArray(da.ones((15, ) * ndim, chunks=chunks))
ans = {dim: n for dim, n in zip(xarr.dims, chunks)}
test = chunk.get_chunksizes(xarr)
assert test == ans
[docs]@pytest.mark.parametrize('chunks', TEST_PARAMS_CHUNKS)
def test_get_chunksizes_dataarray_2(chunks):
data_vars = ['blue', 'green', 'red', 'nir']
ds = build_data.create_test_dataset(data_vars=data_vars,
chunk_x=chunks['x'],
chunk_y=chunks['y'],
chunk_time=chunks['time'])
for dv in data_vars:
test = chunk.get_chunksizes(ds[dv])
assert test == chunks
[docs]def test_get_chunksizes_dataarray_3():
xarr = xr.DataArray(np.ones((15, ) * 3))
test = chunk.get_chunksizes(xarr)
assert test == {}
[docs]def test_get_chunksizes_dataset_1():
xarr = xr.DataArray(np.ones((15, ) * 3))
ds = xr.Dataset({'a': xarr})
test = chunk.get_chunksizes(ds)
assert test == {}
[docs]@pytest.mark.parametrize('test', ({'ten': 10}, (10, ), 10, '10', ))
def test_get_chunksizes_TypeError(test):
err = r'Input.*must be an xarray Dataset or DataArray.*'
with pytest.raises(TypeError, match=err):
chunk.get_chunksizes(test)
# ----------------------------------------------------------------------------
# chunks_to_chunksizes
[docs]@pytest.mark.parametrize('chunksizes', (
(10, 5, 1, ),
(5, 1, )
))
def test_chunks_to_chunksizes_dict(chunksizes):
keys = string.ascii_letters[:len(chunksizes)]
d = OrderedDict(((k, (size, ) * 3) for k, size in zip(keys, chunksizes)))
test = chunk.chunks_to_chunksizes(d)
assert test == chunksizes
[docs]def test_chunks_to_chunksizes_dataset():
ds = build_data.create_test_dataset(chunk_x=2, chunk_y=4, chunk_time=30)
test = chunk.chunks_to_chunksizes(ds, dims=('y', 'time', 'x', ))
assert test == (4, 30, 2)
[docs]@pytest.mark.parametrize('chunksizes', (
(10, 5, 1, ),
(5, 1, )
))
def test_chunks_to_chunksizes_dataarray(chunksizes):
ndim = len(chunksizes)
xarr = xr.DataArray(da.ones((100, ) * ndim, chunks=chunksizes))
test = chunk.chunks_to_chunksizes(xarr)
assert test == chunksizes
[docs]def test_chunks_to_chunksizes_none():
# DataArray
xarr = xr.DataArray(np.ones(10))
test = chunk.chunks_to_chunksizes(xarr)
assert test == ()
# Dataset
ds = xr.Dataset({'x': xarr})
test = chunk.chunks_to_chunksizes(ds)
assert test == ()
[docs]@pytest.mark.parametrize('test', ((10, ), 10, '10', ))
def test_chunks_to_chunksizes_TypeError(test):
with pytest.raises(TypeError, match=r'Unknown type.*'):
chunk.chunks_to_chunksizes(test)