Source code for stems.io.tests.test_io_encoding

""" Tests for :py:mod:`stems.io.encoding`
"""
import dask.array as da
import numpy as np
import pytest
import xarray as xr

from stems.io import encoding


# ----------------------------------------------------------------------------
# default_encoding
[docs]def test_default_encoding_dataarray(tmpdir, dataset_11w7h100t4v):
    data_vars = list(dataset_11w7h100t4v.data_vars.keys())
    for dv in data_vars:
        xarr = dataset_11w7h100t4v[dv]
        enc = encoding.netcdf_encoding(xarr)
        # ensure variable in encoding
        assert dv in enc
        # ensure dtype is correct
        assert enc[dv]['dtype'] == xarr.dtype
        # ensure chunking is correct
        assert enc[dv]['chunksizes'] == (3, 5, 25)
        # no nodata so no _FillValue
        assert '_FillValue' not in enc[dv]
        # compression by default
        assert 'complevel' in enc[dv]
        assert enc[dv]['zlib'] is True

    # Test that integrates with xarray / actually works
    dataset_11w7h100t4v.to_netcdf(str(tmpdir.join('tmp.nc')), encoding=enc)


def test_default_encoding_dataset(tmpdir, dataset_11w7h100t4v):
    enc = encoding.netcdf_encoding(dataset_11w7h100t4v)

    # Check
    for dv in dataset_11w7h100t4v.data_vars:
        enc_ = enc[dv]
        # ensure dtype is correct
        assert enc_['dtype'] == dataset_11w7h100t4v[dv].dtype
        # ensure chunking is correct
        assert enc[dv]['chunksizes'] == (3, 5, 25)
        # no nodata so no _FillValue
        assert '_FillValue' not in enc[dv]
        # compression by default
        assert 'complevel' in enc[dv]
        assert enc[dv]['zlib'] is True

    # Test that integrates with xarray / actually works
    dataset_11w7h100t4v.to_netcdf(str(tmpdir.join('tmp.nc')), encoding=enc)


[docs]def test_default_encoding_dataset(tmpdir, dataset_11w7h100t4v):
    data_vars = list(dataset_11w7h100t4v.data_vars.keys())

    chunks = {'y': 5, 'x': 2, 'time': 20}
    nodata = dict(zip(data_vars, range(len(data_vars))))

    enc = encoding.netcdf_encoding(
        dataset_11w7h100t4v,
        chunks=chunks,
        zlib=False,
        nodata=nodata
    )

    # Check
    for dv in data_vars:
        enc_ = enc[dv]
        # ensure dtype is correct
        assert enc_['dtype'] == dataset_11w7h100t4v[dv].dtype
        # ensure chunking is correct
        assert enc[dv]['chunksizes'] == (5, 2, 20)
        # _FillValue different in each variable
        assert enc[dv]['_FillValue'] == nodata[dv]
        # compression by default
        assert enc[dv]['zlib'] is False

    # Test that integrates with xarray / actually works
    dataset_11w7h100t4v.to_netcdf(str(tmpdir.join('tmp.nc')), encoding=enc)


# -----------------------------------------------------------------------------
# encoding_name
[docs]def test_encoding_name():
    xarr = xr.DataArray(np.ones(5))
    assert encoding.encoding_name(xarr) == xr.backends.api.DATAARRAY_VARIABLE
    xarr.name = 'test'
    assert encoding.encoding_name(xarr) == 'test'


# -----------------------------------------------------------------------------
# encoding_dtype
[docs]@pytest.mark.parametrize('dtype', (np.int16, np.float32, np.byte,
                                   np.datetime64))
def test_encoding_dtype(dtype):
    xarr = xr.DataArray(np.ones(5, ).astype(dtype))
    ans = encoding.encoding_dtype(xarr)
    assert ans == {'dtype': xarr.dtype}


# -----------------------------------------------------------------------------
# encoding_chunksizes
[docs]def test_encoding_chunksizes_None():
    chunks = (10, 10, )
    xarr = xr.DataArray(da.ones((100, 100), chunks=chunks))
    ans = encoding.encoding_chunksizes(xarr, chunks=None)
    assert ans == chunks


[docs]def test_encoding_chunksizes_dict():
    chunks = (10, 10, )
    xarr = xr.DataArray(da.ones((100, 100), chunks=chunks))
    chunks_ = {dim: 50 for dim in xarr.dims}
    ans = encoding.encoding_chunksizes(xarr, chunks=chunks_)
    assert ans == (50, ) * xarr.ndim


# -----------------------------------------------------------------------------
# guard_chunksizes
[docs]def test_guard_chunksizes():
    chunks = (10, 10, )
    xarr = xr.DataArray(da.ones((100, 100), chunks=chunks))
    # Test too big
    ans = encoding.guard_chunksizes(xarr, (200, 200, ))
    assert ans == xarr.shape
    # Test passing sizes (no change)
    ans = encoding.guard_chunksizes(xarr, chunks)
    assert ans == chunks


# -----------------------------------------------------------------------------
# guard_chunksizes_str
[docs]def test_guard_chunksizes_str():
    # 1D (but really 2D in NetCDF world) array of character
    xarr = xr.DataArray(np.repeat(['asdf'], 5))
    # Should get 2D of chunks out for 1D in
    # (since stored as multiple 1D arrays)
    chunks = (1, )
    test = encoding.guard_chunksizes_str(xarr, chunks)
    assert len(test) == 2
    assert test[0] == 1 and test[1] == xarr.dtype.itemsize

    # If object type, just punt
    chunks = (1, )
    test = encoding.guard_chunksizes_str(xarr.astype(object), chunks)
    assert len(test) == 0


# -----------------------------------------------------------------------------
# guard_dtype
[docs]def test_guard_dtype():
    # some float
    dtype = np.float32
    xarr = xr.DataArray(np.ones(5, ).astype(dtype))
    ans = encoding.guard_dtype(xarr, {'dtype': xarr.dtype})
    assert ans == {'dtype': dtype}

    # datetime should be ignored
    dtype = np.datetime64
    xarr = xr.DataArray(np.ones(5, ).astype(dtype))
    ans = encoding.guard_dtype(xarr, {'dtype': xarr.dtype})
    assert ans == {}