Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

""" xarray IO standards and helpers 

""" 

import logging 

 

import numpy as np 

import xarray as xr 

 

from ..gis import convert 

from ..gis.conventions import is_georeferenced, georeference 

from ..gis.projections import cf_xy_coord_names 

from .chunk import auto_determine_chunks 

from .utils import parse_paths 

 

logger = logging.getLogger(__name__) 

 

 

def open_dataset(paths, chunks='auto', concat_dim=None, crs=None, **kwds): 

""" Open an xarray dataset (somewhat) intelligently 

 

Parameters 

---------- 

paths : str or List[str] 

Either a string glob in the form "path/to/my/files/*.nc" or an explicit 

list of files to open. 

chunks : 'auto', int or dict 

Number of chunks or the chunk size for each dimension. If 'auto', 

reads chunks from first file in ``paths`` and uses 

:py:func:`plants.io.chunking_.best_chunksizes` to guess an appropriate 

chunksize based on the most frequently used chunksize per dimension. 

Otherwise, pass `chunks` onto :py:func:`xr.open_mfdataset`. 

concat_dim : None, str, DataArray or Index, optional 

Dimension to concatenate files along. This argument is passed on to 

:py:func:`xarray.auto_combine` along with the dataset objects. You only 

need to provide this argument if the dimension along which you want to 

concatenate is not a dimension in the original datasets, e.g., if you 

want to stack a collection of 2D arrays along a third dimension. 

By default, xarray attempts to infer this argument by examining 

component files. Set ``concat_dim=None`` explicitly to disable 

concatenation. 

crs : rasterio.crs.CRS, int, or str, optional 

Coordinate reference system to assign, if not already georeferenced. 

If type is not ``CRS``, will be converted using 

:py:func:`plants.gis.convert.to_crs`. 

kwds : optional 

Options passed to :py:func:`xarray.open_mfdataset` 

 

Returns 

------- 

xarray.Dataset 

Multi-file dataset 

 

Raises 

------ 

IOError 

Raise if ``paths`` does not parse into any filenames (e.g., a bad glob) 

""" 

# Parse path from input 

paths_ = parse_paths(paths) 

if not paths: 

raise IOError('Could not find a file to read using input "{paths}"' 

.format(paths=paths)) 

 

# Guess at chunks 

if chunks == 'auto': 

chunks = auto_determine_chunks(paths_[0]) 

 

# The xarray call we're wrapping 

ds = xr.open_mfdataset(paths_, 

chunks=chunks, 

concat_dim=concat_dim, 

**kwds) 

 

# TODO: what to do if no geocoding (?) 

if not is_georeferenced(ds): 

if crs is not None: 

logger.debug('Adding CRS information provided') 

crs_ = convert.to_crs(crs) 

transform_ = convert.to_transform(ds) 

ds = georeference(ds, crs_, transform_, inplace=True) 

else: 

logger.debug('No georeference information found on file.') 

 

# TODO: in GIS, figure out if we can go CF -> proj, not just other way 

# around 

return ds 

 

 

def xarray_map(y, x, count, dtype, crs=None, nodata=-9999, dim_band='band'): 

""" A raster map with ``count`` bands 

Parameters 

---------- 

y : array-like 

Y coordinates 

x : array-like 

X coordinates 

count : int 

Number of bands 

dtype : np.dtype 

Data type 

crs : CRS, optional 

Optionally, give the raster a CRS 

nodata : float or int 

No Data Value to initialize data with 

 

Returns 

------- 

xr.DataArray 

3D (band, y, x,) "map" to write data into 

""" 

if crs is not None: 

crs_ = convert.to_crs(crs) 

dim_yx = cf_xy_coord_names(crs_)[::-1] 

else: 

dim_yx = ('y', 'x', ) 

 

shape = (count, len(y), len(x), ) 

dims = (dim_band, ) + dim_yx 

 

coords = { 

dim_yx[0]: y, 

dim_yx[1]: x, 

dim_band: [f'{dim_band}_{i:0{len(str(count))}d}' 

for i in range(count)] 

} 

 

arr = np.full(shape, nodata, dtype=dtype) 

xarr = xr.DataArray(arr, dims=dims, coords=coords) 

 

if crs is not None: 

xarr = georeference(xarr, crs, inplace=True) 

 

return xarr