Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

""" Convert "pre-ARD" to ARD 

""" 

from collections import defaultdict 

import datetime as dt 

import json 

import logging 

import os 

from pathlib import Path 

 

import numpy as np 

import pandas as pd 

import xarray as xr 

 

from stems.gis import convert, georeference, grids 

from stems.io.encoding import netcdf_encoding 

 

from . import defaults, __version__ 

 

logger = logging.getLogger(__name__) 

 

 

def process_preard(metadata, images, chunks=None): 

""" Open and process pre-ARD data to ARD 

 

Parameters 

---------- 

metadata : dict 

Image metadata 

images : Sequence[str or Path] 

Path(s) to pre-ARD imagery 

chunks : dict, optional 

Chunks to use when opening pre-ARD GeoTIFF files. If ``None``, 

defaults to ``{'x': 256, 'y': 256, 'band': -1}`` 

 

Returns 

------- 

xr.Dataset 

pre-ARD processed to (in memory) ARD format that can be written to disk 

""" 

 

image_metadata = metadata['image'] 

# Read metadata and determine key attributes 

times = pd.to_datetime([_ard_image_timestamp(images) 

for images in image_metadata['images']]).values 

bands = image_metadata['bands'] 

 

# Create pre-ARD DataArray 

preard_da = read_preard(images, chunks=chunks) 

 

# Remove any attributes 

preard_da.attrs = {} 

 

# Convert to Dataset 

ard_ds = preard_to_ard(preard_da, times, bands) 

 

# Attach attribute metadata 

version = metadata['program']['version'] 

order_metadata = metadata['order'] 

collection = order_metadata['collection'] 

submitted = order_metadata.get('submitted', 'UNKNOWN TIME') 

date_start = order_metadata['date_start'] 

date_end = order_metadata['date_end'] 

dt_now = dt.datetime.today().strftime("%Y%m%dT%H%M%S") 

 

attrs = { 

'title': f'Collection "{collection}" Analysis Ready Data', 

'history': '\n'.join([ 

(f'{submitted} - Ordered pre-ARD from GEE for collection ' 

f'"{collection}" between {date_start}-{date_end} using ' 

f'`cedar={version}`'), 

f'{dt_now} - Converted to ARD using `cedar={__version__}`' 

]), 

'source': f'Google Earth Engine Collection "{collection}"', 

'images': json.dumps(image_metadata['images']) 

} 

ard_ds.attrs = attrs 

 

# Georeference 

tile_ = grids.Tile.from_dict(metadata['tile']) 

ard_ds = georeference(ard_ds, tile_.crs, tile_.transform) 

 

return ard_ds 

 

 

def find_preard(path, metadata_pattern='*.json'): 

""" Match pre-ARD metadata with imagery in some location 

 

Parameters 

---------- 

path : str or Path 

Path to a metadata file or directory of files (returning matches 

inside the directory) 

metadata_pattern : str, optional 

If ``path`` is a directory, this value is used as a glob inside 

``path`` to locate metadata files 

 

Returns 

------- 

dict[str, list[str]] 

Pairs of metadata filename to image filename(s) 

""" 

path = Path(path) 

if path.is_dir(): 

metadata = list(path.glob(metadata_pattern)) 

else: 

metadata = [path] 

 

preard = {} 

for meta in metadata: 

images = sorted(meta.parent.glob(meta.stem + '*.tif')) 

if images: 

preard[meta] = images 

else: 

logger.debug(f'Could not find images for metadata file {meta}') 

preard[meta] = [] 

 

return preard 

 

 

def preard_to_ard(xarr, time, bands): 

""" Convert a "pre-ARD" DataArray to an ARD xr.Dataset 

 

Parameters 

---------- 

xarr : xarray.DataArray 

DataArray containing observations from all bands and time 

time : np.ndarray 

Time information for each observation 

bands : Sequence[str] 

Band names 

 

Returns 

------- 

xr.Dataset 

Dataset containing all observations split into subdatasets 

according to band 

 

Raises 

------ 

ValueError 

Raised if the number of bands and times specified do not match 

the number of "bands" in the input DataArray 

""" 

n_band = len(bands) 

n_time = len(time) 

n_band_time = n_band * n_time 

 

if n_band * n_time != xarr.band.size: 

raise ValueError('Number of bands x time specified does not match ' 

'input data ({xarr.band.size})') 

 

ds_bands = {} 

for i_band, band_name in enumerate(bands): 

# Select out this band from list of band x time 

da_band = xarr[np.arange(i_band, n_band_time, n_band), ...] 

# Replace "band" for "time" in two steps 

da_band.coords['time'] = ('band', time) 

ds_bands[band_name] = da_band.swap_dims({'band': 'time'}).drop('band') 

 

ard_ds = xr.Dataset(ds_bands) 

return ard_ds 

 

 

def ard_netcdf_encoding(ard_ds, metadata, **encoding_kwds): 

""" Return encoding for ARD NetCDF4 files 

 

Parameters 

---------- 

ard_ds : xr.Dataset 

ARD as a XArray Dataset 

metadata : dict 

Metadata about ARD 

 

Returns 

------- 

dict 

NetCDF encoding to use with :py:meth:`xarray.Dataset.to_netcdf` 

""" 

assert 'nodata' not in encoding_kwds 

nodata = metadata['image'].get('nodata', None) 

encoding = netcdf_encoding(ard_ds, nodata=nodata, **encoding_kwds) 

return encoding 

 

 

def read_metadata(filename): 

""" Read pre-ARD image metadata from a file 

""" 

with open(filename, 'r') as f: 

meta = json.load(f) 

return meta 

 

 

def read_preard(filenames, chunks=None): 

""" Read pre-ARD file(s) into a single DataArray 

 

Parameters 

---------- 

filenames : Sequence[str or Path] 

Pre-ARD file name(s) 

chunks : dict, optional 

Chunks to use when opening pre-ARD GeoTIFF files. If ``None``, 

defaults to ``{'x': 256, 'y': 256, 'band': -1}`` 

 

Returns 

------- 

xr.DataArray 

Pre-ARD joined together as a single DataArray 

""" 

if isinstance(filenames, (str, Path)): 

filenames = (filenames, ) 

filenames = [Path(f) for f in filenames] 

 

if chunks is None: 

chunks = defaults.PREARD_CHUNKS.copy() 

 

common = os.path.commonprefix([f.stem for f in filenames]) 

 

by_row = defaultdict(list) 

for fname in filenames: 

shard = fname.stem[len(common):] 

if not shard: # OK if just 1 file 

assert len(filenames) == 1 

ymin, xmin = 0, 0 

else: 

ymin, xmin = map(int, shard.split('-')) 

da = xr.open_rasterio(fname, chunks=chunks) 

by_row[ymin].append(da) 

 

# Concat across columns 

rows = {row: xr.concat(by_row[row], dim='x') for row in by_row} 

 

# Concat across rows 

preard = xr.concat(rows.values(), dim='y') 

 

return preard 

 

 

def _ard_image_timestamp(images): 

return _unix2dt(min(i['system:time_start'] for i in images)) 

 

 

def _unix2dt(timestamp): 

return dt.datetime.fromtimestamp(timestamp / 1e3)