""" IO utilities
"""
import datetime as dt
import glob
import os.path
from pathlib import Path
import numpy as np
from ..compat import toolz
[docs]def parse_paths(paths):
""" Return a list of path(s)
Parameters
----------
paths : str or Sequence
Either a string glob in the form "path/to/my/files/*.nc" or an explicit
list of files to open.
Returns
-------
list[Path]
Paths determined from ``paths``
"""
if isinstance(paths, (Path, str)):
paths = os.path.expandvars(str(paths))
# Filename or glob, either way turn into list
if '*' in str(paths):
paths = glob.glob(paths)
else:
paths = [paths]
elif isinstance(paths, (list, tuple)):
paths = toolz.concat([parse_paths(p) for p in paths])
else:
raise TypeError('`paths` must be a str, Path, or list/tuple')
return list([Path(p) for p in paths])
[docs]def parse_filename_attrs(paths, index=None, sep=None):
""" Parse filenames into an array of dates
Parameters
----------
paths : str or Sequence
Glob style search pattern, or a list of filenames
index : slice or int
Either a ``slice`` used to index on the stem of the filenames
in `paths`, or together with `sep` an `int` used to index on
the filename stem split by `sep` (e.g., ``filename.split(sep)[index]``)
sep : str, optional
String field seperator
Returns
-------
list[str]
Attribute extracted from each filename path
"""
if index is None and sep is None:
raise TypeError("Must provide either `index`, or `index` and `sep`")
paths = parse_paths(paths)
if not paths:
raise IOError('No files to open')
attrs = []
for path in paths:
# Extract filename attributes
if sep:
string = path.stem.split(sep)[index]
if isinstance(index, slice):
# Asked for multiple components - rejoin into single str
string = ''.join(string)
else:
string = path.stem[index]
assert isinstance(string, str)
attrs.append(string)
return attrs
[docs]def parse_filename_dates(paths, index=None, sep=None, date_format='%Y%m%d'):
""" Parse filenames into an array of dates
Parameters
----------
paths : str or Sequence
Glob style search pattern, or a list of filenames
index : slice or int
Either a ``slice`` used to index on the stem of the filenames
in `paths`, or together with `sep` an `int` used to index on
the filename stem split by `sep` (e.g., ``filename.split(sep)[index]``)
sep : str, optional
String field seperator
date_format : str
Date format used by :py:func:`datetime.datetime.strptime`
Returns
-------
np.ndarray
Array of datetime64
See Also
--------
"""
attrs = parse_filename_attrs(paths, index=index, sep=sep)
times = [np.datetime64(dt.datetime.strptime(attr, date_format))
for attr in attrs]
return np.array(times)