""" Tools for generating simulated datasets
The intent of this module is to provide tools for generating simulated data
that is usefulf for testing, debugging, and learning. This module is inspired
by the :py:func:`sklearn.datasets.make_classification` and
:py:func:`sklearn.datasets.make_regression`, among others.
"""
import numpy as np
import pandas as pd
# Defaults
_N_SAMPLES = 1000
_N_SERIES = 3
_N_SEGMENT = 2
_DATE_START = '2000-01-01'
_DATE_END = '2010-01-01'
_FREQ = '16D'
# =============================================================================
# Segments
[docs]def make_segments(date_start=None, date_end=None, date_freq=None,
n_series=3, n_segments=2, seg_sep=None,
means=None, stds=None, trends=None,
amplitudes=None, phases=None):
""" Simulate data from multiple temporal segments
Parameters
----------
date_start : str, datetime, and more, optional
Starting date (in a format known to Pandas)
date_end : str, datetime, and more, optional
Ending date (in a format known to Pandas)
date_freq : str, optional
Date frequency
n_series : int
Number of series/spectral bands to simulate
n_segments : int
Number of segments to simulate
seg_sep : Sequence[float]
Separability of segments (i.e., the size of the disturbance)
means : Sequence[float]
The mean value for each series/spectral band (passed to
:py:func:`make_time_series_mean`)
stds : Sequence[float]
The standard deviation for each series/spectral band (passed to
:py:func:`make_time_series_mean`)
trends : Sequence[float]
The time trend for each series/spectral band (passed to
:py:func:`make_time_series_trend`)
amplitudes : Sequence[float]
The harmonic amplitude value for each series/spectral band (passed to
:py:func:`make_time_series_harmonic`)
phases : Sequence[float]
The harmonic phase value for each series/spectral band (passed to
:py:func:`make_time_series_harmonic`)
Returns
-------
xr.DataArray
Simulated data for ``n_segments`` across ``n_series`` series/spectral
bands
np.ndarray
Array of ``datetime64`` indicating the dates of change
(size=``n_segments - 1``)
"""
assert isinstance(n_series, int)
assert isinstance(n_segments, int) and n_segments >= 1
# TODO
pass
# =============================================================================
# Time series
[docs]def make_dates(date_start=None, date_end=None, date_freq=None):
""" Return ``datetime64`` dates
Parameters
----------
date_start : str, datetime, and more, optional
Starting date (in a format known to Pandas)
date_end : str, datetime, and more, optional
Ending date (in a format known to Pandas)
date_freq : str, optional
Date frequency
Returns
-------
np.ndarray
Dates as ``np.datetime64``
See Also
--------
pandas.date_range
"""
return pd.date_range(date_start, date_end, freq=date_freq).values
[docs]def make_time_series_mean(dates, mean=None, std=None):
pass
[docs]def make_time_series_trend(dates, trend=None):
pass
[docs]def make_time_series_harmonic(dates, amplitude=None, phase=None):
pass
[docs]def make_time_series_noise(dates, mean=0., std=1.):
""" Generate a time series of noise
"""
# TODO: implement Gaussian noise with mean/std
# TODO: add kwarg to parametrize noise from clouds/shadows
pass