Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

# -*- coding: utf-8 -*- 

""" Methods for estimating structural breaks in time series regressions 

 

TODO: extract and move Chow test from "commission test" over to here 

""" 

from __future__ import division 

 

import logging 

 

import numpy as np 

import pandas as pd 

from scipy.optimize import brentq 

from scipy import stats 

from scipy.stats import norm 

import xarray as xr 

 

from .core import PANDAS_LIKE, StructuralBreakResult 

from .compat import jit 

from .stats import std 

from .recresid_ import _recresid 

 

logger = logging.getLogger(__name__) 

 

pnorm = norm.cdf 

 

 

# OLS-CUSUM 

# dict: CUSUM OLS critical values 

CUSUM_OLS_CRIT = { 

0.01: 1.63, 

0.05: 1.36, 

0.10: 1.22 

} 

 

 

@jit(nopython=True, nogil=True) 

def _cusum(resid, ddof): 

n = resid.size 

df = n - ddof 

 

sigma = ((resid ** 2).sum() / df * n) ** 0.5 

process = resid.cumsum() / sigma 

return process 

 

 

@jit(nopython=True, nogil=True) 

def _cusum_OLS(X, y): 

n, p = X.shape 

beta = np.linalg.lstsq(X, y)[0] 

resid = np.dot(X, beta) - y 

 

process = _cusum(resid, p) 

_process = np.abs(process) 

idx = _process.argmax() 

score = _process[idx] 

 

return process, score, idx 

 

 

def cusum_OLS(X, y, alpha=0.05): 

r""" OLS-CUSUM test for structural breaks 

 

Tested against R's ``strucchange`` package and is faster than 

the equivalent function in the ``statsmodels`` Python package when 

Numba is installed. 

 

The OLS-CUSUM test statistic, based on a single OLS regression, is defined 

as: 

 

.. math:: 

 

W_n^0(t) = \frac{1}{\hat{\sigma}\sqrt{n}} 

\sum_{i=1}^{n}{\hat{\mu_i}} 

 

Parameters 

---------- 

X : array like 

2D (n_obs x n_features) design matrix 

y : array like 

1D (n_obs) indepdent variable 

alpha : float 

Test threshold (either 0.01, 0.05, or 0.10) from Ploberger and 

Krämer (1992) 

 

Returns 

------- 

StructuralBreakResult 

A named tuple include the the test name,change point (index of ``y``), 

the test ``score`` and ``pvalue``, and a boolean testing if the 

CUSUM score is significant at the given ``alpha`` 

 

""" 

_X = X.values if isinstance(X, PANDAS_LIKE) else X 

_y = y.values.ravel() if isinstance(y, PANDAS_LIKE) else y.ravel() 

 

process, score, idx = _cusum_OLS(_X, _y) 

if isinstance(y, PANDAS_LIKE): 

if isinstance(y, (pd.Series, pd.DataFrame)): 

index = y.index 

idx = index[idx] 

elif isinstance(y, xr.DataArray): 

index = y.to_series().index 

idx = index[idx] 

process = pd.Series(data=process, index=index, name='OLS-CUSUM') 

 

# crit = stats.kstwobign.isf(alpha) ~70usec 

crit = CUSUM_OLS_CRIT[alpha] 

pval = stats.kstwobign.sf(score) 

 

return StructuralBreakResult(method='OLS-CUSUM', 

index=idx, 

score=score, 

process=process, 

boundary=crit, 

pvalue=pval, 

signif=score > crit) 

 

 

# REC-CUSUM 

def _brownian_motion_pvalue(x, k): 

""" Return pvalue for some given test statistic """ 

# TODO: Make generic, add "type='Brownian Motion'"? 

if x < 0.3: 

p = 1 - 0.1464 * x 

else: 

p = 2 * (1 - 

pnorm(3 * x) + 

np.exp(-4 * x ** 2) * (pnorm(x) + pnorm(5 * x) - 1) - 

np.exp(-16 * x ** 2) * (1 - pnorm(x))) 

return 1 - (1 - p) ** k 

 

 

def _cusum_rec_test_crit(alpha): 

""" Return critical test statistic value for some alpha """ 

return brentq(lambda _x: _brownian_motion_pvalue(_x, 1) - alpha, 0, 20) 

 

 

@jit(nopython=True, nogil=True) 

def _cusum_rec_boundary(x, alpha=0.05): 

""" Equivalent to ``strucchange::boundary.efp``` for Rec-CUSUM """ 

n = x.ravel().size 

bound = _cusum_rec_test_crit(alpha) 

boundary = (bound + (2 * bound * np.arange(0, n) / (n - 1))) 

 

return boundary 

 

 

@jit(nopython=True, nogil=True) 

def _cusum_rec_efp(X, y): 

""" Equivalent to ``strucchange::efp`` for Rec-CUSUM """ 

# Run "efp" 

n, k = X.shape 

w = _recresid(X, y, k)[k:] 

sigma = std(w, ddof=1) ** 0.5 

w = np.concatenate((np.array([0]), w)) 

return np.cumsum(w) / (sigma * (n - k) ** 0.5) 

 

 

@jit(nopython=True, nogil=True) 

def _cusum_rec_sctest(x): 

""" Equivalent to ``strucchange::sctest`` for Rec-CUSUM """ 

x = x[1:] 

j = np.linspace(0, 1, x.size + 1)[1:] 

x = x * 1 / (1 + 2 * j) 

stat = np.abs(x).max() 

 

return stat 

 

 

def cusum_recursive(X, y, alpha=0.05): 

r""" Rec-CUSUM test for structural breaks 

 

Tested against R's ``strucchange`` package. 

 

The REC-CUSUM test, based on the recursive residuals, is defined as: 

 

.. math:: 

 

W_n(t) = \frac{1}{\tilde{\sigma}\sqrt{n}} 

\sum_{i=k+1}^{k+(n-k)}{\tilde{\mu_i}} 

 

 

Critical values for this test statistic are taken from:: 

 

A. Zeileis. p values and alternative boundaries for CUSUM tests. 

Working Paper 78, SFB "Adaptive Information Systems and Modelling 

in Economics and Management Science", December 2000b. 

 

Parameters 

---------- 

X : array like 

2D (n_obs x n_features) design matrix 

y : array like 

1D (n_obs) indepdent variable 

alpha : float 

Test threshold 

 

Returns 

------- 

StructuralBreakResult 

A named tuple include the the test name, change point (index of 

``y``), the test ``score`` and ``pvalue``, and a boolean testing 

if the CUSUM score is significant at the given ``alpha`` 

 

""" 

_X = X.values if isinstance(X, PANDAS_LIKE) else X 

_y = y.values.ravel() if isinstance(y, PANDAS_LIKE) else y.ravel() 

 

process = _cusum_rec_efp(_X, _y) 

stat = _cusum_rec_sctest(process) 

stat_pvalue = _brownian_motion_pvalue(stat, 1) 

 

pvalue_crit = _cusum_rec_test_crit(alpha) 

if stat_pvalue < alpha: 

boundary = _cusum_rec_boundary(process, alpha) 

idx = np.where(np.abs(process) > boundary)[0].min() 

else: 

idx = np.abs(process).max() 

 

if isinstance(y, PANDAS_LIKE): 

if isinstance(y, (pd.Series, pd.DataFrame)): 

index = y.index 

idx = index[idx] 

elif isinstance(y, xr.DataArray): 

index = y.to_series().index 

idx = index[idx] 

process = pd.Series(data=process, index=index, name='REC-CUSUM') 

boundary = pd.Series(data=boundary, index=index, name='Boundary') 

 

return StructuralBreakResult(method='REC-CUSUM', 

process=process, 

boundary=boundary, 

index=idx, 

pvalue=stat_pvalue, 

score=stat, 

signif=stat_pvalue < pvalue_crit)