Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

""" Commonly used, basic statistics functions 

""" 

import math 

 

import numpy as np 

 

from .compat import HAS_JIT, jit 

 

 

@jit(nopython=True, nogil=True) 

def mad(x, c=1.4826): 

""" Calculate Median-Absolute-Deviation (MAD) of x 

 

Parameters 

---------- 

x : array-like 

1D array 

c : float 

Scale factor to get to ~standard normal (default: 1.4826) 

(i.e. 1 / 0.75iCDF ~= 1.4826) 

 

Returns 

------- 

float 

MAD 'robust' variance estimate 

 

Reference 

--------- 

http://en.wikipedia.org/wiki/Median_absolute_deviation 

""" 

return np.median(np.fabs(x - np.median(x))) * 1.4826 

 

 

# Numba-compatible (if we have it) var/std with ddof != 0 

@jit(nopython=True, nogil=True) 

def _var(x, ddof=0): 

""" Calculate variance for an array 

 

Uses Welford's algorithm[1] for online variance calculation. 

 

Parameters 

---------- 

x : array-like 

The data 

ddof : int 

Degrees of freedom 

 

References 

---------- 

.. [1] https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm 

""" 

n = 0.0 

mean_ = M2 = 0.0 

 

for x_ in x: 

n += 1 

delta = x_ - mean_ 

mean_ += delta / n 

delta2 = x_ - mean_ 

M2 += delta * delta2 

return M2 / max(n - ddof, 0) 

 

 

@jit(nopython=True, nogil=True) 

def _std(x, ddof=0): 

return _var(x, ddof=ddof) ** 0.5 

 

 

# ONLY USE THESE IF WE HAVE NUMBA 

if HAS_JIT: 

var = _var 

std = _std 

else: 

var = np.var 

std = np.std 

 

 

# Model criteria 

@jit(nopython=True, nogil=True) 

def rmse(resid): 

""" Calculate RMSE from either y and yhat or residuals 

 

Parameters 

---------- 

resid : np.ndarray 

Residuals 

 

Returns 

------- 

float 

Root mean squared error 

""" 

return ((resid ** 2).sum() / y.size) ** 0.5 

 

 

# INFORMATION CRITERION 

def AIC(loglik, k): 

""" Akaike Information Criterion 

 

Parameters 

---------- 

loglik : float 

Log likelihood 

k : int 

Parameters estimated 

 

Returns 

------- 

float 

AIC, the smaller the better 

""" 

return -2 * loglik + 2 * k 

 

 

def BIC(loglik, k, n): 

""" Bayesian Information Criterion 

 

Parameters 

---------- 

loglik : float 

Log likelihood 

k : int 

Parameters estimated 

n : int 

Number of observations 

 

Parameters 

---------- 

float 

BIC, the smaller the better 

""" 

return -2 * loglik + math.log(n) * k