-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdistributions.py
79 lines (56 loc) · 2.14 KB
/
distributions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
"""
:mod:`distributions` -- calculate empirical/fitted distributions for obs
========================================================================
.. module:: distributions
:synopsis: Calculate empirical and/or fitted distributions for data
.. moduleauthor:: Craig Arthur <[email protected]>
"""
import numpy as np
from scipy.stats import genpareto
from statsmodels.nonparametric.kde import KDEUnivariate
import logging
LOG = logging.getLogger(__name__)
def empiricalPDF(data):
"""
Evaluate a probability density function using kernel density
estimation for input data.
:param data: :class:`numpy.ndarray` of data values.
:returns: PDF values at the data points.
"""
LOG.debug("Calculating empirical PDF")
sortedmax = np.sort(data)
kde = KDEUnivariate(sortedmax)
kde.fit()
try:
res = kde.evaluate(sortedmax)
except MemoryError:
res = np.zeros(len(sortedmax))
return res
def fittedPDF(data, mu, sigma, xi):
"""
Calculate probability denisty function values given data and
GPD fit parameters.
:param data: :class:`numpy.ndarray` of data values.
:param float mu: Location parameter of the fitted GPD.
:param float sigma: Shape parameter of the fitted GPD.
:param float xi: Scale parameter of the fitted GPD.
:returns: :class:`numpy.ndarray` of PDF values at the data points.
"""
LOG.debug("Calculating fitted GPD PDF")
res = genpareto.pdf(np.sort(data[data > mu]),
sigma, loc=mu, scale=xi)
return res
def generateDistributions(data, mu, sigma, xi):
"""
Generate empirical and fitted PDF values for selected data, based on
threshold, shape and scale parameters.
:param data: :class:`numpy.ndarray` of data values.
:param float mu: Location parameter of the fitted GPD.
:param float sigma: Shape parameter of the fitted GPD.
:param float xi: Scale parameter of the fitted GPD.
"""
if mu > data.max():
raise ValueError("Threshold greater than maximum data value")
emppdf = empiricalPDF(data[data > mu], mu)
gpdf = fittedPDF(data, mu, sigma, xi)
return emppdf, gpdf