easyvvuq.comparison.validate

Validation by comparing QoI distributions.

  1"""Validation by comparing QoI distributions.
  2"""
  3import numpy as np
  4import scipy.stats as st
  5from . import BaseComparisonElement
  6
  7
  8__copyright__ = """
  9
 10    Copyright 2018 Robin A. Richardson, David W. Wright
 11
 12    This file is part of EasyVVUQ
 13
 14    EasyVVUQ is free software: you can redistribute it and/or modify
 15    it under the terms of the Lesser GNU General Public License as published by
 16    the Free Software Foundation, either version 3 of the License, or
 17    (at your option) any later version.
 18
 19    EasyVVUQ is distributed in the hope that it will be useful,
 20    but WITHOUT ANY WARRANTY; without even the implied warranty of
 21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 22    Lesser GNU General Public License for more details.
 23
 24    You should have received a copy of the Lesser GNU General Public License
 25    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 26
 27"""
 28__author__ = 'Jalal Lakhlili'
 29__license__ = "LGPL"
 30
 31
 32class ValidateSimilarity(BaseComparisonElement):
 33
 34    def __init__(self):
 35        pass
 36
 37    def dist(self, p, q):
 38        raise NotImplementedError
 39
 40    def compare(self, dataframe1, dataframe2):
 41        """Perform comparison between two lists or arrays
 42        of discrete distributions.
 43
 44        Parameters
 45        ----------
 46        dataframe1 : NumPy array or list
 47        dataframe2 : NumPy array or list
 48
 49        Returns
 50        -------
 51        A list of distances between two lists of discrete distributions,
 52        dataframe1 and dataframe2.
 53        """
 54
 55        if len(dataframe1) != len(dataframe2):
 56            raise RuntimeError("Input dataframe sizes are not equal")
 57
 58        shape = np.shape(dataframe1)
 59        if len(shape) == 2:
 60            results = []
 61            for i in range(len(dataframe1)):
 62                p1 = np.array(dataframe1[i])
 63                p2 = np.array(dataframe2[i])
 64                d = self.dist(p1, p2)
 65                results.append(d)
 66        else:
 67            p1 = np.array(dataframe1)
 68            p2 = np.array(dataframe2)
 69            results = self.dist(p1, p2)
 70
 71        return results
 72
 73
 74class ValidateSimilarityHellinger(ValidateSimilarity):
 75    def element_name(self):
 76        return "validate_similarity_hellinger"
 77
 78    def element_version(self):
 79        return "0.1"
 80
 81    def dist(self, p, q):
 82        """ Compute Hellinger distance between two discrete probability
 83        distributions (PDF). The Hellinger distance metric gives an
 84        output in the range [0,1] with values closer to 0 meaning the
 85        PDFs are more similar.
 86
 87        Parameters
 88        ----------
 89        p : NumPy array
 90        q : NumPy array
 91
 92        Returns
 93        -------
 94        Hellinger distance between distributions p and q.
 95        https://en.wikipedia.org/wiki/Hellinger_distance
 96        """
 97        p /= p.sum()
 98        q /= q.sum()
 99        return np.sqrt(1. - np.sqrt(p * q).sum())
100
101
102class ValidateSimilarityJensenShannon(ValidateSimilarity):
103    def element_name(self):
104        return "validate_similarity_jensen_shannon"
105
106    def element_version(self):
107        return "0.1"
108
109    def dist(self, p, q):
110        """ Compute Jensen-Shannon distance between two discrete
111        probability distributions (PDF). It is based on Kullback–Leibler
112        divergence and gives an output metric un the range [0,1] with
113        values closer to 0 meaning the PDFs are more similar.
114
115        Parameters
116        ----------
117        p : NumPy array
118        q : NumPy array
119
120        Returns
121        -------
122        Jensen-Shannon divergence between distributions p and q.
123        https://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence
124        https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
125        """
126        p /= p.sum()
127        q /= q.sum()
128        m = 0.5 * (p + q)
129        div = 0.5 * (st.entropy(p, m) + st.entropy(q, m))
130        return np.sqrt(div / np.log(2))
131
132
133class ValidateSimilarityWasserstein(ValidateSimilarity):
134    def element_name(self):
135        return "validate_similarity_wasserstein"
136
137    def element_version(self):
138        return "0.1"
139
140    def dist(self, p, q):
141        """ Compute Wasserstein distance between two discrete cumulative
142        distributions (CDF). The Wasserstein distance has an
143        unrestricted range with a lower limit of 0. A smaller distance
144        indicates a stronger similarity between between CFDs.
145
146        Parameters
147        ----------
148        p : NumPy array
149        q : NumPy array
150
151        Returns
152        -------
153        Wasserstein distance between distributions p and q.
154        https://en.wikipedia.org/wiki/Wasserstein_metric
155        """
156        return st.wasserstein_distance(p, q)
class ValidateSimilarity(easyvvuq.comparison.base.BaseComparisonElement):
33class ValidateSimilarity(BaseComparisonElement):
34
35    def __init__(self):
36        pass
37
38    def dist(self, p, q):
39        raise NotImplementedError
40
41    def compare(self, dataframe1, dataframe2):
42        """Perform comparison between two lists or arrays
43        of discrete distributions.
44
45        Parameters
46        ----------
47        dataframe1 : NumPy array or list
48        dataframe2 : NumPy array or list
49
50        Returns
51        -------
52        A list of distances between two lists of discrete distributions,
53        dataframe1 and dataframe2.
54        """
55
56        if len(dataframe1) != len(dataframe2):
57            raise RuntimeError("Input dataframe sizes are not equal")
58
59        shape = np.shape(dataframe1)
60        if len(shape) == 2:
61            results = []
62            for i in range(len(dataframe1)):
63                p1 = np.array(dataframe1[i])
64                p2 = np.array(dataframe2[i])
65                d = self.dist(p1, p2)
66                results.append(d)
67        else:
68            p1 = np.array(dataframe1)
69            p2 = np.array(dataframe2)
70            results = self.dist(p1, p2)
71
72        return results

Baseclass for all EasyVVUQ comparison elements.

Attributes

def dist(self, p, q):
38    def dist(self, p, q):
39        raise NotImplementedError
def compare(self, dataframe1, dataframe2):
41    def compare(self, dataframe1, dataframe2):
42        """Perform comparison between two lists or arrays
43        of discrete distributions.
44
45        Parameters
46        ----------
47        dataframe1 : NumPy array or list
48        dataframe2 : NumPy array or list
49
50        Returns
51        -------
52        A list of distances between two lists of discrete distributions,
53        dataframe1 and dataframe2.
54        """
55
56        if len(dataframe1) != len(dataframe2):
57            raise RuntimeError("Input dataframe sizes are not equal")
58
59        shape = np.shape(dataframe1)
60        if len(shape) == 2:
61            results = []
62            for i in range(len(dataframe1)):
63                p1 = np.array(dataframe1[i])
64                p2 = np.array(dataframe2[i])
65                d = self.dist(p1, p2)
66                results.append(d)
67        else:
68            p1 = np.array(dataframe1)
69            p2 = np.array(dataframe2)
70            results = self.dist(p1, p2)
71
72        return results

Perform comparison between two lists or arrays of discrete distributions.

Parameters
  • dataframe1 (NumPy array or list):

  • dataframe2 (NumPy array or list):

Returns
  • A list of distances between two lists of discrete distributions,
  • dataframe1 and dataframe2.
class ValidateSimilarityHellinger(ValidateSimilarity):
 75class ValidateSimilarityHellinger(ValidateSimilarity):
 76    def element_name(self):
 77        return "validate_similarity_hellinger"
 78
 79    def element_version(self):
 80        return "0.1"
 81
 82    def dist(self, p, q):
 83        """ Compute Hellinger distance between two discrete probability
 84        distributions (PDF). The Hellinger distance metric gives an
 85        output in the range [0,1] with values closer to 0 meaning the
 86        PDFs are more similar.
 87
 88        Parameters
 89        ----------
 90        p : NumPy array
 91        q : NumPy array
 92
 93        Returns
 94        -------
 95        Hellinger distance between distributions p and q.
 96        https://en.wikipedia.org/wiki/Hellinger_distance
 97        """
 98        p /= p.sum()
 99        q /= q.sum()
100        return np.sqrt(1. - np.sqrt(p * q).sum())

Baseclass for all EasyVVUQ comparison elements.

Attributes

def element_name(self):
76    def element_name(self):
77        return "validate_similarity_hellinger"
def element_version(self):
79    def element_version(self):
80        return "0.1"
def dist(self, p, q):
 82    def dist(self, p, q):
 83        """ Compute Hellinger distance between two discrete probability
 84        distributions (PDF). The Hellinger distance metric gives an
 85        output in the range [0,1] with values closer to 0 meaning the
 86        PDFs are more similar.
 87
 88        Parameters
 89        ----------
 90        p : NumPy array
 91        q : NumPy array
 92
 93        Returns
 94        -------
 95        Hellinger distance between distributions p and q.
 96        https://en.wikipedia.org/wiki/Hellinger_distance
 97        """
 98        p /= p.sum()
 99        q /= q.sum()
100        return np.sqrt(1. - np.sqrt(p * q).sum())

Compute Hellinger distance between two discrete probability distributions (PDF). The Hellinger distance metric gives an output in the range [0,1] with values closer to 0 meaning the PDFs are more similar.

Parameters
  • p (NumPy array):

  • q (NumPy array):

Returns
  • Hellinger distance between distributions p and q.
  • https (//en.wikipedia.org/wiki/Hellinger_distance):
class ValidateSimilarityJensenShannon(ValidateSimilarity):
103class ValidateSimilarityJensenShannon(ValidateSimilarity):
104    def element_name(self):
105        return "validate_similarity_jensen_shannon"
106
107    def element_version(self):
108        return "0.1"
109
110    def dist(self, p, q):
111        """ Compute Jensen-Shannon distance between two discrete
112        probability distributions (PDF). It is based on Kullback–Leibler
113        divergence and gives an output metric un the range [0,1] with
114        values closer to 0 meaning the PDFs are more similar.
115
116        Parameters
117        ----------
118        p : NumPy array
119        q : NumPy array
120
121        Returns
122        -------
123        Jensen-Shannon divergence between distributions p and q.
124        https://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence
125        https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
126        """
127        p /= p.sum()
128        q /= q.sum()
129        m = 0.5 * (p + q)
130        div = 0.5 * (st.entropy(p, m) + st.entropy(q, m))
131        return np.sqrt(div / np.log(2))

Baseclass for all EasyVVUQ comparison elements.

Attributes

def element_name(self):
104    def element_name(self):
105        return "validate_similarity_jensen_shannon"
def element_version(self):
107    def element_version(self):
108        return "0.1"
def dist(self, p, q):
110    def dist(self, p, q):
111        """ Compute Jensen-Shannon distance between two discrete
112        probability distributions (PDF). It is based on Kullback–Leibler
113        divergence and gives an output metric un the range [0,1] with
114        values closer to 0 meaning the PDFs are more similar.
115
116        Parameters
117        ----------
118        p : NumPy array
119        q : NumPy array
120
121        Returns
122        -------
123        Jensen-Shannon divergence between distributions p and q.
124        https://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence
125        https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
126        """
127        p /= p.sum()
128        q /= q.sum()
129        m = 0.5 * (p + q)
130        div = 0.5 * (st.entropy(p, m) + st.entropy(q, m))
131        return np.sqrt(div / np.log(2))

Compute Jensen-Shannon distance between two discrete probability distributions (PDF). It is based on Kullback–Leibler divergence and gives an output metric un the range [0,1] with values closer to 0 meaning the PDFs are more similar.

Parameters
  • p (NumPy array):

  • q (NumPy array):

Returns
  • Jensen-Shannon divergence between distributions p and q.
  • https (//en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence):

  • https (//en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence):

class ValidateSimilarityWasserstein(ValidateSimilarity):
134class ValidateSimilarityWasserstein(ValidateSimilarity):
135    def element_name(self):
136        return "validate_similarity_wasserstein"
137
138    def element_version(self):
139        return "0.1"
140
141    def dist(self, p, q):
142        """ Compute Wasserstein distance between two discrete cumulative
143        distributions (CDF). The Wasserstein distance has an
144        unrestricted range with a lower limit of 0. A smaller distance
145        indicates a stronger similarity between between CFDs.
146
147        Parameters
148        ----------
149        p : NumPy array
150        q : NumPy array
151
152        Returns
153        -------
154        Wasserstein distance between distributions p and q.
155        https://en.wikipedia.org/wiki/Wasserstein_metric
156        """
157        return st.wasserstein_distance(p, q)

Baseclass for all EasyVVUQ comparison elements.

Attributes

def element_name(self):
135    def element_name(self):
136        return "validate_similarity_wasserstein"
def element_version(self):
138    def element_version(self):
139        return "0.1"
def dist(self, p, q):
141    def dist(self, p, q):
142        """ Compute Wasserstein distance between two discrete cumulative
143        distributions (CDF). The Wasserstein distance has an
144        unrestricted range with a lower limit of 0. A smaller distance
145        indicates a stronger similarity between between CFDs.
146
147        Parameters
148        ----------
149        p : NumPy array
150        q : NumPy array
151
152        Returns
153        -------
154        Wasserstein distance between distributions p and q.
155        https://en.wikipedia.org/wiki/Wasserstein_metric
156        """
157        return st.wasserstein_distance(p, q)

Compute Wasserstein distance between two discrete cumulative distributions (CDF). The Wasserstein distance has an unrestricted range with a lower limit of 0. A smaller distance indicates a stronger similarity between between CFDs.

Parameters
  • p (NumPy array):

  • q (NumPy array):

Returns
  • Wasserstein distance between distributions p and q.
  • https (//en.wikipedia.org/wiki/Wasserstein_metric):