easyvvuq.comparison.validate
Validation by comparing QoI distributions.
1"""Validation by comparing QoI distributions. 2""" 3import numpy as np 4import scipy.stats as st 5from . import BaseComparisonElement 6 7 8__copyright__ = """ 9 10 Copyright 2018 Robin A. Richardson, David W. Wright 11 12 This file is part of EasyVVUQ 13 14 EasyVVUQ is free software: you can redistribute it and/or modify 15 it under the terms of the Lesser GNU General Public License as published by 16 the Free Software Foundation, either version 3 of the License, or 17 (at your option) any later version. 18 19 EasyVVUQ is distributed in the hope that it will be useful, 20 but WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 Lesser GNU General Public License for more details. 23 24 You should have received a copy of the Lesser GNU General Public License 25 along with this program. If not, see <https://www.gnu.org/licenses/>. 26 27""" 28__author__ = 'Jalal Lakhlili' 29__license__ = "LGPL" 30 31 32class ValidateSimilarity(BaseComparisonElement): 33 34 def __init__(self): 35 pass 36 37 def dist(self, p, q): 38 raise NotImplementedError 39 40 def compare(self, dataframe1, dataframe2): 41 """Perform comparison between two lists or arrays 42 of discrete distributions. 43 44 Parameters 45 ---------- 46 dataframe1 : NumPy array or list 47 dataframe2 : NumPy array or list 48 49 Returns 50 ------- 51 A list of distances between two lists of discrete distributions, 52 dataframe1 and dataframe2. 53 """ 54 55 if len(dataframe1) != len(dataframe2): 56 raise RuntimeError("Input dataframe sizes are not equal") 57 58 shape = np.shape(dataframe1) 59 if len(shape) == 2: 60 results = [] 61 for i in range(len(dataframe1)): 62 p1 = np.array(dataframe1[i]) 63 p2 = np.array(dataframe2[i]) 64 d = self.dist(p1, p2) 65 results.append(d) 66 else: 67 p1 = np.array(dataframe1) 68 p2 = np.array(dataframe2) 69 results = self.dist(p1, p2) 70 71 return results 72 73 74class ValidateSimilarityHellinger(ValidateSimilarity): 75 def element_name(self): 76 return "validate_similarity_hellinger" 77 78 def element_version(self): 79 return "0.1" 80 81 def dist(self, p, q): 82 """ Compute Hellinger distance between two discrete probability 83 distributions (PDF). The Hellinger distance metric gives an 84 output in the range [0,1] with values closer to 0 meaning the 85 PDFs are more similar. 86 87 Parameters 88 ---------- 89 p : NumPy array 90 q : NumPy array 91 92 Returns 93 ------- 94 Hellinger distance between distributions p and q. 95 https://en.wikipedia.org/wiki/Hellinger_distance 96 """ 97 p /= p.sum() 98 q /= q.sum() 99 return np.sqrt(1. - np.sqrt(p * q).sum()) 100 101 102class ValidateSimilarityJensenShannon(ValidateSimilarity): 103 def element_name(self): 104 return "validate_similarity_jensen_shannon" 105 106 def element_version(self): 107 return "0.1" 108 109 def dist(self, p, q): 110 """ Compute Jensen-Shannon distance between two discrete 111 probability distributions (PDF). It is based on Kullback–Leibler 112 divergence and gives an output metric un the range [0,1] with 113 values closer to 0 meaning the PDFs are more similar. 114 115 Parameters 116 ---------- 117 p : NumPy array 118 q : NumPy array 119 120 Returns 121 ------- 122 Jensen-Shannon divergence between distributions p and q. 123 https://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence 124 https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence 125 """ 126 p /= p.sum() 127 q /= q.sum() 128 m = 0.5 * (p + q) 129 div = 0.5 * (st.entropy(p, m) + st.entropy(q, m)) 130 return np.sqrt(div / np.log(2)) 131 132 133class ValidateSimilarityWasserstein(ValidateSimilarity): 134 def element_name(self): 135 return "validate_similarity_wasserstein" 136 137 def element_version(self): 138 return "0.1" 139 140 def dist(self, p, q): 141 """ Compute Wasserstein distance between two discrete cumulative 142 distributions (CDF). The Wasserstein distance has an 143 unrestricted range with a lower limit of 0. A smaller distance 144 indicates a stronger similarity between between CFDs. 145 146 Parameters 147 ---------- 148 p : NumPy array 149 q : NumPy array 150 151 Returns 152 ------- 153 Wasserstein distance between distributions p and q. 154 https://en.wikipedia.org/wiki/Wasserstein_metric 155 """ 156 return st.wasserstein_distance(p, q)
33class ValidateSimilarity(BaseComparisonElement): 34 35 def __init__(self): 36 pass 37 38 def dist(self, p, q): 39 raise NotImplementedError 40 41 def compare(self, dataframe1, dataframe2): 42 """Perform comparison between two lists or arrays 43 of discrete distributions. 44 45 Parameters 46 ---------- 47 dataframe1 : NumPy array or list 48 dataframe2 : NumPy array or list 49 50 Returns 51 ------- 52 A list of distances between two lists of discrete distributions, 53 dataframe1 and dataframe2. 54 """ 55 56 if len(dataframe1) != len(dataframe2): 57 raise RuntimeError("Input dataframe sizes are not equal") 58 59 shape = np.shape(dataframe1) 60 if len(shape) == 2: 61 results = [] 62 for i in range(len(dataframe1)): 63 p1 = np.array(dataframe1[i]) 64 p2 = np.array(dataframe2[i]) 65 d = self.dist(p1, p2) 66 results.append(d) 67 else: 68 p1 = np.array(dataframe1) 69 p2 = np.array(dataframe2) 70 results = self.dist(p1, p2) 71 72 return results
Baseclass for all EasyVVUQ comparison elements.
Attributes
41 def compare(self, dataframe1, dataframe2): 42 """Perform comparison between two lists or arrays 43 of discrete distributions. 44 45 Parameters 46 ---------- 47 dataframe1 : NumPy array or list 48 dataframe2 : NumPy array or list 49 50 Returns 51 ------- 52 A list of distances between two lists of discrete distributions, 53 dataframe1 and dataframe2. 54 """ 55 56 if len(dataframe1) != len(dataframe2): 57 raise RuntimeError("Input dataframe sizes are not equal") 58 59 shape = np.shape(dataframe1) 60 if len(shape) == 2: 61 results = [] 62 for i in range(len(dataframe1)): 63 p1 = np.array(dataframe1[i]) 64 p2 = np.array(dataframe2[i]) 65 d = self.dist(p1, p2) 66 results.append(d) 67 else: 68 p1 = np.array(dataframe1) 69 p2 = np.array(dataframe2) 70 results = self.dist(p1, p2) 71 72 return results
Perform comparison between two lists or arrays of discrete distributions.
Parameters
dataframe1 (NumPy array or list):
dataframe2 (NumPy array or list):
Returns
- A list of distances between two lists of discrete distributions,
- dataframe1 and dataframe2.
75class ValidateSimilarityHellinger(ValidateSimilarity): 76 def element_name(self): 77 return "validate_similarity_hellinger" 78 79 def element_version(self): 80 return "0.1" 81 82 def dist(self, p, q): 83 """ Compute Hellinger distance between two discrete probability 84 distributions (PDF). The Hellinger distance metric gives an 85 output in the range [0,1] with values closer to 0 meaning the 86 PDFs are more similar. 87 88 Parameters 89 ---------- 90 p : NumPy array 91 q : NumPy array 92 93 Returns 94 ------- 95 Hellinger distance between distributions p and q. 96 https://en.wikipedia.org/wiki/Hellinger_distance 97 """ 98 p /= p.sum() 99 q /= q.sum() 100 return np.sqrt(1. - np.sqrt(p * q).sum())
Baseclass for all EasyVVUQ comparison elements.
Attributes
82 def dist(self, p, q): 83 """ Compute Hellinger distance between two discrete probability 84 distributions (PDF). The Hellinger distance metric gives an 85 output in the range [0,1] with values closer to 0 meaning the 86 PDFs are more similar. 87 88 Parameters 89 ---------- 90 p : NumPy array 91 q : NumPy array 92 93 Returns 94 ------- 95 Hellinger distance between distributions p and q. 96 https://en.wikipedia.org/wiki/Hellinger_distance 97 """ 98 p /= p.sum() 99 q /= q.sum() 100 return np.sqrt(1. - np.sqrt(p * q).sum())
Compute Hellinger distance between two discrete probability distributions (PDF). The Hellinger distance metric gives an output in the range [0,1] with values closer to 0 meaning the PDFs are more similar.
Parameters
p (NumPy array):
q (NumPy array):
Returns
- Hellinger distance between distributions p and q.
- https (//en.wikipedia.org/wiki/Hellinger_distance):
103class ValidateSimilarityJensenShannon(ValidateSimilarity): 104 def element_name(self): 105 return "validate_similarity_jensen_shannon" 106 107 def element_version(self): 108 return "0.1" 109 110 def dist(self, p, q): 111 """ Compute Jensen-Shannon distance between two discrete 112 probability distributions (PDF). It is based on Kullback–Leibler 113 divergence and gives an output metric un the range [0,1] with 114 values closer to 0 meaning the PDFs are more similar. 115 116 Parameters 117 ---------- 118 p : NumPy array 119 q : NumPy array 120 121 Returns 122 ------- 123 Jensen-Shannon divergence between distributions p and q. 124 https://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence 125 https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence 126 """ 127 p /= p.sum() 128 q /= q.sum() 129 m = 0.5 * (p + q) 130 div = 0.5 * (st.entropy(p, m) + st.entropy(q, m)) 131 return np.sqrt(div / np.log(2))
Baseclass for all EasyVVUQ comparison elements.
Attributes
110 def dist(self, p, q): 111 """ Compute Jensen-Shannon distance between two discrete 112 probability distributions (PDF). It is based on Kullback–Leibler 113 divergence and gives an output metric un the range [0,1] with 114 values closer to 0 meaning the PDFs are more similar. 115 116 Parameters 117 ---------- 118 p : NumPy array 119 q : NumPy array 120 121 Returns 122 ------- 123 Jensen-Shannon divergence between distributions p and q. 124 https://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence 125 https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence 126 """ 127 p /= p.sum() 128 q /= q.sum() 129 m = 0.5 * (p + q) 130 div = 0.5 * (st.entropy(p, m) + st.entropy(q, m)) 131 return np.sqrt(div / np.log(2))
Compute Jensen-Shannon distance between two discrete probability distributions (PDF). It is based on Kullback–Leibler divergence and gives an output metric un the range [0,1] with values closer to 0 meaning the PDFs are more similar.
Parameters
p (NumPy array):
q (NumPy array):
Returns
- Jensen-Shannon divergence between distributions p and q.
https (//en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence):
https (//en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence):
134class ValidateSimilarityWasserstein(ValidateSimilarity): 135 def element_name(self): 136 return "validate_similarity_wasserstein" 137 138 def element_version(self): 139 return "0.1" 140 141 def dist(self, p, q): 142 """ Compute Wasserstein distance between two discrete cumulative 143 distributions (CDF). The Wasserstein distance has an 144 unrestricted range with a lower limit of 0. A smaller distance 145 indicates a stronger similarity between between CFDs. 146 147 Parameters 148 ---------- 149 p : NumPy array 150 q : NumPy array 151 152 Returns 153 ------- 154 Wasserstein distance between distributions p and q. 155 https://en.wikipedia.org/wiki/Wasserstein_metric 156 """ 157 return st.wasserstein_distance(p, q)
Baseclass for all EasyVVUQ comparison elements.
Attributes
141 def dist(self, p, q): 142 """ Compute Wasserstein distance between two discrete cumulative 143 distributions (CDF). The Wasserstein distance has an 144 unrestricted range with a lower limit of 0. A smaller distance 145 indicates a stronger similarity between between CFDs. 146 147 Parameters 148 ---------- 149 p : NumPy array 150 q : NumPy array 151 152 Returns 153 ------- 154 Wasserstein distance between distributions p and q. 155 https://en.wikipedia.org/wiki/Wasserstein_metric 156 """ 157 return st.wasserstein_distance(p, q)
Compute Wasserstein distance between two discrete cumulative distributions (CDF). The Wasserstein distance has an unrestricted range with a lower limit of 0. A smaller distance indicates a stronger similarity between between CFDs.
Parameters
p (NumPy array):
q (NumPy array):
Returns
- Wasserstein distance between distributions p and q.
- https (//en.wikipedia.org/wiki/Wasserstein_metric):