easyvvuq.analysis.qmc_analysis
Analysis element for Quasi-Monte Carlo (QMC) sensitivity analysis.
Please refer to the article below for the basic approach used here. https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis
1"""Analysis element for Quasi-Monte Carlo (QMC) sensitivity analysis. 2 3Please refer to the article below for the basic approach used here. 4https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis 5""" 6import logging 7import numpy as np 8from easyvvuq import OutputType 9from .base import BaseAnalysisElement 10from easyvvuq.sampling import QMCSampler 11from .results import AnalysisResults 12from easyvvuq.sampling import MCSampler 13from .ensemble_boot import confidence_interval 14 15__author__ = 'Jalal Lakhlili' 16__license__ = "LGPL" 17 18logger = logging.getLogger(__name__) 19 20 21class QMCAnalysisResults(AnalysisResults): 22 """Analysis results for the QMCAnalysis Method. Refer to the AnalysisResults base class 23 documentation for details on using it. 24 """ 25 26 def _get_sobols_first(self, qoi, input_): 27 raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_first']) 28 return raw_dict[AnalysisResults._to_tuple(qoi)][input_][0] 29 30 def _get_sobols_total(self, qoi, input_): 31 raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_total']) 32 return raw_dict[AnalysisResults._to_tuple(qoi)][input_][0] 33 34 def _get_sobols_second(self, qoi, input_): 35 raise NotImplementedError 36 37 def _get_sobols_first_conf(self, qoi, input_): 38 raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['conf_sobols_first']) 39 return [raw_dict[AnalysisResults._to_tuple(qoi)][input_]['low'][0], 40 raw_dict[AnalysisResults._to_tuple(qoi)][input_]['high'][0]] 41 42 def _get_sobols_total_conf(self, qoi, input_): 43 raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['conf_sobols_total']) 44 return [raw_dict[AnalysisResults._to_tuple(qoi)][input_]['low'][0], 45 raw_dict[AnalysisResults._to_tuple(qoi)][input_]['high'][0]] 46 47 def supported_stats(self): 48 """Types of statistics supported by the describe method. 49 50 Returns 51 ------- 52 list of str 53 """ 54 return ['mean', 'var', 'std', 'min', 'max', 'median', 'percentiles', '1%', '10%', '50%', '90%', '99%'] 55 56 def _describe(self, qoi, statistic): 57 if statistic not in self.supported_stats(): 58 raise NotImplementedError 59 if statistic == '1%': 60 return self.raw_data['percentiles'][qoi]['p1'] 61 if statistic == '10%': 62 return self.raw_data['percentiles'][qoi]['p10'] 63 elif statistic == '50%': 64 return self.raw_data['percentiles'][qoi]['p50'] 65 elif statistic == '90%': 66 return self.raw_data['percentiles'][qoi]['p90'] 67 elif statistic == '99%': 68 return self.raw_data['percentiles'][qoi]['p99'] 69 else: 70 return self.raw_data['statistical_moments'][qoi][statistic][0] 71 72 73class QMCAnalysis(BaseAnalysisElement): 74 def __init__(self, sampler, qoi_cols=None): 75 """Analysis element for Quasi-Monte Carlo (QMC). 76 77 Parameters 78 ---------- 79 sampler : easyvvuq.sampling.qmc.QMCSampler 80 Sampler used to initiate the QMC analysis 81 qoi_cols : list or None 82 Column names for quantities of interest (for which analysis is to be 83 performed). 84 """ 85 if not isinstance(sampler, QMCSampler) and not isinstance(sampler, MCSampler): 86 raise RuntimeError( 87 'QMCAnalysis class relies on the QMCSampler or MCSampler as its sampling component') 88 if qoi_cols is None: 89 self.qoi_cols = list(sampler.vary.get_keys()) 90 else: 91 self.qoi_cols = qoi_cols 92 self.output_type = OutputType.SUMMARY 93 self.sampler = sampler 94 95 def element_name(self): 96 """Name for this element. 97 98 Return 99 ------ 100 str: 101 "QMC_Analysis" 102 """ 103 return "QMC_Analysis" 104 105 def element_version(self): 106 """Version of this element. 107 108 Return 109 ------ 110 str: 111 Element version. 112 """ 113 return "0.2" 114 115 def contains_nan(self, values): 116 """Checks if ``None`` or ``numpy.nan`` exists in `values`. Returns ``True`` if 117 any there are at least one occurrence of ``None`` or ``numpy.nan``. 118 Parameters 119 ---------- 120 values : array_like, list, number 121 `values` where to check for occurrences of ``None`` or ``np.nan``. 122 Can be irregular and have any number of nested elements. 123 Returns 124 ------- 125 bool 126 ``True`` if `values` has at least one occurrence of ``None`` or 127 ``numpy.nan``. 128 """ 129 # To speed up we first try the fast option np.any(np.isnan(values)) 130 try: 131 return np.any(np.isnan(values)) 132 except (ValueError, TypeError): 133 if values is None or values is np.nan: 134 return True 135 # To solve the problem of float/int as well as numpy int/flaot 136 elif np.isscalar(values) and np.isnan(values): 137 return True 138 elif hasattr(values, "__iter__"): 139 for value in values: 140 if self.contains_nan(value): 141 return True 142 143 return False 144 else: 145 return False 146 147 def create_mask(self, samples): 148 """ 149 Mask samples that do not give results (anything but np.nan or None). 150 Parameters 151 ---------- 152 samples : array_like 153 Evaluations for the model. 154 Returns 155 ------- 156 masked_samples : list 157 The evaluations that have results (not numpy.nan or None). 158 mask : boolean array 159 The mask itself, used to create the masked arrays. 160 """ 161 masked_samples = [] 162 mask = np.ones(len(samples), dtype=bool) 163 164 for i, result in enumerate(samples): 165 # if np.any(np.isnan(result)): 166 if self.contains_nan(result): 167 mask[i] = False 168 else: 169 masked_samples.append(result) 170 171 return masked_samples, mask 172 173 def analyse(self, data_frame): 174 """Perform QMC analysis on a given pandas DataFrame. 175 176 Parameters 177 ---------- 178 data_frame : pandas DataFrame 179 Input data for analysis. 180 181 Returns 182 ------- 183 easyvvuq.analysis.qmc.QMCAnalysisResults 184 AnalysisResults object for QMC. 185 """ 186 if data_frame.empty: 187 raise RuntimeError( 188 "No data in data frame passed to analyse element") 189 190 qoi_cols = self.qoi_cols 191 192 results = { 193 'statistical_moments': {k: {} for k in qoi_cols}, 194 'percentiles': {k: {} for k in qoi_cols}, 195 'sobols_first': {k: {} for k in qoi_cols}, 196 'sobols_total': {k: {} for k in qoi_cols}, 197 'conf_sobols_first': {k: {} for k in qoi_cols}, 198 'conf_sobols_total': {k: {} for k in qoi_cols} 199 } 200 201 # Extract output values for each quantity of interest from Dataframe 202 samples = self.get_samples(data_frame) 203 204 # Compute descriptive statistics for each quantity of interest 205 for k in qoi_cols: 206 # Find NaNs and create a mask excluding these samples from the analysis 207 # https://github.com/simetenn/uncertainpy/blob/ffb2400289743066265b9a8561cdf3b72e478a28/src/uncertainpy/core/uncertainty_calculations.py#L1532 208 masked_samples, mask = self.create_mask(samples[k]) 209 210 results['statistical_moments'][k] = {'mean': np.mean(masked_samples, axis=0), 211 'var': np.var(masked_samples, axis=0), 212 'std': np.std(masked_samples, axis=0), 213 'min': np.min(masked_samples, axis=0), 214 'max': np.max(masked_samples, axis=0), 215 'median': np.median(masked_samples, axis=0), 216 } 217 results['percentiles'][k] = {'p1': np.percentile(masked_samples, 1, 0)[0], 218 'p10': np.percentile(masked_samples, 10, 0)[0], 219 'p50': np.percentile(masked_samples, 50, 0)[0], 220 'p90': np.percentile(masked_samples, 90, 0)[0], 221 'p99': np.percentile(masked_samples, 99, 0)[0]} 222 223 # Replace Nan values by the mean before proceeding with the SA 224 indices = np.where(mask == 0)[0] # samples[~mask] = results[k].mean 225 for i in indices: 226 samples[k][i] = results['statistical_moments'][k]['mean'] 227 228 if not np.all(mask): 229 print("Warning: QoI \"{}\" only yields ".format(k) + 230 "results for {}/{} ".format(sum(mask), len(mask)) + 231 "parameter combinations. " + 232 "Runs {} are not valid. ".format(indices+1) + 233 "NaN results are set to the mean when calculating the Sobol indices. " + 234 "This might affect the Sobol indices.") 235 236 sobols_first, conf_first, sobols_total, conf_total = \ 237 self.sobol_bootstrap(samples[k]) 238 results['sobols_first'][k] = sobols_first 239 results['sobols_total'][k] = sobols_total 240 results['conf_sobols_first'][k] = conf_first 241 results['conf_sobols_total'][k] = conf_total 242 243 return QMCAnalysisResults(raw_data=results, samples=data_frame, 244 qois=self.qoi_cols, inputs=list(self.sampler.vary.get_keys())) 245 246 def get_samples(self, data_frame): 247 """ 248 Converts the Pandas dataframe into a dictionary. 249 250 Parameters 251 ---------- 252 data_frame : pandas DataFrame 253 the EasyVVUQ Pandas dataframe from collation. 254 255 Returns 256 ------- 257 dict : 258 A dictionary with the QoI names as keys. 259 Each element is a list of code evaluations. 260 """ 261 samples = {k: [] for k in self.qoi_cols} 262 for run_id in data_frame['run_id'].squeeze().unique(): 263 for k in self.qoi_cols: 264 data = data_frame.loc[data_frame['run_id'].squeeze() == run_id][k] 265 samples[k].append(data.values) 266 return samples 267 268 def sobol_bootstrap_(self, samples, alpha=0.05, n_samples=1000): 269 """ 270 Computes the first order and total order Sobol indices using Saltelli's 271 method. To assess the sampling inaccuracy, bootstrap confidence intervals 272 are also computed. 273 274 Reference: A. Saltelli, Making best use of model evaluations to compute 275 sensitivity indices, Computer Physics Communications, 2002. 276 277 Parameters 278 ---------- 279 samples : list 280 The samples for a given QoI. 281 alpha: float 282 The (1 - alpha) * 100 confidence interval parameter. The default is 0.05. 283 n_samples: int 284 The number of bootstrap samples. The default is 1000. 285 286 Returns 287 ------- 288 sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict: 289 dictionaries containing the first- and total-order Sobol indices for all 290 parameters, and (1-alpha)*100 lower and upper confidence bounds. 291 292 """ 293 assert len(samples) > 0 294 assert alpha > 0.0 295 assert alpha < 1.0 296 assert n_samples > 0 297 298 # convert to array 299 samples = np.array(samples) 300 # the number of parameter and the number of MC samples in n_mc * (n_params + 2) 301 # and the size of the QoI 302 n_params = self.sampler.n_params 303 n_mc = self.sampler.n_mc_samples 304 n_qoi = samples[0].size 305 sobols_first_dict = {} 306 conf_first_dict = {} 307 sobols_total_dict = {} 308 conf_total_dict = {} 309 310 for j, param_name in enumerate(self.sampler.vary.get_keys()): 311 # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params 312 # see reference above. 313 f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc) 314 # our point estimate for the 1st and total order Sobol indices 315 value_first = self._first_order(f_M2, f_M1, f_Ni[:, j]) 316 value_total = self._total_order(f_M2, f_M1, f_Ni[:, j]) 317 # array for resampled estimates 318 sobols_first = np.zeros([n_samples, n_qoi]) 319 sobols_total = np.zeros([n_samples, n_qoi]) 320 for i in range(n_samples): 321 # resample, must be done on already seperated output due to 322 # the specific order in samples 323 idx = np.random.randint(0, n_mc - 1, n_mc) 324 f_M2_resample = f_M2[idx] 325 f_M1_resample = f_M1[idx] 326 f_Ni_resample = f_Ni[idx] 327 # recompute Sobol indices 328 sobols_first[i] = self._first_order(f_M2_resample, f_M1_resample, 329 f_Ni_resample[:, j]) 330 sobols_total[i] = self._total_order(f_M2_resample, f_M1_resample, 331 f_Ni_resample[:, j]) 332 # compute confidence intervals 333 _, low_first, high_first = confidence_interval(sobols_first, value_first, 334 alpha, pivotal=True) 335 _, low_total, high_total = confidence_interval(sobols_total, value_total, 336 alpha, pivotal=True) 337 # store results 338 sobols_first_dict[param_name] = value_first 339 conf_first_dict[param_name] = {'low': low_first, 'high': high_first} 340 sobols_total_dict[param_name] = value_total 341 conf_total_dict[param_name] = {'low': low_total, 'high': high_total} 342 343 return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict 344 345 def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000): 346 """ 347 Computes the first order and total order Sobol indices using Saltelli's 348 method. To assess the sampling inaccuracy, bootstrap confidence intervals 349 are also computed. 350 351 Reference: A. Saltelli, Making best use of model evaluations to compute 352 sensitivity indices, Computer Physics Communications, 2002. 353 354 Parameters 355 ---------- 356 samples : list 357 The samples for a given QoI. 358 alpha: float 359 The (1 - alpha) * 100 confidence interval parameter. The default is 0.05. 360 n_samples: int 361 The number of bootstrap samples. The default is 1000. 362 363 Returns 364 ------- 365 sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict: 366 dictionaries containing the first- and total-order Sobol indices for all 367 parameters, and (1-alpha)*100 lower and upper confidence bounds. 368 369 """ 370 assert len(samples) > 0 371 assert alpha > 0.0 372 assert alpha < 1.0 373 assert n_bootstrap > 0 374 375 # convert to array 376 samples = np.array(samples) 377 # the number of parameter and the number of MC samples in n_mc * (n_params + 2) 378 # and the size of the QoI 379 n_params = self.sampler.n_params 380 # n_mc = self.sampler.n_mc_samples 381 n_mc = int(samples.shape[0]/(n_params + 2)) 382 n_qoi = samples[0].size 383 sobols_first_dict = {} 384 conf_first_dict = {} 385 sobols_total_dict = {} 386 conf_total_dict = {} 387 388 # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params 389 # see reference above. 390 f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc) 391 r = np.random.randint(n_mc, size=(n_mc, n_bootstrap)) 392 393 for j, param_name in enumerate(self.sampler.vary.get_keys()): 394 395 # our point estimate for the 1st and total order Sobol indices 396 value_first = self._first_order(f_M2, f_M1, f_Ni[:, j]) 397 value_total = self._total_order(f_M2, f_M1, f_Ni[:, j]) 398 399 # sobols computed from resampled data points 400 if n_mc * n_bootstrap * n_qoi <= 10**7: 401 #this is a vectorized computation, Is fast, but f_M2[r] will be of size 402 #(n_mc, n_bootstrap, n_qoi), this can become too large and cause a crash, 403 #especially when dealing with large QoI (n_qoi >> 1). So this is only done 404 #when n_mc * n_bootstrap * n_qoi <= 10**7 405 print("Vectorized bootstrapping") 406 sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j]) 407 sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j]) 408 else: 409 #array for resampled estimates 410 sobols_first = np.zeros([n_bootstrap, n_qoi]) 411 sobols_total = np.zeros([n_bootstrap, n_qoi]) 412 print("Sequential bootstrapping") 413 #non-vectorized implementation 414 for i in range(n_bootstrap): 415 #resampled sample matrices of size (n_mc, n_qoi) 416 indices = r[:, i] 417 sobols_first[i] = self._first_order(f_M2[indices], f_M1[indices], f_Ni[indices, j]) 418 sobols_total[i] = self._total_order(f_M2[indices], f_M1[indices], f_Ni[indices, j]) 419 420 # compute confidence intervals based on percentiles 421 _, low_first, high_first = confidence_interval(sobols_first, value_first, 422 alpha, pivotal=True) 423 _, low_total, high_total = confidence_interval(sobols_total, value_total, 424 alpha, pivotal=True) 425 # store results 426 sobols_first_dict[param_name] = value_first 427 conf_first_dict[param_name] = {'low': low_first, 'high': high_first} 428 sobols_total_dict[param_name] = value_total 429 conf_total_dict[param_name] = {'low': low_total, 'high': high_total} 430 431 return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict 432 433 434 # Adapted from SALib 435 @staticmethod 436 def _separate_output_values(samples, n_params, n_mc_samples): 437 """There are n_params + 2 different input matrices: M1, M2, N_i, 438 i=1,...,n_params. (see reference under sobol_bootstrap). The 439 EasyVVUQ dataframe is stored in the order: 440 441 [sample from M2, sample from N1, N2, ... sample from N_n_params, 442 sample from M1, repeat]. 443 444 This subroutine separates the output values into the contributions 445 of the different input matrices. 446 447 Parameters 448 ---------- 449 samples: list 450 The samples for a given QoI 451 n_params: int 452 The number of uncertain input parameters. 453 n_mc_samples: int 454 The number of MC samples per input matrix, i.e. the 455 number of rows in M1, M2 or Ni. 456 457 Returns 458 ------- 459 NumPy arrays of the separated code evaluations: f_M2, f_M1, f_Ni, where 460 f_Ni contains n_params entries corresponding to the n_params Ni matrices. 461 462 """ 463 evaluations = np.array(samples) 464 465 shape = (n_mc_samples, n_params) + evaluations[0].shape 466 step = n_params + 2 467 f_Ni = np.zeros(shape) 468 469 f_M2 = evaluations[0:evaluations.shape[0]:step] 470 f_M1 = evaluations[(step - 1):evaluations.shape[0]:step] 471 472 for i in range(n_params): 473 f_Ni[:, i] = evaluations[(i + 1):evaluations.shape[0]:step] 474 475 return f_M2, f_M1, f_Ni 476 477 @staticmethod 478 def _first_order(f_M2, f_M1, f_Ni): 479 """Calculate first order sensitivity indices. 480 481 Parameters 482 ---------- 483 f_M2: NumPy array 484 Array of code evaluations on input array M2 485 f_M1: NumPy array 486 Array of code evaluations on input array M1 487 f_Ni: NumPy array 488 Array of code evaluations on input array Ni, i=1,...,n_params 489 490 Returns 491 ------- 492 A NumPy array of the n_params first-order Sobol indices. 493 """ 494 V = np.var(np.r_[f_M2, f_M1], axis=0) 495 return np.mean(f_M1 * (f_Ni - f_M2), axis=0) / (V + (V == 0)) * (V != 0) 496 497 @staticmethod 498 def _total_order(f_M2, f_M1, f_Ni): 499 """Calculate total order sensitivity indices. See also: 500 501 A Saltelli et al, Variance based sensitivity analysis of model output. 502 Design and estimator for the total sensitivity index, 2009. 503 504 Parameters 505 ---------- 506 f_M2: NumPy array 507 Array of code evaluations on input array M2 (matrix A in ref above) 508 f_M1: NumPy array 509 Array of code evaluations on input array M1 (matrix B in ref above) 510 f_Ni: NumPy array 511 Array of code evaluations on input array Ni, i=1,...,n_params 512 (matrix AB in ref above) 513 514 Returns 515 ------- 516 A NumPy array of the n_params total-order Sobol indices. 517 """ 518 V = np.var(np.r_[f_M2, f_M1], axis=0) 519 return 0.5 * np.mean((f_M2 - f_Ni) ** 2, axis=0) / (V + (V == 0)) * (V != 0)
22class QMCAnalysisResults(AnalysisResults): 23 """Analysis results for the QMCAnalysis Method. Refer to the AnalysisResults base class 24 documentation for details on using it. 25 """ 26 27 def _get_sobols_first(self, qoi, input_): 28 raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_first']) 29 return raw_dict[AnalysisResults._to_tuple(qoi)][input_][0] 30 31 def _get_sobols_total(self, qoi, input_): 32 raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_total']) 33 return raw_dict[AnalysisResults._to_tuple(qoi)][input_][0] 34 35 def _get_sobols_second(self, qoi, input_): 36 raise NotImplementedError 37 38 def _get_sobols_first_conf(self, qoi, input_): 39 raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['conf_sobols_first']) 40 return [raw_dict[AnalysisResults._to_tuple(qoi)][input_]['low'][0], 41 raw_dict[AnalysisResults._to_tuple(qoi)][input_]['high'][0]] 42 43 def _get_sobols_total_conf(self, qoi, input_): 44 raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['conf_sobols_total']) 45 return [raw_dict[AnalysisResults._to_tuple(qoi)][input_]['low'][0], 46 raw_dict[AnalysisResults._to_tuple(qoi)][input_]['high'][0]] 47 48 def supported_stats(self): 49 """Types of statistics supported by the describe method. 50 51 Returns 52 ------- 53 list of str 54 """ 55 return ['mean', 'var', 'std', 'min', 'max', 'median', 'percentiles', '1%', '10%', '50%', '90%', '99%'] 56 57 def _describe(self, qoi, statistic): 58 if statistic not in self.supported_stats(): 59 raise NotImplementedError 60 if statistic == '1%': 61 return self.raw_data['percentiles'][qoi]['p1'] 62 if statistic == '10%': 63 return self.raw_data['percentiles'][qoi]['p10'] 64 elif statistic == '50%': 65 return self.raw_data['percentiles'][qoi]['p50'] 66 elif statistic == '90%': 67 return self.raw_data['percentiles'][qoi]['p90'] 68 elif statistic == '99%': 69 return self.raw_data['percentiles'][qoi]['p99'] 70 else: 71 return self.raw_data['statistical_moments'][qoi][statistic][0]
Analysis results for the QMCAnalysis Method. Refer to the AnalysisResults base class documentation for details on using it.
48 def supported_stats(self): 49 """Types of statistics supported by the describe method. 50 51 Returns 52 ------- 53 list of str 54 """ 55 return ['mean', 'var', 'std', 'min', 'max', 'median', 'percentiles', '1%', '10%', '50%', '90%', '99%']
Types of statistics supported by the describe method.
Returns
- list of str
74class QMCAnalysis(BaseAnalysisElement): 75 def __init__(self, sampler, qoi_cols=None): 76 """Analysis element for Quasi-Monte Carlo (QMC). 77 78 Parameters 79 ---------- 80 sampler : easyvvuq.sampling.qmc.QMCSampler 81 Sampler used to initiate the QMC analysis 82 qoi_cols : list or None 83 Column names for quantities of interest (for which analysis is to be 84 performed). 85 """ 86 if not isinstance(sampler, QMCSampler) and not isinstance(sampler, MCSampler): 87 raise RuntimeError( 88 'QMCAnalysis class relies on the QMCSampler or MCSampler as its sampling component') 89 if qoi_cols is None: 90 self.qoi_cols = list(sampler.vary.get_keys()) 91 else: 92 self.qoi_cols = qoi_cols 93 self.output_type = OutputType.SUMMARY 94 self.sampler = sampler 95 96 def element_name(self): 97 """Name for this element. 98 99 Return 100 ------ 101 str: 102 "QMC_Analysis" 103 """ 104 return "QMC_Analysis" 105 106 def element_version(self): 107 """Version of this element. 108 109 Return 110 ------ 111 str: 112 Element version. 113 """ 114 return "0.2" 115 116 def contains_nan(self, values): 117 """Checks if ``None`` or ``numpy.nan`` exists in `values`. Returns ``True`` if 118 any there are at least one occurrence of ``None`` or ``numpy.nan``. 119 Parameters 120 ---------- 121 values : array_like, list, number 122 `values` where to check for occurrences of ``None`` or ``np.nan``. 123 Can be irregular and have any number of nested elements. 124 Returns 125 ------- 126 bool 127 ``True`` if `values` has at least one occurrence of ``None`` or 128 ``numpy.nan``. 129 """ 130 # To speed up we first try the fast option np.any(np.isnan(values)) 131 try: 132 return np.any(np.isnan(values)) 133 except (ValueError, TypeError): 134 if values is None or values is np.nan: 135 return True 136 # To solve the problem of float/int as well as numpy int/flaot 137 elif np.isscalar(values) and np.isnan(values): 138 return True 139 elif hasattr(values, "__iter__"): 140 for value in values: 141 if self.contains_nan(value): 142 return True 143 144 return False 145 else: 146 return False 147 148 def create_mask(self, samples): 149 """ 150 Mask samples that do not give results (anything but np.nan or None). 151 Parameters 152 ---------- 153 samples : array_like 154 Evaluations for the model. 155 Returns 156 ------- 157 masked_samples : list 158 The evaluations that have results (not numpy.nan or None). 159 mask : boolean array 160 The mask itself, used to create the masked arrays. 161 """ 162 masked_samples = [] 163 mask = np.ones(len(samples), dtype=bool) 164 165 for i, result in enumerate(samples): 166 # if np.any(np.isnan(result)): 167 if self.contains_nan(result): 168 mask[i] = False 169 else: 170 masked_samples.append(result) 171 172 return masked_samples, mask 173 174 def analyse(self, data_frame): 175 """Perform QMC analysis on a given pandas DataFrame. 176 177 Parameters 178 ---------- 179 data_frame : pandas DataFrame 180 Input data for analysis. 181 182 Returns 183 ------- 184 easyvvuq.analysis.qmc.QMCAnalysisResults 185 AnalysisResults object for QMC. 186 """ 187 if data_frame.empty: 188 raise RuntimeError( 189 "No data in data frame passed to analyse element") 190 191 qoi_cols = self.qoi_cols 192 193 results = { 194 'statistical_moments': {k: {} for k in qoi_cols}, 195 'percentiles': {k: {} for k in qoi_cols}, 196 'sobols_first': {k: {} for k in qoi_cols}, 197 'sobols_total': {k: {} for k in qoi_cols}, 198 'conf_sobols_first': {k: {} for k in qoi_cols}, 199 'conf_sobols_total': {k: {} for k in qoi_cols} 200 } 201 202 # Extract output values for each quantity of interest from Dataframe 203 samples = self.get_samples(data_frame) 204 205 # Compute descriptive statistics for each quantity of interest 206 for k in qoi_cols: 207 # Find NaNs and create a mask excluding these samples from the analysis 208 # https://github.com/simetenn/uncertainpy/blob/ffb2400289743066265b9a8561cdf3b72e478a28/src/uncertainpy/core/uncertainty_calculations.py#L1532 209 masked_samples, mask = self.create_mask(samples[k]) 210 211 results['statistical_moments'][k] = {'mean': np.mean(masked_samples, axis=0), 212 'var': np.var(masked_samples, axis=0), 213 'std': np.std(masked_samples, axis=0), 214 'min': np.min(masked_samples, axis=0), 215 'max': np.max(masked_samples, axis=0), 216 'median': np.median(masked_samples, axis=0), 217 } 218 results['percentiles'][k] = {'p1': np.percentile(masked_samples, 1, 0)[0], 219 'p10': np.percentile(masked_samples, 10, 0)[0], 220 'p50': np.percentile(masked_samples, 50, 0)[0], 221 'p90': np.percentile(masked_samples, 90, 0)[0], 222 'p99': np.percentile(masked_samples, 99, 0)[0]} 223 224 # Replace Nan values by the mean before proceeding with the SA 225 indices = np.where(mask == 0)[0] # samples[~mask] = results[k].mean 226 for i in indices: 227 samples[k][i] = results['statistical_moments'][k]['mean'] 228 229 if not np.all(mask): 230 print("Warning: QoI \"{}\" only yields ".format(k) + 231 "results for {}/{} ".format(sum(mask), len(mask)) + 232 "parameter combinations. " + 233 "Runs {} are not valid. ".format(indices+1) + 234 "NaN results are set to the mean when calculating the Sobol indices. " + 235 "This might affect the Sobol indices.") 236 237 sobols_first, conf_first, sobols_total, conf_total = \ 238 self.sobol_bootstrap(samples[k]) 239 results['sobols_first'][k] = sobols_first 240 results['sobols_total'][k] = sobols_total 241 results['conf_sobols_first'][k] = conf_first 242 results['conf_sobols_total'][k] = conf_total 243 244 return QMCAnalysisResults(raw_data=results, samples=data_frame, 245 qois=self.qoi_cols, inputs=list(self.sampler.vary.get_keys())) 246 247 def get_samples(self, data_frame): 248 """ 249 Converts the Pandas dataframe into a dictionary. 250 251 Parameters 252 ---------- 253 data_frame : pandas DataFrame 254 the EasyVVUQ Pandas dataframe from collation. 255 256 Returns 257 ------- 258 dict : 259 A dictionary with the QoI names as keys. 260 Each element is a list of code evaluations. 261 """ 262 samples = {k: [] for k in self.qoi_cols} 263 for run_id in data_frame['run_id'].squeeze().unique(): 264 for k in self.qoi_cols: 265 data = data_frame.loc[data_frame['run_id'].squeeze() == run_id][k] 266 samples[k].append(data.values) 267 return samples 268 269 def sobol_bootstrap_(self, samples, alpha=0.05, n_samples=1000): 270 """ 271 Computes the first order and total order Sobol indices using Saltelli's 272 method. To assess the sampling inaccuracy, bootstrap confidence intervals 273 are also computed. 274 275 Reference: A. Saltelli, Making best use of model evaluations to compute 276 sensitivity indices, Computer Physics Communications, 2002. 277 278 Parameters 279 ---------- 280 samples : list 281 The samples for a given QoI. 282 alpha: float 283 The (1 - alpha) * 100 confidence interval parameter. The default is 0.05. 284 n_samples: int 285 The number of bootstrap samples. The default is 1000. 286 287 Returns 288 ------- 289 sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict: 290 dictionaries containing the first- and total-order Sobol indices for all 291 parameters, and (1-alpha)*100 lower and upper confidence bounds. 292 293 """ 294 assert len(samples) > 0 295 assert alpha > 0.0 296 assert alpha < 1.0 297 assert n_samples > 0 298 299 # convert to array 300 samples = np.array(samples) 301 # the number of parameter and the number of MC samples in n_mc * (n_params + 2) 302 # and the size of the QoI 303 n_params = self.sampler.n_params 304 n_mc = self.sampler.n_mc_samples 305 n_qoi = samples[0].size 306 sobols_first_dict = {} 307 conf_first_dict = {} 308 sobols_total_dict = {} 309 conf_total_dict = {} 310 311 for j, param_name in enumerate(self.sampler.vary.get_keys()): 312 # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params 313 # see reference above. 314 f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc) 315 # our point estimate for the 1st and total order Sobol indices 316 value_first = self._first_order(f_M2, f_M1, f_Ni[:, j]) 317 value_total = self._total_order(f_M2, f_M1, f_Ni[:, j]) 318 # array for resampled estimates 319 sobols_first = np.zeros([n_samples, n_qoi]) 320 sobols_total = np.zeros([n_samples, n_qoi]) 321 for i in range(n_samples): 322 # resample, must be done on already seperated output due to 323 # the specific order in samples 324 idx = np.random.randint(0, n_mc - 1, n_mc) 325 f_M2_resample = f_M2[idx] 326 f_M1_resample = f_M1[idx] 327 f_Ni_resample = f_Ni[idx] 328 # recompute Sobol indices 329 sobols_first[i] = self._first_order(f_M2_resample, f_M1_resample, 330 f_Ni_resample[:, j]) 331 sobols_total[i] = self._total_order(f_M2_resample, f_M1_resample, 332 f_Ni_resample[:, j]) 333 # compute confidence intervals 334 _, low_first, high_first = confidence_interval(sobols_first, value_first, 335 alpha, pivotal=True) 336 _, low_total, high_total = confidence_interval(sobols_total, value_total, 337 alpha, pivotal=True) 338 # store results 339 sobols_first_dict[param_name] = value_first 340 conf_first_dict[param_name] = {'low': low_first, 'high': high_first} 341 sobols_total_dict[param_name] = value_total 342 conf_total_dict[param_name] = {'low': low_total, 'high': high_total} 343 344 return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict 345 346 def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000): 347 """ 348 Computes the first order and total order Sobol indices using Saltelli's 349 method. To assess the sampling inaccuracy, bootstrap confidence intervals 350 are also computed. 351 352 Reference: A. Saltelli, Making best use of model evaluations to compute 353 sensitivity indices, Computer Physics Communications, 2002. 354 355 Parameters 356 ---------- 357 samples : list 358 The samples for a given QoI. 359 alpha: float 360 The (1 - alpha) * 100 confidence interval parameter. The default is 0.05. 361 n_samples: int 362 The number of bootstrap samples. The default is 1000. 363 364 Returns 365 ------- 366 sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict: 367 dictionaries containing the first- and total-order Sobol indices for all 368 parameters, and (1-alpha)*100 lower and upper confidence bounds. 369 370 """ 371 assert len(samples) > 0 372 assert alpha > 0.0 373 assert alpha < 1.0 374 assert n_bootstrap > 0 375 376 # convert to array 377 samples = np.array(samples) 378 # the number of parameter and the number of MC samples in n_mc * (n_params + 2) 379 # and the size of the QoI 380 n_params = self.sampler.n_params 381 # n_mc = self.sampler.n_mc_samples 382 n_mc = int(samples.shape[0]/(n_params + 2)) 383 n_qoi = samples[0].size 384 sobols_first_dict = {} 385 conf_first_dict = {} 386 sobols_total_dict = {} 387 conf_total_dict = {} 388 389 # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params 390 # see reference above. 391 f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc) 392 r = np.random.randint(n_mc, size=(n_mc, n_bootstrap)) 393 394 for j, param_name in enumerate(self.sampler.vary.get_keys()): 395 396 # our point estimate for the 1st and total order Sobol indices 397 value_first = self._first_order(f_M2, f_M1, f_Ni[:, j]) 398 value_total = self._total_order(f_M2, f_M1, f_Ni[:, j]) 399 400 # sobols computed from resampled data points 401 if n_mc * n_bootstrap * n_qoi <= 10**7: 402 #this is a vectorized computation, Is fast, but f_M2[r] will be of size 403 #(n_mc, n_bootstrap, n_qoi), this can become too large and cause a crash, 404 #especially when dealing with large QoI (n_qoi >> 1). So this is only done 405 #when n_mc * n_bootstrap * n_qoi <= 10**7 406 print("Vectorized bootstrapping") 407 sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j]) 408 sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j]) 409 else: 410 #array for resampled estimates 411 sobols_first = np.zeros([n_bootstrap, n_qoi]) 412 sobols_total = np.zeros([n_bootstrap, n_qoi]) 413 print("Sequential bootstrapping") 414 #non-vectorized implementation 415 for i in range(n_bootstrap): 416 #resampled sample matrices of size (n_mc, n_qoi) 417 indices = r[:, i] 418 sobols_first[i] = self._first_order(f_M2[indices], f_M1[indices], f_Ni[indices, j]) 419 sobols_total[i] = self._total_order(f_M2[indices], f_M1[indices], f_Ni[indices, j]) 420 421 # compute confidence intervals based on percentiles 422 _, low_first, high_first = confidence_interval(sobols_first, value_first, 423 alpha, pivotal=True) 424 _, low_total, high_total = confidence_interval(sobols_total, value_total, 425 alpha, pivotal=True) 426 # store results 427 sobols_first_dict[param_name] = value_first 428 conf_first_dict[param_name] = {'low': low_first, 'high': high_first} 429 sobols_total_dict[param_name] = value_total 430 conf_total_dict[param_name] = {'low': low_total, 'high': high_total} 431 432 return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict 433 434 435 # Adapted from SALib 436 @staticmethod 437 def _separate_output_values(samples, n_params, n_mc_samples): 438 """There are n_params + 2 different input matrices: M1, M2, N_i, 439 i=1,...,n_params. (see reference under sobol_bootstrap). The 440 EasyVVUQ dataframe is stored in the order: 441 442 [sample from M2, sample from N1, N2, ... sample from N_n_params, 443 sample from M1, repeat]. 444 445 This subroutine separates the output values into the contributions 446 of the different input matrices. 447 448 Parameters 449 ---------- 450 samples: list 451 The samples for a given QoI 452 n_params: int 453 The number of uncertain input parameters. 454 n_mc_samples: int 455 The number of MC samples per input matrix, i.e. the 456 number of rows in M1, M2 or Ni. 457 458 Returns 459 ------- 460 NumPy arrays of the separated code evaluations: f_M2, f_M1, f_Ni, where 461 f_Ni contains n_params entries corresponding to the n_params Ni matrices. 462 463 """ 464 evaluations = np.array(samples) 465 466 shape = (n_mc_samples, n_params) + evaluations[0].shape 467 step = n_params + 2 468 f_Ni = np.zeros(shape) 469 470 f_M2 = evaluations[0:evaluations.shape[0]:step] 471 f_M1 = evaluations[(step - 1):evaluations.shape[0]:step] 472 473 for i in range(n_params): 474 f_Ni[:, i] = evaluations[(i + 1):evaluations.shape[0]:step] 475 476 return f_M2, f_M1, f_Ni 477 478 @staticmethod 479 def _first_order(f_M2, f_M1, f_Ni): 480 """Calculate first order sensitivity indices. 481 482 Parameters 483 ---------- 484 f_M2: NumPy array 485 Array of code evaluations on input array M2 486 f_M1: NumPy array 487 Array of code evaluations on input array M1 488 f_Ni: NumPy array 489 Array of code evaluations on input array Ni, i=1,...,n_params 490 491 Returns 492 ------- 493 A NumPy array of the n_params first-order Sobol indices. 494 """ 495 V = np.var(np.r_[f_M2, f_M1], axis=0) 496 return np.mean(f_M1 * (f_Ni - f_M2), axis=0) / (V + (V == 0)) * (V != 0) 497 498 @staticmethod 499 def _total_order(f_M2, f_M1, f_Ni): 500 """Calculate total order sensitivity indices. See also: 501 502 A Saltelli et al, Variance based sensitivity analysis of model output. 503 Design and estimator for the total sensitivity index, 2009. 504 505 Parameters 506 ---------- 507 f_M2: NumPy array 508 Array of code evaluations on input array M2 (matrix A in ref above) 509 f_M1: NumPy array 510 Array of code evaluations on input array M1 (matrix B in ref above) 511 f_Ni: NumPy array 512 Array of code evaluations on input array Ni, i=1,...,n_params 513 (matrix AB in ref above) 514 515 Returns 516 ------- 517 A NumPy array of the n_params total-order Sobol indices. 518 """ 519 V = np.var(np.r_[f_M2, f_M1], axis=0) 520 return 0.5 * np.mean((f_M2 - f_Ni) ** 2, axis=0) / (V + (V == 0)) * (V != 0)
Base class for all EasyVVUQ analysis elements.
Attributes
75 def __init__(self, sampler, qoi_cols=None): 76 """Analysis element for Quasi-Monte Carlo (QMC). 77 78 Parameters 79 ---------- 80 sampler : easyvvuq.sampling.qmc.QMCSampler 81 Sampler used to initiate the QMC analysis 82 qoi_cols : list or None 83 Column names for quantities of interest (for which analysis is to be 84 performed). 85 """ 86 if not isinstance(sampler, QMCSampler) and not isinstance(sampler, MCSampler): 87 raise RuntimeError( 88 'QMCAnalysis class relies on the QMCSampler or MCSampler as its sampling component') 89 if qoi_cols is None: 90 self.qoi_cols = list(sampler.vary.get_keys()) 91 else: 92 self.qoi_cols = qoi_cols 93 self.output_type = OutputType.SUMMARY 94 self.sampler = sampler
Analysis element for Quasi-Monte Carlo (QMC).
Parameters
- sampler (easyvvuq.sampling.qmc.QMCSampler): Sampler used to initiate the QMC analysis
- qoi_cols (list or None): Column names for quantities of interest (for which analysis is to be performed).
96 def element_name(self): 97 """Name for this element. 98 99 Return 100 ------ 101 str: 102 "QMC_Analysis" 103 """ 104 return "QMC_Analysis"
Name for this element.
Return
str: "QMC_Analysis"
106 def element_version(self): 107 """Version of this element. 108 109 Return 110 ------ 111 str: 112 Element version. 113 """ 114 return "0.2"
Version of this element.
Return
str: Element version.
116 def contains_nan(self, values): 117 """Checks if ``None`` or ``numpy.nan`` exists in `values`. Returns ``True`` if 118 any there are at least one occurrence of ``None`` or ``numpy.nan``. 119 Parameters 120 ---------- 121 values : array_like, list, number 122 `values` where to check for occurrences of ``None`` or ``np.nan``. 123 Can be irregular and have any number of nested elements. 124 Returns 125 ------- 126 bool 127 ``True`` if `values` has at least one occurrence of ``None`` or 128 ``numpy.nan``. 129 """ 130 # To speed up we first try the fast option np.any(np.isnan(values)) 131 try: 132 return np.any(np.isnan(values)) 133 except (ValueError, TypeError): 134 if values is None or values is np.nan: 135 return True 136 # To solve the problem of float/int as well as numpy int/flaot 137 elif np.isscalar(values) and np.isnan(values): 138 return True 139 elif hasattr(values, "__iter__"): 140 for value in values: 141 if self.contains_nan(value): 142 return True 143 144 return False 145 else: 146 return False
Checks if None or numpy.nan exists in values. Returns True if
any there are at least one occurrence of None or numpy.nan.
Parameters
- values (array_like, list, number):
valueswhere to check for occurrences ofNoneornp.nan. Can be irregular and have any number of nested elements.
Returns
- bool:
Trueifvalueshas at least one occurrence ofNoneornumpy.nan.
148 def create_mask(self, samples): 149 """ 150 Mask samples that do not give results (anything but np.nan or None). 151 Parameters 152 ---------- 153 samples : array_like 154 Evaluations for the model. 155 Returns 156 ------- 157 masked_samples : list 158 The evaluations that have results (not numpy.nan or None). 159 mask : boolean array 160 The mask itself, used to create the masked arrays. 161 """ 162 masked_samples = [] 163 mask = np.ones(len(samples), dtype=bool) 164 165 for i, result in enumerate(samples): 166 # if np.any(np.isnan(result)): 167 if self.contains_nan(result): 168 mask[i] = False 169 else: 170 masked_samples.append(result) 171 172 return masked_samples, mask
Mask samples that do not give results (anything but np.nan or None).
Parameters
- samples (array_like): Evaluations for the model.
Returns
- masked_samples (list): The evaluations that have results (not numpy.nan or None).
- mask (boolean array): The mask itself, used to create the masked arrays.
174 def analyse(self, data_frame): 175 """Perform QMC analysis on a given pandas DataFrame. 176 177 Parameters 178 ---------- 179 data_frame : pandas DataFrame 180 Input data for analysis. 181 182 Returns 183 ------- 184 easyvvuq.analysis.qmc.QMCAnalysisResults 185 AnalysisResults object for QMC. 186 """ 187 if data_frame.empty: 188 raise RuntimeError( 189 "No data in data frame passed to analyse element") 190 191 qoi_cols = self.qoi_cols 192 193 results = { 194 'statistical_moments': {k: {} for k in qoi_cols}, 195 'percentiles': {k: {} for k in qoi_cols}, 196 'sobols_first': {k: {} for k in qoi_cols}, 197 'sobols_total': {k: {} for k in qoi_cols}, 198 'conf_sobols_first': {k: {} for k in qoi_cols}, 199 'conf_sobols_total': {k: {} for k in qoi_cols} 200 } 201 202 # Extract output values for each quantity of interest from Dataframe 203 samples = self.get_samples(data_frame) 204 205 # Compute descriptive statistics for each quantity of interest 206 for k in qoi_cols: 207 # Find NaNs and create a mask excluding these samples from the analysis 208 # https://github.com/simetenn/uncertainpy/blob/ffb2400289743066265b9a8561cdf3b72e478a28/src/uncertainpy/core/uncertainty_calculations.py#L1532 209 masked_samples, mask = self.create_mask(samples[k]) 210 211 results['statistical_moments'][k] = {'mean': np.mean(masked_samples, axis=0), 212 'var': np.var(masked_samples, axis=0), 213 'std': np.std(masked_samples, axis=0), 214 'min': np.min(masked_samples, axis=0), 215 'max': np.max(masked_samples, axis=0), 216 'median': np.median(masked_samples, axis=0), 217 } 218 results['percentiles'][k] = {'p1': np.percentile(masked_samples, 1, 0)[0], 219 'p10': np.percentile(masked_samples, 10, 0)[0], 220 'p50': np.percentile(masked_samples, 50, 0)[0], 221 'p90': np.percentile(masked_samples, 90, 0)[0], 222 'p99': np.percentile(masked_samples, 99, 0)[0]} 223 224 # Replace Nan values by the mean before proceeding with the SA 225 indices = np.where(mask == 0)[0] # samples[~mask] = results[k].mean 226 for i in indices: 227 samples[k][i] = results['statistical_moments'][k]['mean'] 228 229 if not np.all(mask): 230 print("Warning: QoI \"{}\" only yields ".format(k) + 231 "results for {}/{} ".format(sum(mask), len(mask)) + 232 "parameter combinations. " + 233 "Runs {} are not valid. ".format(indices+1) + 234 "NaN results are set to the mean when calculating the Sobol indices. " + 235 "This might affect the Sobol indices.") 236 237 sobols_first, conf_first, sobols_total, conf_total = \ 238 self.sobol_bootstrap(samples[k]) 239 results['sobols_first'][k] = sobols_first 240 results['sobols_total'][k] = sobols_total 241 results['conf_sobols_first'][k] = conf_first 242 results['conf_sobols_total'][k] = conf_total 243 244 return QMCAnalysisResults(raw_data=results, samples=data_frame, 245 qois=self.qoi_cols, inputs=list(self.sampler.vary.get_keys()))
Perform QMC analysis on a given pandas DataFrame.
Parameters
- data_frame (pandas DataFrame): Input data for analysis.
Returns
- easyvvuq.analysis.qmc.QMCAnalysisResults: AnalysisResults object for QMC.
247 def get_samples(self, data_frame): 248 """ 249 Converts the Pandas dataframe into a dictionary. 250 251 Parameters 252 ---------- 253 data_frame : pandas DataFrame 254 the EasyVVUQ Pandas dataframe from collation. 255 256 Returns 257 ------- 258 dict : 259 A dictionary with the QoI names as keys. 260 Each element is a list of code evaluations. 261 """ 262 samples = {k: [] for k in self.qoi_cols} 263 for run_id in data_frame['run_id'].squeeze().unique(): 264 for k in self.qoi_cols: 265 data = data_frame.loc[data_frame['run_id'].squeeze() == run_id][k] 266 samples[k].append(data.values) 267 return samples
Converts the Pandas dataframe into a dictionary.
Parameters
- data_frame (pandas DataFrame): the EasyVVUQ Pandas dataframe from collation.
Returns
- dict :: A dictionary with the QoI names as keys. Each element is a list of code evaluations.
269 def sobol_bootstrap_(self, samples, alpha=0.05, n_samples=1000): 270 """ 271 Computes the first order and total order Sobol indices using Saltelli's 272 method. To assess the sampling inaccuracy, bootstrap confidence intervals 273 are also computed. 274 275 Reference: A. Saltelli, Making best use of model evaluations to compute 276 sensitivity indices, Computer Physics Communications, 2002. 277 278 Parameters 279 ---------- 280 samples : list 281 The samples for a given QoI. 282 alpha: float 283 The (1 - alpha) * 100 confidence interval parameter. The default is 0.05. 284 n_samples: int 285 The number of bootstrap samples. The default is 1000. 286 287 Returns 288 ------- 289 sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict: 290 dictionaries containing the first- and total-order Sobol indices for all 291 parameters, and (1-alpha)*100 lower and upper confidence bounds. 292 293 """ 294 assert len(samples) > 0 295 assert alpha > 0.0 296 assert alpha < 1.0 297 assert n_samples > 0 298 299 # convert to array 300 samples = np.array(samples) 301 # the number of parameter and the number of MC samples in n_mc * (n_params + 2) 302 # and the size of the QoI 303 n_params = self.sampler.n_params 304 n_mc = self.sampler.n_mc_samples 305 n_qoi = samples[0].size 306 sobols_first_dict = {} 307 conf_first_dict = {} 308 sobols_total_dict = {} 309 conf_total_dict = {} 310 311 for j, param_name in enumerate(self.sampler.vary.get_keys()): 312 # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params 313 # see reference above. 314 f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc) 315 # our point estimate for the 1st and total order Sobol indices 316 value_first = self._first_order(f_M2, f_M1, f_Ni[:, j]) 317 value_total = self._total_order(f_M2, f_M1, f_Ni[:, j]) 318 # array for resampled estimates 319 sobols_first = np.zeros([n_samples, n_qoi]) 320 sobols_total = np.zeros([n_samples, n_qoi]) 321 for i in range(n_samples): 322 # resample, must be done on already seperated output due to 323 # the specific order in samples 324 idx = np.random.randint(0, n_mc - 1, n_mc) 325 f_M2_resample = f_M2[idx] 326 f_M1_resample = f_M1[idx] 327 f_Ni_resample = f_Ni[idx] 328 # recompute Sobol indices 329 sobols_first[i] = self._first_order(f_M2_resample, f_M1_resample, 330 f_Ni_resample[:, j]) 331 sobols_total[i] = self._total_order(f_M2_resample, f_M1_resample, 332 f_Ni_resample[:, j]) 333 # compute confidence intervals 334 _, low_first, high_first = confidence_interval(sobols_first, value_first, 335 alpha, pivotal=True) 336 _, low_total, high_total = confidence_interval(sobols_total, value_total, 337 alpha, pivotal=True) 338 # store results 339 sobols_first_dict[param_name] = value_first 340 conf_first_dict[param_name] = {'low': low_first, 'high': high_first} 341 sobols_total_dict[param_name] = value_total 342 conf_total_dict[param_name] = {'low': low_total, 'high': high_total} 343 344 return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict
Computes the first order and total order Sobol indices using Saltelli's method. To assess the sampling inaccuracy, bootstrap confidence intervals are also computed.
Reference: A. Saltelli, Making best use of model evaluations to compute sensitivity indices, Computer Physics Communications, 2002.
Parameters
- samples (list): The samples for a given QoI.
- alpha (float): The (1 - alpha) * 100 confidence interval parameter. The default is 0.05.
- n_samples (int): The number of bootstrap samples. The default is 1000.
Returns
- sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict:
- dictionaries containing the first- and total-order Sobol indices for all
- parameters, and (1-alpha)*100 lower and upper confidence bounds.
346 def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000): 347 """ 348 Computes the first order and total order Sobol indices using Saltelli's 349 method. To assess the sampling inaccuracy, bootstrap confidence intervals 350 are also computed. 351 352 Reference: A. Saltelli, Making best use of model evaluations to compute 353 sensitivity indices, Computer Physics Communications, 2002. 354 355 Parameters 356 ---------- 357 samples : list 358 The samples for a given QoI. 359 alpha: float 360 The (1 - alpha) * 100 confidence interval parameter. The default is 0.05. 361 n_samples: int 362 The number of bootstrap samples. The default is 1000. 363 364 Returns 365 ------- 366 sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict: 367 dictionaries containing the first- and total-order Sobol indices for all 368 parameters, and (1-alpha)*100 lower and upper confidence bounds. 369 370 """ 371 assert len(samples) > 0 372 assert alpha > 0.0 373 assert alpha < 1.0 374 assert n_bootstrap > 0 375 376 # convert to array 377 samples = np.array(samples) 378 # the number of parameter and the number of MC samples in n_mc * (n_params + 2) 379 # and the size of the QoI 380 n_params = self.sampler.n_params 381 # n_mc = self.sampler.n_mc_samples 382 n_mc = int(samples.shape[0]/(n_params + 2)) 383 n_qoi = samples[0].size 384 sobols_first_dict = {} 385 conf_first_dict = {} 386 sobols_total_dict = {} 387 conf_total_dict = {} 388 389 # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params 390 # see reference above. 391 f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc) 392 r = np.random.randint(n_mc, size=(n_mc, n_bootstrap)) 393 394 for j, param_name in enumerate(self.sampler.vary.get_keys()): 395 396 # our point estimate for the 1st and total order Sobol indices 397 value_first = self._first_order(f_M2, f_M1, f_Ni[:, j]) 398 value_total = self._total_order(f_M2, f_M1, f_Ni[:, j]) 399 400 # sobols computed from resampled data points 401 if n_mc * n_bootstrap * n_qoi <= 10**7: 402 #this is a vectorized computation, Is fast, but f_M2[r] will be of size 403 #(n_mc, n_bootstrap, n_qoi), this can become too large and cause a crash, 404 #especially when dealing with large QoI (n_qoi >> 1). So this is only done 405 #when n_mc * n_bootstrap * n_qoi <= 10**7 406 print("Vectorized bootstrapping") 407 sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j]) 408 sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j]) 409 else: 410 #array for resampled estimates 411 sobols_first = np.zeros([n_bootstrap, n_qoi]) 412 sobols_total = np.zeros([n_bootstrap, n_qoi]) 413 print("Sequential bootstrapping") 414 #non-vectorized implementation 415 for i in range(n_bootstrap): 416 #resampled sample matrices of size (n_mc, n_qoi) 417 indices = r[:, i] 418 sobols_first[i] = self._first_order(f_M2[indices], f_M1[indices], f_Ni[indices, j]) 419 sobols_total[i] = self._total_order(f_M2[indices], f_M1[indices], f_Ni[indices, j]) 420 421 # compute confidence intervals based on percentiles 422 _, low_first, high_first = confidence_interval(sobols_first, value_first, 423 alpha, pivotal=True) 424 _, low_total, high_total = confidence_interval(sobols_total, value_total, 425 alpha, pivotal=True) 426 # store results 427 sobols_first_dict[param_name] = value_first 428 conf_first_dict[param_name] = {'low': low_first, 'high': high_first} 429 sobols_total_dict[param_name] = value_total 430 conf_total_dict[param_name] = {'low': low_total, 'high': high_total} 431 432 return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict
Computes the first order and total order Sobol indices using Saltelli's method. To assess the sampling inaccuracy, bootstrap confidence intervals are also computed.
Reference: A. Saltelli, Making best use of model evaluations to compute sensitivity indices, Computer Physics Communications, 2002.
Parameters
- samples (list): The samples for a given QoI.
- alpha (float): The (1 - alpha) * 100 confidence interval parameter. The default is 0.05.
- n_samples (int): The number of bootstrap samples. The default is 1000.
Returns
- sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict:
- dictionaries containing the first- and total-order Sobol indices for all
- parameters, and (1-alpha)*100 lower and upper confidence bounds.