easyvvuq.analysis.qmc_analysis

Analysis element for Quasi-Monte Carlo (QMC) sensitivity analysis.

Please refer to the article below for the basic approach used here. https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis

  1"""Analysis element for Quasi-Monte Carlo (QMC) sensitivity analysis.
  2
  3Please refer to the article below for the basic approach used here.
  4https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis
  5"""
  6import logging
  7import numpy as np
  8from easyvvuq import OutputType
  9from .base import BaseAnalysisElement
 10from easyvvuq.sampling import QMCSampler
 11from .results import AnalysisResults
 12from easyvvuq.sampling import MCSampler
 13from .ensemble_boot import confidence_interval
 14
 15__author__ = 'Jalal Lakhlili'
 16__license__ = "LGPL"
 17
 18logger = logging.getLogger(__name__)
 19
 20
 21class QMCAnalysisResults(AnalysisResults):
 22    """Analysis results for the QMCAnalysis Method. Refer to the AnalysisResults base class
 23    documentation for details on using it.
 24    """
 25
 26    def _get_sobols_first(self, qoi, input_):
 27        raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_first'])
 28        return raw_dict[AnalysisResults._to_tuple(qoi)][input_][0]
 29
 30    def _get_sobols_total(self, qoi, input_):
 31        raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_total'])
 32        return raw_dict[AnalysisResults._to_tuple(qoi)][input_][0]
 33
 34    def _get_sobols_second(self, qoi, input_):
 35        raise NotImplementedError
 36
 37    def _get_sobols_first_conf(self, qoi, input_):
 38        raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['conf_sobols_first'])
 39        return [raw_dict[AnalysisResults._to_tuple(qoi)][input_]['low'][0],
 40                raw_dict[AnalysisResults._to_tuple(qoi)][input_]['high'][0]]
 41
 42    def _get_sobols_total_conf(self, qoi, input_):
 43        raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['conf_sobols_total'])
 44        return [raw_dict[AnalysisResults._to_tuple(qoi)][input_]['low'][0],
 45                raw_dict[AnalysisResults._to_tuple(qoi)][input_]['high'][0]]
 46
 47    def supported_stats(self):
 48        """Types of statistics supported by the describe method.
 49
 50        Returns
 51        -------
 52        list of str
 53        """
 54        return ['mean', 'var', 'std', 'min', 'max', 'median', 'percentiles', '1%', '10%', '50%', '90%', '99%']
 55
 56    def _describe(self, qoi, statistic):
 57        if statistic not in self.supported_stats():
 58            raise NotImplementedError
 59        if statistic == '1%':
 60            return self.raw_data['percentiles'][qoi]['p1']
 61        if statistic == '10%':
 62            return self.raw_data['percentiles'][qoi]['p10']
 63        elif statistic == '50%':
 64            return self.raw_data['percentiles'][qoi]['p50']
 65        elif statistic == '90%':
 66            return self.raw_data['percentiles'][qoi]['p90']
 67        elif statistic == '99%':
 68            return self.raw_data['percentiles'][qoi]['p99']
 69        else:
 70            return self.raw_data['statistical_moments'][qoi][statistic][0]
 71
 72
 73class QMCAnalysis(BaseAnalysisElement):
 74    def __init__(self, sampler, qoi_cols=None):
 75        """Analysis element for Quasi-Monte Carlo (QMC).
 76
 77        Parameters
 78        ----------
 79        sampler : easyvvuq.sampling.qmc.QMCSampler
 80            Sampler used to initiate the QMC analysis
 81        qoi_cols : list or None
 82            Column names for quantities of interest (for which analysis is to be
 83            performed).
 84        """
 85        if not isinstance(sampler, QMCSampler) and not isinstance(sampler, MCSampler):
 86            raise RuntimeError(
 87                'QMCAnalysis class relies on the QMCSampler or MCSampler as its sampling component')
 88        if qoi_cols is None:
 89            self.qoi_cols = list(sampler.vary.get_keys())
 90        else:
 91            self.qoi_cols = qoi_cols
 92        self.output_type = OutputType.SUMMARY
 93        self.sampler = sampler
 94
 95    def element_name(self):
 96        """Name for this element.
 97
 98        Return
 99        ------
100        str:
101            "QMC_Analysis"
102        """
103        return "QMC_Analysis"
104
105    def element_version(self):
106        """Version of this element.
107
108        Return
109        ------
110        str:
111            Element version.
112        """
113        return "0.2"
114
115    def contains_nan(self, values):
116        """Checks if ``None`` or ``numpy.nan`` exists in `values`. Returns ``True`` if
117        any there are at least one occurrence of ``None`` or ``numpy.nan``.
118        Parameters
119        ----------
120        values : array_like, list, number
121            `values` where to check for occurrences of ``None`` or ``np.nan``.
122            Can be irregular and have any number of nested elements.
123        Returns
124        -------
125        bool
126            ``True`` if `values` has at least one occurrence of ``None`` or
127            ``numpy.nan``.
128        """
129        # To speed up we first try the fast option np.any(np.isnan(values))
130        try:
131            return np.any(np.isnan(values))
132        except (ValueError, TypeError):
133            if values is None or values is np.nan:
134                return True
135            # To solve the problem of float/int as well as numpy int/flaot
136            elif np.isscalar(values) and np.isnan(values):
137                return True
138            elif hasattr(values, "__iter__"):
139                for value in values:
140                    if self.contains_nan(value):
141                        return True
142
143                return False
144            else:
145                return False
146
147    def create_mask(self, samples):
148        """
149        Mask samples that do not give results (anything but np.nan or None).
150        Parameters
151        ----------
152        samples : array_like
153            Evaluations for the model.
154        Returns
155        -------
156        masked_samples : list
157            The evaluations that have results (not numpy.nan or None).
158        mask : boolean array
159            The mask itself, used to create the masked arrays.
160        """
161        masked_samples = []
162        mask = np.ones(len(samples), dtype=bool)
163
164        for i, result in enumerate(samples):
165            # if np.any(np.isnan(result)):
166            if self.contains_nan(result):
167                mask[i] = False
168            else:
169                masked_samples.append(result)
170
171        return masked_samples, mask     
172
173    def analyse(self, data_frame):
174        """Perform QMC analysis on a given pandas DataFrame.
175
176        Parameters
177        ----------
178        data_frame : pandas DataFrame
179            Input data for analysis.
180
181        Returns
182        -------
183        easyvvuq.analysis.qmc.QMCAnalysisResults
184            AnalysisResults object for QMC.
185        """
186        if data_frame.empty:
187            raise RuntimeError(
188                "No data in data frame passed to analyse element")
189
190        qoi_cols = self.qoi_cols
191
192        results = {
193            'statistical_moments': {k: {} for k in qoi_cols},
194            'percentiles': {k: {} for k in qoi_cols},
195            'sobols_first': {k: {} for k in qoi_cols},
196            'sobols_total': {k: {} for k in qoi_cols},
197            'conf_sobols_first': {k: {} for k in qoi_cols},
198            'conf_sobols_total': {k: {} for k in qoi_cols}
199        }
200
201        # Extract output values for each quantity of interest from Dataframe
202        samples = self.get_samples(data_frame)
203
204        # Compute descriptive statistics for each quantity of interest
205        for k in qoi_cols:
206            # Find NaNs and create a mask excluding these samples from the analysis
207            # https://github.com/simetenn/uncertainpy/blob/ffb2400289743066265b9a8561cdf3b72e478a28/src/uncertainpy/core/uncertainty_calculations.py#L1532
208            masked_samples, mask = self.create_mask(samples[k])
209
210            results['statistical_moments'][k] = {'mean': np.mean(masked_samples, axis=0),
211                                                 'var': np.var(masked_samples, axis=0),
212                                                 'std': np.std(masked_samples, axis=0),
213                                                 'min': np.min(masked_samples, axis=0),
214                                                 'max': np.max(masked_samples, axis=0),
215                                                 'median': np.median(masked_samples, axis=0),
216                                                 }
217            results['percentiles'][k] = {'p1': np.percentile(masked_samples, 1, 0)[0],
218                                         'p10': np.percentile(masked_samples, 10, 0)[0],
219                                         'p50': np.percentile(masked_samples, 50, 0)[0],
220                                         'p90': np.percentile(masked_samples, 90, 0)[0],
221                                         'p99': np.percentile(masked_samples, 99, 0)[0]}
222
223            # Replace Nan values by the mean before proceeding with the SA
224            indices = np.where(mask == 0)[0] # samples[~mask] = results[k].mean
225            for i in indices:
226                samples[k][i] = results['statistical_moments'][k]['mean']
227
228            if not np.all(mask):
229                print("Warning: QoI \"{}\" only yields ".format(k) +
230                    "results for {}/{} ".format(sum(mask), len(mask)) +
231                    "parameter combinations. " +
232                    "Runs {} are not valid. ".format(indices+1) +
233                    "NaN results are set to the mean when calculating the Sobol indices. " +
234                    "This might affect the Sobol indices.")
235            
236            sobols_first, conf_first, sobols_total, conf_total = \
237                self.sobol_bootstrap(samples[k])
238            results['sobols_first'][k] = sobols_first
239            results['sobols_total'][k] = sobols_total
240            results['conf_sobols_first'][k] = conf_first
241            results['conf_sobols_total'][k] = conf_total
242
243        return QMCAnalysisResults(raw_data=results, samples=data_frame,
244                                  qois=self.qoi_cols, inputs=list(self.sampler.vary.get_keys()))
245
246    def get_samples(self, data_frame):
247        """
248        Converts the Pandas dataframe into a dictionary.
249
250        Parameters
251        ----------
252        data_frame : pandas DataFrame
253            the EasyVVUQ Pandas dataframe from collation.
254
255        Returns
256        -------
257        dict :
258            A dictionary with the QoI names as keys.
259            Each element is a list of code evaluations.
260        """
261        samples = {k: [] for k in self.qoi_cols}
262        for run_id in data_frame['run_id'].squeeze().unique():
263            for k in self.qoi_cols:
264                data = data_frame.loc[data_frame['run_id'].squeeze() == run_id][k]
265                samples[k].append(data.values)
266        return samples
267
268    def sobol_bootstrap_(self, samples, alpha=0.05, n_samples=1000):
269        """
270        Computes the first order and total order Sobol indices using Saltelli's
271        method. To assess the sampling inaccuracy, bootstrap confidence intervals
272        are also computed.
273
274        Reference: A. Saltelli, Making best use of model evaluations to compute
275        sensitivity indices, Computer Physics Communications, 2002.
276
277        Parameters
278        ----------
279        samples : list
280            The samples for a given QoI.
281        alpha: float
282            The (1 - alpha) * 100 confidence interval parameter. The default is 0.05.
283        n_samples: int
284            The number of bootstrap samples. The default is 1000.
285
286        Returns
287        -------
288        sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict:
289        dictionaries containing the first- and total-order Sobol indices for all
290        parameters, and (1-alpha)*100 lower and upper confidence bounds.
291
292        """
293        assert len(samples) > 0
294        assert alpha > 0.0
295        assert alpha < 1.0
296        assert n_samples > 0
297
298        # convert to array
299        samples = np.array(samples)
300        # the number of parameter and the number of MC samples in n_mc * (n_params + 2)
301        # and the size of the QoI
302        n_params = self.sampler.n_params
303        n_mc = self.sampler.n_mc_samples
304        n_qoi = samples[0].size
305        sobols_first_dict = {}
306        conf_first_dict = {}
307        sobols_total_dict = {}
308        conf_total_dict = {}
309
310        for j, param_name in enumerate(self.sampler.vary.get_keys()):
311            # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params
312            # see reference above.
313            f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc)
314            # our point estimate for the 1st and total order Sobol indices
315            value_first = self._first_order(f_M2, f_M1, f_Ni[:, j])
316            value_total = self._total_order(f_M2, f_M1, f_Ni[:, j])
317            # array for resampled estimates
318            sobols_first = np.zeros([n_samples, n_qoi])
319            sobols_total = np.zeros([n_samples, n_qoi])
320            for i in range(n_samples):
321                # resample, must be done on already seperated output due to
322                # the specific order in samples
323                idx = np.random.randint(0, n_mc - 1, n_mc)
324                f_M2_resample = f_M2[idx]
325                f_M1_resample = f_M1[idx]
326                f_Ni_resample = f_Ni[idx]
327                # recompute Sobol indices
328                sobols_first[i] = self._first_order(f_M2_resample, f_M1_resample,
329                                                    f_Ni_resample[:, j])
330                sobols_total[i] = self._total_order(f_M2_resample, f_M1_resample,
331                                                    f_Ni_resample[:, j])
332            # compute confidence intervals
333            _, low_first, high_first = confidence_interval(sobols_first, value_first,
334                                                           alpha, pivotal=True)
335            _, low_total, high_total = confidence_interval(sobols_total, value_total,
336                                                           alpha, pivotal=True)
337            # store results
338            sobols_first_dict[param_name] = value_first
339            conf_first_dict[param_name] = {'low': low_first, 'high': high_first}
340            sobols_total_dict[param_name] = value_total
341            conf_total_dict[param_name] = {'low': low_total, 'high': high_total}
342
343        return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict
344    
345    def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000):
346        """
347        Computes the first order and total order Sobol indices using Saltelli's
348        method. To assess the sampling inaccuracy, bootstrap confidence intervals
349        are also computed.
350
351        Reference: A. Saltelli, Making best use of model evaluations to compute
352        sensitivity indices, Computer Physics Communications, 2002.
353
354        Parameters
355        ----------
356        samples : list
357            The samples for a given QoI.
358        alpha: float
359            The (1 - alpha) * 100 confidence interval parameter. The default is 0.05.
360        n_samples: int
361            The number of bootstrap samples. The default is 1000.
362
363        Returns
364        -------
365        sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict:
366        dictionaries containing the first- and total-order Sobol indices for all
367        parameters, and (1-alpha)*100 lower and upper confidence bounds.
368
369        """
370        assert len(samples) > 0
371        assert alpha > 0.0
372        assert alpha < 1.0
373        assert n_bootstrap > 0
374
375        # convert to array
376        samples = np.array(samples)
377        # the number of parameter and the number of MC samples in n_mc * (n_params + 2)
378        # and the size of the QoI
379        n_params = self.sampler.n_params
380        # n_mc = self.sampler.n_mc_samples
381        n_mc = int(samples.shape[0]/(n_params + 2))
382        n_qoi = samples[0].size
383        sobols_first_dict = {}
384        conf_first_dict = {}
385        sobols_total_dict = {}
386        conf_total_dict = {}
387
388        # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params
389        # see reference above.
390        f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc)
391        r = np.random.randint(n_mc, size=(n_mc, n_bootstrap))
392
393        for j, param_name in enumerate(self.sampler.vary.get_keys()):
394
395            # our point estimate for the 1st and total order Sobol indices
396            value_first = self._first_order(f_M2, f_M1, f_Ni[:, j])
397            value_total = self._total_order(f_M2, f_M1, f_Ni[:, j])
398
399            # sobols computed from resampled data points
400            if n_mc * n_bootstrap * n_qoi <= 10**7:
401                #this is a vectorized computation, Is fast, but f_M2[r] will be of size
402                #(n_mc, n_bootstrap, n_qoi), this can become too large and cause a crash, 
403                #especially when dealing with large QoI (n_qoi >> 1). So this is only done
404                #when n_mc * n_bootstrap * n_qoi <= 10**7
405                print("Vectorized bootstrapping")
406                sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j])
407                sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j])
408            else:
409                #array for resampled estimates
410                sobols_first = np.zeros([n_bootstrap, n_qoi])
411                sobols_total = np.zeros([n_bootstrap, n_qoi])
412                print("Sequential bootstrapping")
413                #non-vectorized implementation
414                for i in range(n_bootstrap):
415                    #resampled sample matrices of size (n_mc, n_qoi)
416                    indices = r[:, i]  
417                    sobols_first[i] = self._first_order(f_M2[indices], f_M1[indices], f_Ni[indices, j])
418                    sobols_total[i] = self._total_order(f_M2[indices], f_M1[indices], f_Ni[indices, j])
419
420            # compute confidence intervals based on percentiles
421            _, low_first, high_first = confidence_interval(sobols_first, value_first,
422                                                           alpha, pivotal=True)
423            _, low_total, high_total = confidence_interval(sobols_total, value_total,
424                                                           alpha, pivotal=True)
425            # store results
426            sobols_first_dict[param_name] = value_first
427            conf_first_dict[param_name] = {'low': low_first, 'high': high_first}
428            sobols_total_dict[param_name] = value_total
429            conf_total_dict[param_name] = {'low': low_total, 'high': high_total}
430
431        return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict
432
433
434    # Adapted from SALib
435    @staticmethod
436    def _separate_output_values(samples, n_params, n_mc_samples):
437        """There are n_params + 2 different input matrices: M1, M2, N_i,
438        i=1,...,n_params.  (see reference under sobol_bootstrap). The
439        EasyVVUQ dataframe is stored in the order:
440
441        [sample from M2, sample from N1, N2, ... sample from N_n_params,
442         sample from M1, repeat].
443
444        This subroutine separates the output values into the contributions
445        of the different input matrices.
446
447        Parameters
448        ----------
449        samples: list
450            The samples for a given QoI
451        n_params: int
452            The number of uncertain input parameters.
453        n_mc_samples: int
454            The number of MC samples per input matrix, i.e. the
455          number of rows in M1, M2 or Ni.
456
457        Returns
458        -------
459        NumPy arrays of the separated code evaluations: f_M2, f_M1, f_Ni, where
460        f_Ni contains n_params entries corresponding to the n_params Ni matrices.
461
462        """
463        evaluations = np.array(samples)
464
465        shape = (n_mc_samples, n_params) + evaluations[0].shape
466        step = n_params + 2
467        f_Ni = np.zeros(shape)
468
469        f_M2 = evaluations[0:evaluations.shape[0]:step]
470        f_M1 = evaluations[(step - 1):evaluations.shape[0]:step]
471
472        for i in range(n_params):
473            f_Ni[:, i] = evaluations[(i + 1):evaluations.shape[0]:step]
474
475        return f_M2, f_M1, f_Ni
476
477    @staticmethod
478    def _first_order(f_M2, f_M1, f_Ni):
479        """Calculate first order sensitivity indices.
480
481        Parameters
482        ----------
483        f_M2: NumPy array
484            Array of code evaluations on input array M2
485        f_M1: NumPy array
486            Array of code evaluations on input array M1
487        f_Ni: NumPy array
488            Array of code evaluations on input array Ni, i=1,...,n_params
489
490        Returns
491        -------
492        A NumPy array of the n_params first-order Sobol indices.
493        """
494        V = np.var(np.r_[f_M2, f_M1], axis=0)
495        return np.mean(f_M1 * (f_Ni - f_M2), axis=0) / (V + (V == 0)) * (V != 0)
496
497    @staticmethod
498    def _total_order(f_M2, f_M1, f_Ni):
499        """Calculate total order sensitivity indices. See also:
500
501        A Saltelli et al, Variance based sensitivity analysis of model output.
502        Design and estimator for the total sensitivity index, 2009.
503
504        Parameters
505        ----------
506        f_M2: NumPy array
507            Array of code evaluations on input array M2 (matrix A in ref above)
508        f_M1: NumPy array
509            Array of code evaluations on input array M1 (matrix B in ref above)
510        f_Ni: NumPy array
511            Array of code evaluations on input array Ni, i=1,...,n_params
512          (matrix AB in ref above)
513
514        Returns
515        -------
516        A NumPy array of the n_params total-order Sobol indices.
517        """
518        V = np.var(np.r_[f_M2, f_M1], axis=0)
519        return 0.5 * np.mean((f_M2 - f_Ni) ** 2, axis=0) / (V + (V == 0)) * (V != 0)
logger = <Logger easyvvuq.analysis.qmc_analysis (DEBUG)>
class QMCAnalysisResults(easyvvuq.analysis.results.AnalysisResults):
22class QMCAnalysisResults(AnalysisResults):
23    """Analysis results for the QMCAnalysis Method. Refer to the AnalysisResults base class
24    documentation for details on using it.
25    """
26
27    def _get_sobols_first(self, qoi, input_):
28        raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_first'])
29        return raw_dict[AnalysisResults._to_tuple(qoi)][input_][0]
30
31    def _get_sobols_total(self, qoi, input_):
32        raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_total'])
33        return raw_dict[AnalysisResults._to_tuple(qoi)][input_][0]
34
35    def _get_sobols_second(self, qoi, input_):
36        raise NotImplementedError
37
38    def _get_sobols_first_conf(self, qoi, input_):
39        raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['conf_sobols_first'])
40        return [raw_dict[AnalysisResults._to_tuple(qoi)][input_]['low'][0],
41                raw_dict[AnalysisResults._to_tuple(qoi)][input_]['high'][0]]
42
43    def _get_sobols_total_conf(self, qoi, input_):
44        raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['conf_sobols_total'])
45        return [raw_dict[AnalysisResults._to_tuple(qoi)][input_]['low'][0],
46                raw_dict[AnalysisResults._to_tuple(qoi)][input_]['high'][0]]
47
48    def supported_stats(self):
49        """Types of statistics supported by the describe method.
50
51        Returns
52        -------
53        list of str
54        """
55        return ['mean', 'var', 'std', 'min', 'max', 'median', 'percentiles', '1%', '10%', '50%', '90%', '99%']
56
57    def _describe(self, qoi, statistic):
58        if statistic not in self.supported_stats():
59            raise NotImplementedError
60        if statistic == '1%':
61            return self.raw_data['percentiles'][qoi]['p1']
62        if statistic == '10%':
63            return self.raw_data['percentiles'][qoi]['p10']
64        elif statistic == '50%':
65            return self.raw_data['percentiles'][qoi]['p50']
66        elif statistic == '90%':
67            return self.raw_data['percentiles'][qoi]['p90']
68        elif statistic == '99%':
69            return self.raw_data['percentiles'][qoi]['p99']
70        else:
71            return self.raw_data['statistical_moments'][qoi][statistic][0]

Analysis results for the QMCAnalysis Method. Refer to the AnalysisResults base class documentation for details on using it.

def supported_stats(self):
48    def supported_stats(self):
49        """Types of statistics supported by the describe method.
50
51        Returns
52        -------
53        list of str
54        """
55        return ['mean', 'var', 'std', 'min', 'max', 'median', 'percentiles', '1%', '10%', '50%', '90%', '99%']

Types of statistics supported by the describe method.

Returns
  • list of str
class QMCAnalysis(easyvvuq.analysis.base.BaseAnalysisElement):
 74class QMCAnalysis(BaseAnalysisElement):
 75    def __init__(self, sampler, qoi_cols=None):
 76        """Analysis element for Quasi-Monte Carlo (QMC).
 77
 78        Parameters
 79        ----------
 80        sampler : easyvvuq.sampling.qmc.QMCSampler
 81            Sampler used to initiate the QMC analysis
 82        qoi_cols : list or None
 83            Column names for quantities of interest (for which analysis is to be
 84            performed).
 85        """
 86        if not isinstance(sampler, QMCSampler) and not isinstance(sampler, MCSampler):
 87            raise RuntimeError(
 88                'QMCAnalysis class relies on the QMCSampler or MCSampler as its sampling component')
 89        if qoi_cols is None:
 90            self.qoi_cols = list(sampler.vary.get_keys())
 91        else:
 92            self.qoi_cols = qoi_cols
 93        self.output_type = OutputType.SUMMARY
 94        self.sampler = sampler
 95
 96    def element_name(self):
 97        """Name for this element.
 98
 99        Return
100        ------
101        str:
102            "QMC_Analysis"
103        """
104        return "QMC_Analysis"
105
106    def element_version(self):
107        """Version of this element.
108
109        Return
110        ------
111        str:
112            Element version.
113        """
114        return "0.2"
115
116    def contains_nan(self, values):
117        """Checks if ``None`` or ``numpy.nan`` exists in `values`. Returns ``True`` if
118        any there are at least one occurrence of ``None`` or ``numpy.nan``.
119        Parameters
120        ----------
121        values : array_like, list, number
122            `values` where to check for occurrences of ``None`` or ``np.nan``.
123            Can be irregular and have any number of nested elements.
124        Returns
125        -------
126        bool
127            ``True`` if `values` has at least one occurrence of ``None`` or
128            ``numpy.nan``.
129        """
130        # To speed up we first try the fast option np.any(np.isnan(values))
131        try:
132            return np.any(np.isnan(values))
133        except (ValueError, TypeError):
134            if values is None or values is np.nan:
135                return True
136            # To solve the problem of float/int as well as numpy int/flaot
137            elif np.isscalar(values) and np.isnan(values):
138                return True
139            elif hasattr(values, "__iter__"):
140                for value in values:
141                    if self.contains_nan(value):
142                        return True
143
144                return False
145            else:
146                return False
147
148    def create_mask(self, samples):
149        """
150        Mask samples that do not give results (anything but np.nan or None).
151        Parameters
152        ----------
153        samples : array_like
154            Evaluations for the model.
155        Returns
156        -------
157        masked_samples : list
158            The evaluations that have results (not numpy.nan or None).
159        mask : boolean array
160            The mask itself, used to create the masked arrays.
161        """
162        masked_samples = []
163        mask = np.ones(len(samples), dtype=bool)
164
165        for i, result in enumerate(samples):
166            # if np.any(np.isnan(result)):
167            if self.contains_nan(result):
168                mask[i] = False
169            else:
170                masked_samples.append(result)
171
172        return masked_samples, mask     
173
174    def analyse(self, data_frame):
175        """Perform QMC analysis on a given pandas DataFrame.
176
177        Parameters
178        ----------
179        data_frame : pandas DataFrame
180            Input data for analysis.
181
182        Returns
183        -------
184        easyvvuq.analysis.qmc.QMCAnalysisResults
185            AnalysisResults object for QMC.
186        """
187        if data_frame.empty:
188            raise RuntimeError(
189                "No data in data frame passed to analyse element")
190
191        qoi_cols = self.qoi_cols
192
193        results = {
194            'statistical_moments': {k: {} for k in qoi_cols},
195            'percentiles': {k: {} for k in qoi_cols},
196            'sobols_first': {k: {} for k in qoi_cols},
197            'sobols_total': {k: {} for k in qoi_cols},
198            'conf_sobols_first': {k: {} for k in qoi_cols},
199            'conf_sobols_total': {k: {} for k in qoi_cols}
200        }
201
202        # Extract output values for each quantity of interest from Dataframe
203        samples = self.get_samples(data_frame)
204
205        # Compute descriptive statistics for each quantity of interest
206        for k in qoi_cols:
207            # Find NaNs and create a mask excluding these samples from the analysis
208            # https://github.com/simetenn/uncertainpy/blob/ffb2400289743066265b9a8561cdf3b72e478a28/src/uncertainpy/core/uncertainty_calculations.py#L1532
209            masked_samples, mask = self.create_mask(samples[k])
210
211            results['statistical_moments'][k] = {'mean': np.mean(masked_samples, axis=0),
212                                                 'var': np.var(masked_samples, axis=0),
213                                                 'std': np.std(masked_samples, axis=0),
214                                                 'min': np.min(masked_samples, axis=0),
215                                                 'max': np.max(masked_samples, axis=0),
216                                                 'median': np.median(masked_samples, axis=0),
217                                                 }
218            results['percentiles'][k] = {'p1': np.percentile(masked_samples, 1, 0)[0],
219                                         'p10': np.percentile(masked_samples, 10, 0)[0],
220                                         'p50': np.percentile(masked_samples, 50, 0)[0],
221                                         'p90': np.percentile(masked_samples, 90, 0)[0],
222                                         'p99': np.percentile(masked_samples, 99, 0)[0]}
223
224            # Replace Nan values by the mean before proceeding with the SA
225            indices = np.where(mask == 0)[0] # samples[~mask] = results[k].mean
226            for i in indices:
227                samples[k][i] = results['statistical_moments'][k]['mean']
228
229            if not np.all(mask):
230                print("Warning: QoI \"{}\" only yields ".format(k) +
231                    "results for {}/{} ".format(sum(mask), len(mask)) +
232                    "parameter combinations. " +
233                    "Runs {} are not valid. ".format(indices+1) +
234                    "NaN results are set to the mean when calculating the Sobol indices. " +
235                    "This might affect the Sobol indices.")
236            
237            sobols_first, conf_first, sobols_total, conf_total = \
238                self.sobol_bootstrap(samples[k])
239            results['sobols_first'][k] = sobols_first
240            results['sobols_total'][k] = sobols_total
241            results['conf_sobols_first'][k] = conf_first
242            results['conf_sobols_total'][k] = conf_total
243
244        return QMCAnalysisResults(raw_data=results, samples=data_frame,
245                                  qois=self.qoi_cols, inputs=list(self.sampler.vary.get_keys()))
246
247    def get_samples(self, data_frame):
248        """
249        Converts the Pandas dataframe into a dictionary.
250
251        Parameters
252        ----------
253        data_frame : pandas DataFrame
254            the EasyVVUQ Pandas dataframe from collation.
255
256        Returns
257        -------
258        dict :
259            A dictionary with the QoI names as keys.
260            Each element is a list of code evaluations.
261        """
262        samples = {k: [] for k in self.qoi_cols}
263        for run_id in data_frame['run_id'].squeeze().unique():
264            for k in self.qoi_cols:
265                data = data_frame.loc[data_frame['run_id'].squeeze() == run_id][k]
266                samples[k].append(data.values)
267        return samples
268
269    def sobol_bootstrap_(self, samples, alpha=0.05, n_samples=1000):
270        """
271        Computes the first order and total order Sobol indices using Saltelli's
272        method. To assess the sampling inaccuracy, bootstrap confidence intervals
273        are also computed.
274
275        Reference: A. Saltelli, Making best use of model evaluations to compute
276        sensitivity indices, Computer Physics Communications, 2002.
277
278        Parameters
279        ----------
280        samples : list
281            The samples for a given QoI.
282        alpha: float
283            The (1 - alpha) * 100 confidence interval parameter. The default is 0.05.
284        n_samples: int
285            The number of bootstrap samples. The default is 1000.
286
287        Returns
288        -------
289        sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict:
290        dictionaries containing the first- and total-order Sobol indices for all
291        parameters, and (1-alpha)*100 lower and upper confidence bounds.
292
293        """
294        assert len(samples) > 0
295        assert alpha > 0.0
296        assert alpha < 1.0
297        assert n_samples > 0
298
299        # convert to array
300        samples = np.array(samples)
301        # the number of parameter and the number of MC samples in n_mc * (n_params + 2)
302        # and the size of the QoI
303        n_params = self.sampler.n_params
304        n_mc = self.sampler.n_mc_samples
305        n_qoi = samples[0].size
306        sobols_first_dict = {}
307        conf_first_dict = {}
308        sobols_total_dict = {}
309        conf_total_dict = {}
310
311        for j, param_name in enumerate(self.sampler.vary.get_keys()):
312            # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params
313            # see reference above.
314            f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc)
315            # our point estimate for the 1st and total order Sobol indices
316            value_first = self._first_order(f_M2, f_M1, f_Ni[:, j])
317            value_total = self._total_order(f_M2, f_M1, f_Ni[:, j])
318            # array for resampled estimates
319            sobols_first = np.zeros([n_samples, n_qoi])
320            sobols_total = np.zeros([n_samples, n_qoi])
321            for i in range(n_samples):
322                # resample, must be done on already seperated output due to
323                # the specific order in samples
324                idx = np.random.randint(0, n_mc - 1, n_mc)
325                f_M2_resample = f_M2[idx]
326                f_M1_resample = f_M1[idx]
327                f_Ni_resample = f_Ni[idx]
328                # recompute Sobol indices
329                sobols_first[i] = self._first_order(f_M2_resample, f_M1_resample,
330                                                    f_Ni_resample[:, j])
331                sobols_total[i] = self._total_order(f_M2_resample, f_M1_resample,
332                                                    f_Ni_resample[:, j])
333            # compute confidence intervals
334            _, low_first, high_first = confidence_interval(sobols_first, value_first,
335                                                           alpha, pivotal=True)
336            _, low_total, high_total = confidence_interval(sobols_total, value_total,
337                                                           alpha, pivotal=True)
338            # store results
339            sobols_first_dict[param_name] = value_first
340            conf_first_dict[param_name] = {'low': low_first, 'high': high_first}
341            sobols_total_dict[param_name] = value_total
342            conf_total_dict[param_name] = {'low': low_total, 'high': high_total}
343
344        return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict
345    
346    def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000):
347        """
348        Computes the first order and total order Sobol indices using Saltelli's
349        method. To assess the sampling inaccuracy, bootstrap confidence intervals
350        are also computed.
351
352        Reference: A. Saltelli, Making best use of model evaluations to compute
353        sensitivity indices, Computer Physics Communications, 2002.
354
355        Parameters
356        ----------
357        samples : list
358            The samples for a given QoI.
359        alpha: float
360            The (1 - alpha) * 100 confidence interval parameter. The default is 0.05.
361        n_samples: int
362            The number of bootstrap samples. The default is 1000.
363
364        Returns
365        -------
366        sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict:
367        dictionaries containing the first- and total-order Sobol indices for all
368        parameters, and (1-alpha)*100 lower and upper confidence bounds.
369
370        """
371        assert len(samples) > 0
372        assert alpha > 0.0
373        assert alpha < 1.0
374        assert n_bootstrap > 0
375
376        # convert to array
377        samples = np.array(samples)
378        # the number of parameter and the number of MC samples in n_mc * (n_params + 2)
379        # and the size of the QoI
380        n_params = self.sampler.n_params
381        # n_mc = self.sampler.n_mc_samples
382        n_mc = int(samples.shape[0]/(n_params + 2))
383        n_qoi = samples[0].size
384        sobols_first_dict = {}
385        conf_first_dict = {}
386        sobols_total_dict = {}
387        conf_total_dict = {}
388
389        # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params
390        # see reference above.
391        f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc)
392        r = np.random.randint(n_mc, size=(n_mc, n_bootstrap))
393
394        for j, param_name in enumerate(self.sampler.vary.get_keys()):
395
396            # our point estimate for the 1st and total order Sobol indices
397            value_first = self._first_order(f_M2, f_M1, f_Ni[:, j])
398            value_total = self._total_order(f_M2, f_M1, f_Ni[:, j])
399
400            # sobols computed from resampled data points
401            if n_mc * n_bootstrap * n_qoi <= 10**7:
402                #this is a vectorized computation, Is fast, but f_M2[r] will be of size
403                #(n_mc, n_bootstrap, n_qoi), this can become too large and cause a crash, 
404                #especially when dealing with large QoI (n_qoi >> 1). So this is only done
405                #when n_mc * n_bootstrap * n_qoi <= 10**7
406                print("Vectorized bootstrapping")
407                sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j])
408                sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j])
409            else:
410                #array for resampled estimates
411                sobols_first = np.zeros([n_bootstrap, n_qoi])
412                sobols_total = np.zeros([n_bootstrap, n_qoi])
413                print("Sequential bootstrapping")
414                #non-vectorized implementation
415                for i in range(n_bootstrap):
416                    #resampled sample matrices of size (n_mc, n_qoi)
417                    indices = r[:, i]  
418                    sobols_first[i] = self._first_order(f_M2[indices], f_M1[indices], f_Ni[indices, j])
419                    sobols_total[i] = self._total_order(f_M2[indices], f_M1[indices], f_Ni[indices, j])
420
421            # compute confidence intervals based on percentiles
422            _, low_first, high_first = confidence_interval(sobols_first, value_first,
423                                                           alpha, pivotal=True)
424            _, low_total, high_total = confidence_interval(sobols_total, value_total,
425                                                           alpha, pivotal=True)
426            # store results
427            sobols_first_dict[param_name] = value_first
428            conf_first_dict[param_name] = {'low': low_first, 'high': high_first}
429            sobols_total_dict[param_name] = value_total
430            conf_total_dict[param_name] = {'low': low_total, 'high': high_total}
431
432        return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict
433
434
435    # Adapted from SALib
436    @staticmethod
437    def _separate_output_values(samples, n_params, n_mc_samples):
438        """There are n_params + 2 different input matrices: M1, M2, N_i,
439        i=1,...,n_params.  (see reference under sobol_bootstrap). The
440        EasyVVUQ dataframe is stored in the order:
441
442        [sample from M2, sample from N1, N2, ... sample from N_n_params,
443         sample from M1, repeat].
444
445        This subroutine separates the output values into the contributions
446        of the different input matrices.
447
448        Parameters
449        ----------
450        samples: list
451            The samples for a given QoI
452        n_params: int
453            The number of uncertain input parameters.
454        n_mc_samples: int
455            The number of MC samples per input matrix, i.e. the
456          number of rows in M1, M2 or Ni.
457
458        Returns
459        -------
460        NumPy arrays of the separated code evaluations: f_M2, f_M1, f_Ni, where
461        f_Ni contains n_params entries corresponding to the n_params Ni matrices.
462
463        """
464        evaluations = np.array(samples)
465
466        shape = (n_mc_samples, n_params) + evaluations[0].shape
467        step = n_params + 2
468        f_Ni = np.zeros(shape)
469
470        f_M2 = evaluations[0:evaluations.shape[0]:step]
471        f_M1 = evaluations[(step - 1):evaluations.shape[0]:step]
472
473        for i in range(n_params):
474            f_Ni[:, i] = evaluations[(i + 1):evaluations.shape[0]:step]
475
476        return f_M2, f_M1, f_Ni
477
478    @staticmethod
479    def _first_order(f_M2, f_M1, f_Ni):
480        """Calculate first order sensitivity indices.
481
482        Parameters
483        ----------
484        f_M2: NumPy array
485            Array of code evaluations on input array M2
486        f_M1: NumPy array
487            Array of code evaluations on input array M1
488        f_Ni: NumPy array
489            Array of code evaluations on input array Ni, i=1,...,n_params
490
491        Returns
492        -------
493        A NumPy array of the n_params first-order Sobol indices.
494        """
495        V = np.var(np.r_[f_M2, f_M1], axis=0)
496        return np.mean(f_M1 * (f_Ni - f_M2), axis=0) / (V + (V == 0)) * (V != 0)
497
498    @staticmethod
499    def _total_order(f_M2, f_M1, f_Ni):
500        """Calculate total order sensitivity indices. See also:
501
502        A Saltelli et al, Variance based sensitivity analysis of model output.
503        Design and estimator for the total sensitivity index, 2009.
504
505        Parameters
506        ----------
507        f_M2: NumPy array
508            Array of code evaluations on input array M2 (matrix A in ref above)
509        f_M1: NumPy array
510            Array of code evaluations on input array M1 (matrix B in ref above)
511        f_Ni: NumPy array
512            Array of code evaluations on input array Ni, i=1,...,n_params
513          (matrix AB in ref above)
514
515        Returns
516        -------
517        A NumPy array of the n_params total-order Sobol indices.
518        """
519        V = np.var(np.r_[f_M2, f_M1], axis=0)
520        return 0.5 * np.mean((f_M2 - f_Ni) ** 2, axis=0) / (V + (V == 0)) * (V != 0)

Base class for all EasyVVUQ analysis elements.

Attributes

QMCAnalysis(sampler, qoi_cols=None)
75    def __init__(self, sampler, qoi_cols=None):
76        """Analysis element for Quasi-Monte Carlo (QMC).
77
78        Parameters
79        ----------
80        sampler : easyvvuq.sampling.qmc.QMCSampler
81            Sampler used to initiate the QMC analysis
82        qoi_cols : list or None
83            Column names for quantities of interest (for which analysis is to be
84            performed).
85        """
86        if not isinstance(sampler, QMCSampler) and not isinstance(sampler, MCSampler):
87            raise RuntimeError(
88                'QMCAnalysis class relies on the QMCSampler or MCSampler as its sampling component')
89        if qoi_cols is None:
90            self.qoi_cols = list(sampler.vary.get_keys())
91        else:
92            self.qoi_cols = qoi_cols
93        self.output_type = OutputType.SUMMARY
94        self.sampler = sampler

Analysis element for Quasi-Monte Carlo (QMC).

Parameters
  • sampler (easyvvuq.sampling.qmc.QMCSampler): Sampler used to initiate the QMC analysis
  • qoi_cols (list or None): Column names for quantities of interest (for which analysis is to be performed).
output_type
sampler
def element_name(self):
 96    def element_name(self):
 97        """Name for this element.
 98
 99        Return
100        ------
101        str:
102            "QMC_Analysis"
103        """
104        return "QMC_Analysis"

Name for this element.

Return

str: "QMC_Analysis"

def element_version(self):
106    def element_version(self):
107        """Version of this element.
108
109        Return
110        ------
111        str:
112            Element version.
113        """
114        return "0.2"

Version of this element.

Return

str: Element version.

def contains_nan(self, values):
116    def contains_nan(self, values):
117        """Checks if ``None`` or ``numpy.nan`` exists in `values`. Returns ``True`` if
118        any there are at least one occurrence of ``None`` or ``numpy.nan``.
119        Parameters
120        ----------
121        values : array_like, list, number
122            `values` where to check for occurrences of ``None`` or ``np.nan``.
123            Can be irregular and have any number of nested elements.
124        Returns
125        -------
126        bool
127            ``True`` if `values` has at least one occurrence of ``None`` or
128            ``numpy.nan``.
129        """
130        # To speed up we first try the fast option np.any(np.isnan(values))
131        try:
132            return np.any(np.isnan(values))
133        except (ValueError, TypeError):
134            if values is None or values is np.nan:
135                return True
136            # To solve the problem of float/int as well as numpy int/flaot
137            elif np.isscalar(values) and np.isnan(values):
138                return True
139            elif hasattr(values, "__iter__"):
140                for value in values:
141                    if self.contains_nan(value):
142                        return True
143
144                return False
145            else:
146                return False

Checks if None or numpy.nan exists in values. Returns True if any there are at least one occurrence of None or numpy.nan.

Parameters
  • values (array_like, list, number): values where to check for occurrences of None or np.nan. Can be irregular and have any number of nested elements.
Returns
  • bool: True if values has at least one occurrence of None or numpy.nan.
def create_mask(self, samples):
148    def create_mask(self, samples):
149        """
150        Mask samples that do not give results (anything but np.nan or None).
151        Parameters
152        ----------
153        samples : array_like
154            Evaluations for the model.
155        Returns
156        -------
157        masked_samples : list
158            The evaluations that have results (not numpy.nan or None).
159        mask : boolean array
160            The mask itself, used to create the masked arrays.
161        """
162        masked_samples = []
163        mask = np.ones(len(samples), dtype=bool)
164
165        for i, result in enumerate(samples):
166            # if np.any(np.isnan(result)):
167            if self.contains_nan(result):
168                mask[i] = False
169            else:
170                masked_samples.append(result)
171
172        return masked_samples, mask     

Mask samples that do not give results (anything but np.nan or None).

Parameters
  • samples (array_like): Evaluations for the model.
Returns
  • masked_samples (list): The evaluations that have results (not numpy.nan or None).
  • mask (boolean array): The mask itself, used to create the masked arrays.
def analyse(self, data_frame):
174    def analyse(self, data_frame):
175        """Perform QMC analysis on a given pandas DataFrame.
176
177        Parameters
178        ----------
179        data_frame : pandas DataFrame
180            Input data for analysis.
181
182        Returns
183        -------
184        easyvvuq.analysis.qmc.QMCAnalysisResults
185            AnalysisResults object for QMC.
186        """
187        if data_frame.empty:
188            raise RuntimeError(
189                "No data in data frame passed to analyse element")
190
191        qoi_cols = self.qoi_cols
192
193        results = {
194            'statistical_moments': {k: {} for k in qoi_cols},
195            'percentiles': {k: {} for k in qoi_cols},
196            'sobols_first': {k: {} for k in qoi_cols},
197            'sobols_total': {k: {} for k in qoi_cols},
198            'conf_sobols_first': {k: {} for k in qoi_cols},
199            'conf_sobols_total': {k: {} for k in qoi_cols}
200        }
201
202        # Extract output values for each quantity of interest from Dataframe
203        samples = self.get_samples(data_frame)
204
205        # Compute descriptive statistics for each quantity of interest
206        for k in qoi_cols:
207            # Find NaNs and create a mask excluding these samples from the analysis
208            # https://github.com/simetenn/uncertainpy/blob/ffb2400289743066265b9a8561cdf3b72e478a28/src/uncertainpy/core/uncertainty_calculations.py#L1532
209            masked_samples, mask = self.create_mask(samples[k])
210
211            results['statistical_moments'][k] = {'mean': np.mean(masked_samples, axis=0),
212                                                 'var': np.var(masked_samples, axis=0),
213                                                 'std': np.std(masked_samples, axis=0),
214                                                 'min': np.min(masked_samples, axis=0),
215                                                 'max': np.max(masked_samples, axis=0),
216                                                 'median': np.median(masked_samples, axis=0),
217                                                 }
218            results['percentiles'][k] = {'p1': np.percentile(masked_samples, 1, 0)[0],
219                                         'p10': np.percentile(masked_samples, 10, 0)[0],
220                                         'p50': np.percentile(masked_samples, 50, 0)[0],
221                                         'p90': np.percentile(masked_samples, 90, 0)[0],
222                                         'p99': np.percentile(masked_samples, 99, 0)[0]}
223
224            # Replace Nan values by the mean before proceeding with the SA
225            indices = np.where(mask == 0)[0] # samples[~mask] = results[k].mean
226            for i in indices:
227                samples[k][i] = results['statistical_moments'][k]['mean']
228
229            if not np.all(mask):
230                print("Warning: QoI \"{}\" only yields ".format(k) +
231                    "results for {}/{} ".format(sum(mask), len(mask)) +
232                    "parameter combinations. " +
233                    "Runs {} are not valid. ".format(indices+1) +
234                    "NaN results are set to the mean when calculating the Sobol indices. " +
235                    "This might affect the Sobol indices.")
236            
237            sobols_first, conf_first, sobols_total, conf_total = \
238                self.sobol_bootstrap(samples[k])
239            results['sobols_first'][k] = sobols_first
240            results['sobols_total'][k] = sobols_total
241            results['conf_sobols_first'][k] = conf_first
242            results['conf_sobols_total'][k] = conf_total
243
244        return QMCAnalysisResults(raw_data=results, samples=data_frame,
245                                  qois=self.qoi_cols, inputs=list(self.sampler.vary.get_keys()))

Perform QMC analysis on a given pandas DataFrame.

Parameters
  • data_frame (pandas DataFrame): Input data for analysis.
Returns
def get_samples(self, data_frame):
247    def get_samples(self, data_frame):
248        """
249        Converts the Pandas dataframe into a dictionary.
250
251        Parameters
252        ----------
253        data_frame : pandas DataFrame
254            the EasyVVUQ Pandas dataframe from collation.
255
256        Returns
257        -------
258        dict :
259            A dictionary with the QoI names as keys.
260            Each element is a list of code evaluations.
261        """
262        samples = {k: [] for k in self.qoi_cols}
263        for run_id in data_frame['run_id'].squeeze().unique():
264            for k in self.qoi_cols:
265                data = data_frame.loc[data_frame['run_id'].squeeze() == run_id][k]
266                samples[k].append(data.values)
267        return samples

Converts the Pandas dataframe into a dictionary.

Parameters
  • data_frame (pandas DataFrame): the EasyVVUQ Pandas dataframe from collation.
Returns
  • dict :: A dictionary with the QoI names as keys. Each element is a list of code evaluations.
def sobol_bootstrap_(self, samples, alpha=0.05, n_samples=1000):
269    def sobol_bootstrap_(self, samples, alpha=0.05, n_samples=1000):
270        """
271        Computes the first order and total order Sobol indices using Saltelli's
272        method. To assess the sampling inaccuracy, bootstrap confidence intervals
273        are also computed.
274
275        Reference: A. Saltelli, Making best use of model evaluations to compute
276        sensitivity indices, Computer Physics Communications, 2002.
277
278        Parameters
279        ----------
280        samples : list
281            The samples for a given QoI.
282        alpha: float
283            The (1 - alpha) * 100 confidence interval parameter. The default is 0.05.
284        n_samples: int
285            The number of bootstrap samples. The default is 1000.
286
287        Returns
288        -------
289        sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict:
290        dictionaries containing the first- and total-order Sobol indices for all
291        parameters, and (1-alpha)*100 lower and upper confidence bounds.
292
293        """
294        assert len(samples) > 0
295        assert alpha > 0.0
296        assert alpha < 1.0
297        assert n_samples > 0
298
299        # convert to array
300        samples = np.array(samples)
301        # the number of parameter and the number of MC samples in n_mc * (n_params + 2)
302        # and the size of the QoI
303        n_params = self.sampler.n_params
304        n_mc = self.sampler.n_mc_samples
305        n_qoi = samples[0].size
306        sobols_first_dict = {}
307        conf_first_dict = {}
308        sobols_total_dict = {}
309        conf_total_dict = {}
310
311        for j, param_name in enumerate(self.sampler.vary.get_keys()):
312            # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params
313            # see reference above.
314            f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc)
315            # our point estimate for the 1st and total order Sobol indices
316            value_first = self._first_order(f_M2, f_M1, f_Ni[:, j])
317            value_total = self._total_order(f_M2, f_M1, f_Ni[:, j])
318            # array for resampled estimates
319            sobols_first = np.zeros([n_samples, n_qoi])
320            sobols_total = np.zeros([n_samples, n_qoi])
321            for i in range(n_samples):
322                # resample, must be done on already seperated output due to
323                # the specific order in samples
324                idx = np.random.randint(0, n_mc - 1, n_mc)
325                f_M2_resample = f_M2[idx]
326                f_M1_resample = f_M1[idx]
327                f_Ni_resample = f_Ni[idx]
328                # recompute Sobol indices
329                sobols_first[i] = self._first_order(f_M2_resample, f_M1_resample,
330                                                    f_Ni_resample[:, j])
331                sobols_total[i] = self._total_order(f_M2_resample, f_M1_resample,
332                                                    f_Ni_resample[:, j])
333            # compute confidence intervals
334            _, low_first, high_first = confidence_interval(sobols_first, value_first,
335                                                           alpha, pivotal=True)
336            _, low_total, high_total = confidence_interval(sobols_total, value_total,
337                                                           alpha, pivotal=True)
338            # store results
339            sobols_first_dict[param_name] = value_first
340            conf_first_dict[param_name] = {'low': low_first, 'high': high_first}
341            sobols_total_dict[param_name] = value_total
342            conf_total_dict[param_name] = {'low': low_total, 'high': high_total}
343
344        return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict

Computes the first order and total order Sobol indices using Saltelli's method. To assess the sampling inaccuracy, bootstrap confidence intervals are also computed.

Reference: A. Saltelli, Making best use of model evaluations to compute sensitivity indices, Computer Physics Communications, 2002.

Parameters
  • samples (list): The samples for a given QoI.
  • alpha (float): The (1 - alpha) * 100 confidence interval parameter. The default is 0.05.
  • n_samples (int): The number of bootstrap samples. The default is 1000.
Returns
  • sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict:
  • dictionaries containing the first- and total-order Sobol indices for all
  • parameters, and (1-alpha)*100 lower and upper confidence bounds.
def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000):
346    def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000):
347        """
348        Computes the first order and total order Sobol indices using Saltelli's
349        method. To assess the sampling inaccuracy, bootstrap confidence intervals
350        are also computed.
351
352        Reference: A. Saltelli, Making best use of model evaluations to compute
353        sensitivity indices, Computer Physics Communications, 2002.
354
355        Parameters
356        ----------
357        samples : list
358            The samples for a given QoI.
359        alpha: float
360            The (1 - alpha) * 100 confidence interval parameter. The default is 0.05.
361        n_samples: int
362            The number of bootstrap samples. The default is 1000.
363
364        Returns
365        -------
366        sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict:
367        dictionaries containing the first- and total-order Sobol indices for all
368        parameters, and (1-alpha)*100 lower and upper confidence bounds.
369
370        """
371        assert len(samples) > 0
372        assert alpha > 0.0
373        assert alpha < 1.0
374        assert n_bootstrap > 0
375
376        # convert to array
377        samples = np.array(samples)
378        # the number of parameter and the number of MC samples in n_mc * (n_params + 2)
379        # and the size of the QoI
380        n_params = self.sampler.n_params
381        # n_mc = self.sampler.n_mc_samples
382        n_mc = int(samples.shape[0]/(n_params + 2))
383        n_qoi = samples[0].size
384        sobols_first_dict = {}
385        conf_first_dict = {}
386        sobols_total_dict = {}
387        conf_total_dict = {}
388
389        # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params
390        # see reference above.
391        f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc)
392        r = np.random.randint(n_mc, size=(n_mc, n_bootstrap))
393
394        for j, param_name in enumerate(self.sampler.vary.get_keys()):
395
396            # our point estimate for the 1st and total order Sobol indices
397            value_first = self._first_order(f_M2, f_M1, f_Ni[:, j])
398            value_total = self._total_order(f_M2, f_M1, f_Ni[:, j])
399
400            # sobols computed from resampled data points
401            if n_mc * n_bootstrap * n_qoi <= 10**7:
402                #this is a vectorized computation, Is fast, but f_M2[r] will be of size
403                #(n_mc, n_bootstrap, n_qoi), this can become too large and cause a crash, 
404                #especially when dealing with large QoI (n_qoi >> 1). So this is only done
405                #when n_mc * n_bootstrap * n_qoi <= 10**7
406                print("Vectorized bootstrapping")
407                sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j])
408                sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j])
409            else:
410                #array for resampled estimates
411                sobols_first = np.zeros([n_bootstrap, n_qoi])
412                sobols_total = np.zeros([n_bootstrap, n_qoi])
413                print("Sequential bootstrapping")
414                #non-vectorized implementation
415                for i in range(n_bootstrap):
416                    #resampled sample matrices of size (n_mc, n_qoi)
417                    indices = r[:, i]  
418                    sobols_first[i] = self._first_order(f_M2[indices], f_M1[indices], f_Ni[indices, j])
419                    sobols_total[i] = self._total_order(f_M2[indices], f_M1[indices], f_Ni[indices, j])
420
421            # compute confidence intervals based on percentiles
422            _, low_first, high_first = confidence_interval(sobols_first, value_first,
423                                                           alpha, pivotal=True)
424            _, low_total, high_total = confidence_interval(sobols_total, value_total,
425                                                           alpha, pivotal=True)
426            # store results
427            sobols_first_dict[param_name] = value_first
428            conf_first_dict[param_name] = {'low': low_first, 'high': high_first}
429            sobols_total_dict[param_name] = value_total
430            conf_total_dict[param_name] = {'low': low_total, 'high': high_total}
431
432        return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict

Computes the first order and total order Sobol indices using Saltelli's method. To assess the sampling inaccuracy, bootstrap confidence intervals are also computed.

Reference: A. Saltelli, Making best use of model evaluations to compute sensitivity indices, Computer Physics Communications, 2002.

Parameters
  • samples (list): The samples for a given QoI.
  • alpha (float): The (1 - alpha) * 100 confidence interval parameter. The default is 0.05.
  • n_samples (int): The number of bootstrap samples. The default is 1000.
Returns
  • sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict:
  • dictionaries containing the first- and total-order Sobol indices for all
  • parameters, and (1-alpha)*100 lower and upper confidence bounds.