easyvvuq.analysis.qmc_analysis

Analysis element for Quasi-Monte Carlo (QMC) sensitivity analysis.

Please refer to the article below for the basic approach used here. https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis

View Source

  1"""Analysis element for Quasi-Monte Carlo (QMC) sensitivity analysis.
  2
  3Please refer to the article below for the basic approach used here.
  4https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis
  5"""
  6import logging
  7import numpy as np
  8from easyvvuq import OutputType
  9from .base import BaseAnalysisElement
 10from easyvvuq.sampling import QMCSampler
 11from .results import AnalysisResults
 12from easyvvuq.sampling import MCSampler
 13from .ensemble_boot import confidence_interval
 14
 15__author__ = 'Jalal Lakhlili'
 16__license__ = "LGPL"
 17
 18logger = logging.getLogger(__name__)
 19
 20
 21class QMCAnalysisResults(AnalysisResults):
 22    """Analysis results for the QMCAnalysis Method. Refer to the AnalysisResults base class
 23    documentation for details on using it.
 24    """
 25
 26    def _get_sobols_first(self, qoi, input_):
 27        raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_first'])
 28        return raw_dict[AnalysisResults._to_tuple(qoi)][input_][0]
 29
 30    def _get_sobols_total(self, qoi, input_):
 31        raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_total'])
 32        return raw_dict[AnalysisResults._to_tuple(qoi)][input_][0]
 33
 34    def _get_sobols_second(self, qoi, input_):
 35        raise NotImplementedError
 36
 37    def _get_sobols_first_conf(self, qoi, input_):
 38        raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['conf_sobols_first'])
 39        return [raw_dict[AnalysisResults._to_tuple(qoi)][input_]['low'][0],
 40                raw_dict[AnalysisResults._to_tuple(qoi)][input_]['high'][0]]
 41
 42    def _get_sobols_total_conf(self, qoi, input_):
 43        raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['conf_sobols_total'])
 44        return [raw_dict[AnalysisResults._to_tuple(qoi)][input_]['low'][0],
 45                raw_dict[AnalysisResults._to_tuple(qoi)][input_]['high'][0]]
 46
 47    def supported_stats(self):
 48        """Types of statistics supported by the describe method.
 49
 50        Returns
 51        -------
 52        list of str
 53        """
 54        return ['mean', 'var', 'std', 'min', 'max', 'median', 'percentiles', '1%', '10%', '50%', '90%', '99%']
 55
 56    def _describe(self, qoi, statistic):
 57        if statistic not in self.supported_stats():
 58            raise NotImplementedError
 59        if statistic == '1%':
 60            return self.raw_data['percentiles'][qoi]['p1']
 61        if statistic == '10%':
 62            return self.raw_data['percentiles'][qoi]['p10']
 63        elif statistic == '50%':
 64            return self.raw_data['percentiles'][qoi]['p50']
 65        elif statistic == '90%':
 66            return self.raw_data['percentiles'][qoi]['p90']
 67        elif statistic == '99%':
 68            return self.raw_data['percentiles'][qoi]['p99']
 69        else:
 70            return self.raw_data['statistical_moments'][qoi][statistic][0]
 71
 72
 73class QMCAnalysis(BaseAnalysisElement):
 74    def __init__(self, sampler, qoi_cols=None):
 75        """Analysis element for Quasi-Monte Carlo (QMC).
 76
 77        Parameters
 78        ----------
 79        sampler : easyvvuq.sampling.qmc.QMCSampler
 80            Sampler used to initiate the QMC analysis
 81        qoi_cols : list or None
 82            Column names for quantities of interest (for which analysis is to be
 83            performed).
 84        """
 85        if not isinstance(sampler, QMCSampler) and not isinstance(sampler, MCSampler):
 86            raise RuntimeError(
 87                'QMCAnalysis class relies on the QMCSampler or MCSampler as its sampling component')
 88        if qoi_cols is None:
 89            self.qoi_cols = list(sampler.vary.get_keys())
 90        else:
 91            self.qoi_cols = qoi_cols
 92        self.output_type = OutputType.SUMMARY
 93        self.sampler = sampler
 94
 95    def element_name(self):
 96        """Name for this element.
 97
 98        Return
 99        ------
100        str:
101            "QMC_Analysis"
102        """
103        return "QMC_Analysis"
104
105    def element_version(self):
106        """Version of this element.
107
108        Return
109        ------
110        str:
111            Element version.
112        """
113        return "0.2"
114
115    def contains_nan(self, values):
116        """Checks if ``None`` or ``numpy.nan`` exists in `values`. Returns ``True`` if
117        any there are at least one occurrence of ``None`` or ``numpy.nan``.
118        Parameters
119        ----------
120        values : array_like, list, number
121            `values` where to check for occurrences of ``None`` or ``np.nan``.
122            Can be irregular and have any number of nested elements.
123        Returns
124        -------
125        bool
126            ``True`` if `values` has at least one occurrence of ``None`` or
127            ``numpy.nan``.
128        """
129        # To speed up we first try the fast option np.any(np.isnan(values))
130        try:
131            return np.any(np.isnan(values))
132        except (ValueError, TypeError):
133            if values is None or values is np.nan:
134                return True
135            # To solve the problem of float/int as well as numpy int/flaot
136            elif np.isscalar(values) and np.isnan(values):
137                return True
138            elif hasattr(values, "__iter__"):
139                for value in values:
140                    if self.contains_nan(value):
141                        return True
142
143                return False
144            else:
145                return False
146
147    def create_mask(self, samples):
148        """
149        Mask samples that do not give results (anything but np.nan or None).
150        Parameters
151        ----------
152        samples : array_like
153            Evaluations for the model.
154        Returns
155        -------
156        masked_samples : list
157            The evaluations that have results (not numpy.nan or None).
158        mask : boolean array
159            The mask itself, used to create the masked arrays.
160        """
161        masked_samples = []
162        mask = np.ones(len(samples), dtype=bool)
163
164        for i, result in enumerate(samples):
165            # if np.any(np.isnan(result)):
166            if self.contains_nan(result):
167                mask[i] = False
168            else:
169                masked_samples.append(result)
170
171        return masked_samples, mask     
172
173    def analyse(self, data_frame):
174        """Perform QMC analysis on a given pandas DataFrame.
175
176        Parameters
177        ----------
178        data_frame : pandas DataFrame
179            Input data for analysis.
180
181        Returns
182        -------
183        easyvvuq.analysis.qmc.QMCAnalysisResults
184            AnalysisResults object for QMC.
185        """
186        if data_frame.empty:
187            raise RuntimeError(
188                "No data in data frame passed to analyse element")
189
190        qoi_cols = self.qoi_cols
191
192        results = {
193            'statistical_moments': {k: {} for k in qoi_cols},
194            'percentiles': {k: {} for k in qoi_cols},
195            'sobols_first': {k: {} for k in qoi_cols},
196            'sobols_total': {k: {} for k in qoi_cols},
197            'conf_sobols_first': {k: {} for k in qoi_cols},
198            'conf_sobols_total': {k: {} for k in qoi_cols}
199        }
200
201        # Extract output values for each quantity of interest from Dataframe
202        samples = self.get_samples(data_frame)
203
204        # Compute descriptive statistics for each quantity of interest
205        for k in qoi_cols:
206            # Find NaNs and create a mask excluding these samples from the analysis
207            # https://github.com/simetenn/uncertainpy/blob/ffb2400289743066265b9a8561cdf3b72e478a28/src/uncertainpy/core/uncertainty_calculations.py#L1532
208            masked_samples, mask = self.create_mask(samples[k])
209
210            results['statistical_moments'][k] = {'mean': np.mean(masked_samples, axis=0),
211                                                 'var': np.var(masked_samples, axis=0),
212                                                 'std': np.std(masked_samples, axis=0),
213                                                 'min': np.min(masked_samples, axis=0),
214                                                 'max': np.max(masked_samples, axis=0),
215                                                 'median': np.median(masked_samples, axis=0),
216                                                 }
217            results['percentiles'][k] = {'p1': np.percentile(masked_samples, 1, 0)[0],
218                                         'p10': np.percentile(masked_samples, 10, 0)[0],
219                                         'p50': np.percentile(masked_samples, 50, 0)[0],
220                                         'p90': np.percentile(masked_samples, 90, 0)[0],
221                                         'p99': np.percentile(masked_samples, 99, 0)[0]}
222
223            # Replace Nan values by the mean before proceeding with the SA
224            indices = np.where(mask == 0)[0] # samples[~mask] = results[k].mean
225            for i in indices:
226                samples[k][i] = results['statistical_moments'][k]['mean']
227
228            if not np.all(mask):
229                print("Warning: QoI \"{}\" only yields ".format(k) +
230                    "results for {}/{} ".format(sum(mask), len(mask)) +
231                    "parameter combinations. " +
232                    "Runs {} are not valid. ".format(indices+1) +
233                    "NaN results are set to the mean when calculating the Sobol indices. " +
234                    "This might affect the Sobol indices.")
235            
236            sobols_first, conf_first, sobols_total, conf_total = \
237                self.sobol_bootstrap(samples[k])
238            results['sobols_first'][k] = sobols_first
239            results['sobols_total'][k] = sobols_total
240            results['conf_sobols_first'][k] = conf_first
241            results['conf_sobols_total'][k] = conf_total
242
243        return QMCAnalysisResults(raw_data=results, samples=data_frame,
244                                  qois=self.qoi_cols, inputs=list(self.sampler.vary.get_keys()))
245
246    def get_samples(self, data_frame):
247        """
248        Converts the Pandas dataframe into a dictionary.
249
250        Parameters
251        ----------
252        data_frame : pandas DataFrame
253            the EasyVVUQ Pandas dataframe from collation.
254
255        Returns
256        -------
257        dict :
258            A dictionary with the QoI names as keys.
259            Each element is a list of code evaluations.
260        """
261        samples = {k: [] for k in self.qoi_cols}
262        for run_id in data_frame['run_id'].squeeze().unique():
263            for k in self.qoi_cols:
264                data = data_frame.loc[data_frame['run_id'].squeeze() == run_id][k]
265                samples[k].append(data.values)
266        return samples
267
268    def sobol_bootstrap_(self, samples, alpha=0.05, n_samples=1000):
269        """
270        Computes the first order and total order Sobol indices using Saltelli's
271        method. To assess the sampling inaccuracy, bootstrap confidence intervals
272        are also computed.
273
274        Reference: A. Saltelli, Making best use of model evaluations to compute
275        sensitivity indices, Computer Physics Communications, 2002.
276
277        Parameters
278        ----------
279        samples : list
280            The samples for a given QoI.
281        alpha: float
282            The (1 - alpha) * 100 confidence interval parameter. The default is 0.05.
283        n_samples: int
284            The number of bootstrap samples. The default is 1000.
285
286        Returns
287        -------
288        sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict:
289        dictionaries containing the first- and total-order Sobol indices for all
290        parameters, and (1-alpha)*100 lower and upper confidence bounds.
291
292        """
293        assert len(samples) > 0
294        assert alpha > 0.0
295        assert alpha < 1.0
296        assert n_samples > 0
297
298        # convert to array
299        samples = np.array(samples)
300        # the number of parameter and the number of MC samples in n_mc * (n_params + 2)
301        # and the size of the QoI
302        n_params = self.sampler.n_params
303        n_mc = self.sampler.n_mc_samples
304        n_qoi = samples[0].size
305        sobols_first_dict = {}
306        conf_first_dict = {}
307        sobols_total_dict = {}
308        conf_total_dict = {}
309
310        for j, param_name in enumerate(self.sampler.vary.get_keys()):
311            # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params
312            # see reference above.
313            f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc)
314            # our point estimate for the 1st and total order Sobol indices
315            value_first = self._first_order(f_M2, f_M1, f_Ni[:, j])
316            value_total = self._total_order(f_M2, f_M1, f_Ni[:, j])
317            # array for resampled estimates
318            sobols_first = np.zeros([n_samples, n_qoi])
319            sobols_total = np.zeros([n_samples, n_qoi])
320            for i in range(n_samples):
321                # resample, must be done on already seperated output due to
322                # the specific order in samples
323                idx = np.random.randint(0, n_mc - 1, n_mc)
324                f_M2_resample = f_M2[idx]
325                f_M1_resample = f_M1[idx]
326                f_Ni_resample = f_Ni[idx]
327                # recompute Sobol indices
328                sobols_first[i] = self._first_order(f_M2_resample, f_M1_resample,
329                                                    f_Ni_resample[:, j])
330                sobols_total[i] = self._total_order(f_M2_resample, f_M1_resample,
331                                                    f_Ni_resample[:, j])
332            # compute confidence intervals
333            _, low_first, high_first = confidence_interval(sobols_first, value_first,
334                                                           alpha, pivotal=True)
335            _, low_total, high_total = confidence_interval(sobols_total, value_total,
336                                                           alpha, pivotal=True)
337            # store results
338            sobols_first_dict[param_name] = value_first
339            conf_first_dict[param_name] = {'low': low_first, 'high': high_first}
340            sobols_total_dict[param_name] = value_total
341            conf_total_dict[param_name] = {'low': low_total, 'high': high_total}
342
343        return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict
344    
345    def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000):
346        """
347        Computes the first order and total order Sobol indices using Saltelli's
348        method. To assess the sampling inaccuracy, bootstrap confidence intervals
349        are also computed.
350
351        Reference: A. Saltelli, Making best use of model evaluations to compute
352        sensitivity indices, Computer Physics Communications, 2002.
353
354        Parameters
355        ----------
356        samples : list
357            The samples for a given QoI.
358        alpha: float
359            The (1 - alpha) * 100 confidence interval parameter. The default is 0.05.
360        n_samples: int
361            The number of bootstrap samples. The default is 1000.
362
363        Returns
364        -------
365        sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict:
366        dictionaries containing the first- and total-order Sobol indices for all
367        parameters, and (1-alpha)*100 lower and upper confidence bounds.
368
369        """
370        assert len(samples) > 0
371        assert alpha > 0.0
372        assert alpha < 1.0
373        assert n_bootstrap > 0
374
375        # convert to array
376        samples = np.array(samples)
377        # the number of parameter and the number of MC samples in n_mc * (n_params + 2)
378        # and the size of the QoI
379        n_params = self.sampler.n_params
380        # n_mc = self.sampler.n_mc_samples
381        n_mc = int(samples.shape[0]/(n_params + 2))
382        n_qoi = samples[0].size
383        sobols_first_dict = {}
384        conf_first_dict = {}
385        sobols_total_dict = {}
386        conf_total_dict = {}
387
388        # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params
389        # see reference above.
390        f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc)
391        r = np.random.randint(n_mc, size=(n_mc, n_bootstrap))
392
393        for j, param_name in enumerate(self.sampler.vary.get_keys()):
394
395            # our point estimate for the 1st and total order Sobol indices
396            value_first = self._first_order(f_M2, f_M1, f_Ni[:, j])
397            value_total = self._total_order(f_M2, f_M1, f_Ni[:, j])
398
399            # sobols computed from resampled data points
400            if n_mc * n_bootstrap * n_qoi <= 10**7:
401                #this is a vectorized computation, Is fast, but f_M2[r] will be of size
402                #(n_mc, n_bootstrap, n_qoi), this can become too large and cause a crash, 
403                #especially when dealing with large QoI (n_qoi >> 1). So this is only done
404                #when n_mc * n_bootstrap * n_qoi <= 10**7
405                print("Vectorized bootstrapping")
406                sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j])
407                sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j])
408            else:
409                #array for resampled estimates
410                sobols_first = np.zeros([n_bootstrap, n_qoi])
411                sobols_total = np.zeros([n_bootstrap, n_qoi])
412                print("Sequential bootstrapping")
413                #non-vectorized implementation
414                for i in range(n_bootstrap):
415                    #resampled sample matrices of size (n_mc, n_qoi)
416                    indices = r[:, i]  
417                    sobols_first[i] = self._first_order(f_M2[indices], f_M1[indices], f_Ni[indices, j])
418                    sobols_total[i] = self._total_order(f_M2[indices], f_M1[indices], f_Ni[indices, j])
419
420            # compute confidence intervals based on percentiles
421            _, low_first, high_first = confidence_interval(sobols_first, value_first,
422                                                           alpha, pivotal=True)
423            _, low_total, high_total = confidence_interval(sobols_total, value_total,
424                                                           alpha, pivotal=True)
425            # store results
426            sobols_first_dict[param_name] = value_first
427            conf_first_dict[param_name] = {'low': low_first, 'high': high_first}
428            sobols_total_dict[param_name] = value_total
429            conf_total_dict[param_name] = {'low': low_total, 'high': high_total}
430
431        return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict
432
433
434    # Adapted from SALib
435    @staticmethod
436    def _separate_output_values(samples, n_params, n_mc_samples):
437        """There are n_params + 2 different input matrices: M1, M2, N_i,
438        i=1,...,n_params.  (see reference under sobol_bootstrap). The
439        EasyVVUQ dataframe is stored in the order:
440
441        [sample from M2, sample from N1, N2, ... sample from N_n_params,
442         sample from M1, repeat].
443
444        This subroutine separates the output values into the contributions
445        of the different input matrices.
446
447        Parameters
448        ----------
449        samples: list
450            The samples for a given QoI
451        n_params: int
452            The number of uncertain input parameters.
453        n_mc_samples: int
454            The number of MC samples per input matrix, i.e. the
455          number of rows in M1, M2 or Ni.
456
457        Returns
458        -------
459        NumPy arrays of the separated code evaluations: f_M2, f_M1, f_Ni, where
460        f_Ni contains n_params entries corresponding to the n_params Ni matrices.
461
462        """
463        evaluations = np.array(samples)
464
465        shape = (n_mc_samples, n_params) + evaluations[0].shape
466        step = n_params + 2
467        f_Ni = np.zeros(shape)
468
469        f_M2 = evaluations[0:evaluations.shape[0]:step]
470        f_M1 = evaluations[(step - 1):evaluations.shape[0]:step]
471
472        for i in range(n_params):
473            f_Ni[:, i] = evaluations[(i + 1):evaluations.shape[0]:step]
474
475        return f_M2, f_M1, f_Ni
476
477    @staticmethod
478    def _first_order(f_M2, f_M1, f_Ni):
479        """Calculate first order sensitivity indices.
480
481        Parameters
482        ----------
483        f_M2: NumPy array
484            Array of code evaluations on input array M2
485        f_M1: NumPy array
486            Array of code evaluations on input array M1
487        f_Ni: NumPy array
488            Array of code evaluations on input array Ni, i=1,...,n_params
489
490        Returns
491        -------
492        A NumPy array of the n_params first-order Sobol indices.
493        """
494        V = np.var(np.r_[f_M2, f_M1], axis=0)
495        return np.mean(f_M1 * (f_Ni - f_M2), axis=0) / (V + (V == 0)) * (V != 0)
496
497    @staticmethod
498    def _total_order(f_M2, f_M1, f_Ni):
499        """Calculate total order sensitivity indices. See also:
500
501        A Saltelli et al, Variance based sensitivity analysis of model output.
502        Design and estimator for the total sensitivity index, 2009.
503
504        Parameters
505        ----------
506        f_M2: NumPy array
507            Array of code evaluations on input array M2 (matrix A in ref above)
508        f_M1: NumPy array
509            Array of code evaluations on input array M1 (matrix B in ref above)
510        f_Ni: NumPy array
511            Array of code evaluations on input array Ni, i=1,...,n_params
512          (matrix AB in ref above)
513
514        Returns
515        -------
516        A NumPy array of the n_params total-order Sobol indices.
517        """
518        V = np.var(np.r_[f_M2, f_M1], axis=0)
519        return 0.5 * np.mean((f_M2 - f_Ni) ** 2, axis=0) / (V + (V == 0)) * (V != 0)

logger = <Logger easyvvuq.analysis.qmc_analysis (DEBUG)>

class QMCAnalysisResults(easyvvuq.analysis.results.AnalysisResults): View Source

22class QMCAnalysisResults(AnalysisResults):
23    """Analysis results for the QMCAnalysis Method. Refer to the AnalysisResults base class
24    documentation for details on using it.
25    """
26
27    def _get_sobols_first(self, qoi, input_):
28        raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_first'])
29        return raw_dict[AnalysisResults._to_tuple(qoi)][input_][0]
30
31    def _get_sobols_total(self, qoi, input_):
32        raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_total'])
33        return raw_dict[AnalysisResults._to_tuple(qoi)][input_][0]
34
35    def _get_sobols_second(self, qoi, input_):
36        raise NotImplementedError
37
38    def _get_sobols_first_conf(self, qoi, input_):
39        raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['conf_sobols_first'])
40        return [raw_dict[AnalysisResults._to_tuple(qoi)][input_]['low'][0],
41                raw_dict[AnalysisResults._to_tuple(qoi)][input_]['high'][0]]
42
43    def _get_sobols_total_conf(self, qoi, input_):
44        raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['conf_sobols_total'])
45        return [raw_dict[AnalysisResults._to_tuple(qoi)][input_]['low'][0],
46                raw_dict[AnalysisResults._to_tuple(qoi)][input_]['high'][0]]
47
48    def supported_stats(self):
49        """Types of statistics supported by the describe method.
50
51        Returns
52        -------
53        list of str
54        """
55        return ['mean', 'var', 'std', 'min', 'max', 'median', 'percentiles', '1%', '10%', '50%', '90%', '99%']
56
57    def _describe(self, qoi, statistic):
58        if statistic not in self.supported_stats():
59            raise NotImplementedError
60        if statistic == '1%':
61            return self.raw_data['percentiles'][qoi]['p1']
62        if statistic == '10%':
63            return self.raw_data['percentiles'][qoi]['p10']
64        elif statistic == '50%':
65            return self.raw_data['percentiles'][qoi]['p50']
66        elif statistic == '90%':
67            return self.raw_data['percentiles'][qoi]['p90']
68        elif statistic == '99%':
69            return self.raw_data['percentiles'][qoi]['p99']
70        else:
71            return self.raw_data['statistical_moments'][qoi][statistic][0]

Analysis results for the QMCAnalysis Method. Refer to the AnalysisResults base class documentation for details on using it.

def supported_stats(self): View Source

48    def supported_stats(self):
49        """Types of statistics supported by the describe method.
50
51        Returns
52        -------
53        list of str
54        """
55        return ['mean', 'var', 'std', 'min', 'max', 'median', 'percentiles', '1%', '10%', '50%', '90%', '99%']

Types of statistics supported by the describe method.

Returns

list of str

Inherited Members

easyvvuq.analysis.results.AnalysisResults: AnalysisResults; raw_data; samples; qois; inputs; derivatives_first; sobols_first; sobols_second; sobols_total; surrogate; describe; plot_sobols_treemap; plot_sobols_first; plot_moments; get_distribution

easyvvuq.analysis.qmc_analysis

Returns

Inherited Members

Attributes

Parameters

Return

Return

Parameters

Returns

Parameters

Returns

Parameters

Returns

Parameters

Returns

Parameters

Returns

Parameters

Returns

Inherited Members